├── LICENSE ├── Long-term_Forecasting ├── data_provider │ ├── data_factory.py │ └── data_loader.py ├── exp │ ├── exp_basic.py │ └── exp_long_term_forecasting.py ├── models │ ├── S2IPLLM.py │ └── prompt.py ├── run.py ├── scripts │ ├── electricity.sh │ ├── etth1.sh │ ├── etth2.sh │ ├── ettm1.sh │ ├── ettm2.sh │ ├── traffic.sh │ └── weather.sh └── utils │ ├── losses.py │ ├── m4_summary.py │ ├── metrics.py │ ├── timefeatures.py │ ├── tokenization.py │ └── tools.py ├── README.md ├── Short-term_Forecasting ├── data_provider │ ├── __init__.py │ ├── data_factory.py │ ├── data_loader.py │ └── m4.py ├── m4.sh ├── models │ ├── S2IPLLM.py │ └── prompt.py ├── run_m4.py └── utils │ ├── __init__.py │ ├── losses.py │ ├── m4_summary.py │ ├── masking.py │ ├── metrics.py │ ├── timefeatures.py │ └── tools.py └── requirements.txt /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 panzijie825 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Long-term_Forecasting/data_provider/data_factory.py: -------------------------------------------------------------------------------- 1 | from data_provider.data_loader import Dataset_ETT_hour,Dataset_ETT_hour_decomposed, Dataset_ETT_minute,Dataset_ETT_minute_decomposed,Dataset_Custom 2 | 3 | 4 | 5 | from torch.utils.data import DataLoader 6 | 7 | data_dict = { 8 | 'ETTh1': Dataset_ETT_hour, 9 | 'ETTh1_decomposed': Dataset_ETT_hour_decomposed, 10 | 'ETTh2': Dataset_ETT_hour, 11 | 'ETTh2_decomposed': Dataset_ETT_hour_decomposed, 12 | 'ETTm1': Dataset_ETT_minute, 13 | 'ETTm1_decomposed': Dataset_ETT_minute_decomposed, 14 | 'ETTm2': Dataset_ETT_minute, 15 | 'ETTm2_decomposed': Dataset_ETT_minute_decomposed, 16 | 'ECL': Dataset_Custom, 17 | 'traffic': Dataset_Custom, 18 | 'weather': Dataset_Custom 19 | 20 | } 21 | 22 | 23 | def data_provider(args, flag): 24 | Data = data_dict[args.data] 25 | timeenc = 0 if args.embed != 'timeF' else 1 26 | percent = args.percent 27 | 28 | if flag == 'test': 29 | shuffle_flag = False 30 | drop_last = True 31 | if args.task_name == 'anomaly_detection' or args.task_name == 'classification': 32 | batch_size = args.batch_size 33 | else: 34 | batch_size = 1 35 | freq = args.freq 36 | else: 37 | shuffle_flag = True 38 | drop_last = True 39 | batch_size = args.batch_size 40 | freq = args.freq 41 | 42 | if args.task_name == 'anomaly_detection': 43 | drop_last = False 44 | data_set = Data( 45 | root_path=args.root_path, 46 | win_size=args.seq_len, 47 | flag=flag, 48 | ) 49 | print(flag, len(data_set)) 50 | data_loader = DataLoader( 51 | data_set, 52 | batch_size=batch_size, 53 | shuffle=shuffle_flag, 54 | num_workers=args.num_workers, 55 | drop_last=drop_last) 56 | return data_set, data_loader 57 | elif args.task_name == 'classification': 58 | drop_last = False 59 | data_set = Data( 60 | root_path=args.root_path, 61 | flag=flag, 62 | ) 63 | print(flag, len(data_set)) 64 | data_loader = DataLoader( 65 | data_set, 66 | batch_size=batch_size, 67 | shuffle=shuffle_flag, 68 | num_workers=args.num_workers, 69 | drop_last=drop_last, 70 | collate_fn=lambda x: collate_fn(x, max_len=args.seq_len) 71 | ) 72 | return data_set, data_loader 73 | else: 74 | if args.data == 'm4': 75 | drop_last = False 76 | data_set = Data( 77 | root_path=args.root_path, 78 | data_path=args.data_path, 79 | flag=flag, 80 | size=[args.seq_len, args.label_len, args.pred_len], 81 | features=args.features, 82 | target=args.target, 83 | timeenc=timeenc, 84 | freq=freq, 85 | seasonal_patterns=args.seasonal_patterns, 86 | percent=percent 87 | ) 88 | # batch_size = args.batch_size 89 | print(flag, len(data_set)) 90 | data_loader = DataLoader( 91 | data_set, 92 | batch_size=batch_size, 93 | shuffle=shuffle_flag, 94 | num_workers=args.num_workers, 95 | drop_last=drop_last) 96 | return data_set, data_loader 97 | -------------------------------------------------------------------------------- /Long-term_Forecasting/data_provider/data_loader.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import pandas as pd 4 | import glob 5 | import re 6 | import torch 7 | from torch.utils.data import Dataset, DataLoader 8 | from sklearn.preprocessing import StandardScaler 9 | from utils.timefeatures import time_features 10 | from statsmodels.tsa.seasonal import STL 11 | import warnings 12 | from tqdm import tqdm 13 | from joblib import Parallel, delayed 14 | 15 | warnings.filterwarnings('ignore') 16 | 17 | 18 | class Dataset_ETT_hour(Dataset): 19 | def __init__(self, root_path, flag='train', size=None, 20 | features='S', data_path='ETTh1.csv', 21 | target='OT', scale=True, timeenc=0, freq='h', 22 | seasonal_patterns=None, percent=10): 23 | # size [seq_len, label_len, pred_len] 24 | # info 25 | if size == None: 26 | self.seq_len = 24 * 4 * 4 27 | self.label_len = 24 * 4 28 | self.pred_len = 24 * 4 29 | else: 30 | self.seq_len = size[0] 31 | self.label_len = size[1] 32 | self.pred_len = size[2] 33 | # init 34 | assert flag in ['train', 'test', 'val'] 35 | type_map = {'train': 0, 'val': 1, 'test': 2} 36 | self.set_type = type_map[flag] 37 | 38 | self.features = features 39 | self.target = target 40 | self.scale = scale 41 | self.timeenc = timeenc 42 | self.freq = freq 43 | 44 | self.percent = percent 45 | self.root_path = root_path 46 | self.data_path = data_path 47 | self.__read_data__() 48 | 49 | 50 | 51 | 52 | def __read_data__(self): 53 | self.scaler = StandardScaler() 54 | df_raw = pd.read_csv(os.path.join(self.root_path, 55 | self.data_path)) 56 | 57 | border1s = [0, 12 * 30 * 24 - self.seq_len, 12 * 30 * 24 + 4 * 30 * 24 - self.seq_len] 58 | border2s = [12 * 30 * 24, 12 * 30 * 24 + 4 * 30 * 24, 12 * 30 * 24 + 8 * 30 * 24] 59 | 60 | border1 = border1s[self.set_type] 61 | border2 = border2s[self.set_type] 62 | 63 | if self.set_type == 0: 64 | border2 = (border2 - self.seq_len) * self.percent // 100 + self.seq_len 65 | 66 | if self.features == 'M' or self.features == 'MS': 67 | cols_data = df_raw.columns[1:] 68 | df_data = df_raw[cols_data] 69 | elif self.features == 'S': 70 | df_data = df_raw[[self.target]] 71 | 72 | if self.scale: 73 | train_data = df_data[border1s[0]:border2s[0]] 74 | self.scaler.fit(train_data.values) 75 | data = self.scaler.transform(df_data.values) 76 | else: 77 | data = df_data.values 78 | 79 | df_stamp = df_raw[['date']][border1:border2] 80 | df_stamp['date'] = pd.to_datetime(df_stamp.date) 81 | if self.timeenc == 0: 82 | df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1) 83 | df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1) 84 | df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1) 85 | df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1) 86 | data_stamp = df_stamp.drop(['date'], 1).values 87 | elif self.timeenc == 1: 88 | data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq) 89 | data_stamp = data_stamp.transpose(1, 0) 90 | 91 | self.data_x = data[border1:border2] 92 | self.data_y = data[border1:border2] 93 | self.data_stamp = data_stamp 94 | 95 | def __getitem__(self, index): 96 | s_begin = index 97 | s_end = s_begin + self.seq_len 98 | r_begin = s_end - self.label_len 99 | r_end = r_begin + self.label_len + self.pred_len 100 | 101 | seq_x = self.data_x[s_begin:s_end] 102 | seq_y = self.data_y[r_begin:r_end] 103 | seq_x_mark = self.data_stamp[s_begin:s_end] 104 | seq_y_mark = self.data_stamp[r_begin:r_end] 105 | 106 | return seq_x, seq_y, seq_x_mark, seq_y_mark 107 | 108 | def __len__(self): 109 | return len(self.data_x) - self.seq_len - self.pred_len + 1 110 | 111 | def inverse_transform(self, data): 112 | return self.scaler.inverse_transform(data) 113 | 114 | 115 | 116 | 117 | class Dataset_ETT_hour_decomposed(Dataset): 118 | def __init__(self, root_path, flag='train', size=None, 119 | features='S', data_path='ETTh1.csv', 120 | target='OT', scale=True, timeenc=0, freq='h', 121 | seasonal_patterns=None, percent=10): 122 | # size [seq_len, label_len, pred_len] 123 | # info 124 | if size == None: 125 | self.seq_len = 24 * 4 * 4 126 | self.label_len = 24 * 4 127 | self.pred_len = 24 * 4 128 | else: 129 | self.seq_len = size[0] 130 | self.label_len = size[1] 131 | self.pred_len = size[2] 132 | # init 133 | assert flag in ['train', 'test', 'val'] 134 | type_map = {'train': 0, 'val': 1, 'test': 2} 135 | self.set_type = type_map[flag] 136 | 137 | self.features = features 138 | self.target = target 139 | self.scale = scale 140 | self.timeenc = timeenc 141 | self.freq = freq 142 | 143 | self.percent = percent 144 | self.root_path = root_path 145 | self.data_path = data_path 146 | self.decomposed_cache = {} 147 | 148 | 149 | self.__read_data__() 150 | 151 | 152 | def stl_decomposition(self, series, period,variate_name, start_timestamp, end_timestamp): 153 | 154 | cache_key = (start_timestamp, end_timestamp, period, variate_name) 155 | if cache_key in self.decomposed_cache: 156 | return self.decomposed_cache[cache_key] 157 | 158 | 159 | stl = STL(series, period=period) 160 | result = stl.fit() 161 | trend = result.trend 162 | seasonal = result.seasonal 163 | resid = result.resid 164 | self.decomposed_cache[cache_key] = (trend, seasonal, resid) 165 | 166 | return trend, seasonal, resid 167 | 168 | 169 | def __read_data__(self): 170 | self.scaler = StandardScaler() 171 | df_raw = pd.read_csv(os.path.join(self.root_path, 172 | self.data_path)) 173 | 174 | 175 | 176 | border1s = [0, 12 * 30 * 24 - self.seq_len, 12 * 30 * 24 + 4 * 30 * 24 - self.seq_len] 177 | border2s = [12 * 30 * 24, 12 * 30 * 24 + 4 * 30 * 24, 12 * 30 * 24 + 8 * 30 * 24] 178 | 179 | 180 | border1 = border1s[self.set_type] 181 | border2 = border2s[self.set_type] 182 | 183 | if self.set_type == 0: 184 | border2 = (border2 - self.seq_len) * self.percent // 100 + self.seq_len 185 | 186 | 187 | if self.features == 'M' or self.features == 'MS': 188 | cols_data = df_raw.columns[1:] 189 | df_data = df_raw[cols_data] 190 | elif self.features == 'S': 191 | df_data = df_raw[[self.target]] 192 | 193 | if self.scale: 194 | train_data = df_data.iloc[border1s[0]:border2s[0]] 195 | self.scaler.fit(train_data.values) 196 | data = self.scaler.transform(df_data.values) 197 | 198 | else: 199 | 200 | data = df_data.values 201 | 202 | df_stamp = df_raw[['date']][border1:border2] 203 | df_stamp['date'] = pd.to_datetime(df_stamp.date) 204 | if self.timeenc == 0: 205 | df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1) 206 | df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1) 207 | df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1) 208 | df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1) 209 | data_stamp = df_stamp.drop(['date'], 1).values 210 | elif self.timeenc == 1: 211 | data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq) 212 | data_stamp = data_stamp.transpose(1, 0) 213 | 214 | self.time_mark = df_raw['date'].values[border1:border2] 215 | self.data_x = data[border1:border2] 216 | self.data_y = data[border1:border2] 217 | self.data_stamp = data_stamp 218 | 219 | def __getitem__(self, index): 220 | s_begin = index 221 | s_end = s_begin + self.seq_len 222 | r_begin = s_end - self.label_len 223 | r_end = r_begin + self.label_len + self.pred_len 224 | 225 | seq_x = self.data_x[s_begin:s_end] 226 | seq_y = self.data_y[r_begin:r_end] 227 | seq_x_mark = self.data_stamp[s_begin:s_end] 228 | seq_y_mark = self.data_stamp[r_begin:r_end] 229 | 230 | concatenated_result = np.empty((seq_x.shape[0], 0)) 231 | 232 | start_timestamp = self.time_mark[s_begin] 233 | end_timestamp = self.time_mark[s_end] 234 | 235 | for variate in range(seq_x.shape[1]): 236 | series = pd.Series(seq_x[:, variate]) 237 | trend, seasonal, resid = self.stl_decomposition(series, 24, variate, start_timestamp , end_timestamp) 238 | trend_array = trend.to_numpy().reshape(-1, 1) 239 | seasonal_array = seasonal.to_numpy().reshape(-1, 1) 240 | resid_array = resid.to_numpy().reshape(-1, 1) 241 | 242 | variate_components = np.concatenate([trend_array, seasonal_array, resid_array], axis=1) 243 | concatenated_result = np.concatenate([concatenated_result, variate_components], axis=1) 244 | 245 | seq_x_expanded = seq_x if len(seq_x.shape) > 1 else seq_x.reshape(-1, 1) 246 | final_result = np.concatenate([seq_x_expanded, concatenated_result], axis=1) 247 | 248 | 249 | return final_result, seq_y, seq_x_mark, seq_y_mark 250 | 251 | def __len__(self): 252 | return len(self.data_x) - self.seq_len - self.pred_len + 1 253 | 254 | def inverse_transform(self, data): 255 | return self.scaler.inverse_transform(data) 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | class Dataset_ETT_minute(Dataset): 270 | def __init__(self, root_path, flag='train', size=None, 271 | features='S', data_path='ETTm1.csv', 272 | target='OT', scale=True, timeenc=0, freq='t', 273 | seasonal_patterns=None, percent=10): 274 | # size [seq_len, label_len, pred_len] 275 | # info 276 | if size == None: 277 | self.seq_len = 24 * 4 * 4 278 | self.label_len = 24 * 4 279 | self.pred_len = 24 * 4 280 | else: 281 | self.seq_len = size[0] 282 | self.label_len = size[1] 283 | self.pred_len = size[2] 284 | # init 285 | assert flag in ['train', 'test', 'val'] 286 | type_map = {'train': 0, 'val': 1, 'test': 2} 287 | self.set_type = type_map[flag] 288 | 289 | self.features = features 290 | self.target = target 291 | self.scale = scale 292 | self.timeenc = timeenc 293 | self.freq = freq 294 | 295 | self.percent = percent 296 | self.root_path = root_path 297 | self.data_path = data_path 298 | self.__read_data__() 299 | 300 | def __read_data__(self): 301 | self.scaler = StandardScaler() 302 | df_raw = pd.read_csv(os.path.join(self.root_path, 303 | self.data_path)) 304 | 305 | border1s = [0, 12 * 30 * 24 * 4 - self.seq_len, 12 * 30 * 24 * 4 + 4 * 30 * 24 * 4 - self.seq_len] 306 | border2s = [12 * 30 * 24 * 4, 12 * 30 * 24 * 4 + 4 * 30 * 24 * 4, 12 * 30 * 24 * 4 + 8 * 30 * 24 * 4] 307 | border1 = border1s[self.set_type] 308 | border2 = border2s[self.set_type] 309 | 310 | if self.set_type == 0: 311 | border2 = (border2 - self.seq_len) * self.percent // 100 + self.seq_len 312 | 313 | if self.features == 'M' or self.features == 'MS': 314 | cols_data = df_raw.columns[1:] 315 | df_data = df_raw[cols_data] 316 | elif self.features == 'S': 317 | df_data = df_raw[[self.target]] 318 | 319 | if self.scale: 320 | train_data = df_data[border1s[0]:border2s[0]] 321 | self.scaler.fit(train_data.values) 322 | data = self.scaler.transform(df_data.values) 323 | else: 324 | data = df_data.values 325 | 326 | df_stamp = df_raw[['date']][border1:border2] 327 | df_stamp['date'] = pd.to_datetime(df_stamp.date) 328 | if self.timeenc == 0: 329 | df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1) 330 | df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1) 331 | df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1) 332 | df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1) 333 | df_stamp['minute'] = df_stamp.date.apply(lambda row: row.minute, 1) 334 | df_stamp['minute'] = df_stamp.minute.map(lambda x: x // 15) 335 | data_stamp = df_stamp.drop(['date'], 1).values 336 | elif self.timeenc == 1: 337 | data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq) 338 | data_stamp = data_stamp.transpose(1, 0) 339 | 340 | self.data_x = data[border1:border2] 341 | self.data_y = data[border1:border2] 342 | self.data_stamp = data_stamp 343 | 344 | def __getitem__(self, index): 345 | s_begin = index 346 | s_end = s_begin + self.seq_len 347 | r_begin = s_end - self.label_len 348 | r_end = r_begin + self.label_len + self.pred_len 349 | 350 | seq_x = self.data_x[s_begin:s_end] 351 | seq_y = self.data_y[r_begin:r_end] 352 | seq_x_mark = self.data_stamp[s_begin:s_end] 353 | seq_y_mark = self.data_stamp[r_begin:r_end] 354 | 355 | return seq_x, seq_y, seq_x_mark, seq_y_mark 356 | 357 | def __len__(self): 358 | return len(self.data_x) - self.seq_len - self.pred_len + 1 359 | 360 | def inverse_transform(self, data): 361 | return self.scaler.inverse_transform(data) 362 | 363 | 364 | 365 | 366 | 367 | class Dataset_ETT_minute_decomposed(Dataset): 368 | def __init__(self, root_path, flag='train', size=None, 369 | features='S', data_path='ETTm1.csv', 370 | target='OT', scale=True, timeenc=0, freq='t', 371 | seasonal_patterns=None, percent=10): 372 | # size [seq_len, label_len, pred_len] 373 | # info 374 | if size == None: 375 | self.seq_len = 24 * 4 * 4 376 | self.label_len = 24 * 4 377 | self.pred_len = 24 * 4 378 | else: 379 | self.seq_len = size[0] 380 | self.label_len = size[1] 381 | self.pred_len = size[2] 382 | # init 383 | assert flag in ['train', 'test', 'val'] 384 | type_map = {'train': 0, 'val': 1, 'test': 2} 385 | self.set_type = type_map[flag] 386 | 387 | self.features = features 388 | self.target = target 389 | self.scale = scale 390 | self.timeenc = timeenc 391 | self.freq = freq 392 | 393 | self.percent = percent 394 | self.root_path = root_path 395 | self.data_path = data_path 396 | self.decomposed_cache = {} 397 | 398 | 399 | self.__read_data__() 400 | 401 | 402 | def stl_decomposition(self, series, period,variate_name, start_timestamp, end_timestamp): 403 | 404 | cache_key = (start_timestamp, end_timestamp, period, variate_name) 405 | if cache_key in self.decomposed_cache: 406 | return self.decomposed_cache[cache_key] 407 | 408 | 409 | 410 | stl = STL(series, period=period) 411 | result = stl.fit() 412 | trend = result.trend 413 | seasonal = result.seasonal 414 | resid = result.resid 415 | self.decomposed_cache[cache_key] = (trend, seasonal, resid) 416 | 417 | return trend, seasonal, resid 418 | 419 | 420 | def __read_data__(self): 421 | self.scaler = StandardScaler() 422 | df_raw = pd.read_csv(os.path.join(self.root_path, 423 | self.data_path)) 424 | 425 | 426 | border1s = [0, 12 * 30 * 24 * 4 - self.seq_len, 12 * 30 * 24 * 4 + 4 * 30 * 24 * 4 - self.seq_len] 427 | border2s = [12 * 30 * 24 * 4, 12 * 30 * 24 * 4 + 4 * 30 * 24 * 4, 12 * 30 * 24 * 4 + 8 * 30 * 24 * 4] 428 | 429 | 430 | 431 | border1 = border1s[self.set_type] 432 | border2 = border2s[self.set_type] 433 | 434 | if self.set_type == 0: 435 | border2 = (border2 - self.seq_len) * self.percent // 100 + self.seq_len 436 | 437 | 438 | if self.features == 'M' or self.features == 'MS': 439 | cols_data = df_raw.columns[1:] 440 | df_data = df_raw[cols_data] 441 | elif self.features == 'S': 442 | df_data = df_raw[[self.target]] 443 | 444 | if self.scale: 445 | train_data = df_data.iloc[border1s[0]:border2s[0]] 446 | self.scaler.fit(train_data.values) 447 | data = self.scaler.transform(df_data.values) 448 | 449 | 450 | 451 | else: 452 | 453 | data = df_data.values 454 | 455 | df_stamp = df_raw[['date']][border1:border2] 456 | df_stamp['date'] = pd.to_datetime(df_stamp.date) 457 | if self.timeenc == 0: 458 | df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1) 459 | df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1) 460 | df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1) 461 | df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1) 462 | data_stamp = df_stamp.drop(['date'], 1).values 463 | elif self.timeenc == 1: 464 | data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq) 465 | data_stamp = data_stamp.transpose(1, 0) 466 | 467 | self.data_x = data[border1:border2] 468 | self.data_y = data[border1:border2] 469 | self.time_mark = df_raw['date'].values[border1:border2] 470 | self.data_stamp = data_stamp 471 | 472 | def __getitem__(self, index): 473 | s_begin = index 474 | s_end = s_begin + self.seq_len 475 | r_begin = s_end - self.label_len 476 | r_end = r_begin + self.label_len + self.pred_len 477 | 478 | seq_x = self.data_x[s_begin:s_end] 479 | seq_y = self.data_y[r_begin:r_end] 480 | seq_x_mark = self.data_stamp[s_begin:s_end] 481 | seq_y_mark = self.data_stamp[r_begin:r_end] 482 | 483 | concatenated_result = np.empty((seq_x.shape[0], 0)) 484 | 485 | start_timestamp = self.time_mark[s_begin] 486 | end_timestamp = self.time_mark[s_end] 487 | 488 | for variate in range(seq_x.shape[1]): 489 | series = pd.Series(seq_x[:, variate]) 490 | trend, seasonal, resid = self.stl_decomposition(series, 96, variate, start_timestamp , end_timestamp) 491 | trend_array = trend.to_numpy().reshape(-1, 1) 492 | seasonal_array = seasonal.to_numpy().reshape(-1, 1) 493 | resid_array = resid.to_numpy().reshape(-1, 1) 494 | 495 | variate_components = np.concatenate([trend_array, seasonal_array, resid_array], axis=1) 496 | concatenated_result = np.concatenate([concatenated_result, variate_components], axis=1) 497 | 498 | seq_x_expanded = seq_x if len(seq_x.shape) > 1 else seq_x.reshape(-1, 1) 499 | final_result = np.concatenate([seq_x_expanded, concatenated_result], axis=1) 500 | 501 | 502 | return final_result, seq_y, seq_x_mark, seq_y_mark 503 | 504 | def __len__(self): 505 | return len(self.data_x) - self.seq_len - self.pred_len + 1 506 | 507 | def inverse_transform(self, data): 508 | return self.scaler.inverse_transform(data) 509 | 510 | 511 | 512 | class Dataset_Custom(Dataset): 513 | def __init__(self, root_path, flag='train', size=None, 514 | features='S', data_path='ETTh1.csv', 515 | target='OT', scale=True, timeenc=0, freq='h', 516 | percent=10, max_len=-1, train_all=False,seasonal_patterns=None): 517 | # size [seq_len, label_len, pred_len] 518 | # info 519 | if size == None: 520 | self.seq_len = 24 * 4 * 4 521 | self.label_len = 24 * 4 522 | self.pred_len = 24 * 4 523 | else: 524 | self.seq_len = size[0] 525 | self.label_len = size[1] 526 | self.pred_len = size[2] 527 | # init 528 | assert flag in ['train', 'test', 'val'] 529 | type_map = {'train': 0, 'val': 1, 'test': 2} 530 | self.set_type = type_map[flag] 531 | 532 | self.features = features 533 | self.target = target 534 | self.scale = scale 535 | self.timeenc = timeenc 536 | self.freq = freq 537 | self.percent = percent 538 | 539 | self.root_path = root_path 540 | self.data_path = data_path 541 | self.__read_data__() 542 | 543 | self.enc_in = self.data_x.shape[-1] 544 | self.tot_len = len(self.data_x) - self.seq_len - self.pred_len + 1 545 | 546 | def __read_data__(self): 547 | self.scaler = StandardScaler() 548 | df_raw = pd.read_csv(os.path.join(self.root_path, 549 | self.data_path)) 550 | 551 | ''' 552 | df_raw.columns: ['date', ...(other features), target feature] 553 | ''' 554 | cols = list(df_raw.columns) 555 | cols.remove(self.target) 556 | cols.remove('date') 557 | df_raw = df_raw[['date'] + cols + [self.target]] 558 | # print(cols) 559 | num_train = int(len(df_raw) * 0.7) 560 | num_test = int(len(df_raw) * 0.2) 561 | num_vali = len(df_raw) - num_train - num_test 562 | border1s = [0, num_train - self.seq_len, len(df_raw) - num_test - self.seq_len] 563 | border2s = [num_train, num_train + num_vali, len(df_raw)] 564 | border1 = border1s[self.set_type] 565 | border2 = border2s[self.set_type] 566 | 567 | if self.set_type == 0: 568 | border2 = (border2 - self.seq_len) * self.percent // 100 + self.seq_len 569 | 570 | if self.features == 'M' or self.features == 'MS': 571 | cols_data = df_raw.columns[1:] 572 | df_data = df_raw[cols_data] 573 | elif self.features == 'S': 574 | df_data = df_raw[[self.target]] 575 | 576 | if self.scale: 577 | train_data = df_data[border1s[0]:border2s[0]] 578 | self.scaler.fit(train_data.values) 579 | data = self.scaler.transform(df_data.values) 580 | else: 581 | data = df_data.values 582 | 583 | df_stamp = df_raw[['date']][border1:border2] 584 | df_stamp['date'] = pd.to_datetime(df_stamp.date) 585 | if self.timeenc == 0: 586 | df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1) 587 | df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1) 588 | df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1) 589 | df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1) 590 | data_stamp = df_stamp.drop(['date'], 1).values 591 | elif self.timeenc == 1: 592 | data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq) 593 | data_stamp = data_stamp.transpose(1, 0) 594 | 595 | self.data_x = data[border1:border2] 596 | self.data_y = data[border1:border2] 597 | self.data_stamp = data_stamp 598 | 599 | def __getitem__(self, index): 600 | feat_id = index // self.tot_len 601 | s_begin = index % self.tot_len 602 | 603 | s_end = s_begin + self.seq_len 604 | r_begin = s_end - self.label_len 605 | r_end = r_begin + self.label_len + self.pred_len 606 | seq_x = self.data_x[s_begin:s_end, feat_id:feat_id+1] 607 | seq_y = self.data_y[r_begin:r_end, feat_id:feat_id+1] 608 | seq_x_mark = self.data_stamp[s_begin:s_end] 609 | seq_y_mark = self.data_stamp[r_begin:r_end] 610 | 611 | return seq_x, seq_y, seq_x_mark, seq_y_mark 612 | 613 | def __len__(self): 614 | return (len(self.data_x) - self.seq_len - self.pred_len + 1) * self.enc_in 615 | 616 | def inverse_transform(self, data): 617 | return self.scaler.inverse_transform(data) 618 | 619 | 620 | 621 | 622 | 623 | 624 | 625 | 626 | 627 | 628 | -------------------------------------------------------------------------------- /Long-term_Forecasting/exp/exp_basic.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | from models import S2IPLLM 4 | from torch.nn.parallel import DistributedDataParallel as DDP 5 | 6 | 7 | class Exp_Basic(object): 8 | def __init__(self, args): 9 | self.args = args 10 | self.model_dict = { 11 | 12 | 'S2IPLLM': S2IPLLM, 13 | 14 | } 15 | self.device = self._acquire_device() 16 | self.model = self._build_model() 17 | 18 | def _build_model(self): 19 | raise NotImplementedError 20 | return None 21 | 22 | def _acquire_device(self): 23 | if self.args.use_gpu: 24 | os.environ["CUDA_VISIBLE_DEVICES"] = str( 25 | self.args.gpu) if not self.args.use_multi_gpu else self.args.devices 26 | device = torch.device('cuda:{}'.format(self.args.gpu)) 27 | print('Use GPU: cuda:{}'.format(self.args.gpu)) 28 | else: 29 | device = torch.device('cpu') 30 | print('Use CPU') 31 | return device 32 | 33 | def _get_data(self): 34 | pass 35 | 36 | def vali(self): 37 | pass 38 | 39 | def train(self): 40 | pass 41 | 42 | def test(self): 43 | pass 44 | -------------------------------------------------------------------------------- /Long-term_Forecasting/exp/exp_long_term_forecasting.py: -------------------------------------------------------------------------------- 1 | from data_provider.data_factory import data_provider 2 | from exp.exp_basic import Exp_Basic 3 | from utils.tools import EarlyStopping, adjust_learning_rate, visual,adjust_model 4 | from utils.metrics import metric 5 | import torch 6 | import torch.nn as nn 7 | from models import S2IPLLM 8 | from torch.nn.utils import clip_grad_norm_ 9 | from utils.losses import mape_loss, mase_loss, smape_loss 10 | 11 | 12 | from transformers import AdamW 13 | 14 | 15 | 16 | 17 | 18 | from torch.utils.data import Dataset, DataLoader 19 | from torch import optim 20 | import os 21 | import time 22 | import warnings 23 | import numpy as np 24 | 25 | from tqdm import tqdm 26 | 27 | warnings.filterwarnings('ignore') 28 | 29 | 30 | class Exp_Long_Term_Forecast(object): 31 | def __init__(self, args): 32 | self.args = args 33 | self.model_dict = { 34 | 'S2IPLLM': S2IPLLM, 35 | 36 | } 37 | 38 | self.device = torch.device('cuda:0') 39 | self.model = self._build_model() 40 | 41 | self.train_data, self.train_loader = self._get_data(flag='train') 42 | self.vali_data, self.vali_loader = self._get_data(flag='val') 43 | # self.test_data, self.test_loader = self._get_data(flag='test') 44 | 45 | self.optimizer = self._select_optimizer() 46 | self.criterion = self._select_criterion() 47 | 48 | 49 | 50 | def _build_model(self): 51 | model = self.model_dict[self.args.model].Model(self.args).to(self.device) 52 | 53 | 54 | return model 55 | 56 | def _get_data(self, flag): 57 | data_set, data_loader = data_provider(self.args, flag) 58 | return data_set, data_loader 59 | 60 | def _select_optimizer(self): 61 | model_optim = optim.Adam(self.model.parameters(), lr=self.args.learning_rate) 62 | return model_optim 63 | 64 | def _select_criterion(self): 65 | if self.args.loss=='MSE': 66 | criterion = nn.MSELoss() 67 | 68 | elif self.args.loss=='SMAPE': 69 | criterion = smape_loss() 70 | 71 | return criterion 72 | 73 | def vali(self, vali_data, vali_loader, criterion): 74 | total_loss = [] 75 | self.model.eval() 76 | with torch.no_grad(): 77 | for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in tqdm(enumerate(vali_loader)): 78 | batch_x = batch_x.float().to(self.device) 79 | batch_y = batch_y.float().to(self.device) 80 | 81 | batch_x_mark = batch_x_mark.float().to(self.device) 82 | batch_y_mark = batch_y_mark.float().to(self.device) 83 | 84 | # decoder input 85 | dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float() 86 | dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).to(torch.bfloat16).float().to(self.device) 87 | # encoder - decoder 88 | if self.args.use_amp: 89 | with torch.cuda.amp.autocast(): 90 | if self.args.output_attention: 91 | outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0] 92 | else: 93 | outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark) 94 | else: 95 | if self.args.output_attention: 96 | outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0] 97 | else: 98 | outputs,res = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark) 99 | f_dim = -1 if self.args.features == 'MS' else 0 100 | 101 | outputs = outputs[:, -self.args.pred_len:, f_dim:self.args.number_variable].float().to(self.device) 102 | batch_y = batch_y[:, -self.args.pred_len:, f_dim:self.args.number_variable].float().to(self.device) 103 | 104 | 105 | 106 | 107 | 108 | pred = outputs.detach().cpu() 109 | true = batch_y.detach().cpu() 110 | 111 | loss = criterion(pred, true) 112 | 113 | total_loss.append(loss) 114 | 115 | 116 | total_loss = np.average(total_loss) 117 | self.model.train() 118 | return total_loss 119 | 120 | 121 | 122 | 123 | def train(self, setting): 124 | 125 | 126 | path = os.path.join(self.args.checkpoints, setting) 127 | if not os.path.exists(path): 128 | os.makedirs(path) 129 | 130 | time_now = time.time() 131 | 132 | train_steps = len(self.train_loader) 133 | early_stopping = EarlyStopping(patience=self.args.patience, verbose=True) 134 | 135 | if self.args.use_amp: 136 | scaler = torch.cuda.amp.GradScaler() 137 | 138 | for epoch in range(self.args.train_epochs): 139 | iter_count = 0 140 | train_loss = [] 141 | simlarity_losses = [] 142 | 143 | self.model.train() 144 | epoch_time = time.time() 145 | for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in tqdm(enumerate(self.train_loader)): 146 | iter_count += 1 147 | self.optimizer.zero_grad() 148 | batch_x = batch_x.float().to(self.device) 149 | 150 | batch_y = batch_y.float().to(self.device) 151 | batch_x_mark = batch_x_mark.float().to(self.device) 152 | batch_y_mark = batch_y_mark.float().to(self.device) 153 | 154 | # decoder input 155 | dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float().to(self.device) 156 | dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device) 157 | 158 | # encoder - decoder 159 | if self.args.use_amp: 160 | with torch.cuda.amp.autocast(): 161 | if self.args.output_attention: 162 | outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0] 163 | else: 164 | 165 | outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark) 166 | 167 | f_dim = -1 if self.args.features == 'MS' else 0 168 | outputs = outputs[:, -self.args.pred_len:, f_dim:self.args.number_variable] 169 | batch_y = batch_y[:, -self.args.pred_len:, f_dim:self.args.number_variable].float().to(self.device) 170 | loss = self.criterion(outputs, batch_y) 171 | 172 | 173 | else: 174 | if self.args.output_attention: 175 | outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0] 176 | else: 177 | 178 | outputs,res = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark) 179 | 180 | 181 | f_dim = -1 if self.args.features == 'MS' else 0 182 | outputs = outputs[:, -self.args.pred_len:, f_dim:self.args.number_variable] 183 | batch_y = batch_y[:, -self.args.pred_len:, f_dim:self.args.number_variable].float().to(self.device) 184 | loss = self.criterion(outputs, batch_y) 185 | 186 | train_loss.append(loss.item()) 187 | simlarity_losses.append(res['simlarity_loss'].item()) 188 | 189 | 190 | loss += self.args.sim_coef*res['simlarity_loss'] 191 | 192 | 193 | 194 | if (i + 1) % 100 == 0: 195 | print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item())) 196 | speed = (time.time() - time_now) / iter_count 197 | left_time = speed * ((self.args.train_epochs - epoch) * train_steps - i) 198 | print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time)) 199 | iter_count = 0 200 | time_now = time.time() 201 | 202 | if self.args.use_amp: 203 | loss.backward() 204 | self.optimizer.step() 205 | else: 206 | 207 | loss.backward() 208 | self.optimizer.step() 209 | 210 | print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time)) 211 | train_loss = np.average(train_loss) 212 | sim_loss = np.average(simlarity_losses) 213 | vali_loss = self.vali(self.vali_data, self.vali_loader, self.criterion) 214 | 215 | 216 | print("Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Sim Loss: {4:.7f}".format( 217 | epoch + 1, train_steps, train_loss, vali_loss,sim_loss)) 218 | 219 | 220 | early_stopping(vali_loss, self.model, path) 221 | if early_stopping.early_stop: 222 | print("Early stopping") 223 | break 224 | 225 | adjust_learning_rate(self.optimizer, epoch + 1, self.args) 226 | adjust_model(self.model, epoch + 1,self.args) 227 | 228 | 229 | 230 | 231 | def test(self, setting, test=1): 232 | 233 | 234 | test_data, test_loader = self._get_data(flag='test') 235 | 236 | 237 | 238 | preds = [] 239 | trues = [] 240 | folder_path = './test_results/' + setting + '/' 241 | if not os.path.exists(folder_path): 242 | os.makedirs(folder_path) 243 | 244 | sim_matrix = [] 245 | input_embedding = [] 246 | prompted_embedding = [] 247 | last_embedding = [] 248 | 249 | 250 | self.model.eval() 251 | with torch.no_grad(): 252 | for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in tqdm(enumerate(test_loader)): 253 | batch_x = batch_x.float().to(self.device) 254 | batch_y = batch_y.float().to(self.device) 255 | 256 | batch_x_mark = batch_x_mark.float().to(self.device) 257 | batch_y_mark = batch_y_mark.float().to(self.device) 258 | 259 | # decoder input 260 | dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]) 261 | dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device) 262 | # encoder - decoder 263 | if self.args.use_amp: 264 | with torch.cuda.amp.autocast(): 265 | if self.args.output_attention: 266 | outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0] 267 | else: 268 | 269 | outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark) 270 | else: 271 | if self.args.output_attention: 272 | outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0] 273 | 274 | else: 275 | outputs,res = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark) 276 | 277 | f_dim = -1 if self.args.features == 'MS' else 0 278 | 279 | 280 | 281 | outputs = outputs[:, -self.args.pred_len:, f_dim:self.args.number_variable].float().detach().cpu().numpy() 282 | batch_y = batch_y[:, -self.args.pred_len:, f_dim:self.args.number_variable].float().detach().cpu().numpy() 283 | 284 | 285 | 286 | 287 | 288 | 289 | 290 | pred = outputs 291 | true = batch_y 292 | 293 | 294 | preds.append(pred) 295 | trues.append(true) 296 | if i % 20 == 0: 297 | input = batch_x.float().detach().cpu().numpy() 298 | gt = np.concatenate((input[0, :, -1], true[0, :, -1]), axis=0) 299 | pd = np.concatenate((input[0, :, -1], pred[0, :, -1]), axis=0) 300 | visual(gt, pd, os.path.join(folder_path, str(i) + '.pdf')) 301 | 302 | 303 | 304 | 305 | 306 | preds = np.array(preds) 307 | trues = np.array(trues) 308 | 309 | 310 | preds = preds.reshape(-1, preds.shape[-2], preds.shape[-1]) 311 | trues = trues.reshape(-1, trues.shape[-2], trues.shape[-1]) 312 | print('test shape:', preds.shape, trues.shape) 313 | 314 | # result save 315 | folder_path = './results/' + setting + '/' 316 | if not os.path.exists(folder_path): 317 | os.makedirs(folder_path) 318 | 319 | mae, mse, rmse, mape, mspe = metric(preds, trues) 320 | print('mse:{}, mae:{}'.format(mse, mae)) 321 | f = open("result_long_term_forecast.txt", 'a') 322 | f.write(setting + " \n") 323 | f.write('mse:{}, mae:{}'.format(mse, mae)) 324 | f.write('\n') 325 | f.write('\n') 326 | f.close() 327 | 328 | np.save(folder_path + 'metrics.npy', np.array([mae, mse, rmse, mape, mspe])) 329 | np.save(folder_path + 'pred.npy', preds) 330 | np.save(folder_path + 'true.npy', trues) 331 | 332 | return mse, mae 333 | 334 | -------------------------------------------------------------------------------- /Long-term_Forecasting/models/S2IPLLM.py: -------------------------------------------------------------------------------- 1 | #!pip install transformers 2 | 3 | import math 4 | from typing import Optional 5 | import numpy as np 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | from torch import optim 10 | import pandas as pd 11 | 12 | from transformers.models.gpt2.modeling_gpt2 import GPT2Model 13 | from transformers.models.gpt2.configuration_gpt2 import GPT2Config 14 | from einops import rearrange 15 | from transformers import GPT2Tokenizer 16 | from utils.tokenization import SerializerSettings, serialize_arr,serialize_arr 17 | from .prompt import Prompt 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | class Model(nn.Module): 31 | 32 | def __init__(self, configs): 33 | super(Model, self).__init__() 34 | self.configs = configs 35 | self.is_ln = configs.ln 36 | self.task_name = configs.task_name 37 | self.pred_len = configs.pred_len 38 | self.seq_len = configs.seq_len 39 | self.patch_size = configs.patch_size 40 | self.stride = configs.stride 41 | self.d_ff = 768 42 | self.patch_num = (configs.seq_len - self.patch_size) // self.stride + 1 43 | self.padding_patch_layer = nn.ReplicationPad1d((0, self.stride)) 44 | self.patch_num += 1 45 | 46 | 47 | self.tokenizer = GPT2Tokenizer.from_pretrained('gpt2') 48 | self.tokenizer.add_special_tokens({'pad_token': '[PAD]'}) 49 | 50 | 51 | 52 | 53 | if configs.pretrained == True: 54 | 55 | 56 | self.gpt2 = GPT2Model.from_pretrained('gpt2', output_attentions=True, output_hidden_states=True) 57 | self.gpt2.h = self.gpt2.h[:configs.gpt_layers] 58 | 59 | 60 | else: 61 | print("------------------no pretrain------------------") 62 | self.gpt2 = GPT2Model(GPT2Config()) 63 | 64 | 65 | 66 | 67 | for i, (name, param) in enumerate(self.gpt2.named_parameters()): 68 | if 'ln' in name or 'wpe' in name: #or 'mlp' in name: 69 | param.requires_grad = True 70 | else: 71 | param.requires_grad = False # False 72 | 73 | 74 | 75 | 76 | 77 | 78 | if self.task_name == 'long_term_forecast': 79 | 80 | self.in_layer = nn.Linear(configs.patch_size*3, configs.d_model) 81 | self.out_layer = nn.Linear(int(configs.d_model / 3 * (self.patch_num+configs.prompt_length)) , configs.pred_len) 82 | 83 | self.prompt_pool = Prompt(length=1, embed_dim=768, embedding_key='mean', prompt_init='uniform', prompt_pool=False, 84 | prompt_key=True, pool_size=self.configs.pool_size, top_k=self.configs.prompt_length, batchwise_prompt=False, prompt_key_init=self.configs.prompt_init,wte = self.gpt2.wte.weight) 85 | 86 | 87 | 88 | 89 | 90 | for layer in (self.gpt2, self.in_layer, self.out_layer): 91 | layer.cuda() 92 | layer.train() 93 | 94 | 95 | def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): 96 | 97 | 98 | if self.task_name == 'long_term_forecast': 99 | dec_out,res = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec) 100 | return dec_out[:, -self.pred_len:, :],res # [B, L, D] 101 | 102 | 103 | return None 104 | 105 | 106 | 107 | def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec): 108 | 109 | 110 | 111 | 112 | 113 | B, L, M = x_enc.shape 114 | 115 | means = x_enc.mean(1, keepdim=True).detach() 116 | x_enc = x_enc - means 117 | stdev = torch.sqrt( 118 | torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5) 119 | x_enc /= stdev 120 | 121 | x = rearrange(x_enc, 'b l m -> (b m) l') 122 | 123 | 124 | def decompose(x): 125 | df = pd.DataFrame(x) 126 | trend = df.rolling(window=self.configs.trend_length, center=True).mean().fillna(method='bfill').fillna(method='ffill') 127 | detrended = df - trend 128 | seasonal = detrended.groupby(detrended.index % self.configs.seasonal_length).transform('mean').fillna(method='bfill').fillna(method='ffill') 129 | residuals = df - trend - seasonal 130 | combined = np.stack([trend, seasonal, residuals], axis=1) 131 | return combined 132 | 133 | 134 | 135 | decomp_results = np.apply_along_axis(decompose, 1, x.cpu().numpy()) 136 | x = torch.tensor(decomp_results).to(self.gpt2.device) 137 | x = rearrange(x, 'b l c d -> b c (d l)', c = 3) 138 | x = self.padding_patch_layer(x) 139 | x = x.unfold(dimension=-1, size=self.patch_size, step=self.stride) 140 | x = rearrange(x, 'b c n p -> b n (c p)', c = 3) 141 | pre_prompted_embedding = self.in_layer(x.float()) 142 | 143 | 144 | 145 | 146 | 147 | outs = self.prompt_pool(pre_prompted_embedding) 148 | prompted_embedding = outs['prompted_embedding'] 149 | sim = outs['similarity'] 150 | prompt_key = outs['prompt_key'] 151 | simlarity_loss = outs['reduce_sim'] 152 | 153 | 154 | 155 | last_embedding = self.gpt2(inputs_embeds=prompted_embedding).last_hidden_state 156 | outputs = self.out_layer(last_embedding.reshape(B*M*3, -1)) 157 | 158 | 159 | outputs = rearrange(outputs, '(b m c) h -> b m c h', b=B,m=M,c=3) 160 | outputs = outputs.sum(dim=2) 161 | outputs = rearrange(outputs, 'b m l -> b l m') 162 | 163 | res = dict() 164 | res['simlarity_loss'] = simlarity_loss 165 | 166 | 167 | 168 | 169 | 170 | 171 | outputs = outputs * stdev[:,:,:M] 172 | outputs = outputs + means[:,:,:M] 173 | 174 | return outputs,res 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | -------------------------------------------------------------------------------- /Long-term_Forecasting/models/prompt.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | class Prompt(nn.Module): 19 | def __init__(self, length=2, embed_dim=768, embedding_key='mean', prompt_init='uniform', prompt_pool=False, 20 | prompt_key=False, pool_size=30, top_k=4, batchwise_prompt=False, prompt_key_init='uniform',wte = None): 21 | super().__init__() 22 | 23 | self.length = length 24 | self.embed_dim = embed_dim 25 | self.prompt_pool = prompt_pool 26 | self.embedding_key = embedding_key 27 | self.prompt_init = prompt_init 28 | self.prompt_key = prompt_key 29 | self.prompt_key_init = prompt_key_init 30 | self.pool_size = pool_size 31 | print(self.pool_size) 32 | self.top_k = top_k 33 | self.batchwise_prompt = batchwise_prompt 34 | self.wte = wte 35 | 36 | if self.prompt_pool: 37 | prompt_pool_shape = (pool_size, length, embed_dim) 38 | if prompt_init == 'zero': 39 | self.prompt = nn.Parameter(torch.zeros(prompt_pool_shape)) 40 | elif prompt_init == 'uniform': 41 | self.prompt = nn.Parameter(torch.randn(prompt_pool_shape)) 42 | nn.init.uniform_(self.prompt, -1, 1) 43 | 44 | # if using learnable prompt keys 45 | if prompt_key: 46 | key_shape = (pool_size, embed_dim) 47 | if prompt_key_init == 'zero': 48 | self.prompt = nn.Parameter(torch.zeros(key_shape),requires_grad=False) 49 | print('zero initialized key') 50 | 51 | elif prompt_key_init == 'uniform': 52 | self.prompt = nn.Parameter(torch.randn(key_shape),requires_grad=False) 53 | nn.init.uniform_(self.prompt, -5, 5) 54 | print('uniform initialized key') 55 | 56 | elif prompt_key_init == 'gaussian': 57 | self.prompt = nn.Parameter(torch.randn(key_shape),requires_grad=False) 58 | nn.init.normal_(self.prompt, mean=0.0, std=5.0) 59 | print('gaussian initialized key') 60 | 61 | elif prompt_key_init == 'text_prototype': 62 | self.text_prototype_linear = nn.Linear(50257, pool_size) 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | else: 72 | # else use mean of prompt as key 73 | # only compatible with prompt, not prefix 74 | prompt_mean = torch.mean(self.prompt, dim=1) 75 | self.prompt_key = prompt_mean 76 | 77 | def l2_normalize(self, x, dim=None, epsilon=1e-12): 78 | """Normalizes a given vector or matrix.""" 79 | square_sum = torch.sum(x ** 2, dim=dim, keepdim=True) 80 | x_inv_norm = torch.rsqrt(torch.maximum(square_sum, torch.tensor(epsilon, device=x.device))) 81 | return x * x_inv_norm 82 | 83 | def forward(self, x_embed, prompt_mask=None, cls_features=None): 84 | out = dict() 85 | if self.prompt_key: #if self.prompt_pool: 86 | if self.embedding_key == 'mean': 87 | x_embed_mean = torch.mean(x_embed, dim=1) 88 | elif self.embedding_key == 'max': 89 | x_embed_mean = torch.max(x_embed, dim=1)[0] 90 | elif self.embedding_key == 'mean_max': 91 | x_embed_mean = torch.max(x_embed, dim=1)[0] + 2 * torch.mean(x_embed, dim=1) 92 | elif self.embedding_key == 'cls': 93 | if cls_features is None: 94 | x_embed_mean = torch.max(x_embed, dim=1)[0] # B, C 95 | else: 96 | x_embed_mean = cls_features 97 | else: 98 | raise NotImplementedError("Not supported way of calculating embedding keys!") 99 | 100 | 101 | if self.prompt_key_init == 'text_prototype': 102 | prompt_key = self.text_prototype_linear(self.wte.transpose(0, 1)).transpose(0, 1) 103 | 104 | else: 105 | prompt_key = self.prompt 106 | 107 | prompt_norm = self.l2_normalize(prompt_key, dim=1) # Pool_size, C self.prompt_key 108 | x_embed_norm = self.l2_normalize(x_embed_mean, dim=1) # B, C 109 | 110 | similarity = torch.matmul(x_embed_norm, prompt_norm.t()) # B, Pool_size 111 | 112 | if prompt_mask is None: 113 | _, idx = torch.topk(similarity, k=self.top_k, dim=1) # B, top_k 114 | if self.batchwise_prompt: 115 | prompt_id, id_counts = torch.unique(idx, return_counts=True, sorted=True) 116 | # In jnp.unique, when the 'size' is specified and there are fewer than the indicated number of elements, 117 | # the remaining elements will be filled with 'fill_value', the default is the minimum value along the specified dimension. 118 | # Unless dimension is specified, this will be flattend if it is not already 1D. 119 | if prompt_id.shape[0] < self.pool_size: 120 | prompt_id = torch.cat([prompt_id, torch.full((self.pool_size - prompt_id.shape[0],), torch.min(idx.flatten()), device=prompt_id.device)]) 121 | id_counts = torch.cat([id_counts, torch.full((self.pool_size - id_counts.shape[0],), 0, device=id_counts.device)]) 122 | _, major_idx = torch.topk(id_counts, k=self.top_k) # top_k 123 | major_prompt_id = prompt_id[major_idx] # top_k 124 | # expand to batch 125 | idx = major_prompt_id.expand(x_embed.shape[0], -1) # B, top_k 126 | else: 127 | idx = prompt_mask # B, top_k 128 | 129 | # batched_prompt_raw = self.prompt[idx] # B, top_k, length, C 130 | 131 | batched_prompt_raw = prompt_key[idx] # B, top_k, length, C 132 | batched_prompt_raw = batched_prompt_raw.unsqueeze(2) # B, top_k, 1, length, C 133 | 134 | batch_size, top_k, length, c = batched_prompt_raw.shape 135 | batched_prompt = batched_prompt_raw.reshape(batch_size, top_k * length, c) # B, top_k * length, C 136 | 137 | out['prompt_idx'] = idx 138 | 139 | # Debugging, return sim as well 140 | out['prompt_norm'] = prompt_norm 141 | out['x_embed_norm'] = x_embed_norm 142 | out['similarity'] = similarity 143 | 144 | # Put pull_constraint loss calculation inside 145 | batched_key_norm = prompt_norm[idx] # B, top_k, C 146 | out['selected_key'] = batched_key_norm 147 | x_embed_norm = x_embed_norm.unsqueeze(1) # B, 1, C 148 | sim = batched_key_norm * x_embed_norm # B, top_k, C 149 | reduce_sim = torch.sum(sim) / x_embed.shape[0] # Scalar 150 | 151 | out['reduce_sim'] = reduce_sim 152 | else: 153 | if self.prompt_init == 'zero': 154 | self.prompt = nn.Parameter(torch.zeros(self.length, self.embed_dim)) 155 | elif self.prompt_init == 'uniform': 156 | self.prompt = nn.Parameter(torch.randn(self.length, self.embed_dim)) 157 | nn.init.uniform_(self.prompt) 158 | batched_prompt = self.prompt.unsqueeze(0).expand(x_embed.shape[0], -1, -1) 159 | 160 | # The input with the prompt concatenated to the front. [B, prompt+token, C] 161 | out['total_prompt_len'] = batched_prompt.shape[1] 162 | out['prompted_embedding'] = torch.cat([batched_prompt, x_embed], dim=1) 163 | out['prompt_key'] = prompt_key # prompt_key 164 | 165 | return out -------------------------------------------------------------------------------- /Long-term_Forecasting/run.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import torch 4 | 5 | from exp.exp_long_term_forecasting import Exp_Long_Term_Forecast 6 | 7 | 8 | 9 | 10 | import random 11 | import numpy as np 12 | 13 | fix_seed = 2021 14 | random.seed(fix_seed) 15 | torch.manual_seed(fix_seed) 16 | np.random.seed(fix_seed) 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | parser = argparse.ArgumentParser(description='TimesNet') 26 | 27 | # basic config 28 | parser.add_argument('--task_name', type=str, required=True, default='long_term_forecast', 29 | help='task name, options:[long_term_forecast, short_term_forecast, imputation, classification, anomaly_detection]') 30 | parser.add_argument('--is_training', type=int, required=True, default=1, help='status') 31 | parser.add_argument('--model_id', type=str, required=True, default='test', help='model id') 32 | parser.add_argument('--model', type=str, required=True, default='Autoformer', 33 | help='model name, options: [Autoformer, Transformer, TimesNet]') 34 | 35 | # data loader 36 | parser.add_argument('--data', type=str, required=True, default='ETTh1', help='dataset type') 37 | parser.add_argument('--number_variable', type=int,default=7, help='number of variable') 38 | 39 | parser.add_argument('--root_path', type=str, default='./data/raw_data/ETTh1/', help='root path of the data file') 40 | parser.add_argument('--data_path', type=str, default='ETTh1.csv', help='data file') 41 | parser.add_argument('--features', type=str, default='M', 42 | help='forecasting task, options:[M, S, MS]; M:multivariate predict multivariate, S:univariate predict univariate, MS:multivariate predict univariate') 43 | parser.add_argument('--target', type=str, default='OT', help='target feature in S or MS task') 44 | parser.add_argument('--freq', type=str, default='h', 45 | help='freq for time features encoding, options:[s:secondly, t:minutely, h:hourly, d:daily, b:business days, w:weekly, m:monthly], you can also use more detailed freq like 15min or 3h') 46 | parser.add_argument('--checkpoints', type=str, default='./checkpoints/', help='location of model checkpoints') 47 | 48 | # forecasting task 49 | parser.add_argument('--seq_len', type=int, default=96, help='input sequence length') 50 | parser.add_argument('--label_len', type=int, default=48, help='start token length') 51 | parser.add_argument('--pred_len', type=int, default=96, help='prediction sequence length') 52 | parser.add_argument('--seasonal_patterns', type=str, default='Monthly', help='subset for M4') 53 | 54 | 55 | # model define 56 | parser.add_argument('--top_k', type=int, default=5, help='for TimesBlock') 57 | parser.add_argument('--num_kernels', type=int, default=6, help='for Inception') 58 | parser.add_argument('--enc_in', type=int, default=7, help='encoder input size') 59 | parser.add_argument('--dec_in', type=int, default=7, help='decoder input size') 60 | parser.add_argument('--c_out', type=int, default=7, help='output size') 61 | parser.add_argument('--d_model', type=int, default=512, help='dimension of model') 62 | parser.add_argument('--n_heads', type=int, default=8, help='num of heads') 63 | parser.add_argument('--e_layers', type=int, default=2, help='num of encoder layers') 64 | parser.add_argument('--d_layers', type=int, default=1, help='num of decoder layers') 65 | parser.add_argument('--d_ff', type=int, default=2048, help='dimension of fcn') 66 | parser.add_argument('--moving_avg', type=int, default=25, help='window size of moving average') 67 | parser.add_argument('--factor', type=int, default=1, help='attn factor') 68 | parser.add_argument('--distil', action='store_false', 69 | help='whether to use distilling in encoder, using this argument means not using distilling', 70 | default=True) 71 | parser.add_argument('--dropout', type=float, default=0.1, help='dropout') 72 | parser.add_argument('--embed', type=str, default='timeF', 73 | help='time features encoding, options:[timeF, fixed, learned]') 74 | parser.add_argument('--activation', type=str, default='gelu', help='activation') 75 | parser.add_argument('--output_attention', action='store_true', help='whether to output attention in ecoder') 76 | 77 | # optimization 78 | parser.add_argument('--num_workers', type=int, default=90, help='data loader num workers') 79 | parser.add_argument('--itr', type=int, default=1, help='experiments times') 80 | parser.add_argument('--train_epochs', type=int, default=100, help='train epochs') 81 | parser.add_argument('--batch_size', type=int, default=32, help='batch size of train input data') 82 | parser.add_argument('--patience', type=int, default=3, help='early stopping patience') 83 | parser.add_argument('--learning_rate', type=float, default=0.0001, help='optimizer learning rate') 84 | parser.add_argument('--des', type=str, default='test', help='exp description') 85 | parser.add_argument('--loss', type=str, default='MSE', help='loss function') 86 | parser.add_argument('--lradj', type=str, default='type2', help='adjust learning rate') 87 | parser.add_argument('--use_amp', action='store_true', help='use automatic mixed precision training', default=False) 88 | parser.add_argument('--decay_fac', type=float, default=0.75) 89 | 90 | # GPU 91 | parser.add_argument('--use_gpu', type=bool, default=True, help='use gpu') 92 | parser.add_argument('--gpu', type=int, default=0, help='gpu') 93 | parser.add_argument('--use_multi_gpu', action='store_true', help='use multiple gpus', default=False) 94 | parser.add_argument('--devices', type=str, default='0,1,2,3', help='device ids of multile gpus') 95 | 96 | 97 | # de-stationary projector params 98 | parser.add_argument('--p_hidden_dims', type=int, nargs='+', default=[128, 128], 99 | help='hidden layer dimensions of projector (List)') 100 | parser.add_argument('--p_hidden_layers', type=int, default=2, help='number of hidden layers in projector') 101 | 102 | # patching 103 | parser.add_argument('--patch_size', type=int, default=1) 104 | parser.add_argument('--stride', type=int, default=1) 105 | parser.add_argument('--gpt_layers', type=int, default=6) 106 | parser.add_argument('--ln', type=int, default=0) 107 | parser.add_argument('--mlp', type=int, default=0) 108 | parser.add_argument('--weight', type=float, default=0) 109 | parser.add_argument('--percent', type=int, default=5) 110 | parser.add_argument('--pretrained', action='store_false',help='use finetuned GPT2',default=True) 111 | 112 | 113 | parser.add_argument('--tokenization', type=str, default='patch', help='tokenization_method') 114 | parser.add_argument('--training_strategy', type=str, default='none', help='training_strategy') 115 | 116 | parser.add_argument('--add_prompt', type=int, default=0) 117 | parser.add_argument('--add_trainable_prompt', type=int, default=0) 118 | parser.add_argument('--prompt_length', type=int, default=1) 119 | parser.add_argument('--sim_coef', type=float, default=0.0) 120 | parser.add_argument('--pool_size', type=int, default=1000) 121 | parser.add_argument('--period', type=int, default=24) 122 | parser.add_argument('--prompt_init', type=str, default='text_prototype', help='prompt_init_type') 123 | parser.add_argument('--trend_length', type=int, default=24, help='trend_length') 124 | parser.add_argument('--seasonal_length', type=int, default=96, help='seasonal_length') 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | args = parser.parse_args() 135 | args.use_gpu = True if torch.cuda.is_available() and args.use_gpu else False 136 | 137 | if args.use_gpu and args.use_multi_gpu: 138 | args.dvices = args.devices.replace(' ', '') 139 | device_ids = args.devices.split(',') 140 | args.device_ids = [int(id_) for id_ in device_ids] 141 | args.gpu = args.device_ids[0] 142 | 143 | print('Args in experiment:') 144 | print(args) 145 | 146 | 147 | 148 | if args.task_name == 'long_term_forecast': 149 | Exp = Exp_Long_Term_Forecast 150 | 151 | if args.is_training: 152 | mses = [] 153 | maes = [] 154 | smapes = [] 155 | msaes = [] 156 | owas = [] 157 | mapes = [] 158 | 159 | for ii in range(args.itr): 160 | # setting record of experiments 161 | setting = '{}_{}_{}_{}_ft{}_sl{}_ll{}_pl{}_dm{}_nh{}_el{}_dl{}_df{}_fc{}_eb{}_dt{}_{}_{}'.format( 162 | args.task_name, 163 | args.model_id, 164 | args.model, 165 | args.data, 166 | args.features, 167 | args.seq_len, 168 | args.label_len, 169 | args.pred_len, 170 | args.d_model, 171 | args.n_heads, 172 | args.e_layers, 173 | args.d_layers, 174 | args.d_ff, 175 | args.factor, 176 | args.embed, 177 | args.distil, 178 | args.des, ii) 179 | 180 | path = os.path.join(args.checkpoints, setting) 181 | if not os.path.exists(path): 182 | os.makedirs(path) 183 | 184 | 185 | 186 | 187 | exp = Exp(args) # set experiments 188 | 189 | print('>>>>>>>start training : {}>>>>>>>>>>>>>>>>>>>>>>>>>>'.format(setting)) 190 | exp.train(setting) 191 | 192 | print('>>>>>>>testing : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting)) 193 | 194 | best_model_path = path + '/' + 'checkpoint.pth' 195 | exp.model.load_state_dict(torch.load(best_model_path)) 196 | 197 | if args.task_name == 'long_term_forecast': 198 | mse, mae = exp.test(setting) 199 | mses.append(mse) 200 | maes.append(mae) 201 | torch.cuda.empty_cache() 202 | 203 | 204 | 205 | print('mse_means: ', np.array(mses),'mean: ', np.mean(np.array(mses))) 206 | print('mae_means: ', np.array(maes),'mean: ', np.mean(np.array(maes))) 207 | 208 | else: 209 | ii = 0 210 | setting = '{}_{}_{}_{}_ft{}_sl{}_ll{}_pl{}_dm{}_nh{}_el{}_dl{}_df{}_fc{}_eb{}_dt{}_{}_{}'.format( 211 | args.task_name, 212 | args.model_id, 213 | args.model, 214 | args.data, 215 | args.features, 216 | args.seq_len, 217 | args.label_len, 218 | args.pred_len, 219 | args.d_model, 220 | args.n_heads, 221 | args.e_layers, 222 | args.d_layers, 223 | args.d_ff, 224 | args.factor, 225 | args.embed, 226 | args.distil, 227 | args.des, ii) 228 | 229 | 230 | exp = Exp(args) # set experiments 231 | print('>>>>>>>testing : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting)) 232 | # exp.test(setting, test=1) 233 | torch.cuda.empty_cache() 234 | -------------------------------------------------------------------------------- /Long-term_Forecasting/scripts/electricity.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=1 python -u run.py \ 2 | --task_name long_term_forecast \ 3 | --is_training 1 \ 4 | --root_path ./data/electricity \ 5 | --data_path electricity.csv \ 6 | --model_id electricity_512_96 \ 7 | --model S2IPLLM \ 8 | --data ECL \ 9 | --features M \ 10 | --seq_len 512 \ 11 | --label_len 0 \ 12 | --pred_len 96 \ 13 | --des 'Exp' \ 14 | --itr 1 \ 15 | --d_model 768 \ 16 | --learning_rate 0.0001 \ 17 | --patch_size 16 \ 18 | --stride 8 \ 19 | --add_prompt 1 \ 20 | --prompt_length 4 \ 21 | --batch_size 1024 \ 22 | --sim_coef -0.1 \ 23 | --pool_size 1000 \ 24 | --period 24 \ 25 | --percent 100 \ 26 | --trend_length 24 \ 27 | --seasonal_length 4 28 | 29 | 30 | 31 | CUDA_VISIBLE_DEVICES=1 python -u run.py \ 32 | --task_name long_term_forecast \ 33 | --is_training 1 \ 34 | --root_path ./data/electricity \ 35 | --data_path electricity.csv \ 36 | --model_id electricity_512_192 \ 37 | --model S2IPLLM \ 38 | --data ECL \ 39 | --features M \ 40 | --seq_len 512 \ 41 | --label_len 0 \ 42 | --pred_len 192 \ 43 | --des 'Exp' \ 44 | --itr 1 \ 45 | --d_model 768 \ 46 | --learning_rate 0.0001 \ 47 | --patch_size 16 \ 48 | --stride 8 \ 49 | --add_prompt 1 \ 50 | --prompt_length 4 \ 51 | --batch_size 1024 \ 52 | --sim_coef -0.1 \ 53 | --pool_size 1000 \ 54 | --period 24 \ 55 | --percent 100 \ 56 | --trend_length 24 \ 57 | --seasonal_length 4 58 | 59 | 60 | 61 | 62 | CUDA_VISIBLE_DEVICES=1 python -u run.py \ 63 | --task_name long_term_forecast \ 64 | --is_training 1 \ 65 | --root_path ./data/electricity \ 66 | --data_path electricity.csv \ 67 | --model_id electricity_512_336 \ 68 | --model S2IPLLM \ 69 | --data ECL \ 70 | --features M \ 71 | --seq_len 512 \ 72 | --label_len 0 \ 73 | --pred_len 336 \ 74 | --des 'Exp' \ 75 | --itr 1 \ 76 | --d_model 768 \ 77 | --learning_rate 0.0001 \ 78 | --patch_size 16 \ 79 | --stride 8 \ 80 | --add_prompt 1 \ 81 | --prompt_length 4 \ 82 | --batch_size 1024 \ 83 | --sim_coef -0.1 \ 84 | --pool_size 1000 \ 85 | --period 24 \ 86 | --percent 100 \ 87 | --trend_length 24 \ 88 | --seasonal_length 4 89 | 90 | 91 | CUDA_VISIBLE_DEVICES=1 python -u run.py \ 92 | --task_name long_term_forecast \ 93 | --is_training 1 \ 94 | --root_path ./data/electricity \ 95 | --data_path electricity.csv \ 96 | --model_id electricity_512_720 \ 97 | --model S2IPLLM \ 98 | --data ECL \ 99 | --features M \ 100 | --seq_len 512 \ 101 | --label_len 0 \ 102 | --pred_len 720 \ 103 | --des 'Exp' \ 104 | --itr 1 \ 105 | --d_model 768 \ 106 | --learning_rate 0.0001 \ 107 | --patch_size 16 \ 108 | --stride 8 \ 109 | --add_prompt 1 \ 110 | --prompt_length 4 \ 111 | --batch_size 1024 \ 112 | --sim_coef -0.1 \ 113 | --pool_size 1000 \ 114 | --period 24 \ 115 | --percent 100 \ 116 | --trend_length 24 \ 117 | --seasonal_length 4 118 | 119 | -------------------------------------------------------------------------------- /Long-term_Forecasting/scripts/etth1.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=0 python -u run.py \ 2 | --task_name long_term_forecast \ 3 | --is_training 1 \ 4 | --root_path ./data/ETT-small \ 5 | --data_path ETTh1.csv \ 6 | --model_id ETTh1_512_96 \ 7 | --model S2IPLLM \ 8 | --data ETTh1 \ 9 | --number_variable 7 \ 10 | --features M \ 11 | --seq_len 512 \ 12 | --label_len 0 \ 13 | --pred_len 96 \ 14 | --enc_in 7 \ 15 | --dec_in 7 \ 16 | --c_out 7 \ 17 | --des 'Exp' \ 18 | --itr 1 \ 19 | --d_model 768 \ 20 | --learning_rate 0.0001 \ 21 | --patch_size 16 \ 22 | --stride 8 \ 23 | --add_prompt 1 \ 24 | --prompt_length 4 \ 25 | --batch_size 128 \ 26 | --sim_coef -0.05 \ 27 | --pool_size 1000 \ 28 | --percent 100 \ 29 | --trend_length 96 \ 30 | --seasonal_length 96 31 | 32 | 33 | 34 | CUDA_VISIBLE_DEVICES=0 python -u run.py \ 35 | --task_name long_term_forecast \ 36 | --is_training 1 \ 37 | --root_path ./data/ETT-small \ 38 | --data_path ETTh1.csv \ 39 | --model_id ETTh1_512_192 \ 40 | --model S2IPLLM \ 41 | --data ETTh1 \ 42 | --number_variable 7 \ 43 | --features M \ 44 | --seq_len 512 \ 45 | --label_len 0 \ 46 | --pred_len 192 \ 47 | --enc_in 7 \ 48 | --dec_in 7 \ 49 | --c_out 7 \ 50 | --des 'Exp' \ 51 | --itr 1 \ 52 | --d_model 768 \ 53 | --learning_rate 0.0001 \ 54 | --patch_size 16 \ 55 | --stride 8 \ 56 | --add_prompt 1 \ 57 | --prompt_length 4 \ 58 | --batch_size 128 \ 59 | --sim_coef -0.05 \ 60 | --pool_size 1000 \ 61 | --percent 100 \ 62 | --trend_length 96 \ 63 | --seasonal_length 96 64 | 65 | 66 | 67 | 68 | CUDA_VISIBLE_DEVICES=0 python -u run.py \ 69 | --task_name long_term_forecast \ 70 | --is_training 1 \ 71 | --root_path ./data/ETT-small \ 72 | --data_path ETTh1.csv \ 73 | --model_id ETTh1_512_336 \ 74 | --model S2IPLLM \ 75 | --data ETTh1 \ 76 | --number_variable 7 \ 77 | --features M \ 78 | --seq_len 512 \ 79 | --label_len 0 \ 80 | --pred_len 336 \ 81 | --enc_in 7 \ 82 | --dec_in 7 \ 83 | --c_out 7 \ 84 | --des 'Exp' \ 85 | --itr 1 \ 86 | --d_model 768 \ 87 | --learning_rate 0.0001 \ 88 | --patch_size 16 \ 89 | --stride 8 \ 90 | --add_prompt 1 \ 91 | --prompt_length 8 \ 92 | --batch_size 128 \ 93 | --sim_coef -0.01 \ 94 | --pool_size 1000 \ 95 | --percent 100 \ 96 | --trend_length 24\ 97 | --seasonal_length 24 98 | 99 | 100 | 101 | 102 | CUDA_VISIBLE_DEVICES=0 python -u run.py \ 103 | --task_name long_term_forecast \ 104 | --is_training 1 \ 105 | --root_path ./data/ETT-small \ 106 | --data_path ETTh1.csv \ 107 | --model_id ETTh1_512_720 \ 108 | --model S2IPLLM \ 109 | --data ETTh1 \ 110 | --number_variable 7 \ 111 | --features M \ 112 | --seq_len 512 \ 113 | --label_len 0 \ 114 | --pred_len 720 \ 115 | --enc_in 7 \ 116 | --dec_in 7 \ 117 | --c_out 7 \ 118 | --des 'Exp' \ 119 | --itr 1 \ 120 | --d_model 768 \ 121 | --learning_rate 0.001 \ 122 | --patch_size 16 \ 123 | --stride 8 \ 124 | --add_prompt 1 \ 125 | --prompt_length 4 \ 126 | --batch_size 128 \ 127 | --sim_coef -0.01 \ 128 | --pool_size 1000 \ 129 | --percent 100 \ 130 | --trend_length 192 \ 131 | --seasonal_length 48 132 | 133 | 134 | -------------------------------------------------------------------------------- /Long-term_Forecasting/scripts/etth2.sh: -------------------------------------------------------------------------------- 1 | 2 | CUDA_VISIBLE_DEVICES=0 python -u run.py \ 3 | --task_name long_term_forecast \ 4 | --is_training 1 \ 5 | --root_path ./data/ETT-small \ 6 | --data_path ETTh2.csv \ 7 | --model_id ETTh2_512_96 \ 8 | --model S2IPLLM \ 9 | --data ETTh2 \ 10 | --number_variable 7 \ 11 | --features M \ 12 | --seq_len 512 \ 13 | --label_len 0 \ 14 | --pred_len 96 \ 15 | --enc_in 7 \ 16 | --dec_in 7 \ 17 | --c_out 7 \ 18 | --des 'Exp' \ 19 | --itr 1 \ 20 | --d_model 768 \ 21 | --learning_rate 0.0001 \ 22 | --patch_size 16 \ 23 | --stride 8 \ 24 | --add_prompt 1 \ 25 | --prompt_length 16 \ 26 | --batch_size 128 \ 27 | --sim_coef -0.05 \ 28 | --pool_size 1000 \ 29 | --percent 100 \ 30 | --trend_length 96 \ 31 | --seasonal_length 96 32 | 33 | 34 | 35 | 36 | CUDA_VISIBLE_DEVICES=0 python -u run.py \ 37 | --task_name long_term_forecast \ 38 | --is_training 1 \ 39 | --root_path ./data/ETT-small \ 40 | --data_path ETTh2.csv \ 41 | --model_id ETTh2_512_192 \ 42 | --model S2IPLLM \ 43 | --data ETTh2 \ 44 | --number_variable 7 \ 45 | --features M \ 46 | --seq_len 512 \ 47 | --label_len 0 \ 48 | --pred_len 192 \ 49 | --enc_in 7 \ 50 | --dec_in 7 \ 51 | --c_out 7 \ 52 | --des 'Exp' \ 53 | --itr 1 \ 54 | --d_model 768 \ 55 | --learning_rate 0.0001 \ 56 | --patch_size 16 \ 57 | --stride 8 \ 58 | --add_prompt 1 \ 59 | --prompt_length 4 \ 60 | --batch_size 128 \ 61 | --sim_coef -0.05 \ 62 | --pool_size 1000 \ 63 | --percent 100 \ 64 | --trend_length 96 \ 65 | --seasonal_length 12 66 | 67 | 68 | 69 | 70 | CUDA_VISIBLE_DEVICES=0 python -u run.py \ 71 | --task_name long_term_forecast \ 72 | --is_training 1 \ 73 | --root_path ./data/ETT-small \ 74 | --data_path ETTh2.csv \ 75 | --model_id ETTh2_512_336 \ 76 | --model S2IPLLM \ 77 | --data ETTh2 \ 78 | --number_variable 7 \ 79 | --features M \ 80 | --seq_len 512 \ 81 | --label_len 0 \ 82 | --pred_len 336 \ 83 | --enc_in 7 \ 84 | --dec_in 7 \ 85 | --c_out 7 \ 86 | --des 'Exp' \ 87 | --itr 1 \ 88 | --d_model 768 \ 89 | --learning_rate 0.0001 \ 90 | --patch_size 16 \ 91 | --stride 8 \ 92 | --add_prompt 1 \ 93 | --prompt_length 8 \ 94 | --batch_size 128 \ 95 | --sim_coef -0.05 \ 96 | --pool_size 1000 \ 97 | --percent 100 \ 98 | --trend_length 96 \ 99 | --seasonal_length 12 100 | 101 | 102 | CUDA_VISIBLE_DEVICES=0 python -u run.py \ 103 | --task_name long_term_forecast \ 104 | --is_training 1 \ 105 | --root_path ./data/ETT-small \ 106 | --data_path ETTh2.csv \ 107 | --model_id ETTh2_512_720 \ 108 | --model S2IPLLM \ 109 | --data ETTh2 \ 110 | --number_variable 7 \ 111 | --features M \ 112 | --seq_len 512 \ 113 | --label_len 0 \ 114 | --pred_len 720 \ 115 | --enc_in 7 \ 116 | --dec_in 7 \ 117 | --c_out 7 \ 118 | --des 'Exp' \ 119 | --itr 1 \ 120 | --d_model 768 \ 121 | --learning_rate 0.0001 \ 122 | --patch_size 16 \ 123 | --stride 8 \ 124 | --add_prompt 1 \ 125 | --prompt_length 2 \ 126 | --batch_size 128 \ 127 | --sim_coef -0.01 \ 128 | --pool_size 1000 \ 129 | --percent 100 \ 130 | --trend_length 24 \ 131 | --seasonal_length 24 -------------------------------------------------------------------------------- /Long-term_Forecasting/scripts/ettm1.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=0 python -u run.py \ 2 | --task_name long_term_forecast \ 3 | --is_training 1 \ 4 | --root_path ./data/ETT-small \ 5 | --data_path ETTm1.csv \ 6 | --model_id ETTm1_512_96 \ 7 | --model S2IPLLM \ 8 | --data ETTm1 \ 9 | --number_variable 7 \ 10 | --features M \ 11 | --seq_len 512 \ 12 | --label_len 0 \ 13 | --pred_len 96 \ 14 | --enc_in 7 \ 15 | --dec_in 7 \ 16 | --c_out 7 \ 17 | --des 'Exp' \ 18 | --itr 1 \ 19 | --d_model 768 \ 20 | --learning_rate 0.0001 \ 21 | --patch_size 16 \ 22 | --stride 8 \ 23 | --add_prompt 1 \ 24 | --prompt_length 4 \ 25 | --batch_size 128 \ 26 | --sim_coef -0.05 \ 27 | --pool_size 1000 \ 28 | --percent 100 \ 29 | --trend_length 144 \ 30 | --seasonal_length 96 31 | 32 | 33 | 34 | CUDA_VISIBLE_DEVICES=0 python -u run.py \ 35 | --task_name long_term_forecast \ 36 | --is_training 1 \ 37 | --root_path ./data/ETT-small \ 38 | --data_path ETTm1.csv \ 39 | --model_id ETTm1_512_192 \ 40 | --model S2IPLLM \ 41 | --data ETTm1 \ 42 | --number_variable 7 \ 43 | --features M \ 44 | --seq_len 512 \ 45 | --label_len 0 \ 46 | --pred_len 192 \ 47 | --enc_in 7 \ 48 | --dec_in 7 \ 49 | --c_out 7 \ 50 | --des 'Exp' \ 51 | --itr 1 \ 52 | --d_model 768 \ 53 | --learning_rate 0.0001 \ 54 | --patch_size 16 \ 55 | --stride 8 \ 56 | --add_prompt 1 \ 57 | --prompt_length 4 \ 58 | --batch_size 128 \ 59 | --sim_coef -0.05 \ 60 | --pool_size 1000 \ 61 | --percent 100 \ 62 | --trend_length 192 \ 63 | --seasonal_length 96 64 | 65 | 66 | 67 | CUDA_VISIBLE_DEVICES=0 python -u run.py \ 68 | --task_name long_term_forecast \ 69 | --is_training 1 \ 70 | --root_path ./data/ETT-small \ 71 | --data_path ETTm1.csv \ 72 | --model_id ETTm1_512_336 \ 73 | --model S2IPLLM \ 74 | --data ETTm1 \ 75 | --number_variable 7 \ 76 | --features M \ 77 | --seq_len 512 \ 78 | --label_len 0 \ 79 | --pred_len 336 \ 80 | --enc_in 7 \ 81 | --dec_in 7 \ 82 | --c_out 7 \ 83 | --des 'Exp' \ 84 | --itr 1 \ 85 | --d_model 768 \ 86 | --learning_rate 0.0001 \ 87 | --patch_size 16 \ 88 | --stride 8 \ 89 | --add_prompt 1 \ 90 | --prompt_length 8 \ 91 | --batch_size 128 \ 92 | --sim_coef -0.05 \ 93 | --pool_size 1000 \ 94 | --percent 100 \ 95 | --trend_length 192 \ 96 | --seasonal_length 96 97 | 98 | 99 | 100 | CUDA_VISIBLE_DEVICES=0 python -u run.py \ 101 | --task_name long_term_forecast \ 102 | --is_training 1 \ 103 | --root_path ./data/ETT-small \ 104 | --data_path ETTm1.csv \ 105 | --model_id ETTm1_512_720 \ 106 | --model S2IPLLM \ 107 | --data ETTm1 \ 108 | --number_variable 7 \ 109 | --features M \ 110 | --seq_len 512 \ 111 | --label_len 0 \ 112 | --pred_len 720 \ 113 | --enc_in 7 \ 114 | --dec_in 7 \ 115 | --c_out 7 \ 116 | --des 'Exp' \ 117 | --itr 1 \ 118 | --d_model 768 \ 119 | --learning_rate 0.0001 \ 120 | --patch_size 16 \ 121 | --stride 8 \ 122 | --add_prompt 1 \ 123 | --prompt_length 8 \ 124 | --batch_size 128 \ 125 | --sim_coef -0.05 \ 126 | --pool_size 5000 \ 127 | --percent 100 \ 128 | --trend_length 192 \ 129 | --seasonal_length 96 -------------------------------------------------------------------------------- /Long-term_Forecasting/scripts/ettm2.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=0 python -u run.py \ 2 | --task_name long_term_forecast \ 3 | --is_training 1 \ 4 | --root_path ./data/ETT-small \ 5 | --data_path ETTm2.csv \ 6 | --model_id ETTm2_512_96 \ 7 | --model S2IPLLM \ 8 | --data ETTm2 \ 9 | --number_variable 7 \ 10 | --features M \ 11 | --seq_len 512 \ 12 | --label_len 0 \ 13 | --pred_len 96 \ 14 | --enc_in 7 \ 15 | --dec_in 7 \ 16 | --c_out 7 \ 17 | --des 'Exp' \ 18 | --itr 1 \ 19 | --d_model 768 \ 20 | --learning_rate 0.0001 \ 21 | --patch_size 16 \ 22 | --stride 8 \ 23 | --add_prompt 1 \ 24 | --prompt_length 4 \ 25 | --batch_size 128 \ 26 | --sim_coef -0.05 \ 27 | --pool_size 1000 \ 28 | --percent 100 \ 29 | --trend_length 24 \ 30 | --seasonal_length 24 31 | 32 | 33 | 34 | CUDA_VISIBLE_DEVICES=0 python -u run.py \ 35 | --task_name long_term_forecast \ 36 | --is_training 1 \ 37 | --root_path ./data/ETT-small \ 38 | --data_path ETTm2.csv \ 39 | --model_id ETTm2_512_192 \ 40 | --model S2IPLLM \ 41 | --data ETTm2 \ 42 | --number_variable 7 \ 43 | --features M \ 44 | --seq_len 512 \ 45 | --label_len 0 \ 46 | --pred_len 192 \ 47 | --enc_in 7 \ 48 | --dec_in 7 \ 49 | --c_out 7 \ 50 | --des 'Exp' \ 51 | --itr 1 \ 52 | --d_model 768 \ 53 | --learning_rate 0.0001 \ 54 | --patch_size 16 \ 55 | --stride 8 \ 56 | --add_prompt 1 \ 57 | --prompt_length 8 \ 58 | --batch_size 128 \ 59 | --sim_coef -0.05 \ 60 | --pool_size 1000 \ 61 | --percent 100 \ 62 | --trend_length 192 \ 63 | --seasonal_length 48 64 | 65 | 66 | 67 | 68 | CUDA_VISIBLE_DEVICES=0 python -u run.py \ 69 | --task_name long_term_forecast \ 70 | --is_training 1 \ 71 | --root_path ./data/ETT-small \ 72 | --data_path ETTm2.csv \ 73 | --model_id ETTm2_512_336 \ 74 | --model S2IPLLM \ 75 | --data ETTm2 \ 76 | --number_variable 7 \ 77 | --features M \ 78 | --seq_len 512 \ 79 | --label_len 0 \ 80 | --pred_len 336 \ 81 | --enc_in 7 \ 82 | --dec_in 7 \ 83 | --c_out 7 \ 84 | --des 'Exp' \ 85 | --itr 1 \ 86 | --d_model 768 \ 87 | --learning_rate 0.0001 \ 88 | --patch_size 16 \ 89 | --stride 8 \ 90 | --add_prompt 1 \ 91 | --prompt_length 8 \ 92 | --batch_size 128 \ 93 | --sim_coef -0.05 \ 94 | --pool_size 1000 \ 95 | --percent 100 \ 96 | --trend_length 192 \ 97 | --seasonal_length 96 98 | 99 | 100 | 101 | 102 | CUDA_VISIBLE_DEVICES=0 python -u run.py \ 103 | --task_name long_term_forecast \ 104 | --is_training 1 \ 105 | --root_path ./data/ETT-small \ 106 | --data_path ETTm2.csv \ 107 | --model_id ETTm2_512_720 \ 108 | --model S2IPLLM \ 109 | --data ETTm2 \ 110 | --number_variable 7 \ 111 | --features M \ 112 | --seq_len 512 \ 113 | --label_len 0 \ 114 | --pred_len 720 \ 115 | --enc_in 7 \ 116 | --dec_in 7 \ 117 | --c_out 7 \ 118 | --des 'Exp' \ 119 | --itr 1 \ 120 | --d_model 768 \ 121 | --learning_rate 0.0001 \ 122 | --patch_size 16 \ 123 | --stride 8 \ 124 | --add_prompt 1 \ 125 | --prompt_length 8 \ 126 | --batch_size 128 \ 127 | --sim_coef -0.05 \ 128 | --pool_size 1000 \ 129 | --percent 100 \ 130 | --trend_length 192 \ 131 | --seasonal_length 96 -------------------------------------------------------------------------------- /Long-term_Forecasting/scripts/traffic.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=1 python -u run.py \ 2 | --task_name long_term_forecast \ 3 | --is_training 1 \ 4 | --root_path ./data/traffic \ 5 | --data_path traffic.csv \ 6 | --model_id traffic_512_96 \ 7 | --model S2IPLLM \ 8 | --data traffic \ 9 | --features M \ 10 | --seq_len 512 \ 11 | --label_len 0 \ 12 | --pred_len 96 \ 13 | --des 'Exp' \ 14 | --itr 1 \ 15 | --d_model 768 \ 16 | --learning_rate 0.0001 \ 17 | --patch_size 16 \ 18 | --stride 8 \ 19 | --add_prompt 1 \ 20 | --prompt_length 2 \ 21 | --batch_size 32 \ 22 | --sim_coef -0.1 \ 23 | --pool_size 1000 \ 24 | --period 24 \ 25 | --percent 100 \ 26 | --trend_length 24 \ 27 | --seasonal_length 4 28 | 29 | 30 | 31 | CUDA_VISIBLE_DEVICES=1 python -u run.py \ 32 | --task_name long_term_forecast \ 33 | --is_training 1 \ 34 | --root_path ./data/traffic \ 35 | --data_path traffic.csv \ 36 | --model_id traffic_512_192 \ 37 | --model S2IPLLM \ 38 | --data traffic \ 39 | --features M \ 40 | --seq_len 512 \ 41 | --label_len 0 \ 42 | --pred_len 192 \ 43 | --des 'Exp' \ 44 | --itr 1 \ 45 | --d_model 768 \ 46 | --learning_rate 0.0001 \ 47 | --patch_size 16 \ 48 | --stride 8 \ 49 | --add_prompt 1 \ 50 | --prompt_length 2 \ 51 | --batch_size 32 \ 52 | --sim_coef -0.1 \ 53 | --pool_size 1000 \ 54 | --period 24 \ 55 | --percent 100 \ 56 | --trend_length 24 \ 57 | --seasonal_length 12 58 | 59 | 60 | CUDA_VISIBLE_DEVICES=1 python -u run.py \ 61 | --task_name long_term_forecast \ 62 | --is_training 1 \ 63 | --root_path ./data/traffic \ 64 | --data_path traffic.csv \ 65 | --model_id traffic_512_336 \ 66 | --model S2IPLLM \ 67 | --data traffic \ 68 | --features M \ 69 | --seq_len 512 \ 70 | --label_len 0 \ 71 | --pred_len 336 \ 72 | --des 'Exp' \ 73 | --itr 1 \ 74 | --d_model 768 \ 75 | --learning_rate 0.0001 \ 76 | --patch_size 16 \ 77 | --stride 8 \ 78 | --add_prompt 1 \ 79 | --prompt_length 2 \ 80 | --batch_size 32 \ 81 | --sim_coef -0.1 \ 82 | --pool_size 1000 \ 83 | --period 24 \ 84 | --percent 100 \ 85 | --trend_length 24 \ 86 | --seasonal_length 4 87 | 88 | 89 | 90 | CUDA_VISIBLE_DEVICES=1 python -u run.py \ 91 | --task_name long_term_forecast \ 92 | --is_training 1 \ 93 | --root_path ./data/traffic \ 94 | --data_path traffic.csv \ 95 | --model_id traffic_512_720 \ 96 | --model S2IPLLM \ 97 | --data traffic \ 98 | --features M \ 99 | --seq_len 512 \ 100 | --label_len 0 \ 101 | --pred_len 720 \ 102 | --des 'Exp' \ 103 | --itr 1 \ 104 | --d_model 768 \ 105 | --learning_rate 0.0001 \ 106 | --patch_size 16 \ 107 | --stride 8 \ 108 | --add_prompt 1 \ 109 | --prompt_length 2 \ 110 | --batch_size 32 \ 111 | --sim_coef -0.1 \ 112 | --pool_size 1000 \ 113 | --period 24 \ 114 | --percent 100 \ 115 | --trend_length 24 \ 116 | --seasonal_length 12 -------------------------------------------------------------------------------- /Long-term_Forecasting/scripts/weather.sh: -------------------------------------------------------------------------------- 1 | 2 | 3 | CUDA_VISIBLE_DEVICES=1 python -u run.py \ 4 | --task_name long_term_forecast \ 5 | --is_training 1 \ 6 | --root_path ./data/weather \ 7 | --data_path weather.csv \ 8 | --model_id weather_512_96 \ 9 | --model S2IPLLM \ 10 | --data weather \ 11 | --features M \ 12 | --seq_len 512 \ 13 | --label_len 0 \ 14 | --pred_len 96 \ 15 | --des 'Exp' \ 16 | --itr 1 \ 17 | --d_model 768 \ 18 | --learning_rate 0.001 \ 19 | --patch_size 16 \ 20 | --stride 8 \ 21 | --add_prompt 1 \ 22 | --prompt_length 2 \ 23 | --batch_size 1024 \ 24 | --sim_coef -0.1 \ 25 | --pool_size 1000 \ 26 | --period 24 \ 27 | --percent 100 \ 28 | --trend_length 96 \ 29 | --seasonal_length 48 30 | 31 | 32 | 33 | CUDA_VISIBLE_DEVICES=1 python -u run.py \ 34 | --task_name long_term_forecast \ 35 | --is_training 1 \ 36 | --root_path ./data/weather \ 37 | --data_path weather.csv \ 38 | --model_id weather_512_192 \ 39 | --model S2IPLLM \ 40 | --data weather \ 41 | --features M \ 42 | --seq_len 512 \ 43 | --label_len 0 \ 44 | --pred_len 192 \ 45 | --des 'Exp' \ 46 | --itr 1 \ 47 | --d_model 768 \ 48 | --learning_rate 0.001 \ 49 | --patch_size 16 \ 50 | --stride 8 \ 51 | --add_prompt 1 \ 52 | --prompt_length 2 \ 53 | --batch_size 1024 \ 54 | --sim_coef -0.1 \ 55 | --pool_size 1000 \ 56 | --period 24 \ 57 | --percent 100 \ 58 | --trend_length 96 \ 59 | --seasonal_length 48 60 | 61 | 62 | 63 | CUDA_VISIBLE_DEVICES=1 python -u run.py \ 64 | --task_name long_term_forecast \ 65 | --is_training 1 \ 66 | --root_path ./data/weather \ 67 | --data_path weather.csv \ 68 | --model_id weather_512_336 \ 69 | --model S2IPLLM \ 70 | --data weather \ 71 | --features M \ 72 | --seq_len 512 \ 73 | --label_len 0 \ 74 | --pred_len 336 \ 75 | --des 'Exp' \ 76 | --itr 1 \ 77 | --d_model 768 \ 78 | --learning_rate 0.001 \ 79 | --patch_size 16 \ 80 | --stride 8 \ 81 | --add_prompt 1 \ 82 | --prompt_length 4 \ 83 | --batch_size 1024 \ 84 | --sim_coef -0.1 \ 85 | --pool_size 1000 \ 86 | --period 24 \ 87 | --percent 100 \ 88 | --trend_length 96 \ 89 | --seasonal_length 48 90 | 91 | 92 | 93 | CUDA_VISIBLE_DEVICES=1 python -u run.py \ 94 | --task_name long_term_forecast \ 95 | --is_training 1 \ 96 | --root_path ./data/weather \ 97 | --data_path weather.csv \ 98 | --model_id weather_512_720 \ 99 | --model S2IPLLM \ 100 | --data weather \ 101 | --features M \ 102 | --seq_len 512 \ 103 | --label_len 0 \ 104 | --pred_len 720 \ 105 | --des 'Exp' \ 106 | --itr 1 \ 107 | --d_model 768 \ 108 | --learning_rate 0.001 \ 109 | --patch_size 16 \ 110 | --stride 8 \ 111 | --add_prompt 1 \ 112 | --prompt_length 4 \ 113 | --batch_size 1024 \ 114 | --sim_coef -0.1 \ 115 | --pool_size 1000 \ 116 | --period 24 \ 117 | --percent 100 \ 118 | --trend_length 96 \ 119 | --seasonal_length 48 -------------------------------------------------------------------------------- /Long-term_Forecasting/utils/losses.py: -------------------------------------------------------------------------------- 1 | # This source code is provided for the purposes of scientific reproducibility 2 | # under the following limited license from Element AI Inc. The code is an 3 | # implementation of the N-BEATS model (Oreshkin et al., N-BEATS: Neural basis 4 | # expansion analysis for interpretable time series forecasting, 5 | # https://arxiv.org/abs/1905.10437). The copyright to the source code is 6 | # licensed under the Creative Commons - Attribution-NonCommercial 4.0 7 | # International license (CC BY-NC 4.0): 8 | # https://creativecommons.org/licenses/by-nc/4.0/. Any commercial use (whether 9 | # for the benefit of third parties or internally in production) requires an 10 | # explicit license. The subject-matter of the N-BEATS model and associated 11 | # materials are the property of Element AI Inc. and may be subject to patent 12 | # protection. No license to patents is granted hereunder (whether express or 13 | # implied). Copyright © 2020 Element AI Inc. All rights reserved. 14 | 15 | """ 16 | Loss functions for PyTorch. 17 | """ 18 | 19 | import torch as t 20 | import torch.nn as nn 21 | import numpy as np 22 | import pdb 23 | 24 | 25 | def divide_no_nan(a, b): 26 | """ 27 | a/b where the resulted NaN or Inf are replaced by 0. 28 | """ 29 | result = a / b 30 | result[result != result] = .0 31 | result[result == np.inf] = .0 32 | return result 33 | 34 | 35 | class mape_loss(nn.Module): 36 | def __init__(self): 37 | super(mape_loss, self).__init__() 38 | 39 | def forward(self, insample: t.Tensor, freq: int, 40 | forecast: t.Tensor, target: t.Tensor, mask: t.Tensor) -> t.float: 41 | """ 42 | MAPE loss as defined in: https://en.wikipedia.org/wiki/Mean_absolute_percentage_error 43 | 44 | :param forecast: Forecast values. Shape: batch, time 45 | :param target: Target values. Shape: batch, time 46 | :param mask: 0/1 mask. Shape: batch, time 47 | :return: Loss value 48 | """ 49 | weights = divide_no_nan(mask, target) 50 | return t.mean(t.abs((forecast - target) * weights)) 51 | 52 | 53 | class smape_loss(nn.Module): 54 | def __init__(self): 55 | super(smape_loss, self).__init__() 56 | 57 | def forward(self, insample: t.Tensor, freq: int, 58 | forecast: t.Tensor, target: t.Tensor, mask: t.Tensor) -> t.float: 59 | """ 60 | sMAPE loss as defined in https://robjhyndman.com/hyndsight/smape/ (Makridakis 1993) 61 | 62 | :param forecast: Forecast values. Shape: batch, time 63 | :param target: Target values. Shape: batch, time 64 | :param mask: 0/1 mask. Shape: batch, time 65 | :return: Loss value 66 | """ 67 | return 200 * t.mean(divide_no_nan(t.abs(forecast - target), 68 | t.abs(forecast.data) + t.abs(target.data)) * mask) 69 | 70 | 71 | class mase_loss(nn.Module): 72 | def __init__(self): 73 | super(mase_loss, self).__init__() 74 | 75 | def forward(self, insample: t.Tensor, freq: int, 76 | forecast: t.Tensor, target: t.Tensor, mask: t.Tensor) -> t.float: 77 | """ 78 | MASE loss as defined in "Scaled Errors" https://robjhyndman.com/papers/mase.pdf 79 | 80 | :param insample: Insample values. Shape: batch, time_i 81 | :param freq: Frequency value 82 | :param forecast: Forecast values. Shape: batch, time_o 83 | :param target: Target values. Shape: batch, time_o 84 | :param mask: 0/1 mask. Shape: batch, time_o 85 | :return: Loss value 86 | """ 87 | masep = t.mean(t.abs(insample[:, freq:] - insample[:, :-freq]), dim=1) 88 | masked_masep_inv = divide_no_nan(mask, masep[:, None]) 89 | return t.mean(t.abs(target - forecast) * masked_masep_inv) 90 | -------------------------------------------------------------------------------- /Long-term_Forecasting/utils/m4_summary.py: -------------------------------------------------------------------------------- 1 | # This source code is provided for the purposes of scientific reproducibility 2 | # under the following limited license from Element AI Inc. The code is an 3 | # implementation of the N-BEATS model (Oreshkin et al., N-BEATS: Neural basis 4 | # expansion analysis for interpretable time series forecasting, 5 | # https://arxiv.org/abs/1905.10437). The copyright to the source code is 6 | # licensed under the Creative Commons - Attribution-NonCommercial 4.0 7 | # International license (CC BY-NC 4.0): 8 | # https://creativecommons.org/licenses/by-nc/4.0/. Any commercial use (whether 9 | # for the benefit of third parties or internally in production) requires an 10 | # explicit license. The subject-matter of the N-BEATS model and associated 11 | # materials are the property of Element AI Inc. and may be subject to patent 12 | # protection. No license to patents is granted hereunder (whether express or 13 | # implied). Copyright 2020 Element AI Inc. All rights reserved. 14 | 15 | """ 16 | M4 Summary 17 | """ 18 | from collections import OrderedDict 19 | 20 | import numpy as np 21 | import pandas as pd 22 | 23 | from data_provider.m4 import M4Dataset 24 | from data_provider.m4 import M4Meta 25 | import os 26 | 27 | 28 | def group_values(values, groups, group_name): 29 | return np.array([v[~np.isnan(v)] for v in values[groups == group_name]]) 30 | 31 | 32 | def mase(forecast, insample, outsample, frequency): 33 | return np.mean(np.abs(forecast - outsample)) / np.mean(np.abs(insample[:-frequency] - insample[frequency:])) 34 | 35 | 36 | def smape_2(forecast, target): 37 | denom = np.abs(target) + np.abs(forecast) 38 | # divide by 1.0 instead of 0.0, in case when denom is zero the enumerator will be 0.0 anyway. 39 | denom[denom == 0.0] = 1.0 40 | return 200 * np.abs(forecast - target) / denom 41 | 42 | 43 | def mape(forecast, target): 44 | denom = np.abs(target) 45 | # divide by 1.0 instead of 0.0, in case when denom is zero the enumerator will be 0.0 anyway. 46 | denom[denom == 0.0] = 1.0 47 | return 100 * np.abs(forecast - target) / denom 48 | 49 | 50 | class M4Summary: 51 | def __init__(self, file_path, root_path): 52 | self.file_path = file_path 53 | self.training_set = M4Dataset.load(training=True, dataset_file=root_path) 54 | self.test_set = M4Dataset.load(training=False, dataset_file=root_path) 55 | self.naive_path = os.path.join(root_path, 'submission-Naive2.csv') 56 | 57 | def evaluate(self): 58 | """ 59 | Evaluate forecasts using M4 test dataset. 60 | 61 | :param forecast: Forecasts. Shape: timeseries, time. 62 | :return: sMAPE and OWA grouped by seasonal patterns. 63 | """ 64 | grouped_owa = OrderedDict() 65 | 66 | naive2_forecasts = pd.read_csv(self.naive_path).values[:, 1:].astype(np.float32) 67 | naive2_forecasts = np.array([v[~np.isnan(v)] for v in naive2_forecasts]) 68 | 69 | model_mases = {} 70 | naive2_smapes = {} 71 | naive2_mases = {} 72 | grouped_smapes = {} 73 | grouped_mapes = {} 74 | for group_name in M4Meta.seasonal_patterns: 75 | file_name = self.file_path + group_name + "_forecast.csv" 76 | if os.path.exists(file_name): 77 | model_forecast = pd.read_csv(file_name).values 78 | 79 | naive2_forecast = group_values(naive2_forecasts, self.test_set.groups, group_name) 80 | target = group_values(self.test_set.values, self.test_set.groups, group_name) 81 | # all timeseries within group have same frequency 82 | frequency = self.training_set.frequencies[self.test_set.groups == group_name][0] 83 | insample = group_values(self.training_set.values, self.test_set.groups, group_name) 84 | 85 | model_mases[group_name] = np.mean([mase(forecast=model_forecast[i], 86 | insample=insample[i], 87 | outsample=target[i], 88 | frequency=frequency) for i in range(len(model_forecast))]) 89 | naive2_mases[group_name] = np.mean([mase(forecast=naive2_forecast[i], 90 | insample=insample[i], 91 | outsample=target[i], 92 | frequency=frequency) for i in range(len(model_forecast))]) 93 | 94 | naive2_smapes[group_name] = np.mean(smape_2(naive2_forecast, target)) 95 | grouped_smapes[group_name] = np.mean(smape_2(forecast=model_forecast, target=target)) 96 | grouped_mapes[group_name] = np.mean(mape(forecast=model_forecast, target=target)) 97 | 98 | grouped_smapes = self.summarize_groups(grouped_smapes) 99 | grouped_mapes = self.summarize_groups(grouped_mapes) 100 | grouped_model_mases = self.summarize_groups(model_mases) 101 | grouped_naive2_smapes = self.summarize_groups(naive2_smapes) 102 | grouped_naive2_mases = self.summarize_groups(naive2_mases) 103 | for k in grouped_model_mases.keys(): 104 | grouped_owa[k] = (grouped_model_mases[k] / grouped_naive2_mases[k] + 105 | grouped_smapes[k] / grouped_naive2_smapes[k]) / 2 106 | 107 | def round_all(d): 108 | return dict(map(lambda kv: (kv[0], np.round(kv[1], 3)), d.items())) 109 | 110 | return round_all(grouped_smapes), round_all(grouped_owa), round_all(grouped_mapes), round_all( 111 | grouped_model_mases) 112 | 113 | def summarize_groups(self, scores): 114 | """ 115 | Re-group scores respecting M4 rules. 116 | :param scores: Scores per group. 117 | :return: Grouped scores. 118 | """ 119 | scores_summary = OrderedDict() 120 | 121 | def group_count(group_name): 122 | return len(np.where(self.test_set.groups == group_name)[0]) 123 | 124 | weighted_score = {} 125 | for g in ['Yearly', 'Quarterly', 'Monthly']: 126 | weighted_score[g] = scores[g] * group_count(g) 127 | scores_summary[g] = scores[g] 128 | 129 | others_score = 0 130 | others_count = 0 131 | for g in ['Weekly', 'Daily', 'Hourly']: 132 | others_score += scores[g] * group_count(g) 133 | others_count += group_count(g) 134 | weighted_score['Others'] = others_score 135 | scores_summary['Others'] = others_score / others_count 136 | 137 | average = np.sum(list(weighted_score.values())) / len(self.test_set.groups) 138 | scores_summary['Average'] = average 139 | 140 | return scores_summary 141 | -------------------------------------------------------------------------------- /Long-term_Forecasting/utils/metrics.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def RSE(pred, true): 5 | return np.sqrt(np.sum((true - pred) ** 2)) / np.sqrt(np.sum((true - true.mean()) ** 2)) 6 | 7 | 8 | def CORR(pred, true): 9 | u = ((true - true.mean(0)) * (pred - pred.mean(0))).sum(0) 10 | d = np.sqrt(((true - true.mean(0)) ** 2 * (pred - pred.mean(0)) ** 2).sum(0)) 11 | return (u / d).mean(-1) 12 | 13 | 14 | def MAE(pred, true): 15 | return np.mean(np.abs(pred - true)) 16 | 17 | 18 | def MSE(pred, true): 19 | return np.mean((pred - true) ** 2) 20 | 21 | 22 | def RMSE(pred, true): 23 | return np.sqrt(MSE(pred, true)) 24 | 25 | 26 | def MAPE(pred, true): 27 | return np.mean(np.abs((pred - true) / true)) 28 | 29 | 30 | def MSPE(pred, true): 31 | return np.mean(np.square((pred - true) / true)) 32 | 33 | 34 | def metric(pred, true): 35 | mae = MAE(pred, true) 36 | mse = MSE(pred, true) 37 | rmse = RMSE(pred, true) 38 | mape = MAPE(pred, true) 39 | mspe = MSPE(pred, true) 40 | 41 | return mae, mse, rmse, mape, mspe 42 | -------------------------------------------------------------------------------- /Long-term_Forecasting/utils/timefeatures.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import numpy as np 4 | import pandas as pd 5 | from pandas.tseries import offsets 6 | from pandas.tseries.frequencies import to_offset 7 | 8 | 9 | class TimeFeature: 10 | def __init__(self): 11 | pass 12 | 13 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 14 | pass 15 | 16 | def __repr__(self): 17 | return self.__class__.__name__ + "()" 18 | 19 | 20 | class SecondOfMinute(TimeFeature): 21 | """Minute of hour encoded as value between [-0.5, 0.5]""" 22 | 23 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 24 | return index.second / 59.0 - 0.5 25 | 26 | 27 | class MinuteOfHour(TimeFeature): 28 | """Minute of hour encoded as value between [-0.5, 0.5]""" 29 | 30 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 31 | return index.minute / 59.0 - 0.5 32 | 33 | 34 | class HourOfDay(TimeFeature): 35 | """Hour of day encoded as value between [-0.5, 0.5]""" 36 | 37 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 38 | return index.hour / 23.0 - 0.5 39 | 40 | 41 | class DayOfWeek(TimeFeature): 42 | """Hour of day encoded as value between [-0.5, 0.5]""" 43 | 44 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 45 | return index.dayofweek / 6.0 - 0.5 46 | 47 | 48 | class DayOfMonth(TimeFeature): 49 | """Day of month encoded as value between [-0.5, 0.5]""" 50 | 51 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 52 | return (index.day - 1) / 30.0 - 0.5 53 | 54 | 55 | class DayOfYear(TimeFeature): 56 | """Day of year encoded as value between [-0.5, 0.5]""" 57 | 58 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 59 | return (index.dayofyear - 1) / 365.0 - 0.5 60 | 61 | 62 | class MonthOfYear(TimeFeature): 63 | """Month of year encoded as value between [-0.5, 0.5]""" 64 | 65 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 66 | return (index.month - 1) / 11.0 - 0.5 67 | 68 | 69 | class WeekOfYear(TimeFeature): 70 | """Week of year encoded as value between [-0.5, 0.5]""" 71 | 72 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 73 | return (index.isocalendar().week - 1) / 52.0 - 0.5 74 | 75 | 76 | def time_features_from_frequency_str(freq_str: str) -> List[TimeFeature]: 77 | """ 78 | Returns a list of time features that will be appropriate for the given frequency string. 79 | Parameters 80 | ---------- 81 | freq_str 82 | Frequency string of the form [multiple][granularity] such as "12H", "5min", "1D" etc. 83 | """ 84 | 85 | features_by_offsets = { 86 | offsets.YearEnd: [], 87 | offsets.QuarterEnd: [MonthOfYear], 88 | offsets.MonthEnd: [MonthOfYear], 89 | offsets.Week: [DayOfMonth, WeekOfYear], 90 | offsets.Day: [DayOfWeek, DayOfMonth, DayOfYear], 91 | offsets.BusinessDay: [DayOfWeek, DayOfMonth, DayOfYear], 92 | offsets.Hour: [HourOfDay, DayOfWeek, DayOfMonth, DayOfYear], 93 | offsets.Minute: [ 94 | MinuteOfHour, 95 | HourOfDay, 96 | DayOfWeek, 97 | DayOfMonth, 98 | DayOfYear, 99 | ], 100 | offsets.Second: [ 101 | SecondOfMinute, 102 | MinuteOfHour, 103 | HourOfDay, 104 | DayOfWeek, 105 | DayOfMonth, 106 | DayOfYear, 107 | ], 108 | } 109 | 110 | offset = to_offset(freq_str) 111 | 112 | for offset_type, feature_classes in features_by_offsets.items(): 113 | if isinstance(offset, offset_type): 114 | return [cls() for cls in feature_classes] 115 | 116 | supported_freq_msg = f""" 117 | Unsupported frequency {freq_str} 118 | The following frequencies are supported: 119 | Y - yearly 120 | alias: A 121 | M - monthly 122 | W - weekly 123 | D - daily 124 | B - business days 125 | H - hourly 126 | T - minutely 127 | alias: min 128 | S - secondly 129 | """ 130 | raise RuntimeError(supported_freq_msg) 131 | 132 | 133 | def time_features(dates, freq='h'): 134 | return np.vstack([feat(dates) for feat in time_features_from_frequency_str(freq)]) 135 | -------------------------------------------------------------------------------- /Long-term_Forecasting/utils/tokenization.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | import numpy as np 3 | from dataclasses import dataclass 4 | 5 | def vec_num2repr(val, base, prec, max_val): 6 | """ 7 | Convert numbers to a representation in a specified base with precision. 8 | 9 | Parameters: 10 | - val (np.array): The numbers to represent. 11 | - base (int): The base of the representation. 12 | - prec (int): The precision after the 'decimal' point in the base representation. 13 | - max_val (float): The maximum absolute value of the number. 14 | 15 | Returns: 16 | - tuple: Sign and digits in the specified base representation. 17 | 18 | Examples: 19 | With base=10, prec=2: 20 | 0.5 -> 50 21 | 3.52 -> 352 22 | 12.5 -> 1250 23 | """ 24 | base = float(base) 25 | bs = val.shape[0] 26 | sign = 1 * (val >= 0) - 1 * (val < 0) 27 | val = np.abs(val) 28 | max_bit_pos = int(np.ceil(np.log(max_val) / np.log(base)).item()) 29 | 30 | before_decimals = [] 31 | for i in range(max_bit_pos): 32 | digit = (val / base**(max_bit_pos - i - 1)).astype(int) 33 | before_decimals.append(digit) 34 | val -= digit * base**(max_bit_pos - i - 1) 35 | 36 | before_decimals = np.stack(before_decimals, axis=-1) 37 | 38 | if prec > 0: 39 | after_decimals = [] 40 | for i in range(prec): 41 | digit = (val / base**(-i - 1)).astype(int) 42 | after_decimals.append(digit) 43 | val -= digit * base**(-i - 1) 44 | 45 | after_decimals = np.stack(after_decimals, axis=-1) 46 | digits = np.concatenate([before_decimals, after_decimals], axis=-1) 47 | else: 48 | digits = before_decimals 49 | return sign, digits 50 | 51 | def vec_repr2num(sign, digits, base, prec, half_bin_correction=True): 52 | """ 53 | Convert a string representation in a specified base back to numbers. 54 | 55 | Parameters: 56 | - sign (np.array): The sign of the numbers. 57 | - digits (np.array): Digits of the numbers in the specified base. 58 | - base (int): The base of the representation. 59 | - prec (int): The precision after the 'decimal' point in the base representation. 60 | - half_bin_correction (bool): If True, adds 0.5 of the smallest bin size to the number. 61 | 62 | Returns: 63 | - np.array: Numbers corresponding to the given base representation. 64 | """ 65 | base = float(base) 66 | bs, D = digits.shape 67 | digits_flipped = np.flip(digits, axis=-1) 68 | powers = -np.arange(-prec, -prec + D) 69 | val = np.sum(digits_flipped/base**powers, axis=-1) 70 | 71 | if half_bin_correction: 72 | val += 0.5/base**prec 73 | 74 | return sign * val 75 | 76 | @dataclass 77 | class SerializerSettings: 78 | """ 79 | Settings for serialization of numbers. 80 | 81 | Attributes: 82 | - base (int): The base for number representation. 83 | - prec (int): The precision after the 'decimal' point in the base representation. 84 | - signed (bool): If True, allows negative numbers. Default is False. 85 | - fixed_length (bool): If True, ensures fixed length of serialized string. Default is False. 86 | - max_val (float): Maximum absolute value of number for serialization. 87 | - time_sep (str): Separator for different time steps. 88 | - bit_sep (str): Separator for individual digits. 89 | - plus_sign (str): String representation for positive sign. 90 | - minus_sign (str): String representation for negative sign. 91 | - half_bin_correction (bool): If True, applies half bin correction during deserialization. Default is True. 92 | - decimal_point (str): String representation for the decimal point. 93 | """ 94 | base: int = 10 95 | prec: int = 1 96 | signed: bool = True 97 | fixed_length: bool = False 98 | max_val: float = 1e7 99 | time_sep: str = ' ,' 100 | bit_sep: str = ' ' 101 | plus_sign: str = '' 102 | minus_sign: str = ' -' 103 | half_bin_correction: bool = True 104 | decimal_point: str = '' 105 | missing_str: str = ' Nan' 106 | 107 | def serialize_arr(arr, settings: SerializerSettings): 108 | """ 109 | Serialize an array of numbers (a time series) into a string based on the provided settings. 110 | 111 | Parameters: 112 | - arr (np.array): Array of numbers to serialize. 113 | - settings (SerializerSettings): Settings for serialization. 114 | 115 | Returns: 116 | - str: String representation of the array. 117 | """ 118 | # max_val is only for fixing the number of bits in nunm2repr so it can be vmapped 119 | assert np.all(np.abs(arr[~np.isnan(arr)]) <= settings.max_val), f"abs(arr) must be <= max_val,\ 120 | but abs(arr)={np.abs(arr)}, max_val={settings.max_val}" 121 | 122 | if not settings.signed: 123 | assert np.all(arr[~np.isnan(arr)] >= 0), f"unsigned arr must be >= 0" 124 | plus_sign = minus_sign = '' 125 | else: 126 | plus_sign = settings.plus_sign 127 | minus_sign = settings.minus_sign 128 | 129 | vnum2repr = partial(vec_num2repr,base=settings.base,prec=settings.prec,max_val=settings.max_val) 130 | sign_arr, digits_arr = vnum2repr(np.where(np.isnan(arr),np.zeros_like(arr),arr)) 131 | 132 | 133 | ismissing = np.isnan(arr) 134 | 135 | def tokenize(arr): 136 | return ''.join([settings.bit_sep+str(b) for b in arr]) 137 | 138 | bit_strs = [] 139 | 140 | 141 | for sign, digits,missing in zip(sign_arr[0], digits_arr[0], ismissing[0]): 142 | 143 | if not settings.fixed_length: 144 | # remove leading zeros 145 | nonzero_indices = np.where(digits != 0)[0] 146 | if len(nonzero_indices) == 0: 147 | digits = np.array([0]) 148 | else: 149 | digits = digits[nonzero_indices[0]:] 150 | # add a decimal point 151 | prec = settings.prec 152 | if len(settings.decimal_point): 153 | digits = np.concatenate([digits[:-prec], np.array([settings.decimal_point]), digits[-prec:]]) 154 | digits = tokenize(digits) 155 | sign_sep = plus_sign if sign==1 else minus_sign 156 | if missing: 157 | bit_strs.append(settings.missing_str) 158 | else: 159 | bit_strs.append(sign_sep + digits) 160 | bit_str = settings.time_sep.join(bit_strs) 161 | bit_str += settings.time_sep # otherwise there is ambiguity in number of digits in the last time step 162 | return bit_str 163 | 164 | def deserialize_str(bit_str, settings: SerializerSettings, ignore_last=False, steps=None): 165 | """ 166 | Deserialize a string into an array of numbers (a time series) based on the provided settings. 167 | 168 | Parameters: 169 | - bit_str (str): String representation of an array of numbers. 170 | - settings (SerializerSettings): Settings for deserialization. 171 | - ignore_last (bool): If True, ignores the last time step in the string (which may be incomplete due to token limit etc.). Default is False. 172 | - steps (int, optional): Number of steps or entries to deserialize. 173 | 174 | Returns: 175 | - None if deserialization failed for the very first number, otherwise 176 | - np.array: Array of numbers corresponding to the string. 177 | """ 178 | # ignore_last is for ignoring the last time step in the prediction, which is often a partially generated due to token limit 179 | orig_bitstring = bit_str 180 | bit_strs = bit_str.split(settings.time_sep) 181 | # remove empty strings 182 | bit_strs = [a for a in bit_strs if len(a) > 0] 183 | if ignore_last: 184 | bit_strs = bit_strs[:-1] 185 | if steps is not None: 186 | bit_strs = bit_strs[:steps] 187 | vrepr2num = partial(vec_repr2num,base=settings.base,prec=settings.prec,half_bin_correction=settings.half_bin_correction) 188 | max_bit_pos = int(np.ceil(np.log(settings.max_val)/np.log(settings.base)).item()) 189 | sign_arr = [] 190 | digits_arr = [] 191 | try: 192 | for i, bit_str in enumerate(bit_strs): 193 | if bit_str.startswith(settings.minus_sign): 194 | sign = -1 195 | elif bit_str.startswith(settings.plus_sign): 196 | sign = 1 197 | else: 198 | assert settings.signed == False, f"signed bit_str must start with {settings.minus_sign} or {settings.plus_sign}" 199 | bit_str = bit_str[len(settings.plus_sign):] if sign==1 else bit_str[len(settings.minus_sign):] 200 | if settings.bit_sep=='': 201 | bits = [b for b in bit_str.lstrip()] 202 | else: 203 | bits = [b[:1] for b in bit_str.lstrip().split(settings.bit_sep)] 204 | if settings.fixed_length: 205 | assert len(bits) == max_bit_pos+settings.prec, f"fixed length bit_str must have {max_bit_pos+settings.prec} bits, but has {len(bits)}: '{bit_str}'" 206 | digits = [] 207 | for b in bits: 208 | if b==settings.decimal_point: 209 | continue 210 | # check if is a digit 211 | if b.isdigit(): 212 | digits.append(int(b)) 213 | else: 214 | break 215 | #digits = [int(b) for b in bits] 216 | sign_arr.append(sign) 217 | digits_arr.append(digits) 218 | except Exception as e: 219 | print(f"Error deserializing {settings.time_sep.join(bit_strs[i-2:i+5])}{settings.time_sep}\n\t{e}") 220 | print(f'Got {orig_bitstring}') 221 | print(f"Bitstr {bit_str}, separator {settings.bit_sep}") 222 | # At this point, we have already deserialized some of the bit_strs, so we return those below 223 | if digits_arr: 224 | # add leading zeros to get to equal lengths 225 | max_len = max([len(d) for d in digits_arr]) 226 | for i in range(len(digits_arr)): 227 | digits_arr[i] = [0]*(max_len-len(digits_arr[i])) + digits_arr[i] 228 | return vrepr2num(np.array(sign_arr), np.array(digits_arr)) 229 | else: 230 | # errored at first step 231 | return None -------------------------------------------------------------------------------- /Long-term_Forecasting/utils/tools.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | import matplotlib.pyplot as plt 5 | from tqdm import tqdm 6 | import torch.fft 7 | 8 | from datetime import datetime 9 | from distutils.util import strtobool 10 | import pandas as pd 11 | 12 | from utils.metrics import metric 13 | 14 | plt.switch_backend('agg') 15 | 16 | def adjust_model(model, epoch, args): 17 | 18 | if args.training_strategy == 'progressive': 19 | 20 | if epoch >=3: 21 | 22 | print('switch to progressive training strategy') 23 | 24 | for i, (name, param) in enumerate(model.named_parameters()): 25 | 26 | param.requires_grad = True 27 | 28 | else: 29 | pass 30 | else: 31 | pass 32 | 33 | 34 | def FFT_for_Period(x, k=10): 35 | # [B, T, C] 36 | xf = torch.fft.rfft(x, dim=1) 37 | # find period by amplitudes 38 | frequency_list = abs(xf).mean(0).mean(-1) 39 | frequency_list[0] = 0 40 | _, top_list = torch.topk(frequency_list, k) 41 | top_list = top_list.detach().cpu().numpy() 42 | period = x.shape[1] // top_list 43 | return period, abs(xf).mean(-1)[:, top_list] 44 | 45 | 46 | 47 | 48 | 49 | def adjust_learning_rate(optimizer, epoch, args): 50 | # lr = args.learning_rate * (0.2 ** (epoch // 2)) 51 | # if args.decay_fac is None: 52 | # args.decay_fac = 0.5 53 | # if args.lradj == 'type1': 54 | # lr_adjust = {epoch: args.learning_rate * (args.decay_fac ** ((epoch - 1) // 1))} 55 | # elif args.lradj == 'type2': 56 | # lr_adjust = { 57 | # 2: 5e-5, 4: 1e-5, 6: 5e-6, 8: 1e-6, 58 | # 10: 5e-7, 15: 1e-7, 20: 5e-8 59 | # } 60 | if args.lradj =='type1': 61 | lr_adjust = {epoch: args.learning_rate if epoch < 3 else args.learning_rate * (0.9 ** ((epoch - 3) // 1))} 62 | elif args.lradj =='type2': 63 | lr_adjust = {epoch: args.learning_rate * (0.9 ** ((epoch - 1) // 1))} 64 | elif args.lradj =='type4': 65 | lr_adjust = {epoch: args.learning_rate * (args.decay_fac ** ((epoch) // 1))} 66 | else: 67 | args.learning_rate = 1e-4 68 | lr_adjust = {epoch: args.learning_rate if epoch < 3 else args.learning_rate * (0.9 ** ((epoch - 3) // 1))} 69 | print("lr_adjust = {}".format(lr_adjust)) 70 | if epoch in lr_adjust.keys(): 71 | lr = lr_adjust[epoch] 72 | for param_group in optimizer.param_groups: 73 | param_group['lr'] = lr 74 | print('Updating learning rate to {}'.format(lr)) 75 | 76 | 77 | class EarlyStopping: 78 | def __init__(self, patience=7, verbose=False, delta=0): 79 | self.patience = patience 80 | self.verbose = verbose 81 | self.counter = 0 82 | self.best_score = None 83 | self.early_stop = False 84 | self.val_loss_min = np.Inf 85 | self.delta = delta 86 | 87 | def __call__(self, val_loss, model, path): 88 | score = -val_loss 89 | if self.best_score is None: 90 | self.best_score = score 91 | self.save_checkpoint(val_loss, model, path) 92 | elif score < self.best_score + self.delta: 93 | self.counter += 1 94 | print(f'EarlyStopping counter: {self.counter} out of {self.patience}') 95 | if self.counter >= self.patience: 96 | self.early_stop = True 97 | else: 98 | self.best_score = score 99 | self.save_checkpoint(val_loss, model, path) 100 | self.counter = 0 101 | 102 | def save_checkpoint(self, val_loss, model, path): 103 | if self.verbose: 104 | print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...') 105 | torch.save(model.state_dict(), path + '/' + 'checkpoint.pth') 106 | self.val_loss_min = val_loss 107 | 108 | 109 | class dotdict(dict): 110 | """dot.notation access to dictionary attributes""" 111 | __getattr__ = dict.get 112 | __setattr__ = dict.__setitem__ 113 | __delattr__ = dict.__delitem__ 114 | 115 | 116 | class StandardScaler(): 117 | def __init__(self, mean, std): 118 | self.mean = mean 119 | self.std = std 120 | 121 | def transform(self, data): 122 | return (data - self.mean) / self.std 123 | 124 | def inverse_transform(self, data): 125 | return (data * self.std) + self.mean 126 | 127 | 128 | def visual(true, preds=None, name='./pic/test.pdf'): 129 | """ 130 | Results visualization 131 | """ 132 | plt.figure() 133 | plt.plot(true, label='GroundTruth', linewidth=2) 134 | if preds is not None: 135 | plt.plot(preds, label='Prediction', linewidth=2) 136 | plt.legend() 137 | plt.savefig(name, bbox_inches='tight') 138 | 139 | 140 | def convert_tsf_to_dataframe( 141 | full_file_path_and_name, 142 | replace_missing_vals_with="NaN", 143 | value_column_name="series_value", 144 | ): 145 | col_names = [] 146 | col_types = [] 147 | all_data = {} 148 | line_count = 0 149 | frequency = None 150 | forecast_horizon = None 151 | contain_missing_values = None 152 | contain_equal_length = None 153 | found_data_tag = False 154 | found_data_section = False 155 | started_reading_data_section = False 156 | 157 | with open(full_file_path_and_name, "r", encoding="cp1252") as file: 158 | for line in file: 159 | # Strip white space from start/end of line 160 | line = line.strip() 161 | 162 | if line: 163 | if line.startswith("@"): # Read meta-data 164 | if not line.startswith("@data"): 165 | line_content = line.split(" ") 166 | if line.startswith("@attribute"): 167 | if ( 168 | len(line_content) != 3 169 | ): # Attributes have both name and type 170 | raise Exception("Invalid meta-data specification.") 171 | 172 | col_names.append(line_content[1]) 173 | col_types.append(line_content[2]) 174 | else: 175 | if ( 176 | len(line_content) != 2 177 | ): # Other meta-data have only values 178 | raise Exception("Invalid meta-data specification.") 179 | 180 | if line.startswith("@frequency"): 181 | frequency = line_content[1] 182 | elif line.startswith("@horizon"): 183 | forecast_horizon = int(line_content[1]) 184 | elif line.startswith("@missing"): 185 | contain_missing_values = bool( 186 | strtobool(line_content[1]) 187 | ) 188 | elif line.startswith("@equallength"): 189 | contain_equal_length = bool(strtobool(line_content[1])) 190 | 191 | else: 192 | if len(col_names) == 0: 193 | raise Exception( 194 | "Missing attribute section. Attribute section must come before data." 195 | ) 196 | 197 | found_data_tag = True 198 | elif not line.startswith("#"): 199 | if len(col_names) == 0: 200 | raise Exception( 201 | "Missing attribute section. Attribute section must come before data." 202 | ) 203 | elif not found_data_tag: 204 | raise Exception("Missing @data tag.") 205 | else: 206 | if not started_reading_data_section: 207 | started_reading_data_section = True 208 | found_data_section = True 209 | all_series = [] 210 | 211 | for col in col_names: 212 | all_data[col] = [] 213 | 214 | full_info = line.split(":") 215 | 216 | if len(full_info) != (len(col_names) + 1): 217 | raise Exception("Missing attributes/values in series.") 218 | 219 | series = full_info[len(full_info) - 1] 220 | series = series.split(",") 221 | 222 | if len(series) == 0: 223 | raise Exception( 224 | "A given series should contains a set of comma separated numeric values. At least one numeric value should be there in a series. Missing values should be indicated with ? symbol" 225 | ) 226 | 227 | numeric_series = [] 228 | 229 | for val in series: 230 | if val == "?": 231 | numeric_series.append(replace_missing_vals_with) 232 | else: 233 | numeric_series.append(float(val)) 234 | 235 | if numeric_series.count(replace_missing_vals_with) == len( 236 | numeric_series 237 | ): 238 | raise Exception( 239 | "All series values are missing. A given series should contains a set of comma separated numeric values. At least one numeric value should be there in a series." 240 | ) 241 | 242 | all_series.append(pd.Series(numeric_series).array) 243 | 244 | for i in range(len(col_names)): 245 | att_val = None 246 | if col_types[i] == "numeric": 247 | att_val = int(full_info[i]) 248 | elif col_types[i] == "string": 249 | att_val = str(full_info[i]) 250 | elif col_types[i] == "date": 251 | att_val = datetime.strptime( 252 | full_info[i], "%Y-%m-%d %H-%M-%S" 253 | ) 254 | else: 255 | raise Exception( 256 | "Invalid attribute type." 257 | ) # Currently, the code supports only numeric, string and date types. Extend this as required. 258 | 259 | if att_val is None: 260 | raise Exception("Invalid attribute value.") 261 | else: 262 | all_data[col_names[i]].append(att_val) 263 | 264 | line_count = line_count + 1 265 | 266 | if line_count == 0: 267 | raise Exception("Empty file.") 268 | if len(col_names) == 0: 269 | raise Exception("Missing attribute section.") 270 | if not found_data_section: 271 | raise Exception("Missing series information under data section.") 272 | 273 | all_data[value_column_name] = all_series 274 | loaded_data = pd.DataFrame(all_data) 275 | 276 | return ( 277 | loaded_data, 278 | frequency, 279 | forecast_horizon, 280 | contain_missing_values, 281 | contain_equal_length, 282 | ) 283 | 284 | 285 | def vali(model, vali_data, vali_loader, criterion, args, device, itr): 286 | total_loss = [] 287 | if args.model == 'PatchTST' or args.model == 'DLinear' or args.model == 'TCN': 288 | model.eval() 289 | else: 290 | model.in_layer.eval() 291 | model.out_layer.eval() 292 | with torch.no_grad(): 293 | for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in tqdm(enumerate(vali_loader)): 294 | batch_x = batch_x.float().to(device) 295 | batch_y = batch_y.float() 296 | 297 | batch_x_mark = batch_x_mark.float().to(device) 298 | batch_y_mark = batch_y_mark.float().to(device) 299 | 300 | outputs = model(batch_x, itr) 301 | 302 | # encoder - decoder 303 | outputs = outputs[:, -args.pred_len:, :] 304 | batch_y = batch_y[:, -args.pred_len:, :].to(device) 305 | 306 | pred = outputs.detach().cpu() 307 | true = batch_y.detach().cpu() 308 | 309 | loss = criterion(pred, true) 310 | 311 | total_loss.append(loss) 312 | total_loss = np.average(total_loss) 313 | if args.model == 'PatchTST' or args.model == 'DLinear' or args.model == 'TCN': 314 | model.train() 315 | else: 316 | model.in_layer.train() 317 | model.out_layer.train() 318 | return total_loss 319 | 320 | def MASE(x, freq, pred, true): 321 | masep = np.mean(np.abs(x[:, freq:] - x[:, :-freq])) 322 | return np.mean(np.abs(pred - true) / (masep + 1e-8)) 323 | 324 | def test(model, test_data, test_loader, args, device, itr): 325 | preds = [] 326 | trues = [] 327 | # mases = [] 328 | 329 | model.eval() 330 | with torch.no_grad(): 331 | for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in tqdm(enumerate(test_loader)): 332 | 333 | # outputs_np = batch_x.cpu().numpy() 334 | # np.save("emb_test/ETTh2_192_test_input_itr{}_{}.npy".format(itr, i), outputs_np) 335 | # outputs_np = batch_y.cpu().numpy() 336 | # np.save("emb_test/ETTh2_192_test_true_itr{}_{}.npy".format(itr, i), outputs_np) 337 | 338 | batch_x = batch_x.float().to(device) 339 | batch_y = batch_y.float() 340 | 341 | outputs = model(batch_x[:, -args.seq_len:, :], itr) 342 | 343 | # encoder - decoder 344 | outputs = outputs[:, -args.pred_len:, :] 345 | batch_y = batch_y[:, -args.pred_len:, :].to(device) 346 | 347 | pred = outputs.detach().cpu().numpy() 348 | true = batch_y.detach().cpu().numpy() 349 | 350 | preds.append(pred) 351 | trues.append(true) 352 | 353 | preds = np.array(preds) 354 | trues = np.array(trues) 355 | # mases = np.mean(np.array(mases)) 356 | print('test shape:', preds.shape, trues.shape) 357 | preds = preds.reshape(-1, preds.shape[-2], preds.shape[-1]) 358 | trues = trues.reshape(-1, trues.shape[-2], trues.shape[-1]) 359 | print('test shape:', preds.shape, trues.shape) 360 | 361 | mae, mse, rmse, mape, mspe, smape, nd = metric(preds, trues) 362 | # print('mae:{:.4f}, mse:{:.4f}, rmse:{:.4f}, smape:{:.4f}, mases:{:.4f}'.format(mae, mse, rmse, smape, mases)) 363 | print('mae:{:.4f}, mse:{:.4f}, rmse:{:.4f}, smape:{:.4f}'.format(mae, mse, rmse, smape)) 364 | 365 | return mse, mae 366 | 367 | 368 | 369 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # S2IP-LLM 2 | Official reponsitory for "S^2IP-LLM: Semantic Space Informed Prompt Learning with LLM for Time Series Forecasting" 3 | 4 | 5 | 6 | 7 | ## 🛠 Prerequisites 8 | 9 | Ensure you have installed the necessary dependencies by first building environment: 10 | 11 | ``` 12 | conda create -n "myenv" python=3.10.0 13 | conda activate myenv 14 | ``` 15 | Inside the folder, run: 16 | ``` 17 | pip install -r requirements.txt 18 | ``` 19 | 20 | ## 📊 Prepare Datasets 21 | 22 | Begin by downloading the required datasets. All datasets are conveniently available at [Autoformer](https://drive.google.com/drive/folders/1ZOYpTUa82_jCcxIdTmyr0LXQfvaM9vIy). Create a separate folder named `./data` 23 | 24 | 25 | 26 | ## 💻 Training 27 | 28 | All scripts are located in `./scripts`. Example: 29 | 30 | ```shell 31 | cd Long-term_Forecasting 32 | sh scripts/etth1.sh 33 | ``` 34 | 35 | 36 | 37 | 38 | 39 | ## 📚 Citation 40 | If you find this repo useful, please consider citing our paper as follows: 41 | ``` 42 | @inproceedings{pan2024s, 43 | title={$ S\^{} 2$ IP-LLM: Semantic Space Informed Prompt Learning with LLM for Time Series Forecasting}, 44 | author={Pan, Zijie and Jiang, Yushan and Garg, Sahil and Schneider, Anderson and Nevmyvaka, Yuriy and Song, Dongjin}, 45 | booktitle={Forty-first International Conference on Machine Learning}, 46 | year={2024} 47 | } 48 | ``` 49 | -------------------------------------------------------------------------------- /Short-term_Forecasting/data_provider/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /Short-term_Forecasting/data_provider/data_factory.py: -------------------------------------------------------------------------------- 1 | from data_provider.data_loader import Dataset_M4 2 | from torch.utils.data import DataLoader 3 | 4 | data_dict = { 5 | 6 | 'm4': Dataset_M4, 7 | } 8 | 9 | 10 | def data_provider(args, flag): 11 | Data = data_dict[args.data] 12 | timeenc = 0 if args.embed != 'timeF' else 1 13 | percent = args.percent 14 | 15 | if flag == 'test': 16 | shuffle_flag = False 17 | drop_last = False 18 | batch_size = args.batch_size 19 | freq = args.freq 20 | else: 21 | shuffle_flag = True 22 | drop_last = True 23 | batch_size = args.batch_size 24 | freq = args.freq 25 | 26 | if args.data == 'm4': 27 | drop_last = False 28 | data_set = Data( 29 | root_path=args.root_path, 30 | data_path=args.data_path, 31 | flag=flag, 32 | size=[args.seq_len, args.label_len, args.pred_len], 33 | features=args.features, 34 | target=args.target, 35 | timeenc=timeenc, 36 | freq=freq, 37 | seasonal_patterns=args.seasonal_patterns 38 | ) 39 | else: 40 | data_set = Data( 41 | root_path=args.root_path, 42 | data_path=args.data_path, 43 | flag=flag, 44 | size=[args.seq_len, args.label_len, args.pred_len], 45 | features=args.features, 46 | target=args.target, 47 | timeenc=timeenc, 48 | freq=freq, 49 | percent=percent, 50 | seasonal_patterns=args.seasonal_patterns 51 | ) 52 | data_loader = DataLoader( 53 | data_set, 54 | batch_size=batch_size, 55 | shuffle=shuffle_flag, 56 | num_workers=args.num_workers, 57 | drop_last=drop_last) 58 | return data_set, data_loader 59 | -------------------------------------------------------------------------------- /Short-term_Forecasting/data_provider/data_loader.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import pandas as pd 4 | from torch.utils.data import Dataset 5 | from sklearn.preprocessing import StandardScaler 6 | from utils.timefeatures import time_features 7 | from data_provider.m4 import M4Dataset, M4Meta 8 | import warnings 9 | 10 | warnings.filterwarnings('ignore') 11 | 12 | 13 | 14 | class Dataset_M4(Dataset): 15 | def __init__(self, root_path, flag='pred', size=None, 16 | features='S', data_path='ETTh1.csv', 17 | target='OT', scale=False, inverse=False, timeenc=0, freq='15min', 18 | seasonal_patterns='Yearly'): 19 | # size [seq_len, label_len, pred_len] 20 | # init 21 | self.features = features 22 | self.target = target 23 | self.scale = scale 24 | self.inverse = inverse 25 | self.timeenc = timeenc 26 | self.root_path = root_path 27 | 28 | self.seq_len = size[0] 29 | self.label_len = size[1] 30 | self.pred_len = size[2] 31 | 32 | self.seasonal_patterns = seasonal_patterns 33 | self.history_size = M4Meta.history_size[seasonal_patterns] 34 | self.window_sampling_limit = int(self.history_size * self.pred_len) 35 | self.flag = flag 36 | 37 | self.__read_data__() 38 | 39 | def __read_data__(self): 40 | # M4Dataset.initialize() 41 | if self.flag == 'train': 42 | dataset = M4Dataset.load(training=True, dataset_file=self.root_path) 43 | else: 44 | dataset = M4Dataset.load(training=False, dataset_file=self.root_path) 45 | 46 | training_values = [v[~np.isnan(v)] for v in 47 | dataset.values[dataset.groups == self.seasonal_patterns]] # split different frequencies 48 | 49 | self.ids = np.array([i for i in dataset.ids[dataset.groups == self.seasonal_patterns]]) 50 | self.timeseries = [ts for ts in training_values] 51 | 52 | def __getitem__(self, index): 53 | insample = np.zeros((self.seq_len, 1)) 54 | insample_mask = np.zeros((self.seq_len, 1)) 55 | outsample = np.zeros((self.pred_len + self.label_len, 1)) 56 | outsample_mask = np.zeros((self.pred_len + self.label_len, 1)) # m4 dataset 57 | 58 | sampled_timeseries = self.timeseries[index] 59 | cut_point = np.random.randint(low=max(1, len(sampled_timeseries) - self.window_sampling_limit), 60 | high=len(sampled_timeseries), 61 | size=1)[0] 62 | 63 | insample_window = sampled_timeseries[max(0, cut_point - self.seq_len):cut_point] 64 | insample[-len(insample_window):, 0] = insample_window 65 | insample_mask[-len(insample_window):, 0] = 1.0 66 | outsample_window = sampled_timeseries[ 67 | cut_point - self.label_len:min(len(sampled_timeseries), cut_point + self.pred_len)] 68 | outsample[:len(outsample_window), 0] = outsample_window 69 | outsample_mask[:len(outsample_window), 0] = 1.0 70 | return insample, outsample, insample_mask, outsample_mask 71 | 72 | def __len__(self): 73 | return len(self.timeseries) 74 | 75 | def inverse_transform(self, data): 76 | return self.scaler.inverse_transform(data) 77 | 78 | def last_insample_window(self): 79 | """ 80 | The last window of insample size of all timeseries. 81 | This function does not support batching and does not reshuffle timeseries. 82 | 83 | :return: Last insample window of all timeseries. Shape "timeseries, insample size" 84 | """ 85 | insample = np.zeros((len(self.timeseries), self.seq_len)) 86 | insample_mask = np.zeros((len(self.timeseries), self.seq_len)) 87 | for i, ts in enumerate(self.timeseries): 88 | ts_last_window = ts[-self.seq_len:] 89 | insample[i, -len(ts):] = ts_last_window 90 | insample_mask[i, -len(ts):] = 1.0 91 | return insample, insample_mask -------------------------------------------------------------------------------- /Short-term_Forecasting/data_provider/m4.py: -------------------------------------------------------------------------------- 1 | # This source code is provided for the purposes of scientific reproducibility 2 | # under the following limited license from Element AI Inc. The code is an 3 | # implementation of the N-BEATS model (Oreshkin et al., N-BEATS: Neural basis 4 | # expansion analysis for interpretable time series forecasting, 5 | # https://arxiv.org/abs/1905.10437). The copyright to the source code is 6 | # licensed under the Creative Commons - Attribution-NonCommercial 4.0 7 | # International license (CC BY-NC 4.0): 8 | # https://creativecommons.org/licenses/by-nc/4.0/. Any commercial use (whether 9 | # for the benefit of third parties or internally in production) requires an 10 | # explicit license. The subject-matter of the N-BEATS model and associated 11 | # materials are the property of Element AI Inc. and may be subject to patent 12 | # protection. No license to patents is granted hereunder (whether express or 13 | # implied). Copyright © 2020 Element AI Inc. All rights reserved. 14 | 15 | """ 16 | M4 Dataset 17 | """ 18 | from dataclasses import dataclass 19 | 20 | import numpy as np 21 | import pandas as pd 22 | import logging 23 | import os 24 | import pathlib 25 | import sys 26 | from urllib import request 27 | 28 | 29 | def url_file_name(url: str) -> str: 30 | """ 31 | Extract file name from url. 32 | 33 | :param url: URL to extract file name from. 34 | :return: File name. 35 | """ 36 | return url.split('/')[-1] if len(url) > 0 else '' 37 | 38 | 39 | def download(url: str, file_path: str) -> None: 40 | """ 41 | Download a file to the given path. 42 | 43 | :param url: URL to download 44 | :param file_path: Where to download the content. 45 | """ 46 | 47 | def progress(count, block_size, total_size): 48 | progress_pct = float(count * block_size) / float(total_size) * 100.0 49 | sys.stdout.write('\rDownloading {} to {} {:.1f}%'.format(url, file_path, progress_pct)) 50 | sys.stdout.flush() 51 | 52 | if not os.path.isfile(file_path): 53 | opener = request.build_opener() 54 | opener.addheaders = [('User-agent', 'Mozilla/5.0')] 55 | request.install_opener(opener) 56 | pathlib.Path(os.path.dirname(file_path)).mkdir(parents=True, exist_ok=True) 57 | f, _ = request.urlretrieve(url, file_path, progress) 58 | sys.stdout.write('\n') 59 | sys.stdout.flush() 60 | file_info = os.stat(f) 61 | logging.info(f'Successfully downloaded {os.path.basename(file_path)} {file_info.st_size} bytes.') 62 | else: 63 | file_info = os.stat(file_path) 64 | logging.info(f'File already exists: {file_path} {file_info.st_size} bytes.') 65 | 66 | 67 | @dataclass() 68 | class M4Dataset: 69 | ids: np.ndarray 70 | groups: np.ndarray 71 | frequencies: np.ndarray 72 | horizons: np.ndarray 73 | values: np.ndarray 74 | 75 | @staticmethod 76 | def load(training: bool = True, dataset_file: str = '../dataset/m4') -> 'M4Dataset': 77 | """ 78 | Load cached dataset. 79 | 80 | :param training: Load training part if training is True, test part otherwise. 81 | """ 82 | info_file = os.path.join(dataset_file, 'M4-info.csv') 83 | train_cache_file = os.path.join(dataset_file, 'training.npz') 84 | test_cache_file = os.path.join(dataset_file, 'test.npz') 85 | m4_info = pd.read_csv(info_file) 86 | return M4Dataset(ids=m4_info.M4id.values, 87 | groups=m4_info.SP.values, 88 | frequencies=m4_info.Frequency.values, 89 | horizons=m4_info.Horizon.values, 90 | values=np.load( 91 | train_cache_file if training else test_cache_file, 92 | allow_pickle=True)) 93 | 94 | 95 | @dataclass() 96 | class M4Meta: 97 | seasonal_patterns = ['Yearly', 'Quarterly', 'Monthly', 'Weekly', 'Daily', 'Hourly'] 98 | horizons = [6, 8, 18, 13, 14, 48] 99 | frequencies = [1, 4, 12, 1, 1, 24] 100 | horizons_map = { 101 | 'Yearly': 6, 102 | 'Quarterly': 8, 103 | 'Monthly': 18, 104 | 'Weekly': 13, 105 | 'Daily': 14, 106 | 'Hourly': 48 107 | } # different predict length 108 | frequency_map = { 109 | 'Yearly': 1, 110 | 'Quarterly': 4, 111 | 'Monthly': 12, 112 | 'Weekly': 1, 113 | 'Daily': 1, 114 | 'Hourly': 24 115 | } 116 | history_size = { 117 | 'Yearly': 1.5, 118 | 'Quarterly': 1.5, 119 | 'Monthly': 1.5, 120 | 'Weekly': 10, 121 | 'Daily': 10, 122 | 'Hourly': 10 123 | } # from interpretable.gin 124 | 125 | 126 | def load_m4_info() -> pd.DataFrame: 127 | """ 128 | Load M4Info file. 129 | 130 | :return: Pandas DataFrame of M4Info. 131 | """ 132 | return pd.read_csv(INFO_FILE_PATH) 133 | -------------------------------------------------------------------------------- /Short-term_Forecasting/m4.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=3 python -u run_m4.py \ 2 | --task_name short_term_forecast \ 3 | --is_training 1 \ 4 | --root_path ./dataset/m4 \ 5 | --seasonal_patterns 'Monthly' \ 6 | --model_id m4_Monthly \ 7 | --model "S2IPLLM" \ 8 | --d_model 768 \ 9 | --data m4 \ 10 | --features M \ 11 | --enc_in 1 \ 12 | --dec_in 1 \ 13 | --c_out 1 \ 14 | --patch_size 1 \ 15 | --stride 1 \ 16 | --batch_size 64 \ 17 | --des 'Exp' \ 18 | --itr 1 \ 19 | --learning_rate 0.001 \ 20 | --loss 'SMAPE' \ 21 | --train_epochs 50 \ 22 | --model_comment 'S2LLM-M4' \ 23 | --patience 3 \ 24 | --add_prompt 1 \ 25 | --prompt_length 4 \ 26 | --sim_coef -0.1 \ 27 | --pool_size 1000 \ 28 | --trend_length 4 \ 29 | --seasonal_length 2 30 | 31 | 32 | 33 | CUDA_VISIBLE_DEVICES=3 python -u run_m4.py \ 34 | --task_name short_term_forecast \ 35 | --is_training 1 \ 36 | --root_path ./dataset/m4 \ 37 | --seasonal_patterns 'Yearly' \ 38 | --model_id m4_Yearly \ 39 | --model "S2IPLLM" \ 40 | --d_model 768 \ 41 | --data m4 \ 42 | --features M \ 43 | --enc_in 1 \ 44 | --dec_in 1 \ 45 | --c_out 1 \ 46 | --patch_size 1 \ 47 | --stride 1 \ 48 | --batch_size 64 \ 49 | --des 'Exp' \ 50 | --itr 1 \ 51 | --learning_rate 0.001 \ 52 | --loss 'SMAPE' \ 53 | --train_epochs 50 \ 54 | --model_comment 'S2LLM-M4' \ 55 | --patience 3 \ 56 | --add_prompt 1 \ 57 | --prompt_length 2 \ 58 | --sim_coef -0.01 \ 59 | --pool_size 1000 \ 60 | --trend_length 1 \ 61 | --seasonal_length 1 62 | 63 | CUDA_VISIBLE_DEVICES=3 python -u run_m4.py \ 64 | --task_name short_term_forecast \ 65 | --is_training 1 \ 66 | --root_path ./dataset/m4 \ 67 | --seasonal_patterns 'Weekly' \ 68 | --model_id m4_Weekly \ 69 | --model "S2IPLLM" \ 70 | --d_model 768 \ 71 | --data m4 \ 72 | --features M \ 73 | --enc_in 1 \ 74 | --dec_in 1 \ 75 | --c_out 1 \ 76 | --patch_size 1 \ 77 | --stride 1 \ 78 | --batch_size 64 \ 79 | --des 'Exp' \ 80 | --itr 1 \ 81 | --learning_rate 0.001 \ 82 | --loss 'SMAPE' \ 83 | --train_epochs 50 \ 84 | --model_comment 'S2LLM-M4' \ 85 | --patience 3 \ 86 | --add_prompt 1 \ 87 | --prompt_length 4 \ 88 | --sim_coef -0.1 \ 89 | --pool_size 1000 \ 90 | --trend_length 4 \ 91 | --seasonal_length 2 92 | 93 | CUDA_VISIBLE_DEVICES=3 python -u run_m4.py \ 94 | --task_name short_term_forecast \ 95 | --is_training 1 \ 96 | --root_path ./dataset/m4 \ 97 | --seasonal_patterns 'Daily' \ 98 | --model_id m4_Daily \ 99 | --model "S2IPLLM" \ 100 | --d_model 768 \ 101 | --data m4 \ 102 | --features M \ 103 | --enc_in 1 \ 104 | --dec_in 1 \ 105 | --c_out 1 \ 106 | --patch_size 1 \ 107 | --stride 1 \ 108 | --batch_size 64 \ 109 | --des 'Exp' \ 110 | --itr 1 \ 111 | --learning_rate 0.001 \ 112 | --loss 'SMAPE' \ 113 | --train_epochs 50 \ 114 | --model_comment 'S2LLM-M4' \ 115 | --patience 3 \ 116 | --add_prompt 1 \ 117 | --prompt_length 4 \ 118 | --sim_coef -0.1 \ 119 | --pool_size 1000 \ 120 | --trend_length 4 \ 121 | --seasonal_length 2 122 | 123 | CUDA_VISIBLE_DEVICES=3 python -u run_m4.py \ 124 | --task_name short_term_forecast \ 125 | --is_training 1 \ 126 | --root_path ./dataset/m4 \ 127 | --seasonal_patterns 'Quarterly' \ 128 | --model_id m4_Quarterly \ 129 | --model "S2IPLLM" \ 130 | --d_model 768 \ 131 | --data m4 \ 132 | --features M \ 133 | --enc_in 1 \ 134 | --dec_in 1 \ 135 | --c_out 1 \ 136 | --patch_size 1 \ 137 | --stride 1 \ 138 | --batch_size 64 \ 139 | --des 'Exp' \ 140 | --itr 1 \ 141 | --learning_rate 0.001 \ 142 | --loss 'SMAPE' \ 143 | --train_epochs 50 \ 144 | --model_comment 'S2LLM-M4' \ 145 | --patience 3 \ 146 | --add_prompt 1 \ 147 | --prompt_length 2 \ 148 | --sim_coef -0.1 \ 149 | --pool_size 1000 \ 150 | --trend_length 1 \ 151 | --seasonal_length 1 152 | 153 | 154 | CUDA_VISIBLE_DEVICES=3 python -u run_m4.py \ 155 | --task_name short_term_forecast \ 156 | --is_training 1 \ 157 | --root_path ./dataset/m4 \ 158 | --seasonal_patterns 'Hourly' \ 159 | --model_id m4_Hourly \ 160 | --model "S2IPLLM" \ 161 | --d_model 768 \ 162 | --data m4 \ 163 | --features M \ 164 | --enc_in 1 \ 165 | --dec_in 1 \ 166 | --c_out 1 \ 167 | --patch_size 1 \ 168 | --stride 1 \ 169 | --batch_size 64 \ 170 | --des 'Exp' \ 171 | --itr 1 \ 172 | --learning_rate 0.001 \ 173 | --loss 'SMAPE' \ 174 | --train_epochs 50 \ 175 | --model_comment 'S2LLM-M4' \ 176 | --patience 3 \ 177 | --add_prompt 1 \ 178 | --prompt_length 4 \ 179 | --sim_coef -0.1 \ 180 | --pool_size 1000 \ 181 | --trend_length 4 \ 182 | --seasonal_length 2 -------------------------------------------------------------------------------- /Short-term_Forecasting/models/S2IPLLM.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | from torch import optim 5 | 6 | from transformers.models.gpt2.modeling_gpt2 import GPT2Model 7 | from transformers import BertTokenizer, BertModel 8 | from einops import rearrange 9 | from embed import DataEmbedding, DataEmbedding_wo_time 10 | from transformers.models.gpt2.configuration_gpt2 import GPT2Config 11 | from .prompt import Prompt 12 | import pandas as pd 13 | 14 | class S2IPLLM(nn.Module): 15 | 16 | def __init__(self, configs, device): 17 | super(S2IPLLM, self).__init__() 18 | self.configs = configs 19 | # self.is_gpt = configs.is_gpt 20 | self.patch_size = configs.patch_size 21 | # self.pretrain = configs.pretrain 22 | self.stride = configs.stride 23 | self.patch_num = (configs.seq_len - self.patch_size) // self.stride + 1 24 | 25 | self.padding_patch_layer = nn.ReplicationPad1d((0, self.stride)) 26 | self.patch_num += 1 27 | 28 | self.gpt2 = GPT2Model.from_pretrained('gpt2', output_attentions=True, output_hidden_states=True) # loads a pretrained GPT-2 base model 29 | # else: 30 | # print("------------------no pretrain------------------") 31 | # self.gpt2 = GPT2Model(GPT2Config()) 32 | self.gpt2.h = self.gpt2.h[:configs.gpt_layers] 33 | print("gpt2 = {}".format(self.gpt2)) 34 | 35 | 36 | 37 | self.in_layer = nn.Linear(configs.patch_size*3, configs.d_model) 38 | 39 | self.prompt_pool = Prompt(length=1, embed_dim=768, embedding_key='mean', prompt_init='uniform', prompt_pool=False, 40 | prompt_key=True, pool_size=self.configs.pool_size, top_k=self.configs.prompt_length, batchwise_prompt=False, prompt_key_init=self.configs.prompt_init,wte = self.gpt2.wte.weight) 41 | 42 | 43 | 44 | 45 | self.out_layer = nn.Linear(int(configs.d_model / 3 * (self.patch_num+configs.prompt_length)), configs.pred_len) 46 | 47 | 48 | 49 | # if configs.freeze and configs.pretrain: 50 | for i, (name, param) in enumerate(self.gpt2.named_parameters()): 51 | if 'ln' in name or 'wpe' in name: 52 | param.requires_grad = True 53 | else: 54 | param.requires_grad = False 55 | 56 | 57 | 58 | self.cnt = 0 59 | 60 | 61 | def forward(self, x, itr): 62 | B, L, M = x.shape 63 | 64 | means = x.mean(1, keepdim=True).detach() 65 | x = x - means 66 | stdev = torch.sqrt(torch.var(x, dim=1, keepdim=True, unbiased=False)+ 1e-5).detach() 67 | x /= stdev 68 | 69 | x = rearrange(x, 'b l m -> (b m) l') 70 | 71 | def decompose(x): 72 | df = pd.DataFrame(x) 73 | trend = df.rolling(window=self.configs.trend_length, center=True).mean().fillna(method='bfill').fillna(method='ffill') 74 | detrended = df - trend 75 | seasonal = detrended.groupby(detrended.index % self.configs.seasonal_length).transform('mean').fillna(method='bfill').fillna(method='ffill') 76 | residuals = df - trend - seasonal 77 | combined = np.stack([trend, seasonal, residuals], axis=1) 78 | return combined 79 | 80 | 81 | decomp_results = np.apply_along_axis(decompose, 1, x.cpu().numpy()) 82 | x = torch.tensor(decomp_results).to(self.gpt2.device) 83 | x = rearrange(x, 'b l c d -> b c (d l)', c = 3) 84 | 85 | x = self.padding_patch_layer(x) 86 | x = x.unfold(dimension=-1, size=self.patch_size, step=self.stride) 87 | 88 | x = rearrange(x, 'b c n p -> b n (c p)', c = 3) 89 | pre_prompted_embedding = self.in_layer(x.float()) 90 | 91 | 92 | outs = self.prompt_pool(pre_prompted_embedding) 93 | prompted_embedding = outs['prompted_embedding'] 94 | sim = outs['similarity'] 95 | prompt_key = outs['prompt_key'] 96 | simlarity_loss = outs['reduce_sim'] 97 | 98 | 99 | last_embedding = self.gpt2(inputs_embeds=prompted_embedding).last_hidden_state 100 | outputs = self.out_layer(last_embedding.reshape(B*M*3, -1)) 101 | 102 | 103 | outputs = rearrange(outputs, '(b m c) h -> b m c h', b=B,m=M,c=3) 104 | outputs = outputs.sum(dim=2) 105 | outputs = rearrange(outputs, 'b m l -> b l m') 106 | 107 | res = dict() 108 | res['simlarity_loss'] = simlarity_loss 109 | 110 | outputs = outputs * stdev 111 | outputs = outputs + means 112 | 113 | return outputs,res 114 | 115 | 116 | 117 | 118 | 119 | -------------------------------------------------------------------------------- /Short-term_Forecasting/models/prompt.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | class Prompt(nn.Module): 19 | def __init__(self, length=2, embed_dim=768, embedding_key='mean', prompt_init='uniform', prompt_pool=False, 20 | prompt_key=False, pool_size=30, top_k=4, batchwise_prompt=False, prompt_key_init='uniform',wte = None): 21 | super().__init__() 22 | 23 | self.length = length 24 | self.embed_dim = embed_dim 25 | self.prompt_pool = prompt_pool 26 | self.embedding_key = embedding_key 27 | self.prompt_init = prompt_init 28 | self.prompt_key = prompt_key 29 | self.prompt_key_init = prompt_key_init 30 | self.pool_size = pool_size 31 | print(self.pool_size) 32 | self.top_k = top_k 33 | self.batchwise_prompt = batchwise_prompt 34 | self.wte = wte 35 | 36 | if self.prompt_pool: 37 | prompt_pool_shape = (pool_size, length, embed_dim) 38 | if prompt_init == 'zero': 39 | self.prompt = nn.Parameter(torch.zeros(prompt_pool_shape)) 40 | elif prompt_init == 'uniform': 41 | self.prompt = nn.Parameter(torch.randn(prompt_pool_shape)) 42 | nn.init.uniform_(self.prompt, -1, 1) 43 | 44 | # if using learnable prompt keys 45 | if prompt_key: 46 | key_shape = (pool_size, embed_dim) 47 | if prompt_key_init == 'zero': 48 | self.prompt = nn.Parameter(torch.zeros(key_shape),requires_grad=False) 49 | print('zero initialized key') 50 | 51 | elif prompt_key_init == 'uniform': 52 | self.prompt = nn.Parameter(torch.randn(key_shape),requires_grad=False) 53 | nn.init.uniform_(self.prompt, -5, 5) 54 | print('uniform initialized key') 55 | 56 | elif prompt_key_init == 'gaussian': 57 | self.prompt = nn.Parameter(torch.randn(key_shape),requires_grad=False) 58 | nn.init.normal_(self.prompt, mean=0.0, std=5.0) 59 | print('gaussian initialized key') 60 | 61 | 62 | elif prompt_key_init == 'text_prototype': 63 | self.text_prototype_linear = nn.Linear(50257, pool_size) 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | else: 72 | # else use mean of prompt as key 73 | # only compatible with prompt, not prefix 74 | prompt_mean = torch.mean(self.prompt, dim=1) 75 | self.prompt_key = prompt_mean 76 | 77 | def l2_normalize(self, x, dim=None, epsilon=1e-12): 78 | """Normalizes a given vector or matrix.""" 79 | square_sum = torch.sum(x ** 2, dim=dim, keepdim=True) 80 | x_inv_norm = torch.rsqrt(torch.maximum(square_sum, torch.tensor(epsilon, device=x.device))) 81 | return x * x_inv_norm 82 | 83 | def forward(self, x_embed, prompt_mask=None, cls_features=None): 84 | out = dict() 85 | if self.prompt_key: #if self.prompt_pool: 86 | if self.embedding_key == 'mean': 87 | x_embed_mean = torch.mean(x_embed, dim=1) 88 | elif self.embedding_key == 'max': 89 | x_embed_mean = torch.max(x_embed, dim=1)[0] 90 | elif self.embedding_key == 'mean_max': 91 | x_embed_mean = torch.max(x_embed, dim=1)[0] + 2 * torch.mean(x_embed, dim=1) 92 | elif self.embedding_key == 'cls': 93 | if cls_features is None: 94 | x_embed_mean = torch.max(x_embed, dim=1)[0] # B, C 95 | else: 96 | x_embed_mean = cls_features 97 | else: 98 | raise NotImplementedError("Not supported way of calculating embedding keys!") 99 | 100 | 101 | if self.prompt_key_init == 'text_prototype': 102 | prompt_key = self.text_prototype_linear(self.wte.transpose(0, 1)).transpose(0, 1) 103 | 104 | else: 105 | prompt_key = self.prompt 106 | 107 | prompt_norm = self.l2_normalize(prompt_key, dim=1) # Pool_size, C self.prompt_key 108 | x_embed_norm = self.l2_normalize(x_embed_mean, dim=1) # B, C 109 | 110 | similarity = torch.matmul(x_embed_norm, prompt_norm.t()) # B, Pool_size 111 | 112 | if prompt_mask is None: 113 | _, idx = torch.topk(similarity, k=self.top_k, dim=1) # B, top_k 114 | if self.batchwise_prompt: 115 | prompt_id, id_counts = torch.unique(idx, return_counts=True, sorted=True) 116 | # In jnp.unique, when the 'size' is specified and there are fewer than the indicated number of elements, 117 | # the remaining elements will be filled with 'fill_value', the default is the minimum value along the specified dimension. 118 | # Unless dimension is specified, this will be flattend if it is not already 1D. 119 | if prompt_id.shape[0] < self.pool_size: 120 | prompt_id = torch.cat([prompt_id, torch.full((self.pool_size - prompt_id.shape[0],), torch.min(idx.flatten()), device=prompt_id.device)]) 121 | id_counts = torch.cat([id_counts, torch.full((self.pool_size - id_counts.shape[0],), 0, device=id_counts.device)]) 122 | _, major_idx = torch.topk(id_counts, k=self.top_k) # top_k 123 | major_prompt_id = prompt_id[major_idx] # top_k 124 | # expand to batch 125 | idx = major_prompt_id.expand(x_embed.shape[0], -1) # B, top_k 126 | else: 127 | idx = prompt_mask # B, top_k 128 | 129 | # batched_prompt_raw = self.prompt[idx] # B, top_k, length, C 130 | 131 | batched_prompt_raw = prompt_key[idx] # B, top_k, length, C 132 | batched_prompt_raw = batched_prompt_raw.unsqueeze(2) # B, top_k, 1, length, C 133 | 134 | batch_size, top_k, length, c = batched_prompt_raw.shape 135 | batched_prompt = batched_prompt_raw.reshape(batch_size, top_k * length, c) # B, top_k * length, C 136 | 137 | out['prompt_idx'] = idx 138 | 139 | # Debugging, return sim as well 140 | out['prompt_norm'] = prompt_norm 141 | out['x_embed_norm'] = x_embed_norm 142 | out['similarity'] = similarity 143 | 144 | # Put pull_constraint loss calculation inside 145 | batched_key_norm = prompt_norm[idx] # B, top_k, C 146 | out['selected_key'] = batched_key_norm 147 | x_embed_norm = x_embed_norm.unsqueeze(1) # B, 1, C 148 | sim = batched_key_norm * x_embed_norm # B, top_k, C 149 | reduce_sim = torch.sum(sim) / x_embed.shape[0] # Scalar 150 | 151 | out['reduce_sim'] = reduce_sim 152 | else: 153 | if self.prompt_init == 'zero': 154 | self.prompt = nn.Parameter(torch.zeros(self.length, self.embed_dim)) 155 | elif self.prompt_init == 'uniform': 156 | self.prompt = nn.Parameter(torch.randn(self.length, self.embed_dim)) 157 | nn.init.uniform_(self.prompt) 158 | batched_prompt = self.prompt.unsqueeze(0).expand(x_embed.shape[0], -1, -1) 159 | 160 | # The input with the prompt concatenated to the front. [B, prompt+token, C] 161 | out['total_prompt_len'] = batched_prompt.shape[1] 162 | out['prompted_embedding'] = torch.cat([batched_prompt, x_embed], dim=1) 163 | out['prompt_key'] = prompt_key # prompt_key 164 | 165 | return out -------------------------------------------------------------------------------- /Short-term_Forecasting/run_m4.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import torch 3 | from torch import optim 4 | from torch.optim import lr_scheduler 5 | 6 | from data_provider.m4 import M4Meta 7 | from models.S2IPLLM import S2IPLLM 8 | 9 | from data_provider.data_factory import data_provider 10 | import time 11 | import random 12 | import numpy as np 13 | import pandas 14 | 15 | from utils.losses import smape_loss 16 | from utils.m4_summary import M4Summary 17 | import os 18 | 19 | os.environ['CURL_CA_BUNDLE'] = '' 20 | os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:64" 21 | 22 | from utils.tools import del_files, EarlyStopping, adjust_learning_rate, load_content, test 23 | 24 | parser = argparse.ArgumentParser(description='Time-LLM') 25 | 26 | fix_seed = 2021 27 | random.seed(fix_seed) 28 | torch.manual_seed(fix_seed) 29 | np.random.seed(fix_seed) 30 | 31 | # basic config 32 | parser.add_argument('--task_name', type=str, required=True, default='long_term_forecast', 33 | help='task name, options:[long_term_forecast, short_term_forecast, imputation, classification, anomaly_detection]') 34 | parser.add_argument('--is_training', type=int, required=True, default=1, help='status') 35 | parser.add_argument('--model_id', type=str, required=True, default='test', help='model id') 36 | parser.add_argument('--model_comment', type=str, required=True, default='none', help='prefix when saving test results') 37 | parser.add_argument('--model', type=str, required=True, default='Autoformer', 38 | help='model name, options: [Autoformer, DLinear]') 39 | parser.add_argument('--seed', type=int, default=0, help='random seed') 40 | 41 | # data loader 42 | parser.add_argument('--data', type=str, required=True, default='ETTm1', help='dataset type') 43 | parser.add_argument('--root_path', type=str, default='./dataset', help='root path of the data file') 44 | parser.add_argument('--data_path', type=str, default='ETTh1.csv', help='data file') 45 | parser.add_argument('--features', type=str, default='M', 46 | help='forecasting task, options:[M, S, MS]; ' 47 | 'M:multivariate predict multivariate, S: univariate predict univariate, ' 48 | 'MS:multivariate predict univariate') 49 | parser.add_argument('--target', type=str, default='OT', help='target feature in S or MS task') 50 | parser.add_argument('--loader', type=str, default='modal', help='dataset type') 51 | parser.add_argument('--freq', type=str, default='h', 52 | help='freq for time features encoding, ' 53 | 'options:[s:secondly, t:minutely, h:hourly, d:daily, b:business days, w:weekly, m:monthly], ' 54 | 'you can also use more detailed freq like 15min or 3h') 55 | parser.add_argument('--checkpoints', type=str, default='./checkpoints/', help='location of model checkpoints') 56 | 57 | # forecasting task 58 | parser.add_argument('--seq_len', type=int, default=96, help='input sequence length') 59 | parser.add_argument('--label_len', type=int, default=48, help='start token length') 60 | parser.add_argument('--pred_len', type=int, default=96, help='prediction sequence length') 61 | parser.add_argument('--seasonal_patterns', type=str, default='Monthly', help='subset for M4') 62 | 63 | # model define 64 | parser.add_argument('--enc_in', type=int, default=7, help='encoder input size') 65 | parser.add_argument('--dec_in', type=int, default=7, help='decoder input size') 66 | parser.add_argument('--c_out', type=int, default=7, help='output size') 67 | parser.add_argument('--d_model', type=int, default=16, help='dimension of model') 68 | parser.add_argument('--n_heads', type=int, default=8, help='num of heads') 69 | parser.add_argument('--e_layers', type=int, default=2, help='num of encoder layers') 70 | parser.add_argument('--d_layers', type=int, default=1, help='num of decoder layers') 71 | parser.add_argument('--d_ff', type=int, default=32, help='dimension of fcn') 72 | parser.add_argument('--moving_avg', type=int, default=25, help='window size of moving average') 73 | parser.add_argument('--factor', type=int, default=1, help='attn factor') 74 | parser.add_argument('--dropout', type=float, default=0.1, help='dropout') 75 | parser.add_argument('--embed', type=str, default='timeF', 76 | help='time features encoding, options:[timeF, fixed, learned]') 77 | parser.add_argument('--activation', type=str, default='gelu', help='activation') 78 | parser.add_argument('--output_attention', action='store_true', help='whether to output attention in ecoder') 79 | parser.add_argument('--patch_size', type=int, default=16, help='patch length') 80 | parser.add_argument('--stride', type=int, default=8, help='stride') 81 | 82 | 83 | # optimization 84 | parser.add_argument('--num_workers', type=int, default=10, help='data loader num workers') 85 | parser.add_argument('--itr', type=int, default=1, help='experiments times') 86 | parser.add_argument('--train_epochs', type=int, default=10, help='train epochs') 87 | parser.add_argument('--align_epochs', type=int, default=10, help='alignment epochs') 88 | parser.add_argument('--batch_size', type=int, default=32, help='batch size of train input data') 89 | parser.add_argument('--eval_batch_size', type=int, default=8, help='batch size of model evaluation') 90 | parser.add_argument('--patience', type=int, default=20, help='early stopping patience') 91 | parser.add_argument('--learning_rate', type=float, default=0.0001, help='optimizer learning rate') 92 | parser.add_argument('--des', type=str, default='test', help='exp description') 93 | parser.add_argument('--loss', type=str, default='MSE', help='loss function') 94 | parser.add_argument('--lradj', type=str, default='type1', help='adjust learning rate') 95 | parser.add_argument('--pct_start', type=float, default=0.2, help='pct_start') 96 | parser.add_argument('--use_amp', action='store_true', help='use automatic mixed precision training', default=False) 97 | parser.add_argument('--llm_layers', type=int, default=6) 98 | parser.add_argument('--percent', type=int, default=100) 99 | parser.add_argument('--add_prompt', type=int, default=1) 100 | parser.add_argument('--prompt_length', type=int, default=2) 101 | parser.add_argument('--sim_coef', type=float, default=-0.1) 102 | parser.add_argument('--pool_size', type=int, default=1000) 103 | parser.add_argument('--trend_length', type=int, default=24) 104 | parser.add_argument('--seasonal_length', type=int, default=4) 105 | parser.add_argument('--gpt_layers', type=int, default=6) 106 | parser.add_argument('--prompt_init', type=str, default='text_prototype', help='prompt_init_type') 107 | 108 | 109 | 110 | args = parser.parse_args() 111 | 112 | for ii in range(args.itr): 113 | # setting record of experiments 114 | setting = '{}_{}_{}_{}_ft{}_sl{}_ll{}_pl{}_dm{}_nh{}_el{}_dl{}_df{}_fc{}_eb{}_{}_{}'.format( 115 | args.task_name, 116 | args.model_id, 117 | args.model, 118 | args.data, 119 | args.features, 120 | args.seq_len, 121 | args.label_len, 122 | args.pred_len, 123 | args.d_model, 124 | args.n_heads, 125 | args.e_layers, 126 | args.d_layers, 127 | args.d_ff, 128 | args.factor, 129 | args.embed, 130 | args.des, ii) 131 | 132 | if args.data == 'm4': 133 | args.pred_len = M4Meta.horizons_map[args.seasonal_patterns] # Up to M4 config 134 | args.seq_len = 2 * args.pred_len 135 | args.label_len = args.pred_len 136 | args.frequency_map = M4Meta.frequency_map[args.seasonal_patterns] 137 | 138 | train_data, train_loader = data_provider(args, 'train') 139 | vali_data, vali_loader = data_provider(args, 'val') 140 | test_data, test_loader = data_provider(args, 'test') 141 | 142 | device = torch.device('cuda:0') 143 | model = S2IPLLM(args, device) 144 | model.to(device) 145 | 146 | path = os.path.join(args.checkpoints, 147 | setting + '-' + args.model_comment) # unique checkpoint saving path 148 | 149 | if not os.path.exists(path): 150 | os.makedirs(path) 151 | 152 | time_now = time.time() 153 | 154 | train_steps = len(train_loader) 155 | early_stopping = EarlyStopping(patience=args.patience, verbose=True) 156 | 157 | model_optim = optim.Adam(model.parameters(), lr=args.learning_rate) 158 | 159 | if args.lradj == 'COS': 160 | scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(model_optim, T_max=20, eta_min=1e-8) 161 | else: 162 | scheduler = lr_scheduler.OneCycleLR(optimizer=model_optim, 163 | steps_per_epoch=train_steps, 164 | pct_start=args.pct_start, 165 | epochs=args.train_epochs, 166 | max_lr=args.learning_rate) 167 | 168 | criterion = smape_loss() 169 | 170 | 171 | for epoch in range(args.train_epochs): 172 | iter_count = 0 173 | train_loss = [] 174 | 175 | model.train() 176 | epoch_time = time.time() 177 | 178 | for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(train_loader): 179 | iter_count += 1 180 | model_optim.zero_grad() 181 | batch_x = batch_x.float().to(device) 182 | 183 | batch_y = batch_y.float().to(device) 184 | batch_y_mark = batch_y_mark.float().to(device) 185 | 186 | # decoder input 187 | dec_inp = torch.zeros_like(batch_y[:, -args.pred_len:, :]).float().to(device) 188 | dec_inp = torch.cat([batch_y[:, :args.label_len, :], dec_inp], dim=1).float().to(device) 189 | 190 | outputs,res = model(batch_x, ii) 191 | 192 | f_dim = -1 if args.features == 'MS' else 0 193 | outputs = outputs[:, -args.pred_len:, f_dim:] 194 | batch_y = batch_y[:, -args.pred_len:, f_dim:] 195 | 196 | batch_y_mark = batch_y_mark[:, -args.pred_len:, f_dim:] 197 | loss = criterion(batch_x, args.frequency_map, outputs, batch_y, batch_y_mark) 198 | 199 | train_loss.append(loss.item()) 200 | 201 | loss += args.sim_coef*res['simlarity_loss'] 202 | 203 | if (i + 1) % 100 == 0: 204 | print( 205 | "\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item()) 206 | ) 207 | speed = (time.time() - time_now) / iter_count 208 | left_time = speed * ((args.train_epochs - epoch) * train_steps - i) 209 | print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time)) 210 | iter_count = 0 211 | time_now = time.time() 212 | 213 | loss.backward() 214 | model_optim.step() 215 | 216 | 217 | print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time)) 218 | train_loss = np.average(train_loss) 219 | vali_loss = test(args, model, train_loader, vali_loader, criterion,ii) 220 | test_loss = vali_loss 221 | print( 222 | "Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}".format( 223 | epoch + 1, train_steps, train_loss, vali_loss, test_loss)) 224 | early_stopping(vali_loss, model, path) # model saving 225 | if early_stopping.early_stop: 226 | print("Early stopping") 227 | break 228 | 229 | print('Updating learning rate to {}'.format(scheduler.get_last_lr()[0])) 230 | 231 | best_model_path = path + '/' + 'checkpoint' 232 | 233 | torch.cuda.synchronize() 234 | torch.cuda.empty_cache() 235 | model.load_state_dict(torch.load(best_model_path, map_location=lambda storage, loc: storage)) 236 | 237 | x, _ = train_loader.dataset.last_insample_window() 238 | y = test_loader.dataset.timeseries 239 | x = torch.tensor(x, dtype=torch.float32).to(device) 240 | x = x.unsqueeze(-1) 241 | 242 | model.eval() 243 | 244 | with torch.no_grad(): 245 | B, _, C = x.shape 246 | dec_inp = torch.zeros((B, args.pred_len, C)).float().to(device) 247 | dec_inp = torch.cat([x[:, -args.label_len:, :], dec_inp], dim=1) 248 | outputs = torch.zeros((B, args.pred_len, C)).float().to(device) 249 | id_list = np.arange(0, B, args.eval_batch_size) 250 | id_list = np.append(id_list, B) 251 | for i in range(len(id_list) - 1): 252 | outputs[id_list[i]:id_list[i + 1], :, :],_ = model( 253 | x[id_list[i]:id_list[i + 1]],ii 254 | ) 255 | 256 | f_dim = -1 if args.features == 'MS' else 0 257 | outputs = outputs[:, -args.pred_len:, f_dim:] 258 | outputs = outputs.detach().cpu().numpy() 259 | 260 | preds = outputs 261 | trues = y 262 | x = x.detach().cpu().numpy() 263 | 264 | print('test shape:', preds.shape) 265 | 266 | folder_path = './m4_results/' + args.model + '-' + args.model_comment + '/' 267 | 268 | if not os.path.exists(folder_path): 269 | os.makedirs(folder_path) 270 | 271 | 272 | forecasts_df = pandas.DataFrame(preds[:, :, 0], columns=[f'V{i + 1}' for i in range(args.pred_len)]) 273 | forecasts_df.index = test_loader.dataset.ids[:preds.shape[0]] 274 | forecasts_df.index.name = 'id' 275 | forecasts_df.set_index(forecasts_df.columns[0], inplace=True) 276 | forecasts_df.to_csv(folder_path + args.seasonal_patterns + '_forecast.csv') 277 | 278 | # calculate metrics 279 | print(args.model) 280 | file_path = folder_path 281 | if 'Weekly_forecast.csv' in os.listdir(file_path) \ 282 | and 'Monthly_forecast.csv' in os.listdir(file_path) \ 283 | and 'Yearly_forecast.csv' in os.listdir(file_path) \ 284 | and 'Daily_forecast.csv' in os.listdir(file_path) \ 285 | and 'Hourly_forecast.csv' in os.listdir(file_path) \ 286 | and 'Quarterly_forecast.csv' in os.listdir(file_path): 287 | 288 | m4_summary = M4Summary(file_path, args.root_path) 289 | # m4_forecast.set_index(m4_winner_forecast.columns[0], inplace=True) 290 | smape_results, owa_results, mape, mase = m4_summary.evaluate() 291 | print('smape:', smape_results) 292 | print('mape:', mape) 293 | print('mase:', mase) 294 | print('owa:', owa_results) 295 | else: 296 | print('After all 6 tasks are finished, you can calculate the averaged performance') 297 | 298 | 299 | -------------------------------------------------------------------------------- /Short-term_Forecasting/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/panzijie825/S2IP-LLM/05b216ac5a126bd5117be91de1ce97afb3cef9cb/Short-term_Forecasting/utils/__init__.py -------------------------------------------------------------------------------- /Short-term_Forecasting/utils/losses.py: -------------------------------------------------------------------------------- 1 | # This source code is provided for the purposes of scientific reproducibility 2 | # under the following limited license from Element AI Inc. The code is an 3 | # implementation of the N-BEATS model (Oreshkin et al., N-BEATS: Neural basis 4 | # expansion analysis for interpretable time series forecasting, 5 | # https://arxiv.org/abs/1905.10437). The copyright to the source code is 6 | # licensed under the Creative Commons - Attribution-NonCommercial 4.0 7 | # International license (CC BY-NC 4.0): 8 | # https://creativecommons.org/licenses/by-nc/4.0/. Any commercial use (whether 9 | # for the benefit of third parties or internally in production) requires an 10 | # explicit license. The subject-matter of the N-BEATS model and associated 11 | # materials are the property of Element AI Inc. and may be subject to patent 12 | # protection. No license to patents is granted hereunder (whether express or 13 | # implied). Copyright © 2020 Element AI Inc. All rights reserved. 14 | 15 | """ 16 | Loss functions for PyTorch. 17 | """ 18 | 19 | import torch as t 20 | import torch.nn as nn 21 | import numpy as np 22 | import pdb 23 | 24 | 25 | def divide_no_nan(a, b): 26 | """ 27 | a/b where the resulted NaN or Inf are replaced by 0. 28 | """ 29 | result = a / b 30 | result[result != result] = .0 31 | result[result == np.inf] = .0 32 | return result 33 | 34 | 35 | class mape_loss(nn.Module): 36 | def __init__(self): 37 | super(mape_loss, self).__init__() 38 | 39 | def forward(self, insample: t.Tensor, freq: int, 40 | forecast: t.Tensor, target: t.Tensor, mask: t.Tensor) -> t.float: 41 | """ 42 | MAPE loss as defined in: https://en.wikipedia.org/wiki/Mean_absolute_percentage_error 43 | 44 | :param forecast: Forecast values. Shape: batch, time 45 | :param target: Target values. Shape: batch, time 46 | :param mask: 0/1 mask. Shape: batch, time 47 | :return: Loss value 48 | """ 49 | weights = divide_no_nan(mask, target) 50 | return t.mean(t.abs((forecast - target) * weights)) 51 | 52 | 53 | class smape_loss(nn.Module): 54 | def __init__(self): 55 | super(smape_loss, self).__init__() 56 | 57 | def forward(self, insample: t.Tensor, freq: int, 58 | forecast: t.Tensor, target: t.Tensor, mask: t.Tensor) -> t.float: 59 | """ 60 | sMAPE loss as defined in https://robjhyndman.com/hyndsight/smape/ (Makridakis 1993) 61 | 62 | :param forecast: Forecast values. Shape: batch, time 63 | :param target: Target values. Shape: batch, time 64 | :param mask: 0/1 mask. Shape: batch, time 65 | :return: Loss value 66 | """ 67 | return 200 * t.mean(divide_no_nan(t.abs(forecast - target), 68 | t.abs(forecast.data) + t.abs(target.data)) * mask) 69 | 70 | 71 | class mase_loss(nn.Module): 72 | def __init__(self): 73 | super(mase_loss, self).__init__() 74 | 75 | def forward(self, insample: t.Tensor, freq: int, 76 | forecast: t.Tensor, target: t.Tensor, mask: t.Tensor) -> t.float: 77 | """ 78 | MASE loss as defined in "Scaled Errors" https://robjhyndman.com/papers/mase.pdf 79 | 80 | :param insample: Insample values. Shape: batch, time_i 81 | :param freq: Frequency value 82 | :param forecast: Forecast values. Shape: batch, time_o 83 | :param target: Target values. Shape: batch, time_o 84 | :param mask: 0/1 mask. Shape: batch, time_o 85 | :return: Loss value 86 | """ 87 | masep = t.mean(t.abs(insample[:, freq:] - insample[:, :-freq]), dim=1) 88 | masked_masep_inv = divide_no_nan(mask, masep[:, None]) 89 | return t.mean(t.abs(target - forecast) * masked_masep_inv) 90 | -------------------------------------------------------------------------------- /Short-term_Forecasting/utils/m4_summary.py: -------------------------------------------------------------------------------- 1 | # This source code is provided for the purposes of scientific reproducibility 2 | # under the following limited license from Element AI Inc. The code is an 3 | # implementation of the N-BEATS model (Oreshkin et al., N-BEATS: Neural basis 4 | # expansion analysis for interpretable time series forecasting, 5 | # https://arxiv.org/abs/1905.10437). The copyright to the source code is 6 | # licensed under the Creative Commons - Attribution-NonCommercial 4.0 7 | # International license (CC BY-NC 4.0): 8 | # https://creativecommons.org/licenses/by-nc/4.0/. Any commercial use (whether 9 | # for the benefit of third parties or internally in production) requires an 10 | # explicit license. The subject-matter of the N-BEATS model and associated 11 | # materials are the property of Element AI Inc. and may be subject to patent 12 | # protection. No license to patents is granted hereunder (whether express or 13 | # implied). Copyright 2020 Element AI Inc. All rights reserved. 14 | 15 | """ 16 | M4 Summary 17 | """ 18 | from collections import OrderedDict 19 | 20 | import numpy as np 21 | import pandas as pd 22 | 23 | from data_provider.m4 import M4Dataset 24 | from data_provider.m4 import M4Meta 25 | import os 26 | 27 | 28 | def group_values(values, groups, group_name): 29 | 30 | mask = groups == group_name 31 | selected_values = [values[i] for i in range(len(values)) if mask[i]] 32 | filtered_values = [np.array(v)[~np.isnan(v)] for v in selected_values] 33 | 34 | 35 | return np.array(filtered_values, dtype=object) 36 | #np.array([v[~np.isnan(v)] for v in values[groups == group_name]]) 37 | # 38 | 39 | 40 | def mase(forecast, insample, outsample, frequency): 41 | return np.mean(np.abs(forecast - outsample)) / np.mean(np.abs(insample[:-frequency] - insample[frequency:])) 42 | 43 | 44 | def smape_2(forecast, target): 45 | denom = np.abs(target) + np.abs(forecast) 46 | # divide by 1.0 instead of 0.0, in case when denom is zero the enumerator will be 0.0 anyway. 47 | denom[denom == 0.0] = 1.0 48 | return 200 * np.abs(forecast - target) / denom 49 | 50 | 51 | def mape(forecast, target): 52 | denom = np.abs(target) 53 | # divide by 1.0 instead of 0.0, in case when denom is zero the enumerator will be 0.0 anyway. 54 | denom[denom == 0.0] = 1.0 55 | return 100 * np.abs(forecast - target) / denom 56 | 57 | 58 | class M4Summary: 59 | def __init__(self, file_path, root_path): 60 | self.file_path = file_path 61 | self.training_set = M4Dataset.load(training=True, dataset_file=root_path) 62 | self.test_set = M4Dataset.load(training=False, dataset_file=root_path) 63 | self.naive_path = os.path.join(root_path, 'submission-Naive2.csv') 64 | 65 | def evaluate(self): 66 | """ 67 | Evaluate forecasts using M4 test dataset. 68 | 69 | :param forecast: Forecasts. Shape: timeseries, time. 70 | :return: sMAPE and OWA grouped by seasonal patterns. 71 | """ 72 | grouped_owa = OrderedDict() 73 | 74 | naive2_forecasts = pd.read_csv(self.naive_path).values[:, 1:].astype(np.float32) 75 | naive2_forecasts = [v[~np.isnan(v)] for v in naive2_forecasts] 76 | 77 | model_mases = {} 78 | naive2_smapes = {} 79 | naive2_mases = {} 80 | grouped_smapes = {} 81 | grouped_mapes = {} 82 | for group_name in M4Meta.seasonal_patterns: 83 | 84 | file_name = self.file_path + group_name + "_forecast.csv" 85 | print(file_name) 86 | if os.path.exists(file_name): 87 | model_forecast = pd.read_csv(file_name).values 88 | 89 | naive2_forecast = group_values(naive2_forecasts, self.test_set.groups, group_name) 90 | target = group_values(self.test_set.values, self.test_set.groups, group_name) 91 | # all timeseries within group have same frequency 92 | frequency = self.training_set.frequencies[self.test_set.groups == group_name][0] 93 | insample = group_values(self.training_set.values, self.test_set.groups, group_name) 94 | 95 | model_mases[group_name] = np.mean([mase(forecast=model_forecast[i], 96 | insample=insample[i], 97 | outsample=target[i], 98 | frequency=frequency) for i in range(len(model_forecast))]) 99 | naive2_mases[group_name] = np.mean([mase(forecast=naive2_forecast[i], 100 | insample=insample[i], 101 | outsample=target[i], 102 | frequency=frequency) for i in range(len(model_forecast))]) 103 | 104 | naive2_smapes[group_name] = np.mean(smape_2(naive2_forecast, target)) 105 | grouped_smapes[group_name] = np.mean(smape_2(forecast=model_forecast, target=target)) 106 | grouped_mapes[group_name] = np.mean(mape(forecast=model_forecast, target=target)) 107 | 108 | grouped_smapes = self.summarize_groups(grouped_smapes) 109 | grouped_mapes = self.summarize_groups(grouped_mapes) 110 | grouped_model_mases = self.summarize_groups(model_mases) 111 | grouped_naive2_smapes = self.summarize_groups(naive2_smapes) 112 | grouped_naive2_mases = self.summarize_groups(naive2_mases) 113 | for k in grouped_model_mases.keys(): 114 | grouped_owa[k] = (grouped_model_mases[k] / grouped_naive2_mases[k] + 115 | grouped_smapes[k] / grouped_naive2_smapes[k]) / 2 116 | 117 | def round_all(d): 118 | return dict(map(lambda kv: (kv[0], np.round(kv[1], 3)), d.items())) 119 | 120 | return round_all(grouped_smapes), round_all(grouped_owa), round_all(grouped_mapes), round_all( 121 | grouped_model_mases) 122 | 123 | def summarize_groups(self, scores): 124 | """ 125 | Re-group scores respecting M4 rules. 126 | :param scores: Scores per group. 127 | :return: Grouped scores. 128 | """ 129 | scores_summary = OrderedDict() 130 | 131 | def group_count(group_name): 132 | return len(np.where(self.test_set.groups == group_name)[0]) 133 | 134 | weighted_score = {} 135 | for g in ['Yearly', 'Quarterly', 'Monthly']: 136 | weighted_score[g] = scores[g] * group_count(g) 137 | scores_summary[g] = scores[g] 138 | 139 | others_score = 0 140 | others_count = 0 141 | for g in ['Weekly', 'Daily', 'Hourly']: 142 | others_score += scores[g] * group_count(g) 143 | others_count += group_count(g) 144 | weighted_score['Others'] = others_score 145 | scores_summary['Others'] = others_score / others_count 146 | 147 | average = np.sum(list(weighted_score.values())) / len(self.test_set.groups) 148 | scores_summary['Average'] = average 149 | 150 | return scores_summary 151 | -------------------------------------------------------------------------------- /Short-term_Forecasting/utils/masking.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class TriangularCausalMask(): 5 | def __init__(self, B, L, device="cpu"): 6 | mask_shape = [B, 1, L, L] 7 | with torch.no_grad(): 8 | self._mask = torch.triu(torch.ones(mask_shape, dtype=torch.bool), diagonal=1).to(device) 9 | 10 | @property 11 | def mask(self): 12 | return self._mask 13 | 14 | 15 | class ProbMask(): 16 | def __init__(self, B, H, L, index, scores, device="cpu"): 17 | _mask = torch.ones(L, scores.shape[-1], dtype=torch.bool).to(device).triu(1) 18 | _mask_ex = _mask[None, None, :].expand(B, H, L, scores.shape[-1]) 19 | indicator = _mask_ex[torch.arange(B)[:, None, None], 20 | torch.arange(H)[None, :, None], 21 | index, :].to(device) 22 | self._mask = indicator.view(scores.shape).to(device) 23 | 24 | @property 25 | def mask(self): 26 | return self._mask -------------------------------------------------------------------------------- /Short-term_Forecasting/utils/metrics.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def RSE(pred, true): 5 | return np.sqrt(np.sum((true - pred) ** 2)) / np.sqrt(np.sum((true - true.mean()) ** 2)) 6 | 7 | 8 | def CORR(pred, true): 9 | u = ((true - true.mean(0)) * (pred - pred.mean(0))).sum(0) 10 | d = np.sqrt(((true - true.mean(0)) ** 2 * (pred - pred.mean(0)) ** 2).sum(0)) 11 | return (u / d).mean(-1) 12 | 13 | 14 | def MAE(pred, true): 15 | return np.mean(np.abs(pred - true)) 16 | 17 | 18 | def MSE(pred, true): 19 | return np.mean((pred - true) ** 2) 20 | 21 | 22 | def RMSE(pred, true): 23 | return np.sqrt(MSE(pred, true)) 24 | 25 | 26 | def MAPE(pred, true): 27 | return np.mean(np.abs((pred - true) / true)) 28 | 29 | 30 | def MSPE(pred, true): 31 | return np.mean(np.square((pred - true) / true)) 32 | 33 | 34 | def metric(pred, true): 35 | mae = MAE(pred, true) 36 | mse = MSE(pred, true) 37 | rmse = RMSE(pred, true) 38 | mape = MAPE(pred, true) 39 | mspe = MSPE(pred, true) 40 | 41 | return mae, mse, rmse, mape, mspe 42 | -------------------------------------------------------------------------------- /Short-term_Forecasting/utils/timefeatures.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import numpy as np 4 | import pandas as pd 5 | from pandas.tseries import offsets 6 | from pandas.tseries.frequencies import to_offset 7 | 8 | 9 | class TimeFeature: 10 | def __init__(self): 11 | pass 12 | 13 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 14 | pass 15 | 16 | def __repr__(self): 17 | return self.__class__.__name__ + "()" 18 | 19 | 20 | class SecondOfMinute(TimeFeature): 21 | """Minute of hour encoded as value between [-0.5, 0.5]""" 22 | 23 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 24 | return index.second / 59.0 - 0.5 25 | 26 | 27 | class MinuteOfHour(TimeFeature): 28 | """Minute of hour encoded as value between [-0.5, 0.5]""" 29 | 30 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 31 | return index.minute / 59.0 - 0.5 32 | 33 | 34 | class HourOfDay(TimeFeature): 35 | """Hour of day encoded as value between [-0.5, 0.5]""" 36 | 37 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 38 | return index.hour / 23.0 - 0.5 39 | 40 | 41 | class DayOfWeek(TimeFeature): 42 | """Hour of day encoded as value between [-0.5, 0.5]""" 43 | 44 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 45 | return index.dayofweek / 6.0 - 0.5 46 | 47 | 48 | class DayOfMonth(TimeFeature): 49 | """Day of month encoded as value between [-0.5, 0.5]""" 50 | 51 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 52 | return (index.day - 1) / 30.0 - 0.5 53 | 54 | 55 | class DayOfYear(TimeFeature): 56 | """Day of year encoded as value between [-0.5, 0.5]""" 57 | 58 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 59 | return (index.dayofyear - 1) / 365.0 - 0.5 60 | 61 | 62 | class MonthOfYear(TimeFeature): 63 | """Month of year encoded as value between [-0.5, 0.5]""" 64 | 65 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 66 | return (index.month - 1) / 11.0 - 0.5 67 | 68 | 69 | class WeekOfYear(TimeFeature): 70 | """Week of year encoded as value between [-0.5, 0.5]""" 71 | 72 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 73 | return (index.isocalendar().week - 1) / 52.0 - 0.5 74 | 75 | 76 | def time_features_from_frequency_str(freq_str: str) -> List[TimeFeature]: 77 | """ 78 | Returns a list of time features that will be appropriate for the given frequency string. 79 | Parameters 80 | ---------- 81 | freq_str 82 | Frequency string of the form [multiple][granularity] such as "12H", "5min", "1D" etc. 83 | """ 84 | 85 | features_by_offsets = { 86 | offsets.YearEnd: [], 87 | offsets.QuarterEnd: [MonthOfYear], 88 | offsets.MonthEnd: [MonthOfYear], 89 | offsets.Week: [DayOfMonth, WeekOfYear], 90 | offsets.Day: [DayOfWeek, DayOfMonth, DayOfYear], 91 | offsets.BusinessDay: [DayOfWeek, DayOfMonth, DayOfYear], 92 | offsets.Hour: [HourOfDay, DayOfWeek, DayOfMonth, DayOfYear], 93 | offsets.Minute: [ 94 | MinuteOfHour, 95 | HourOfDay, 96 | DayOfWeek, 97 | DayOfMonth, 98 | DayOfYear, 99 | ], 100 | offsets.Second: [ 101 | SecondOfMinute, 102 | MinuteOfHour, 103 | HourOfDay, 104 | DayOfWeek, 105 | DayOfMonth, 106 | DayOfYear, 107 | ], 108 | } 109 | 110 | offset = to_offset(freq_str) 111 | 112 | for offset_type, feature_classes in features_by_offsets.items(): 113 | if isinstance(offset, offset_type): 114 | return [cls() for cls in feature_classes] 115 | 116 | supported_freq_msg = f""" 117 | Unsupported frequency {freq_str} 118 | The following frequencies are supported: 119 | Y - yearly 120 | alias: A 121 | M - monthly 122 | W - weekly 123 | D - daily 124 | B - business days 125 | H - hourly 126 | T - minutely 127 | alias: min 128 | S - secondly 129 | """ 130 | raise RuntimeError(supported_freq_msg) 131 | 132 | 133 | def time_features(dates, freq='h'): 134 | return np.vstack([feat(dates) for feat in time_features_from_frequency_str(freq)]) -------------------------------------------------------------------------------- /Short-term_Forecasting/utils/tools.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import matplotlib.pyplot as plt 4 | import shutil 5 | 6 | from tqdm import tqdm 7 | 8 | plt.switch_backend('agg') 9 | 10 | 11 | def adjust_learning_rate(optimizer, scheduler, epoch, args, printout=True): 12 | if args.lradj == 'type1': 13 | lr_adjust = {epoch: args.learning_rate * (0.5 ** ((epoch - 1) // 1))} 14 | elif args.lradj == 'type2': 15 | lr_adjust = { 16 | 2: 5e-5, 4: 1e-5, 6: 5e-6, 8: 1e-6, 17 | 10: 5e-7, 15: 1e-7, 20: 5e-8 18 | } 19 | elif args.lradj == 'type3': 20 | lr_adjust = {epoch: args.learning_rate if epoch < 3 else args.learning_rate * (0.9 ** ((epoch - 3) // 1))} 21 | elif args.lradj == 'PEMS': 22 | lr_adjust = {epoch: args.learning_rate * (0.95 ** (epoch // 1))} 23 | elif args.lradj == 'TST': 24 | lr_adjust = {epoch: scheduler.get_last_lr()[0]} 25 | elif args.lradj == 'constant': 26 | lr_adjust = {epoch: args.learning_rate} 27 | if epoch in lr_adjust.keys(): 28 | lr = lr_adjust[epoch] 29 | for param_group in optimizer.param_groups: 30 | param_group['lr'] = lr 31 | if printout: 32 | 33 | print('Updating learning rate to {}'.format(lr)) 34 | 35 | 36 | class EarlyStopping: 37 | def __init__(self, accelerator=None, patience=7, verbose=False, delta=0, save_mode=True): 38 | self.accelerator = accelerator 39 | self.patience = patience 40 | self.verbose = verbose 41 | self.counter = 0 42 | self.best_score = None 43 | self.early_stop = False 44 | self.val_loss_min = np.Inf 45 | self.delta = delta 46 | self.save_mode = save_mode 47 | 48 | def __call__(self, val_loss, model, path): 49 | score = -val_loss 50 | if self.best_score is None: 51 | self.best_score = score 52 | if self.save_mode: 53 | self.save_checkpoint(val_loss, model, path) 54 | elif score < self.best_score + self.delta: 55 | self.counter += 1 56 | if self.accelerator is None: 57 | print(f'EarlyStopping counter: {self.counter} out of {self.patience}') 58 | else: 59 | self.accelerator.print(f'EarlyStopping counter: {self.counter} out of {self.patience}') 60 | if self.counter >= self.patience: 61 | self.early_stop = True 62 | else: 63 | self.best_score = score 64 | if self.save_mode: 65 | self.save_checkpoint(val_loss, model, path) 66 | self.counter = 0 67 | 68 | def save_checkpoint(self, val_loss, model, path): 69 | if self.verbose: 70 | if self.accelerator is not None: 71 | self.accelerator.print( 72 | f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...') 73 | else: 74 | print( 75 | f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...') 76 | 77 | if self.accelerator is not None: 78 | model = self.accelerator.unwrap_model(model) 79 | torch.save(model.state_dict(), path + '/' + 'checkpoint') 80 | else: 81 | torch.save(model.state_dict(), path + '/' + 'checkpoint') 82 | self.val_loss_min = val_loss 83 | 84 | 85 | class dotdict(dict): 86 | """dot.notation access to dictionary attributes""" 87 | __getattr__ = dict.get 88 | __setattr__ = dict.__setitem__ 89 | __delattr__ = dict.__delitem__ 90 | 91 | 92 | class StandardScaler(): 93 | def __init__(self, mean, std): 94 | self.mean = mean 95 | self.std = std 96 | 97 | def transform(self, data): 98 | return (data - self.mean) / self.std 99 | 100 | def inverse_transform(self, data): 101 | return (data * self.std) + self.mean 102 | 103 | def adjustment(gt, pred): 104 | anomaly_state = False 105 | for i in range(len(gt)): 106 | if gt[i] == 1 and pred[i] == 1 and not anomaly_state: 107 | anomaly_state = True 108 | for j in range(i, 0, -1): 109 | if gt[j] == 0: 110 | break 111 | else: 112 | if pred[j] == 0: 113 | pred[j] = 1 114 | for j in range(i, len(gt)): 115 | if gt[j] == 0: 116 | break 117 | else: 118 | if pred[j] == 0: 119 | pred[j] = 1 120 | elif gt[i] == 0: 121 | anomaly_state = False 122 | if anomaly_state: 123 | pred[i] = 1 124 | return gt, pred 125 | 126 | 127 | def cal_accuracy(y_pred, y_true): 128 | return np.mean(y_pred == y_true) 129 | 130 | 131 | def del_files(dir_path): 132 | shutil.rmtree(dir_path) 133 | 134 | 135 | def vali(args, accelerator, model, vali_data, vali_loader, criterion, mae_metric): 136 | total_loss = [] 137 | total_mae_loss = [] 138 | model.eval() 139 | with torch.no_grad(): 140 | for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in tqdm(enumerate(vali_loader)): 141 | batch_x = batch_x.float().to(accelerator.device) 142 | batch_y = batch_y.float() 143 | 144 | batch_x_mark = batch_x_mark.float().to(accelerator.device) 145 | batch_y_mark = batch_y_mark.float().to(accelerator.device) 146 | 147 | # decoder input 148 | dec_inp = torch.zeros_like(batch_y[:, -args.pred_len:, :]).float() 149 | dec_inp = torch.cat([batch_y[:, :args.label_len, :], dec_inp], dim=1).float().to( 150 | accelerator.device) 151 | # encoder - decoder 152 | if args.use_amp: 153 | with torch.cuda.amp.autocast(): 154 | if args.output_attention: 155 | outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0] 156 | else: 157 | outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark) 158 | else: 159 | if args.output_attention: 160 | outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0] 161 | else: 162 | outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark) 163 | 164 | outputs, batch_y = accelerator.gather_for_metrics((outputs, batch_y)) 165 | 166 | f_dim = -1 if args.features == 'MS' else 0 167 | outputs = outputs[:, -args.pred_len:, f_dim:] 168 | batch_y = batch_y[:, -args.pred_len:, f_dim:].to(accelerator.device) 169 | 170 | pred = outputs.detach() 171 | true = batch_y.detach() 172 | 173 | loss = criterion(pred, true) 174 | 175 | mae_loss = mae_metric(pred, true) 176 | 177 | total_loss.append(loss.item()) 178 | total_mae_loss.append(mae_loss.item()) 179 | 180 | total_loss = np.average(total_loss) 181 | total_mae_loss = np.average(total_mae_loss) 182 | 183 | model.train() 184 | return total_loss, total_mae_loss 185 | 186 | 187 | def test(args, model, train_loader, vali_loader, criterion,ii): 188 | x, _ = train_loader.dataset.last_insample_window() 189 | y = vali_loader.dataset.timeseries 190 | x = torch.tensor(x, dtype=torch.float32).cuda() 191 | x = x.unsqueeze(-1) 192 | 193 | model.eval() 194 | with torch.no_grad(): 195 | B, _, C = x.shape 196 | dec_inp = torch.zeros((B, args.pred_len, C)).float().cuda() 197 | dec_inp = torch.cat([x[:, -args.label_len:, :], dec_inp], dim=1) 198 | outputs = torch.zeros((B, args.pred_len, C)).float().cuda() 199 | id_list = np.arange(0, B, args.eval_batch_size) 200 | id_list = np.append(id_list, B) 201 | for i in range(len(id_list) - 1): 202 | outputs[id_list[i]:id_list[i + 1], :, :],_ = model( 203 | x[id_list[i]:id_list[i + 1]],ii 204 | ) 205 | 206 | f_dim = -1 if args.features == 'MS' else 0 207 | outputs = outputs[:, -args.pred_len:, f_dim:] 208 | pred = outputs 209 | true = torch.from_numpy(np.array(y)).cuda() 210 | batch_y_mark = torch.ones(true.shape).cuda() 211 | 212 | loss = criterion(x[:, :, 0], args.frequency_map, pred[:, :, 0], true, batch_y_mark) 213 | 214 | model.train() 215 | return loss 216 | 217 | 218 | def load_content(args): 219 | if 'ETT' in args.data: 220 | file = 'ETT' 221 | else: 222 | file = args.data 223 | with open('./dataset/prompt_bank/{0}.txt'.format(file), 'r') as f: 224 | content = f.read() 225 | return content -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | torch==2.2.2 2 | einops==0.7.0 3 | matplotlib==3.7.0 4 | numpy==1.23.5 5 | pandas==1.5.3 6 | scikit_learn==1.2.2 7 | scipy==1.12.0 8 | tqdm==4.65.0 9 | transformers==4.31.0 10 | statsmodels==0.14.2 --------------------------------------------------------------------------------