├── .gitignore ├── README.md ├── Test.ipynb ├── Train.ipynb ├── config.py ├── data ├── test_data.csv └── train_data.csv ├── main_forecasting.py ├── models ├── informer │ ├── layers │ │ ├── __init__.py │ │ ├── attn.py │ │ ├── decoder.py │ │ ├── embed.py │ │ ├── encoder.py │ │ └── model.py │ ├── trainer.py │ └── utils │ │ ├── __init__.py │ │ ├── masking.py │ │ └── tools.py ├── rnn │ ├── model.py │ └── trainer.py └── scinet │ ├── SCINet.py │ ├── trainer.py │ └── utils │ └── tools.py ├── scaler └── minmax_scaler.pkl └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | .ipynb_checkpoints/ 2 | __pycache__/ 3 | ckpt/ -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # KUDataForecasting 2 | - 원본 시계열 데이터를 입력으로 받아 미래 데이터 값을 예측하는 time series forecasting 대한 설명 3 | - 입력 데이터 형태 : 단변량 시계열 데이터(univariate time-series data) 4 |
5 | 6 | **Time series forecasting 사용 시, 설정해야하는 값** 7 | * **model** : [‘lstm’, ‘gru’, ‘informer’, ‘scinet’] 중 선택 8 | * **best_model_path** : 학습 완료된 모델을 저장할 경로 9 | 10 | * **시계열 forecasting 모델 hyperparameter :** 아래에 자세히 설명. 11 | * LSTM hyperparameter 12 | * GRU hyperparameter 13 | * Informer hyperparameter 14 | * SCINet hyperparameter 15 | 16 |
17 | 18 | #### 시계열 forecasting 모델 hyperparameter
19 | 20 | #### 1. LSTM 21 | - **input_size** : 데이터 변수 개수, int 22 | - **window_size** : input sequence의 길이, int 23 | - **forecast_step** : 예측할 미래 시점의 길이, int 24 | - **num_layers** : recurrent layers의 수, int(default: 2, 범위: 1 이상) 25 | - **hidden_size** : Hidden state의 차원, int(default: 64, 범위: 1 이상) 26 | - **dropout** : dropout 확률, float(default: 0.1, 범위: 0 이상 1 이하) 27 | - **bidirectional** : 모델의 양방향성 여부, bool(default: True) 28 | - **num_epochs** : 학습 epoch 횟수, int(default: 150, 범위: 1 이상) 29 | - **batch_size** : batch 크기, int(default: 64, 범위: 1 이상, 컴퓨터 사양에 적합하게 설정) 30 | - **lr** : learning rate, float(default: 0.0001, 범위: 0.1 이하) 31 | - **device** : 학습 환경, (default: 'cuda', ['cuda', 'cpu'] 중 선택) 32 | 33 |
34 | 35 | #### 2. GRU 36 | - **input_size** : 데이터 변수 개수, int 37 | - **window_size** : input sequence의 길이, int 38 | - **forecast_step** : 예측할 미래 시점의 길이, int 39 | - **num_layers** : recurrent layers의 수, int(default: 2, 범위: 1 이상) 40 | - **hidden_size** : hidden state의 차원, int(default: 64, 범위: 1 이상) 41 | - **dropout** : dropout 확률, float(default: 0.1, 범위: 0 이상 1 이하) 42 | - **bidirectional** : 모델의 양방향성 여부, bool(default: True) 43 | - **num_epochs** : 학습 epoch 횟수, int(default: 150, 범위: 1 이상) 44 | - **batch_size** : batch 크기, int(default: 64, 범위: 1 이상, 컴퓨터 사양에 적합하게 설정) 45 | - **lr** : learning rate, float(default: 0.0001, 범위: 0.1 이하) 46 | - **device** : 학습 환경, (default: 'cuda', ['cuda', 'cpu'] 중 선택) 47 |
48 | 49 | #### 3. Informer 50 | - **input_size** : 데이터 변수 개수, int 51 | - **window_size** : input sequence의 길이, int 52 | - **forecast_step** : 예측할 미래 시점의 길이, int 53 | - **label_len** : decoder의 start token 길이, int(default: 12, 범위: 1 이상) 54 | - **d_model** : 모델의 hidden dimension, int(default: 512, 범위: 1 이상) 55 | - **e_layers** : encoder layer 수, int(default: 2, 범위: 1 이상) 56 | - **d_layers** : decoder layer 수, int(default: 1, 범위: 1 이상) 57 | - **d_ff** : fully connected layer의 hidden dimension, int(default: 2048, 범위: 1 이상) 58 | - **factor** : 모델의 ProbSparse Attention factor, int(default: 5) 59 | - **dropout** : dropout ratio, int(default: 0.05) 60 | - **attn** : 모델의 attention 계산 방식, (default: 'prob', ['prob', 'full'] 중 선택) 61 | - **n_heads** : multi-head attention head 수, int(default: 8) 62 | - **embed** : time features encoding 방식, (default: 'timeF', ['timeF', 'fixed', 'learned'] 중 선택) 63 | - **num_epochs** : 학습 epoch 횟수, int(default: 150, 범위: 1 이상) 64 | - **batch_size** : batch 크기, int(default: 64, 범위: 1 이상, 컴퓨터 사양에 적합하게 설정) 65 | - **lr** : learning rate, float(default: 0.0001, 범위: 0.1 이하) 66 | - **lradj** : learning rate 조정 방식, (default: 'type1', ['type1', 'type2'] 중 선택) 67 | - **device** : 학습 환경, (default: 'cuda', ['cuda', 'cpu'] 중 선택) 68 |
69 | 70 | #### 4. SCINet 71 | - **input_size** : 데이터 변수 개수, int 72 | - **window_size** : input sequence의 길이, int ## 2의 지수승 input 필요 (ex,64,128,256 ...) 73 | - **forecast_step** : 예측할 미래 시점의 길이, int 74 | - **levels** : Tree의 depth, int(default: 2, 범위: input sequence의 로그 값 이하, 2~4 설정 권장) 75 | - **stacks** : SCINet 구조를 중첩하는 횟수 (default : 1, stack 값은 1을 고정적으로 사용) 76 | - **num_epochs** : 학습 epoch 횟수, int(default: 150, 범위: 1 이상) 77 | - **batch_size** : batch 크기, int(default: 64, 범위: 1 이상, 컴퓨터 사양에 적합하게 설정) 78 | - **lr** : learning rate, float(default: 0.0001, 범위: 0.1 이하) 79 | - **device** : 학습 환경, (default: 'cuda', ['cuda', 'cpu'] 중 선택) 80 |
81 | -------------------------------------------------------------------------------- /Train.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import torch\n", 10 | "import random\n", 11 | "import numpy as np\n", 12 | "from sklearn.model_selection import train_test_split\n", 13 | "\n", 14 | "import config\n", 15 | "import utils\n", 16 | "import main_forecasting as mf" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 2, 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "# seed 고정\n", 26 | "random_seed = 42\n", 27 | "\n", 28 | "torch.manual_seed(random_seed)\n", 29 | "torch.cuda.manual_seed(random_seed)\n", 30 | "torch.backends.cudnn.deterministic = True\n", 31 | "torch.backends.cudnn.benchmark = False\n", 32 | "np.random.seed(random_seed)\n", 33 | "random.seed(random_seed)" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 3, 39 | "metadata": {}, 40 | "outputs": [ 41 | { 42 | "name": "stdout", 43 | "output_type": "stream", 44 | "text": [ 45 | "(21043,)\n", 46 | "(5261,)\n", 47 | "Save MinMaxScaler in path: ./scaler/minmax_scaler.pkl\n" 48 | ] 49 | } 50 | ], 51 | "source": [ 52 | "# load raw data\n", 53 | "data_root_dir = 'data'\n", 54 | "train_data, test_data = utils.load_data(data_root_dir) # shape=(# time steps, 1)\n", 55 | "\n", 56 | "# split train data into train/valiation data\n", 57 | "# train data를 시간순으로 test_size=split_ratio에 대하여 train/validation set으로 분할\n", 58 | "split_ratio = 0.2\n", 59 | "train_data, valid_data = train_test_split(train_data, test_size=split_ratio, shuffle=False)\n", 60 | "\n", 61 | "# normalization\n", 62 | "scaler_path = './scaler/minmax_scaler.pkl'\n", 63 | "train_data, valid_data = utils.get_train_val_data(train_data, valid_data, scaler_path)" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": 4, 69 | "metadata": { 70 | "scrolled": true 71 | }, 72 | "outputs": [ 73 | { 74 | "name": "stdout", 75 | "output_type": "stream", 76 | "text": [ 77 | "Start training model: lstm\n", 78 | "\n", 79 | "Epoch 1/150\n", 80 | "train Loss: 0.0621 RMSE: 0.2493\n", 81 | "val Loss: 0.0251 RMSE: 0.1584\n", 82 | "\n", 83 | "Epoch 10/150\n", 84 | "train Loss: 0.0040 RMSE: 0.0629\n", 85 | "val Loss: 0.0052 RMSE: 0.0719\n", 86 | "\n", 87 | "Epoch 20/150\n", 88 | "train Loss: 0.0027 RMSE: 0.0517\n", 89 | "val Loss: 0.0036 RMSE: 0.0600\n", 90 | "\n", 91 | "Epoch 30/150\n", 92 | "train Loss: 0.0021 RMSE: 0.0453\n", 93 | "val Loss: 0.0027 RMSE: 0.0520\n", 94 | "\n", 95 | "Epoch 40/150\n", 96 | "train Loss: 0.0019 RMSE: 0.0432\n", 97 | "val Loss: 0.0024 RMSE: 0.0493\n", 98 | "\n", 99 | "Epoch 50/150\n", 100 | "train Loss: 0.0017 RMSE: 0.0417\n", 101 | "val Loss: 0.0024 RMSE: 0.0492\n", 102 | "\n", 103 | "Epoch 60/150\n", 104 | "train Loss: 0.0017 RMSE: 0.0410\n", 105 | "val Loss: 0.0028 RMSE: 0.0527\n", 106 | "\n", 107 | "Epoch 70/150\n", 108 | "train Loss: 0.0016 RMSE: 0.0405\n", 109 | "val Loss: 0.0023 RMSE: 0.0482\n", 110 | "\n", 111 | "Epoch 80/150\n", 112 | "train Loss: 0.0015 RMSE: 0.0393\n", 113 | "val Loss: 0.0021 RMSE: 0.0458\n", 114 | "\n", 115 | "Epoch 90/150\n", 116 | "train Loss: 0.0015 RMSE: 0.0392\n", 117 | "val Loss: 0.0021 RMSE: 0.0460\n", 118 | "\n", 119 | "Epoch 100/150\n", 120 | "train Loss: 0.0014 RMSE: 0.0379\n", 121 | "val Loss: 0.0020 RMSE: 0.0447\n", 122 | "\n", 123 | "Epoch 110/150\n", 124 | "train Loss: 0.0014 RMSE: 0.0379\n", 125 | "val Loss: 0.0019 RMSE: 0.0437\n", 126 | "\n", 127 | "Epoch 120/150\n", 128 | "train Loss: 0.0014 RMSE: 0.0372\n", 129 | "val Loss: 0.0020 RMSE: 0.0444\n", 130 | "\n", 131 | "Epoch 130/150\n", 132 | "train Loss: 0.0013 RMSE: 0.0366\n", 133 | "val Loss: 0.0019 RMSE: 0.0438\n", 134 | "\n", 135 | "Epoch 140/150\n", 136 | "train Loss: 0.0013 RMSE: 0.0362\n", 137 | "val Loss: 0.0018 RMSE: 0.0429\n", 138 | "\n", 139 | "Epoch 150/150\n", 140 | "train Loss: 0.0013 RMSE: 0.0362\n", 141 | "val Loss: 0.0019 RMSE: 0.0438\n", 142 | "\n", 143 | "Training complete in 3m 17s\n", 144 | "Best val MSE: 0.043848\n" 145 | ] 146 | } 147 | ], 148 | "source": [ 149 | "# Case 1. model = lstm\n", 150 | "model_name = 'lstm'\n", 151 | "model_params = config.model_config[model_name]\n", 152 | "\n", 153 | "data_forecast = mf.Forecasting(model_params)\n", 154 | "best_model = data_forecast.train_model(train_data, valid_data) # 모델 학습\n", 155 | "data_forecast.save_model(best_model, best_model_path=model_params[\"best_model_path\"]) # 모델 저장" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": 5, 161 | "metadata": { 162 | "scrolled": true 163 | }, 164 | "outputs": [ 165 | { 166 | "name": "stdout", 167 | "output_type": "stream", 168 | "text": [ 169 | "Start training model: gru\n", 170 | "\n", 171 | "Epoch 1/150\n", 172 | "train Loss: 0.0354 RMSE: 0.1883\n", 173 | "val Loss: 0.0202 RMSE: 0.1420\n", 174 | "\n", 175 | "Epoch 10/150\n", 176 | "train Loss: 0.0046 RMSE: 0.0682\n", 177 | "val Loss: 0.0061 RMSE: 0.0778\n", 178 | "\n", 179 | "Epoch 20/150\n", 180 | "train Loss: 0.0040 RMSE: 0.0629\n", 181 | "val Loss: 0.0057 RMSE: 0.0756\n", 182 | "\n", 183 | "Epoch 30/150\n", 184 | "train Loss: 0.0033 RMSE: 0.0577\n", 185 | "val Loss: 0.0051 RMSE: 0.0717\n", 186 | "\n", 187 | "Epoch 40/150\n", 188 | "train Loss: 0.0026 RMSE: 0.0512\n", 189 | "val Loss: 0.0038 RMSE: 0.0618\n", 190 | "\n", 191 | "Epoch 50/150\n", 192 | "train Loss: 0.0020 RMSE: 0.0447\n", 193 | "val Loss: 0.0035 RMSE: 0.0591\n", 194 | "\n", 195 | "Epoch 60/150\n", 196 | "train Loss: 0.0018 RMSE: 0.0423\n", 197 | "val Loss: 0.0026 RMSE: 0.0511\n", 198 | "\n", 199 | "Epoch 70/150\n", 200 | "train Loss: 0.0017 RMSE: 0.0411\n", 201 | "val Loss: 0.0024 RMSE: 0.0494\n", 202 | "\n", 203 | "Epoch 80/150\n", 204 | "train Loss: 0.0016 RMSE: 0.0400\n", 205 | "val Loss: 0.0027 RMSE: 0.0516\n", 206 | "\n", 207 | "Epoch 90/150\n", 208 | "train Loss: 0.0015 RMSE: 0.0393\n", 209 | "val Loss: 0.0023 RMSE: 0.0475\n", 210 | "\n", 211 | "Epoch 100/150\n", 212 | "train Loss: 0.0035 RMSE: 0.0595\n", 213 | "val Loss: 0.0052 RMSE: 0.0721\n", 214 | "\n", 215 | "Epoch 110/150\n", 216 | "train Loss: 0.0022 RMSE: 0.0468\n", 217 | "val Loss: 0.0035 RMSE: 0.0588\n", 218 | "\n", 219 | "Epoch 120/150\n", 220 | "train Loss: 0.0019 RMSE: 0.0437\n", 221 | "val Loss: 0.0027 RMSE: 0.0519\n", 222 | "\n", 223 | "Epoch 130/150\n", 224 | "train Loss: 0.0017 RMSE: 0.0410\n", 225 | "val Loss: 0.0023 RMSE: 0.0476\n", 226 | "\n", 227 | "Epoch 140/150\n", 228 | "train Loss: 0.0015 RMSE: 0.0391\n", 229 | "val Loss: 0.0022 RMSE: 0.0470\n", 230 | "\n", 231 | "Epoch 150/150\n", 232 | "train Loss: 0.0014 RMSE: 0.0378\n", 233 | "val Loss: 0.0021 RMSE: 0.0457\n", 234 | "\n", 235 | "Training complete in 3m 16s\n", 236 | "Best val MSE: 0.045691\n" 237 | ] 238 | } 239 | ], 240 | "source": [ 241 | "# Case 2. model = gru\n", 242 | "model_name = 'gru'\n", 243 | "model_params = config.model_config[model_name]\n", 244 | "\n", 245 | "data_forecast = mf.Forecasting(model_params)\n", 246 | "best_model = data_forecast.train_model(train_data, valid_data) # 모델 학습\n", 247 | "data_forecast.save_model(best_model, best_model_path=model_params[\"best_model_path\"]) # 모델 저장" 248 | ] 249 | }, 250 | { 251 | "cell_type": "code", 252 | "execution_count": 6, 253 | "metadata": { 254 | "scrolled": true 255 | }, 256 | "outputs": [ 257 | { 258 | "name": "stdout", 259 | "output_type": "stream", 260 | "text": [ 261 | "Start training model: informer\n", 262 | "\n", 263 | "Epoch 1/150\n", 264 | "train Loss: 0.0244 RMSE: 0.1562\n", 265 | "val Loss: 0.0002 RMSE: 0.0138\n", 266 | "\n", 267 | "Epoch 10/150\n", 268 | "train Loss: 0.0006 RMSE: 0.0254\n", 269 | "val Loss: 0.0001 RMSE: 0.0101\n", 270 | "\n", 271 | "Epoch 20/150\n", 272 | "train Loss: 0.0006 RMSE: 0.0253\n", 273 | "val Loss: 0.0001 RMSE: 0.0102\n", 274 | "\n", 275 | "Epoch 30/150\n", 276 | "train Loss: 0.0006 RMSE: 0.0252\n", 277 | "val Loss: 0.0001 RMSE: 0.0103\n", 278 | "\n", 279 | "Epoch 40/150\n", 280 | "train Loss: 0.0007 RMSE: 0.0256\n", 281 | "val Loss: 0.0001 RMSE: 0.0101\n", 282 | "\n", 283 | "Epoch 50/150\n", 284 | "train Loss: 0.0007 RMSE: 0.0256\n", 285 | "val Loss: 0.0001 RMSE: 0.0101\n", 286 | "\n", 287 | "Epoch 60/150\n", 288 | "train Loss: 0.0006 RMSE: 0.0252\n", 289 | "val Loss: 0.0001 RMSE: 0.0101\n", 290 | "\n", 291 | "Epoch 70/150\n", 292 | "train Loss: 0.0006 RMSE: 0.0253\n", 293 | "val Loss: 0.0001 RMSE: 0.0102\n", 294 | "\n", 295 | "Epoch 80/150\n", 296 | "train Loss: 0.0007 RMSE: 0.0256\n", 297 | "val Loss: 0.0001 RMSE: 0.0101\n", 298 | "\n", 299 | "Epoch 90/150\n", 300 | "train Loss: 0.0006 RMSE: 0.0254\n", 301 | "val Loss: 0.0001 RMSE: 0.0102\n", 302 | "\n", 303 | "Epoch 100/150\n", 304 | "train Loss: 0.0006 RMSE: 0.0253\n", 305 | "val Loss: 0.0001 RMSE: 0.0102\n", 306 | "\n", 307 | "Epoch 110/150\n", 308 | "train Loss: 0.0006 RMSE: 0.0254\n", 309 | "val Loss: 0.0001 RMSE: 0.0100\n", 310 | "\n", 311 | "Epoch 120/150\n", 312 | "train Loss: 0.0006 RMSE: 0.0252\n", 313 | "val Loss: 0.0001 RMSE: 0.0100\n", 314 | "\n", 315 | "Epoch 130/150\n", 316 | "train Loss: 0.0006 RMSE: 0.0251\n", 317 | "val Loss: 0.0001 RMSE: 0.0103\n", 318 | "\n", 319 | "Epoch 140/150\n", 320 | "train Loss: 0.0006 RMSE: 0.0254\n", 321 | "val Loss: 0.0001 RMSE: 0.0101\n", 322 | "\n", 323 | "Epoch 150/150\n", 324 | "train Loss: 0.0006 RMSE: 0.0255\n", 325 | "val Loss: 0.0001 RMSE: 0.0100\n", 326 | "\n", 327 | "Training complete in 14m 58s\n", 328 | "Best val MSE: 0.000044\n" 329 | ] 330 | } 331 | ], 332 | "source": [ 333 | "# Case 3. model = informer\n", 334 | "model_name = 'informer'\n", 335 | "model_params = config.model_config[model_name]\n", 336 | "\n", 337 | "data_forecast = mf.Forecasting(model_params)\n", 338 | "best_model = data_forecast.train_model(train_data, valid_data) # 모델 학습\n", 339 | "data_forecast.save_model(best_model, best_model_path=model_params[\"best_model_path\"]) # 모델 저장" 340 | ] 341 | }, 342 | { 343 | "cell_type": "code", 344 | "execution_count": 7, 345 | "metadata": { 346 | "scrolled": true 347 | }, 348 | "outputs": [ 349 | { 350 | "name": "stdout", 351 | "output_type": "stream", 352 | "text": [ 353 | "Start training model: scinet\n", 354 | "\n", 355 | "Epoch 1/150\n", 356 | "train Loss: 0.2516 RMSE: 0.5016\n", 357 | "val Loss: 0.1055 RMSE: 0.3247\n", 358 | "\n", 359 | "Epoch 10/150\n", 360 | "train Loss: 0.0227 RMSE: 0.1505\n", 361 | "val Loss: 0.0202 RMSE: 0.1420\n", 362 | "\n", 363 | "Epoch 20/150\n", 364 | "train Loss: 0.0092 RMSE: 0.0957\n", 365 | "val Loss: 0.0096 RMSE: 0.0979\n", 366 | "\n", 367 | "Epoch 30/150\n", 368 | "train Loss: 0.0054 RMSE: 0.0732\n", 369 | "val Loss: 0.0066 RMSE: 0.0814\n", 370 | "\n", 371 | "Epoch 40/150\n", 372 | "train Loss: 0.0039 RMSE: 0.0627\n", 373 | "val Loss: 0.0052 RMSE: 0.0720\n", 374 | "\n", 375 | "Epoch 50/150\n", 376 | "train Loss: 0.0032 RMSE: 0.0563\n", 377 | "val Loss: 0.0044 RMSE: 0.0660\n", 378 | "\n", 379 | "Epoch 60/150\n", 380 | "train Loss: 0.0027 RMSE: 0.0519\n", 381 | "val Loss: 0.0038 RMSE: 0.0618\n", 382 | "\n", 383 | "Epoch 70/150\n", 384 | "train Loss: 0.0024 RMSE: 0.0490\n", 385 | "val Loss: 0.0035 RMSE: 0.0590\n", 386 | "\n", 387 | "Epoch 80/150\n", 388 | "train Loss: 0.0022 RMSE: 0.0468\n", 389 | "val Loss: 0.0032 RMSE: 0.0569\n", 390 | "\n", 391 | "Epoch 90/150\n", 392 | "train Loss: 0.0021 RMSE: 0.0454\n", 393 | "val Loss: 0.0030 RMSE: 0.0552\n", 394 | "\n", 395 | "Epoch 100/150\n", 396 | "train Loss: 0.0020 RMSE: 0.0443\n", 397 | "val Loss: 0.0029 RMSE: 0.0542\n", 398 | "\n", 399 | "Epoch 110/150\n", 400 | "train Loss: 0.0019 RMSE: 0.0434\n", 401 | "val Loss: 0.0028 RMSE: 0.0531\n", 402 | "\n", 403 | "Epoch 120/150\n", 404 | "train Loss: 0.0018 RMSE: 0.0428\n", 405 | "val Loss: 0.0027 RMSE: 0.0524\n", 406 | "\n", 407 | "Epoch 130/150\n", 408 | "train Loss: 0.0018 RMSE: 0.0422\n", 409 | "val Loss: 0.0026 RMSE: 0.0514\n", 410 | "\n", 411 | "Epoch 140/150\n", 412 | "train Loss: 0.0017 RMSE: 0.0416\n", 413 | "val Loss: 0.0026 RMSE: 0.0508\n", 414 | "\n", 415 | "Epoch 150/150\n", 416 | "train Loss: 0.0017 RMSE: 0.0414\n", 417 | "val Loss: 0.0025 RMSE: 0.0502\n", 418 | "\n", 419 | "Training complete in 63m 16s\n", 420 | "Best val MSE: 0.002523\n" 421 | ] 422 | } 423 | ], 424 | "source": [ 425 | "# Case 4. model = scinet\n", 426 | "model_name = 'scinet'\n", 427 | "model_params = config.model_config[model_name]\n", 428 | "\n", 429 | "data_forecast = mf.Forecasting(model_params)\n", 430 | "best_model = data_forecast.train_model(train_data, valid_data) # 모델 학습\n", 431 | "data_forecast.save_model(best_model, best_model_path=model_params[\"best_model_path\"]) # 모델 저장" 432 | ] 433 | }, 434 | { 435 | "cell_type": "code", 436 | "execution_count": null, 437 | "metadata": {}, 438 | "outputs": [], 439 | "source": [] 440 | } 441 | ], 442 | "metadata": { 443 | "kernelspec": { 444 | "display_name": "iitp_time_serise", 445 | "language": "python", 446 | "name": "iitp" 447 | }, 448 | "language_info": { 449 | "codemirror_mode": { 450 | "name": "ipython", 451 | "version": 3 452 | }, 453 | "file_extension": ".py", 454 | "mimetype": "text/x-python", 455 | "name": "python", 456 | "nbconvert_exporter": "python", 457 | "pygments_lexer": "ipython3", 458 | "version": "3.7.11" 459 | }, 460 | "vscode": { 461 | "interpreter": { 462 | "hash": "448579598765c533641c9414ab2d5f7dc2c763c0bd653033bda6f4c5cb841b26" 463 | } 464 | } 465 | }, 466 | "nbformat": 4, 467 | "nbformat_minor": 5 468 | } 469 | -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | model_config = { 2 | "lstm": { # Case 1. model = lstm 3 | "model": 'lstm', 4 | "best_model_path": './ckpt/lstm.pt', # 학습 완료 모델 저장 경로 5 | "parameter": { 6 | "input_size" : 1, # 데이터 변수 개수, int 7 | "window_size" : 48, # input sequence의 길이, int 8 | "forecast_step" : 1, # 예측할 미래 시점의 길이, int 9 | "num_layers" : 2, # recurrent layers의 수, int(default: 2, 범위: 1 이상) 10 | "hidden_size" : 64, # hidden state의 차원, int(default: 64, 범위: 1 이상) 11 | "dropout" : 0.1, # dropout 확률, float(default: 0.1, 범위: 0 이상 1 이하) 12 | "bidirectional" : True, # 모델의 양방향성 여부, bool(default: True) 13 | "num_epochs" : 150, # 학습 epoch 횟수, int(default: 150, 범위: 1 이상) 14 | "batch_size" : 64, # batch 크기, int(default: 64, 범위: 1 이상, 컴퓨터 사양에 적합하게 설정) 15 | "lr" : 0.0001, # learning rate, float(default: 0.0001, 범위: 0.1 이하) 16 | "device" : 'cuda' # 학습 환경, (default: 'cuda', ['cuda', 'cpu'] 중 선택) 17 | } 18 | }, 19 | 'gru': { # Case 2. model = gru 20 | "model": 'gru', 21 | "best_model_path": './ckpt/gru.pt', # 학습 완료 모델 저장 경로 22 | "parameter": { 23 | "input_size" : 1, # 데이터 변수 개수, int 24 | "window_size" : 48, # input sequence의 길이, int 25 | "forecast_step" : 1, # 예측할 미래 시점의 길이, int 26 | "num_layers" : 2, # recurrent layers의 수, int(default: 2, 범위: 1 이상) 27 | "hidden_size" : 64, # hidden state의 차원, int(default: 64, 범위: 1 이상) 28 | "dropout" : 0.1, # dropout 확률, float(default: 0.1, 범위: 0 이상 1 이하) 29 | "bidirectional" : True, # 모델의 양방향성 여부, bool(default: True) 30 | "num_epochs" : 150, # 학습 epoch 횟수, int(default: 150, 범위: 1 이상) 31 | "batch_size" : 64, # batch 크기, int(default: 64, 범위: 1 이상, 컴퓨터 사양에 적합하게 설정) 32 | "lr" : 0.0001, # learning rate, float(default: 0.0001, 범위: 0.1 이하) 33 | "device" : 'cuda' # 학습 환경, (default: 'cuda', ['cuda', 'cpu'] 중 선택) 34 | } 35 | }, 36 | 'informer': { # Case 3. model = informer 37 | "model": 'informer', 38 | "best_model_path": './ckpt/informer.pt', # 학습 완료 모델 저장 경로 39 | "parameter": { 40 | "input_size" : 1, # 데이터 변수 개수, int 41 | "window_size" : 48, # input sequence의 길이, int 42 | "forecast_step" : 1, # 예측할 미래 시점의 길이, int 43 | "label_len" : 12, # Decoder의 start token 길이, int(default: 12) 44 | "d_model" : 512, # 모델의 hidden dimension, int(default: 512) 45 | "e_layers" : 2, # encoder layer 수, int(default: 2) 46 | "d_layers" : 1, # decoder layer 수, int(default: 1) 47 | "d_ff" : 2048, # fully connected layer의 hidden dimension, int(default: 2048) 48 | "factor" : 5, # 모델의 ProbSparse Attention factor, int(default: 5) 49 | "dropout" : 0.05, # dropout ratio, int(default: 0.05) 50 | "attn" : 'prob', # 모델의 attention 계산 방식, (default: 'prob', ['prob', 'full'] 중 선택) 51 | "n_heads" : 8, # multi-head attention head 수, int(default: 8) 52 | "embed" : 'timeF', # time features encoding 방식, (default: 'timeF', ['timeF', 'fixed', 'learned'] 중 선택) 53 | "num_epochs" : 150, # 학습 epoch 횟수, int(default: 150, 범위: 1 이상) 54 | "batch_size" : 64, # batch 크기, int(default: 64, 범위: 1 이상, 컴퓨터 사양에 적합하게 설정) 55 | "lr" : 0.0001, # learning rate, float(default: 0.0001, 범위: 0.1 이하) 56 | "lradj" : 'type1', # learning rate 조정 방식, (default: 'type1', ['type1', 'type2'] 중 선택) 57 | "device" : 'cuda' # 학습 환경, (default: 'cuda', ['cuda', 'cpu'] 중 선택) 58 | } 59 | }, 60 | 'scinet': { # Case 4. model = scinet 61 | "model": 'scinet', 62 | "best_model_path": './ckpt/scinet.pt', # 학습 완료 모델 저장 경로 63 | "parameter": { 64 | "input_size" : 1, # 데이터 변수 개수, int 65 | "window_size" : 48, # input sequence의 길이, int 66 | "forecast_step" : 1, # 예측할 미래 시점의 길이, int 67 | "levels" : 2, # Tree의 depth, int(default: 2, 범위: input sequence의 로그 값 이하, 2~4 설정 권장) 68 | "stacks" : 1, # SCINet 구조를 쌓는 횟수, int(default: 1, 범위: 3 이하) 69 | "num_epochs" : 150, # 학습 epoch 횟수, int(default: 150, 범위: 1 이상) 70 | "batch_size" : 64, # batch 크기, int(default: 64, 범위: 1 이상, 컴퓨터 사양에 적합하게 설정) 71 | "lr" : 0.00005, # learning rate, float(default: 0.0001, 범위: 0.1 이하) 72 | "device" : 'cuda' # 학습 환경, (default: 'cuda', ['cuda', 'cpu'] 중 선택) 73 | } 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /main_forecasting.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | import pandas as pd 4 | from sklearn.metrics import mean_absolute_error, mean_squared_error 5 | 6 | from models.rnn.trainer import Trainer_RNN 7 | from models.informer.trainer import Trainer_Informer 8 | from models.scinet.trainer import Trainer_SCINet 9 | 10 | 11 | class Forecasting(): 12 | def __init__(self, config): 13 | """ 14 | Initialize Forecasting class 15 | 16 | :param config: config 17 | :type config: dictionary 18 | 19 | example (training) 20 | >>> model_name = 'lstm' 21 | >>> model_params = config.model_config[model_name] 22 | >>> data_forecast = mf.Forecasting(model_params) 23 | >>> best_model = data_forecast.train_model(train_data, valid_data) # 모델 학습 24 | >>> data_forecast.save_model(best_model, best_model_path=model_params["best_model_path"]) # 모델 저장 25 | 26 | example (testing) 27 | >>> model_name = 'lstm' 28 | >>> model_params = config.model_config[model_name] 29 | >>> data_forecast = mf.Forecasting(model_params) 30 | >>> pred, mse, mae = data_forecast.pred_data(test_data, scaler, best_model_path=model_params["best_model_path"]) # 예측 31 | """ 32 | 33 | self.model_name = config['model'] 34 | self.parameter = config['parameter'] 35 | 36 | def build_model(self): 37 | """ 38 | Build model and return initialized model for selected model_name 39 | 40 | :return: initialized model 41 | :rtype: model 42 | """ 43 | 44 | # build initialized model 45 | if self.model_name == 'lstm': 46 | model = Trainer_RNN(self.parameter, model_name='lstm') 47 | elif self.model_name == 'gru': 48 | model = Trainer_RNN(self.parameter, model_name='gru') 49 | elif self.model_name == 'informer': 50 | model = Trainer_Informer(self.parameter) 51 | elif self.model_name == 'scinet': 52 | model = Trainer_SCINet(self.parameter) 53 | return model 54 | 55 | def train_model(self, train_data, valid_data): 56 | """ 57 | Train model and return best model 58 | 59 | :param train_data: train data whose shape is (# time steps, 1) 60 | :type train_data: numpy array 61 | 62 | :param valid_data: validation data whose shape is (# time steps, 1) 63 | :type valid_data: numpy array 64 | 65 | :return: best trained model 66 | :rtype: model 67 | """ 68 | 69 | print(f"Start training model: {self.model_name}") 70 | 71 | # build train/validation dataloaders 72 | train_loader = self.get_dataloader(train_data, self.parameter['window_size'], 73 | self.parameter['forecast_step'], self.parameter['batch_size'], shuffle=True) 74 | valid_loader = self.get_dataloader(valid_data, self.parameter['window_size'], 75 | self.parameter['forecast_step'], self.parameter['batch_size'], shuffle=False) 76 | 77 | # build initialized model 78 | init_model = self.build_model() 79 | 80 | # train model 81 | best_model = init_model.fit(train_loader, valid_loader) 82 | return best_model 83 | 84 | def save_model(self, best_model, best_model_path): 85 | """ 86 | Save the best trained model 87 | 88 | :param best_model: best trained model 89 | :type best_model: model 90 | 91 | :param best_model_path: path for saving model 92 | :type best_model_path: str 93 | """ 94 | 95 | # save model 96 | torch.save(best_model.state_dict(), best_model_path) 97 | 98 | def pred_data(self, test_data, scaler, best_model_path): 99 | """ 100 | Predict future data for test dataset using the best trained model 101 | 102 | :param test_data: test data whose shape is (# time steps, 1) 103 | :type test_data: numpy array 104 | 105 | :param scaler: scaler fitted on train dataset 106 | :type: MinMaxScaler 107 | 108 | :param best_model_path: path for loading the best trained model 109 | :type best_model_path: str 110 | 111 | :return: true values and predicted values 112 | :rtype: DataFrame 113 | 114 | :return: test mse 115 | :rtype: float 116 | 117 | :return: test mae 118 | :rtype: float 119 | """ 120 | 121 | print(f"Start testing model: {self.model_name}") 122 | 123 | # build test dataloader 124 | test_loader = self.get_dataloader(test_data, self.parameter['window_size'], 125 | self.parameter['forecast_step'], self.parameter['batch_size'], shuffle=False) 126 | 127 | # build initialized model 128 | init_model = self.build_model() 129 | 130 | # load best model 131 | init_model.model.load_state_dict(torch.load(best_model_path)) 132 | 133 | # get prediction results 134 | # the number of predicted values = forecast_step * ((len(test_data)-window_size-forecast_step) // forecast_step + 1) 135 | # start time point of prediction = window_size 136 | # end time point of prediction = len(test_data) - (len(test_data)-window_size-forecast_step) % forecast_step - 1 137 | pred_data = init_model.test(test_loader) # shape=(the number of predicted values, 1) 138 | 139 | # select true data whose times match that of predicted values 140 | start_idx = self.parameter['window_size'] 141 | end_idx = len(test_data) - (len(test_data)-self.parameter['window_size']-self.parameter['forecast_step']) % self.parameter['forecast_step'] - 1 142 | true_data = test_data[start_idx:end_idx+1] 143 | 144 | # inverse normalization to original scale 145 | true_data = scaler.inverse_transform(np.expand_dims(true_data, axis=-1)) 146 | pred_data = scaler.inverse_transform(pred_data) 147 | true_data = true_data.squeeze(-1) # shape=(the number of predicted values, ) 148 | pred_data = pred_data.squeeze(-1) # shape=(the number of predicted values, ) 149 | 150 | # calculate performance metrics 151 | mse = mean_squared_error(true_data, pred_data) 152 | mae = mean_absolute_error(true_data, pred_data) 153 | 154 | # merge true value and predicted value 155 | pred_df = pd.DataFrame() 156 | pred_df['actual_value'] = true_data 157 | pred_df['predicted_value'] = pred_data 158 | return pred_df, mse, mae 159 | 160 | def get_dataloader(self, dataset, window_size, forecast_step, batch_size, shuffle): 161 | """ 162 | Get DataLoader 163 | 164 | :param dataset: data whose shape is (# time steps, ) 165 | :type dataset: numpy array 166 | 167 | :param window_size: window size 168 | :type window_size: int 169 | 170 | :param forecast_step: forecast step size 171 | :type forecast_step: int 172 | 173 | :param batch_size: batch size 174 | :type batch_size: int 175 | 176 | :param shuffle: shuffle for making batch 177 | :type shuffle: bool 178 | 179 | :return: dataloader 180 | :rtype: DataLoader 181 | """ 182 | 183 | # data dimension 확인 및 변환 => shape: (# time steps, 1) 184 | if len(dataset.shape) == 1: 185 | dataset = np.expand_dims(dataset, axis=-1) 186 | 187 | # input: window_size 길이의 시계열 데이터 188 | # 전체 데이터를 sliding window 방식(slide 크기=forecast_step)으로 window_size 길이의 time window로 분할하여 input 생성 189 | T = dataset.shape[0] 190 | windows = [dataset[i : i+window_size] for i in range(0, T-window_size-forecast_step+1, forecast_step)] 191 | 192 | # target: input의 마지막 시점 이후 forecast_step 시점만큼의 미래 데이터 (예측 정답) 193 | targets = [dataset[i+window_size : i+window_size+forecast_step] for i in range(0, T-window_size-forecast_step+1, forecast_step)] 194 | 195 | # torch dataset 구축 196 | dataset = torch.utils.data.TensorDataset(torch.FloatTensor(windows), torch.FloatTensor(targets)) 197 | 198 | # DataLoader 구축 199 | # windows: shape=(batch_size, window_size, 1) & targets: shape=(batch_size, forecast_step, 1) 200 | data_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=shuffle) 201 | return data_loader 202 | -------------------------------------------------------------------------------- /models/informer/layers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClustProject/KUDataForecasting/5e5720d5e4db46e404bd14206fef5673263cda7f/models/informer/layers/__init__.py -------------------------------------------------------------------------------- /models/informer/layers/attn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | import numpy as np 6 | 7 | from math import sqrt 8 | from ..utils.masking import TriangularCausalMask, ProbMask 9 | 10 | class FullAttention(nn.Module): 11 | def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False): 12 | super(FullAttention, self).__init__() 13 | self.scale = scale 14 | self.mask_flag = mask_flag 15 | self.output_attention = output_attention 16 | self.dropout = nn.Dropout(attention_dropout) 17 | 18 | def forward(self, queries, keys, values, attn_mask): 19 | B, L, H, E = queries.shape 20 | _, S, _, D = values.shape 21 | scale = self.scale or 1./sqrt(E) 22 | 23 | scores = torch.einsum("blhe,bshe->bhls", queries, keys) 24 | if self.mask_flag: 25 | if attn_mask is None: 26 | attn_mask = TriangularCausalMask(B, L, device=queries.device) 27 | 28 | scores.masked_fill_(attn_mask.mask, -np.inf) 29 | 30 | A = self.dropout(torch.softmax(scale * scores, dim=-1)) 31 | V = torch.einsum("bhls,bshd->blhd", A, values) 32 | 33 | if self.output_attention: 34 | return (V.contiguous(), A) 35 | else: 36 | return (V.contiguous(), None) 37 | 38 | class ProbAttention(nn.Module): 39 | def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False): 40 | super(ProbAttention, self).__init__() 41 | self.factor = factor 42 | self.scale = scale 43 | self.mask_flag = mask_flag 44 | self.output_attention = output_attention 45 | self.dropout = nn.Dropout(attention_dropout) 46 | 47 | def _prob_QK(self, Q, K, sample_k, n_top): # n_top: c*ln(L_q) 48 | # Q [B, H, L, D] 49 | B, H, L_K, E = K.shape 50 | _, _, L_Q, _ = Q.shape 51 | 52 | # calculate the sampled Q_K 53 | K_expand = K.unsqueeze(-3).expand(B, H, L_Q, L_K, E) 54 | index_sample = torch.randint(L_K, (L_Q, sample_k)) # real U = U_part(factor*ln(L_k))*L_q 55 | K_sample = K_expand[:, :, torch.arange(L_Q).unsqueeze(1), index_sample, :] 56 | Q_K_sample = torch.matmul(Q.unsqueeze(-2), K_sample.transpose(-2, -1)).squeeze(-2) 57 | 58 | # find the Top_k query with sparisty measurement 59 | M = Q_K_sample.max(-1)[0] - torch.div(Q_K_sample.sum(-1), L_K) 60 | M_top = M.topk(n_top, sorted=False)[1] 61 | 62 | # use the reduced Q to calculate Q_K 63 | Q_reduce = Q[torch.arange(B)[:, None, None], 64 | torch.arange(H)[None, :, None], 65 | M_top, :] # factor*ln(L_q) 66 | Q_K = torch.matmul(Q_reduce, K.transpose(-2, -1)) # factor*ln(L_q)*L_k 67 | 68 | return Q_K, M_top 69 | 70 | def _get_initial_context(self, V, L_Q): 71 | B, H, L_V, D = V.shape 72 | if not self.mask_flag: 73 | # V_sum = V.sum(dim=-2) 74 | V_sum = V.mean(dim=-2) 75 | contex = V_sum.unsqueeze(-2).expand(B, H, L_Q, V_sum.shape[-1]).clone() 76 | else: # use mask 77 | assert(L_Q == L_V) # requires that L_Q == L_V, i.e. for self-attention only 78 | contex = V.cumsum(dim=-2) 79 | return contex 80 | 81 | def _update_context(self, context_in, V, scores, index, L_Q, attn_mask): 82 | B, H, L_V, D = V.shape 83 | 84 | if self.mask_flag: 85 | attn_mask = ProbMask(B, H, L_Q, index, scores, device=V.device) 86 | scores.masked_fill_(attn_mask.mask, -np.inf) 87 | 88 | attn = torch.softmax(scores, dim=-1) # nn.Softmax(dim=-1)(scores) 89 | 90 | context_in[torch.arange(B)[:, None, None], 91 | torch.arange(H)[None, :, None], 92 | index, :] = torch.matmul(attn, V).type_as(context_in) 93 | if self.output_attention: 94 | attns = (torch.ones([B, H, L_V, L_V])/L_V).type_as(attn).to(attn.device) 95 | attns[torch.arange(B)[:, None, None], torch.arange(H)[None, :, None], index, :] = attn 96 | return (context_in, attns) 97 | else: 98 | return (context_in, None) 99 | 100 | def forward(self, queries, keys, values, attn_mask): 101 | B, L_Q, H, D = queries.shape 102 | _, L_K, _, _ = keys.shape 103 | 104 | queries = queries.transpose(2,1) 105 | keys = keys.transpose(2,1) 106 | values = values.transpose(2,1) 107 | 108 | U_part = self.factor * np.ceil(np.log(L_K)).astype('int').item() # c*ln(L_k) 109 | u = self.factor * np.ceil(np.log(L_Q)).astype('int').item() # c*ln(L_q) 110 | 111 | U_part = U_part if U_part='1.5.0' else 2 30 | self.tokenConv = nn.Conv1d(in_channels=c_in, out_channels=d_model, 31 | kernel_size=3, padding=padding, padding_mode='circular') 32 | for m in self.modules(): 33 | if isinstance(m, nn.Conv1d): 34 | nn.init.kaiming_normal_(m.weight,mode='fan_in',nonlinearity='leaky_relu') 35 | 36 | def forward(self, x): 37 | x = self.tokenConv(x.permute(0, 2, 1)).transpose(1,2) 38 | return x 39 | 40 | class FixedEmbedding(nn.Module): 41 | def __init__(self, c_in, d_model): 42 | super(FixedEmbedding, self).__init__() 43 | 44 | w = torch.zeros(c_in, d_model).float() 45 | w.require_grad = False 46 | 47 | position = torch.arange(0, c_in).float().unsqueeze(1) 48 | div_term = (torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model)).exp() 49 | 50 | w[:, 0::2] = torch.sin(position * div_term) 51 | w[:, 1::2] = torch.cos(position * div_term) 52 | 53 | self.emb = nn.Embedding(c_in, d_model) 54 | self.emb.weight = nn.Parameter(w, requires_grad=False) 55 | 56 | def forward(self, x): 57 | return self.emb(x).detach() 58 | 59 | class TemporalEmbedding(nn.Module): 60 | def __init__(self, d_model, embed_type='fixed', freq='h'): 61 | super(TemporalEmbedding, self).__init__() 62 | 63 | minute_size = 4; hour_size = 24 64 | weekday_size = 7; day_size = 32; month_size = 13 65 | 66 | Embed = FixedEmbedding if embed_type=='fixed' else nn.Embedding 67 | if freq=='t': 68 | self.minute_embed = Embed(minute_size, d_model) 69 | self.hour_embed = Embed(hour_size, d_model) 70 | self.weekday_embed = Embed(weekday_size, d_model) 71 | self.day_embed = Embed(day_size, d_model) 72 | self.month_embed = Embed(month_size, d_model) 73 | 74 | def forward(self, x): 75 | x = x.long() 76 | 77 | minute_x = self.minute_embed(x[:,:,4]) if hasattr(self, 'minute_embed') else 0. 78 | hour_x = self.hour_embed(x[:,:,3]) 79 | weekday_x = self.weekday_embed(x[:,:,2]) 80 | day_x = self.day_embed(x[:,:,1]) 81 | month_x = self.month_embed(x[:,:,0]) 82 | 83 | return hour_x + weekday_x + day_x + month_x + minute_x 84 | 85 | class TimeFeatureEmbedding(nn.Module): 86 | def __init__(self, d_model, embed_type='timeF', freq='h'): 87 | super(TimeFeatureEmbedding, self).__init__() 88 | 89 | freq_map = {'h':4, 't':5, 's':6, 'm':1, 'a':1, 'w':2, 'd':3, 'b':3} 90 | d_inp = freq_map[freq] 91 | self.embed = nn.Linear(d_inp, d_model) 92 | 93 | def forward(self, x): 94 | return self.embed(x) 95 | 96 | class DataEmbedding(nn.Module): 97 | def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1): 98 | super(DataEmbedding, self).__init__() 99 | 100 | self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model) 101 | self.position_embedding = PositionalEmbedding(d_model=d_model) 102 | self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type, freq=freq) if embed_type!='timeF' else TimeFeatureEmbedding(d_model=d_model, embed_type=embed_type, freq=freq) 103 | 104 | self.dropout = nn.Dropout(p=dropout) 105 | 106 | def forward(self, x, x_mark): 107 | # x = self.value_embedding(x) + self.position_embedding(x) + self.temporal_embedding(x_mark) 108 | x = self.value_embedding(x) + self.position_embedding(x) 109 | return self.dropout(x) -------------------------------------------------------------------------------- /models/informer/layers/encoder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | class ConvLayer(nn.Module): 6 | def __init__(self, c_in): 7 | super(ConvLayer, self).__init__() 8 | padding = 1 if torch.__version__>='1.5.0' else 2 9 | self.downConv = nn.Conv1d(in_channels=c_in, 10 | out_channels=c_in, 11 | kernel_size=3, 12 | padding=padding, 13 | padding_mode='circular') 14 | self.norm = nn.BatchNorm1d(c_in) 15 | self.activation = nn.ELU() 16 | self.maxPool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1) 17 | 18 | def forward(self, x): 19 | x = self.downConv(x.permute(0, 2, 1)) 20 | x = self.norm(x) 21 | x = self.activation(x) 22 | x = self.maxPool(x) 23 | x = x.transpose(1,2) 24 | return x 25 | 26 | class EncoderLayer(nn.Module): 27 | def __init__(self, attention, d_model, d_ff=None, dropout=0.1, activation="relu"): 28 | super(EncoderLayer, self).__init__() 29 | d_ff = d_ff or 4*d_model 30 | self.attention = attention 31 | self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1) 32 | self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1) 33 | self.norm1 = nn.LayerNorm(d_model) 34 | self.norm2 = nn.LayerNorm(d_model) 35 | self.dropout = nn.Dropout(dropout) 36 | self.activation = F.relu if activation == "relu" else F.gelu 37 | 38 | def forward(self, x, attn_mask=None): 39 | # x [B, L, D] 40 | # x = x + self.dropout(self.attention( 41 | # x, x, x, 42 | # attn_mask = attn_mask 43 | # )) 44 | new_x, attn = self.attention( 45 | x, x, x, 46 | attn_mask = attn_mask 47 | ) 48 | x = x + self.dropout(new_x) 49 | 50 | y = x = self.norm1(x) 51 | y = self.dropout(self.activation(self.conv1(y.transpose(-1,1)))) 52 | y = self.dropout(self.conv2(y).transpose(-1,1)) 53 | 54 | return self.norm2(x+y), attn 55 | 56 | class Encoder(nn.Module): 57 | def __init__(self, attn_layers, conv_layers=None, norm_layer=None): 58 | super(Encoder, self).__init__() 59 | self.attn_layers = nn.ModuleList(attn_layers) 60 | self.conv_layers = nn.ModuleList(conv_layers) if conv_layers is not None else None 61 | self.norm = norm_layer 62 | 63 | def forward(self, x, attn_mask=None): 64 | # x [B, L, D] 65 | attns = [] 66 | if self.conv_layers is not None: 67 | for attn_layer, conv_layer in zip(self.attn_layers, self.conv_layers): 68 | x, attn = attn_layer(x, attn_mask=attn_mask) 69 | x = conv_layer(x) 70 | attns.append(attn) 71 | x, attn = self.attn_layers[-1](x, attn_mask=attn_mask) 72 | attns.append(attn) 73 | else: 74 | for attn_layer in self.attn_layers: 75 | x, attn = attn_layer(x, attn_mask=attn_mask) 76 | attns.append(attn) 77 | 78 | if self.norm is not None: 79 | x = self.norm(x) 80 | 81 | return x, attns 82 | -------------------------------------------------------------------------------- /models/informer/layers/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from ..utils.masking import TriangularCausalMask, ProbMask 6 | from ..layers.encoder import Encoder, EncoderLayer, ConvLayer 7 | from ..layers.decoder import Decoder, DecoderLayer 8 | from ..layers.attn import FullAttention, ProbAttention, AttentionLayer 9 | from ..layers.embed import DataEmbedding 10 | 11 | class Informer(nn.Module): 12 | def __init__(self, enc_in, dec_in, c_out, seq_len, label_len, out_len, 13 | factor=5, d_model=512, n_heads=8, e_layers=3, d_layers=2, d_ff=512, 14 | dropout=0.0, attn='prob', embed='fixed', freq='h', activation='gelu', 15 | output_attention = False, distil=True, mix=True, 16 | device=torch.device('cuda:0')): 17 | super(Informer, self).__init__() 18 | self.pred_len = out_len 19 | self.attn = attn 20 | self.output_attention = output_attention 21 | 22 | # Encoding 23 | self.enc_embedding = DataEmbedding(enc_in, d_model, embed, freq, dropout) 24 | self.dec_embedding = DataEmbedding(dec_in, d_model, embed, freq, dropout) 25 | # Attention 26 | Attn = ProbAttention if attn=='prob' else FullAttention 27 | # Encoder 28 | self.encoder = Encoder( 29 | [ 30 | EncoderLayer( 31 | AttentionLayer(Attn(False, factor, attention_dropout=dropout, output_attention=output_attention), 32 | d_model, n_heads, mix=False), 33 | d_model, 34 | d_ff, 35 | dropout=dropout, 36 | activation=activation 37 | ) for l in range(e_layers) 38 | ], 39 | [ 40 | ConvLayer( 41 | d_model 42 | ) for l in range(e_layers-1) 43 | ] if distil else None, 44 | norm_layer=torch.nn.LayerNorm(d_model) 45 | ) 46 | # Decoder 47 | self.decoder = Decoder( 48 | [ 49 | DecoderLayer( 50 | AttentionLayer(Attn(True, factor, attention_dropout=dropout, output_attention=False), 51 | d_model, n_heads, mix=mix), 52 | AttentionLayer(FullAttention(False, factor, attention_dropout=dropout, output_attention=False), 53 | d_model, n_heads, mix=False), 54 | d_model, 55 | d_ff, 56 | dropout=dropout, 57 | activation=activation, 58 | ) 59 | for l in range(d_layers) 60 | ], 61 | norm_layer=torch.nn.LayerNorm(d_model) 62 | ) 63 | # self.end_conv1 = nn.Conv1d(in_channels=label_len+out_len, out_channels=out_len, kernel_size=1, bias=True) 64 | # self.end_conv2 = nn.Conv1d(in_channels=d_model, out_channels=c_out, kernel_size=1, bias=True) 65 | self.projection = nn.Linear(d_model, c_out, bias=True) 66 | 67 | def forward(self, x_enc, x_dec, x_mark_enc=None, x_mark_dec=None, 68 | enc_self_mask=None, dec_self_mask=None, dec_enc_mask=None): 69 | enc_out = self.enc_embedding(x_enc, x_mark_enc) 70 | enc_out, attns = self.encoder(enc_out, attn_mask=enc_self_mask) 71 | 72 | dec_out = self.dec_embedding(x_dec, x_mark_dec) 73 | dec_out = self.decoder(dec_out, enc_out, x_mask=dec_self_mask, cross_mask=dec_enc_mask) 74 | dec_out = self.projection(dec_out) 75 | 76 | # dec_out = self.end_conv1(dec_out) 77 | # dec_out = self.end_conv2(dec_out.transpose(2,1)).transpose(1,2) 78 | if self.output_attention: 79 | return dec_out[:,-self.pred_len:,:], attns 80 | else: 81 | return dec_out[:,-self.pred_len:,:] # [B, L, D] 82 | -------------------------------------------------------------------------------- /models/informer/trainer.py: -------------------------------------------------------------------------------- 1 | from .layers.model import Informer 2 | 3 | from .utils.tools import adjust_learning_rate 4 | 5 | import torch 6 | import torch.nn as nn 7 | from torch import optim 8 | 9 | import os 10 | import time 11 | import copy 12 | import numpy as np 13 | 14 | import warnings 15 | warnings.filterwarnings('ignore') 16 | 17 | 18 | class Trainer_Informer: 19 | def __init__(self, config): 20 | """ 21 | Initialize class 22 | 23 | :param config: configuration 24 | :type config: dictionary 25 | """ 26 | 27 | self.config = config 28 | self.model = Informer( 29 | self.config['input_size'], 30 | self.config['input_size'], 31 | self.config['input_size'], 32 | self.config['window_size'], 33 | self.config['label_len'], 34 | self.config['forecast_step'], 35 | self.config['factor'], 36 | self.config['d_model'], 37 | self.config['n_heads'], 38 | self.config['e_layers'], 39 | self.config['d_layers'], 40 | self.config['d_ff'], 41 | self.config['dropout'], 42 | self.config['attn'], 43 | self.config['embed'], 44 | device = self.config['device'] 45 | ).float().to(self.config['device']) 46 | 47 | def fit(self, train_loader, valid_loader): 48 | """ 49 | Train the model and return the best trained model 50 | 51 | :param train_loader: train dataloader 52 | :type train_loader: DataLoader 53 | 54 | :param valid_loader: validation dataloader 55 | :type valid_loader: DataLoader 56 | 57 | :return: trained model 58 | :rtype: model 59 | """ 60 | 61 | since = time.time() 62 | 63 | model_optim = optim.Adam(self.model.parameters(), lr=self.config['lr']) 64 | criterion = nn.MSELoss() 65 | 66 | best_model_wts = copy.deepcopy(self.model.state_dict()) 67 | best_val_loss = 100000000 68 | 69 | for epoch in range(self.config['num_epochs']): 70 | train_loss = [] 71 | 72 | self.model.train() 73 | for i, (batch_x, batch_y) in enumerate(train_loader): 74 | model_optim.zero_grad() 75 | 76 | pred, true = self._process_one_batch(batch_x, batch_y) 77 | 78 | loss = criterion(pred, true) 79 | train_loss.append(loss.item()) 80 | 81 | loss.backward() 82 | model_optim.step() 83 | 84 | train_loss = np.average(train_loss) 85 | valid_loss = self.valid(valid_loader, criterion) 86 | 87 | if epoch == 0 or (epoch + 1) % 10 == 0: 88 | print() 89 | print('Epoch {}/{}'.format(epoch + 1, self.config['num_epochs'])) 90 | print('train Loss: {:.4f} RMSE: {:.4f}'.format(train_loss, np.sqrt(train_loss))) 91 | print('val Loss: {:.4f} RMSE: {:.4f}'.format(valid_loss, np.sqrt(valid_loss))) 92 | 93 | if valid_loss < best_val_loss: 94 | best_val_loss = valid_loss 95 | best_model_wts = copy.deepcopy(self.model.state_dict()) 96 | 97 | adjust_learning_rate(model_optim, epoch + 1, self.config) 98 | 99 | time_elapsed = time.time() - since 100 | print('\nTraining complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60)) 101 | print('Best val MSE: {:4f}'.format(best_val_loss)) 102 | 103 | self.model.load_state_dict(best_model_wts) 104 | return self.model 105 | 106 | def _process_one_batch(self, batch_x, batch_y): 107 | """ 108 | Train the model for one batch 109 | 110 | :param batch_x: batch data for input 111 | :type batch_x: Tensor 112 | 113 | :param batch_y: batch data for target label 114 | :type batch_y: Tensor 115 | 116 | :return: outputs from the model and target label 117 | :rtype: Tensor 118 | """ 119 | 120 | batch_x = batch_x.float().to(self.config['device']) 121 | batch_y = batch_y.float() 122 | 123 | # decoder input 124 | # zero padding 125 | dec_inp = torch.zeros([batch_y.shape[0], self.config['forecast_step'], batch_y.shape[-1]]).float() 126 | dec_inp = torch.cat([batch_y[:,:self.config['label_len'],:], dec_inp], dim=1).float().to(self.config['device']) 127 | 128 | # encoder - decoder 129 | outputs = self.model(batch_x, dec_inp) 130 | batch_y = batch_y[:,-self.config['forecast_step']:,:].to(self.config['device']) 131 | return outputs, batch_y 132 | 133 | def valid(self, valid_loader, criterion): 134 | """ 135 | Evaluate the model in training step 136 | 137 | :param valid_loader: validation dataloader 138 | :type valid_loader: DataLoader 139 | 140 | :param criterion: criterion for caculating validation loss 141 | :type criterion: Class 142 | 143 | :return: average validation loss for all validation dataset 144 | :rtype: Tensor 145 | """ 146 | 147 | self.model.eval() 148 | 149 | total_loss = [] 150 | for i, (batch_x, batch_y) in enumerate(valid_loader): 151 | pred, true = self._process_one_batch(batch_x, batch_y) 152 | 153 | loss = criterion(pred.detach().cpu(), true.detach().cpu()) 154 | total_loss.append(loss) 155 | 156 | total_loss = np.average(total_loss) 157 | return total_loss 158 | 159 | def test(self, test_loader): 160 | """ 161 | Predict future values based on the best trained model 162 | 163 | :param test_loader: test dataloader 164 | :type test_loader: DataLoader 165 | 166 | :return: predicted values 167 | :rtype: numpy array 168 | """ 169 | 170 | self.model.eval() 171 | 172 | preds = [] 173 | for i, (batch_x, batch_y) in enumerate(test_loader): 174 | pred, true = self._process_one_batch(batch_x, batch_y) 175 | preds.append(pred.detach().cpu().numpy()) 176 | 177 | preds = np.concatenate(preds) 178 | preds = preds.reshape(-1, preds.shape[-1]) 179 | return preds -------------------------------------------------------------------------------- /models/informer/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClustProject/KUDataForecasting/5e5720d5e4db46e404bd14206fef5673263cda7f/models/informer/utils/__init__.py -------------------------------------------------------------------------------- /models/informer/utils/masking.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | class TriangularCausalMask(): 4 | def __init__(self, B, L, device="cpu"): 5 | mask_shape = [B, 1, L, L] 6 | with torch.no_grad(): 7 | self._mask = torch.triu(torch.ones(mask_shape, dtype=torch.bool), diagonal=1).to(device) 8 | 9 | @property 10 | def mask(self): 11 | return self._mask 12 | 13 | class ProbMask(): 14 | def __init__(self, B, H, L, index, scores, device="cpu"): 15 | _mask = torch.ones(L, scores.shape[-1], dtype=torch.bool).to(device).triu(1) 16 | _mask_ex = _mask[None, None, :].expand(B, H, L, scores.shape[-1]) 17 | indicator = _mask_ex[torch.arange(B)[:, None, None], 18 | torch.arange(H)[None, :, None], 19 | index, :].to(device) 20 | self._mask = indicator.view(scores.shape).to(device) 21 | 22 | @property 23 | def mask(self): 24 | return self._mask -------------------------------------------------------------------------------- /models/informer/utils/tools.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | def adjust_learning_rate(optimizer, epoch, config): 5 | # lr = args.learning_rate * (0.2 ** (epoch // 2)) 6 | if config['lradj']=='type1': 7 | lr_adjust = {epoch: config['lr'] * (0.5 ** ((epoch-1) // 1))} 8 | elif config['lradj']=='type2': 9 | lr_adjust = { 10 | 2: 5e-5, 4: 1e-5, 6: 5e-6, 8: 1e-6, 11 | 10: 5e-7, 15: 1e-7, 20: 5e-8 12 | } 13 | if epoch in lr_adjust.keys(): 14 | lr = lr_adjust[epoch] 15 | for param_group in optimizer.param_groups: 16 | param_group['lr'] = lr 17 | 18 | class EarlyStopping: 19 | def __init__(self, patience=7, verbose=False, delta=0): 20 | self.patience = patience 21 | self.verbose = verbose 22 | self.counter = 0 23 | self.best_score = None 24 | self.early_stop = False 25 | self.val_loss_min = np.Inf 26 | self.delta = delta 27 | 28 | def __call__(self, val_loss, model): 29 | score = -val_loss 30 | if self.best_score is None: 31 | self.best_score = score 32 | self.logging(val_loss, model) 33 | elif score < self.best_score + self.delta: 34 | self.counter += 1 35 | print(f'EarlyStopping counter: {self.counter} out of {self.patience}') 36 | if self.counter >= self.patience: 37 | self.early_stop = True 38 | else: 39 | self.best_score = score 40 | self.logging(val_loss, model) 41 | self.counter = 0 42 | 43 | def logging(self, val_loss, model): 44 | if self.verbose: 45 | print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f})') 46 | self.val_loss_min = val_loss 47 | -------------------------------------------------------------------------------- /models/rnn/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class RNN(nn.Module): 6 | def __init__(self, input_size, hidden_size, num_layers, bidirectional, rnn_type, forecast_step, device): 7 | super(RNN, self).__init__() 8 | self.hidden_size = hidden_size 9 | self.num_layers = num_layers 10 | self.rnn_type = rnn_type 11 | self.num_directions = 2 if bidirectional == True else 1 12 | self.device = device 13 | 14 | # rnn_type에 따른 recurrent layer 설정 15 | if self.rnn_type == 'rnn': 16 | self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True, bidirectional=bidirectional) 17 | elif self.rnn_type == 'lstm': 18 | self.rnn = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, bidirectional=bidirectional) 19 | elif self.rnn_type == 'gru': 20 | self.rnn = nn.GRU(input_size, hidden_size, num_layers, batch_first=True, bidirectional=bidirectional) 21 | 22 | # bidirectional에 따른 fc layer 구축 23 | # bidirectional 여부에 따라 hidden state의 shape가 달라짐 (True: 2 * hidden_size, False: hidden_size) 24 | self.fc = nn.Linear(self.num_directions * hidden_size, forecast_step) 25 | 26 | def forward(self, x): # (batch_size x seq_len x input_size) 27 | # initial hidden states 설정 28 | h0 = torch.zeros(self.num_directions * self.num_layers, x.size(0), self.hidden_size).to(self.device) 29 | 30 | # 선택한 rnn_type의 RNN으로부터 output 도출 31 | if self.rnn_type in ['rnn', 'gru']: 32 | out, _ = self.rnn(x, h0) # out: tensor of shape (batch_size, seq_length, hidden_size) 33 | else: 34 | # initial cell states 설정 35 | c0 = torch.zeros(self.num_directions * self.num_layers, x.size(0), self.hidden_size).to(self.device) 36 | out, _ = self.rnn(x, (h0, c0)) # out: tensor of shape (batch_size, seq_length, hidden_size) 37 | 38 | out = self.fc(out[:, -1, :]) 39 | return out -------------------------------------------------------------------------------- /models/rnn/trainer.py: -------------------------------------------------------------------------------- 1 | import time 2 | import copy 3 | import numpy as np 4 | 5 | import torch 6 | import torch.nn as nn 7 | import torch.optim as optim 8 | 9 | from models.rnn.model import RNN 10 | 11 | 12 | class Trainer_RNN: 13 | def __init__(self, config, model_name): 14 | """ 15 | Initialize class 16 | 17 | :param config: configuration 18 | :type config: dictionary 19 | """ 20 | 21 | self.num_epochs = config['num_epochs'] 22 | self.dropout = config['dropout'] 23 | self.lr = config['lr'] 24 | self.device = config['device'] 25 | 26 | self.model_name = model_name 27 | self.model = RNN( 28 | config['input_size'], 29 | config['hidden_size'], 30 | config['num_layers'], 31 | config['bidirectional'], 32 | self.model_name, 33 | config['forecast_step'], 34 | config['device']).to(self.device) 35 | 36 | def fit(self, train_loader, valid_loader): 37 | """ 38 | Train the model and return the best trained model 39 | 40 | :param train_loader: train dataloader 41 | :type train_loader: DataLoader 42 | 43 | :param valid_loader: validation dataloader 44 | :type valid_loader: DataLoader 45 | 46 | :return: trained model 47 | :rtype: model 48 | """ 49 | 50 | since = time.time() 51 | 52 | best_model_wts = copy.deepcopy(self.model.state_dict()) 53 | best_val_loss = 10000000 54 | 55 | optimizer = optim.Adam(self.model.parameters(), lr=self.lr) 56 | criterion = nn.MSELoss() 57 | 58 | for epoch in range(self.num_epochs): 59 | if epoch == 0 or (epoch + 1) % 10 == 0: 60 | print() 61 | print('Epoch {}/{}'.format(epoch + 1, self.num_epochs)) 62 | 63 | # 각 epoch마다 순서대로 training과 validation을 진행 64 | for phase in ['train', 'val']: 65 | if phase == 'train': 66 | self.model.train() # 모델을 training mode로 설정 67 | dataloader = train_loader 68 | else: 69 | self.model.eval() # 모델을 validation mode로 설정 70 | dataloader = valid_loader 71 | running_loss = 0.0 72 | running_total = 0 73 | 74 | # training과 validation 단계에 맞는 dataloader에 대하여 학습/검증 진행 75 | for inputs, labels in dataloader: 76 | inputs = inputs.to(self.device) 77 | labels = labels.to(self.device) 78 | 79 | # parameter gradients를 0으로 설정 80 | optimizer.zero_grad() 81 | 82 | # forward 83 | # training 단계에서만 gradient 업데이트 수행 84 | with torch.set_grad_enabled(phase == 'train'): 85 | # input을 model에 넣어 output을 도출한 후, loss를 계산함 86 | outputs = self.model(inputs) 87 | loss = criterion(outputs.unsqueeze(2), labels) 88 | 89 | # backward (optimize): training 단계에서만 수행 90 | if phase == 'train': 91 | loss.backward() 92 | optimizer.step() 93 | 94 | # batch별 loss를 축적함 95 | running_loss += loss.item() * inputs.size(0) 96 | running_total += labels.size(0) 97 | 98 | # epoch의 loss 및 RMSE 도출 99 | epoch_loss = running_loss / running_total 100 | epoch_rmse = np.sqrt(running_loss / running_total) 101 | 102 | if epoch == 0 or (epoch + 1) % 10 == 0: 103 | print('{} Loss: {:.4f} RMSE: {:.4f}'.format(phase, epoch_loss, epoch_rmse)) 104 | 105 | # validation 단계에서 validation loss가 감소할 때마다 best model 가중치를 업데이트함 106 | if phase == 'val' and epoch_loss < best_val_loss: 107 | best_val_loss = epoch_rmse 108 | best_model_wts = copy.deepcopy(self.model.state_dict()) 109 | 110 | # 전체 학습 시간 계산 111 | time_elapsed = time.time() - since 112 | print('\nTraining complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60)) 113 | print('Best val MSE: {:4f}'.format(best_val_loss)) 114 | 115 | # validation loss가 가장 낮았을 때의 best model 가중치를 불러와 best model을 구축함 116 | self.model.load_state_dict(best_model_wts) 117 | return self.model 118 | 119 | def test(self, test_loader): 120 | """ 121 | Predict future values based on the best trained model 122 | 123 | :param test_loader: test dataloader 124 | :type test_loader: DataLoader 125 | 126 | :return: predicted values 127 | :rtype: numpy array 128 | """ 129 | 130 | self.model.eval() 131 | 132 | preds = [] 133 | with torch.no_grad(): 134 | for inputs, _ in test_loader: 135 | inputs = inputs.to(self.device) 136 | outputs = self.model(inputs) 137 | preds.append(outputs.detach().cpu().numpy()) 138 | 139 | preds = np.concatenate(preds) 140 | preds = preds.reshape(-1, 1) 141 | return preds -------------------------------------------------------------------------------- /models/scinet/SCINet.py: -------------------------------------------------------------------------------- 1 | 2 | import math 3 | import torch.nn.functional as F 4 | from torch.autograd import Variable 5 | from torch import nn 6 | import torch 7 | import argparse 8 | import numpy as np 9 | 10 | class Splitting(nn.Module): 11 | def __init__(self): 12 | super(Splitting, self).__init__() 13 | 14 | def even(self, x): 15 | return x[:, ::2, :] 16 | 17 | def odd(self, x): 18 | return x[:, 1::2, :] 19 | 20 | def forward(self, x): 21 | '''Returns the odd and even part''' 22 | return (self.even(x), self.odd(x)) 23 | 24 | 25 | class Interactor(nn.Module): 26 | def __init__(self, in_planes, splitting=True, 27 | kernel = 5, dropout=0.5, groups = 1, hidden_size = 1, INN = True): 28 | super(Interactor, self).__init__() 29 | self.modified = INN 30 | self.kernel_size = kernel 31 | self.dilation = 1 32 | self.dropout = dropout 33 | self.hidden_size = hidden_size 34 | self.groups = groups 35 | if self.kernel_size % 2 == 0: 36 | pad_l = self.dilation * (self.kernel_size - 2) // 2 + 1 #by default: stride==1 37 | pad_r = self.dilation * (self.kernel_size) // 2 + 1 #by default: stride==1 38 | 39 | else: 40 | pad_l = self.dilation * (self.kernel_size - 1) // 2 + 1 # we fix the kernel size of the second layer as 3. 41 | pad_r = self.dilation * (self.kernel_size - 1) // 2 + 1 42 | self.splitting = splitting 43 | self.split = Splitting() 44 | 45 | modules_P = [] 46 | modules_U = [] 47 | modules_psi = [] 48 | modules_phi = [] 49 | prev_size = 1 50 | 51 | size_hidden = self.hidden_size 52 | modules_P += [ 53 | nn.ReplicationPad1d((pad_l, pad_r)), 54 | 55 | nn.Conv1d(in_planes * prev_size, int(in_planes * size_hidden), 56 | kernel_size=self.kernel_size, dilation=self.dilation, stride=1, groups= self.groups), 57 | nn.LeakyReLU(negative_slope=0.01, inplace=True), 58 | 59 | nn.Dropout(self.dropout), 60 | nn.Conv1d(int(in_planes * size_hidden), in_planes, 61 | kernel_size=3, stride=1, groups= self.groups), 62 | nn.Tanh() 63 | ] 64 | modules_U += [ 65 | nn.ReplicationPad1d((pad_l, pad_r)), 66 | nn.Conv1d(in_planes * prev_size, int(in_planes * size_hidden), 67 | kernel_size=self.kernel_size, dilation=self.dilation, stride=1, groups= self.groups), 68 | nn.LeakyReLU(negative_slope=0.01, inplace=True), 69 | nn.Dropout(self.dropout), 70 | nn.Conv1d(int(in_planes * size_hidden), in_planes, 71 | kernel_size=3, stride=1, groups= self.groups), 72 | nn.Tanh() 73 | ] 74 | 75 | modules_phi += [ 76 | nn.ReplicationPad1d((pad_l, pad_r)), 77 | nn.Conv1d(in_planes * prev_size, int(in_planes * size_hidden), 78 | kernel_size=self.kernel_size, dilation=self.dilation, stride=1, groups= self.groups), 79 | nn.LeakyReLU(negative_slope=0.01, inplace=True), 80 | nn.Dropout(self.dropout), 81 | nn.Conv1d(int(in_planes * size_hidden), in_planes, 82 | kernel_size=3, stride=1, groups= self.groups), 83 | nn.Tanh() 84 | ] 85 | modules_psi += [ 86 | nn.ReplicationPad1d((pad_l, pad_r)), 87 | nn.Conv1d(in_planes * prev_size, int(in_planes * size_hidden), 88 | kernel_size=self.kernel_size, dilation=self.dilation, stride=1, groups= self.groups), 89 | nn.LeakyReLU(negative_slope=0.01, inplace=True), 90 | nn.Dropout(self.dropout), 91 | nn.Conv1d(int(in_planes * size_hidden), in_planes, 92 | kernel_size=3, stride=1, groups= self.groups), 93 | nn.Tanh() 94 | ] 95 | self.phi = nn.Sequential(*modules_phi) 96 | self.psi = nn.Sequential(*modules_psi) 97 | self.P = nn.Sequential(*modules_P) 98 | self.U = nn.Sequential(*modules_U) 99 | 100 | def forward(self, x): 101 | if self.splitting: 102 | (x_even, x_odd) = self.split(x) 103 | else: 104 | (x_even, x_odd) = x 105 | 106 | if self.modified: 107 | x_even = x_even.permute(0, 2, 1) 108 | x_odd = x_odd.permute(0, 2, 1) 109 | 110 | d = x_odd.mul(torch.exp(self.phi(x_even))) 111 | c = x_even.mul(torch.exp(self.psi(x_odd))) 112 | 113 | x_even_update = c + self.U(d) 114 | x_odd_update = d - self.P(c) 115 | 116 | return (x_even_update, x_odd_update) 117 | 118 | else: 119 | x_even = x_even.permute(0, 2, 1) 120 | x_odd = x_odd.permute(0, 2, 1) 121 | 122 | d = x_odd - self.P(x_even) 123 | c = x_even + self.U(d) 124 | return (c, d) 125 | 126 | class InteractorLevel(nn.Module): 127 | def __init__(self, in_planes, kernel, dropout, groups , hidden_size, INN): 128 | super(InteractorLevel, self).__init__() 129 | self.level = Interactor(in_planes = in_planes, splitting=True, 130 | kernel = kernel, dropout=dropout, groups = groups, hidden_size = hidden_size, INN = INN) 131 | 132 | def forward(self, x): 133 | (x_even_update, x_odd_update) = self.level(x) 134 | return (x_even_update, x_odd_update) 135 | 136 | class LevelSCINet(nn.Module): 137 | def __init__(self,in_planes, kernel_size, dropout, groups, hidden_size, INN): 138 | super(LevelSCINet, self).__init__() 139 | self.interact = InteractorLevel(in_planes= in_planes, kernel = kernel_size, dropout = dropout, groups =groups , hidden_size = hidden_size, INN = INN) 140 | 141 | def forward(self, x): 142 | (x_even_update, x_odd_update) = self.interact(x) 143 | return x_even_update.permute(0, 2, 1), x_odd_update.permute(0, 2, 1) #even: B, T, D odd: B, T, D 144 | 145 | class SCINet_Tree(nn.Module): 146 | def __init__(self, in_planes, current_level, kernel_size, dropout, groups, hidden_size, INN): 147 | super().__init__() 148 | self.current_level = current_level 149 | 150 | self.workingblock = LevelSCINet( 151 | in_planes = in_planes, 152 | kernel_size = kernel_size, 153 | dropout = dropout, 154 | groups= groups, 155 | hidden_size = hidden_size, 156 | INN = INN) 157 | 158 | if current_level!=0: 159 | self.SCINet_Tree_odd=SCINet_Tree(in_planes, current_level-1, kernel_size, dropout, groups, hidden_size, INN) 160 | self.SCINet_Tree_even=SCINet_Tree(in_planes, current_level-1, kernel_size, dropout, groups, hidden_size, INN) 161 | 162 | def zip_up_the_pants(self, even, odd): 163 | even = even.permute(1, 0, 2) 164 | odd = odd.permute(1, 0, 2) #L, B, D 165 | even_len = even.shape[0] 166 | odd_len = odd.shape[0] 167 | mlen = min((odd_len, even_len)) 168 | _ = [] 169 | for i in range(mlen): 170 | _.append(even[i].unsqueeze(0)) 171 | _.append(odd[i].unsqueeze(0)) 172 | if odd_len < even_len: 173 | _.append(even[-1].unsqueeze(0)) 174 | return torch.cat(_,0).permute(1,0,2) #B, L, D 175 | 176 | def forward(self, x): 177 | x_even_update, x_odd_update= self.workingblock(x) 178 | # We recursively reordered these sub-series. You can run the ./utils/recursive_demo.py to emulate this procedure. 179 | if self.current_level ==0: 180 | return self.zip_up_the_pants(x_even_update, x_odd_update) 181 | else: 182 | return self.zip_up_the_pants(self.SCINet_Tree_even(x_even_update), self.SCINet_Tree_odd(x_odd_update)) 183 | 184 | class EncoderTree(nn.Module): 185 | def __init__(self, in_planes, num_levels, kernel_size, dropout, groups, hidden_size, INN): 186 | super().__init__() 187 | self.levels=num_levels 188 | self.SCINet_Tree = SCINet_Tree( 189 | in_planes = in_planes, 190 | current_level = num_levels-1, 191 | kernel_size = kernel_size, 192 | dropout =dropout , 193 | groups = groups, 194 | hidden_size = hidden_size, 195 | INN = INN) 196 | 197 | def forward(self, x): 198 | x= self.SCINet_Tree(x) 199 | return x 200 | 201 | class SCINet(nn.Module): 202 | def __init__(self, output_len, input_len, input_dim = 9, hid_size = 1, num_stacks = 1, 203 | num_levels = 3, num_decoder_layer = 1, concat_len = 0, groups = 1, kernel = 5, dropout = 0.5, 204 | single_step_output_One = 0, input_len_seg = 0, positionalE = False, modified = True, RIN=False): 205 | super(SCINet, self).__init__() 206 | 207 | self.input_dim = input_dim 208 | self.input_len = input_len 209 | self.output_len = output_len 210 | self.hidden_size = hid_size 211 | self.num_levels = num_levels 212 | self.groups = groups 213 | self.modified = modified 214 | self.kernel_size = kernel 215 | self.dropout = dropout 216 | self.single_step_output_One = single_step_output_One 217 | self.concat_len = concat_len 218 | self.pe = positionalE 219 | self.RIN=RIN 220 | self.num_decoder_layer = num_decoder_layer 221 | 222 | self.blocks1 = EncoderTree( 223 | in_planes=self.input_dim, 224 | num_levels = self.num_levels, 225 | kernel_size = self.kernel_size, 226 | dropout = self.dropout, 227 | groups = self.groups, 228 | hidden_size = self.hidden_size, 229 | INN = modified) 230 | 231 | if num_stacks == 2: # we only implement two stacks at most. 232 | self.blocks2 = EncoderTree( 233 | in_planes=self.input_dim, 234 | num_levels = self.num_levels, 235 | kernel_size = self.kernel_size, 236 | dropout = self.dropout, 237 | groups = self.groups, 238 | hidden_size = self.hidden_size, 239 | INN = modified) 240 | 241 | self.stacks = num_stacks 242 | 243 | for m in self.modules(): 244 | if isinstance(m, nn.Conv2d): 245 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 246 | m.weight.data.normal_(0, math.sqrt(2. / n)) 247 | elif isinstance(m, nn.BatchNorm2d): 248 | m.weight.data.fill_(1) 249 | m.bias.data.zero_() 250 | elif isinstance(m, nn.Linear): 251 | m.bias.data.zero_() 252 | self.projection1 = nn.Conv1d(self.input_len, self.output_len, kernel_size=1, stride=1, bias=False) 253 | self.div_projection = nn.ModuleList() 254 | self.overlap_len = self.input_len//4 255 | self.div_len = self.input_len//6 256 | 257 | if self.num_decoder_layer > 1: 258 | self.projection1 = nn.Linear(self.input_len, self.output_len) 259 | for layer_idx in range(self.num_decoder_layer-1): 260 | div_projection = nn.ModuleList() 261 | for i in range(6): 262 | lens = min(i*self.div_len+self.overlap_len,self.input_len) - i*self.div_len 263 | div_projection.append(nn.Linear(lens, self.div_len)) 264 | self.div_projection.append(div_projection) 265 | 266 | if self.single_step_output_One: # only output the N_th timestep. 267 | if self.stacks == 2: 268 | if self.concat_len: 269 | self.projection2 = nn.Conv1d(self.concat_len + self.output_len, 1, 270 | kernel_size = 1, bias = False) 271 | else: 272 | self.projection2 = nn.Conv1d(self.input_len + self.output_len, 1, 273 | kernel_size = 1, bias = False) 274 | else: # output the N timesteps. 275 | if self.stacks == 2: 276 | if self.concat_len: 277 | self.projection2 = nn.Conv1d(self.concat_len + self.output_len, self.output_len, 278 | kernel_size = 1, bias = False) 279 | else: 280 | self.projection2 = nn.Conv1d(self.input_len + self.output_len, self.output_len, 281 | kernel_size = 1, bias = False) 282 | 283 | # For positional encoding 284 | self.pe_hidden_size = input_dim 285 | if self.pe_hidden_size % 2 == 1: 286 | self.pe_hidden_size += 1 287 | 288 | num_timescales = self.pe_hidden_size // 2 289 | max_timescale = 10000.0 290 | min_timescale = 1.0 291 | 292 | log_timescale_increment = ( 293 | math.log(float(max_timescale) / float(min_timescale)) / 294 | max(num_timescales - 1, 1)) 295 | temp = torch.arange(num_timescales, dtype=torch.float32) 296 | inv_timescales = min_timescale * torch.exp( 297 | torch.arange(num_timescales, dtype=torch.float32) * 298 | -log_timescale_increment) 299 | self.register_buffer('inv_timescales', inv_timescales) 300 | 301 | ### RIN Parameters ### 302 | if self.RIN: 303 | self.affine_weight = nn.Parameter(torch.ones(1, 1, input_dim)) 304 | self.affine_bias = nn.Parameter(torch.zeros(1, 1, input_dim)) 305 | 306 | def get_position_encoding(self, x): 307 | max_length = x.size()[1] 308 | position = torch.arange(max_length, dtype=torch.float32, device=x.device) # tensor([0., 1., 2., 3., 4.], device='cuda:0') 309 | temp1 = position.unsqueeze(1) # 5 1 310 | temp2 = self.inv_timescales.unsqueeze(0) # 1 256 311 | scaled_time = position.unsqueeze(1) * self.inv_timescales.unsqueeze(0) # 5 256 312 | signal = torch.cat([torch.sin(scaled_time), torch.cos(scaled_time)], dim=1) #[T, C] 313 | signal = F.pad(signal, (0, 0, 0, self.pe_hidden_size % 2)) 314 | signal = signal.view(1, max_length, self.pe_hidden_size) 315 | return signal 316 | 317 | def forward(self, x): 318 | assert self.input_len % (np.power(2, self.num_levels)) == 0 # evenly divided the input length into two parts. (e.g., 32 -> 16 -> 8 -> 4 for 3 levels) 319 | if self.pe: 320 | pe = self.get_position_encoding(x) 321 | if pe.shape[2] > x.shape[2]: 322 | x += pe[:, :, :-1] 323 | else: 324 | x += self.get_position_encoding(x) 325 | 326 | ### activated when RIN flag is set ### 327 | if self.RIN: 328 | print('/// RIN ACTIVATED ///\r',end='') 329 | means = x.mean(1, keepdim=True).detach() 330 | #mean 331 | x = x - means 332 | #var 333 | stdev = torch.sqrt(torch.var(x, dim=1, keepdim=True, unbiased=False) + 1e-5) 334 | x /= stdev 335 | # affine 336 | # print(x.shape,self.affine_weight.shape,self.affine_bias.shape) 337 | x = x * self.affine_weight + self.affine_bias 338 | 339 | # the first stack 340 | res1 = x 341 | x = self.blocks1(x) 342 | x += res1 343 | if self.num_decoder_layer == 1: 344 | x = self.projection1(x) 345 | else: 346 | x = x.permute(0,2,1) 347 | for div_projection in self.div_projection: 348 | output = torch.zeros(x.shape,dtype=x.dtype).cuda() 349 | for i, div_layer in enumerate(div_projection): 350 | div_x = x[:,:,i*self.div_len:min(i*self.div_len+self.overlap_len,self.input_len)] 351 | output[:,:,i*self.div_len:(i+1)*self.div_len] = div_layer(div_x) 352 | x = output 353 | x = self.projection1(x) 354 | x = x.permute(0,2,1) 355 | 356 | if self.stacks == 1: 357 | ### reverse RIN ### 358 | if self.RIN: 359 | x = x - self.affine_bias 360 | x = x / (self.affine_weight + 1e-10) 361 | x = x * stdev 362 | x = x + means 363 | return x 364 | 365 | elif self.stacks == 2: 366 | MidOutPut = x 367 | if self.concat_len: 368 | x = torch.cat((res1[:, -self.concat_len:,:], x), dim=1) 369 | else: 370 | x = torch.cat((res1, x), dim=1) 371 | 372 | # the second stack 373 | res2 = x 374 | x = self.blocks2(x) 375 | x += res2 376 | x = self.projection2(x) 377 | 378 | ### Reverse RIN ### 379 | if self.RIN: 380 | MidOutPut = MidOutPut - self.affine_bias 381 | MidOutPut = MidOutPut / (self.affine_weight + 1e-10) 382 | MidOutPut = MidOutPut * stdev 383 | MidOutPut = MidOutPut + means 384 | 385 | if self.RIN: 386 | x = x - self.affine_bias 387 | x = x / (self.affine_weight + 1e-10) 388 | x = x * stdev 389 | x = x + means 390 | return x, MidOutPut 391 | 392 | def get_variable(x): 393 | x = Variable(x) 394 | return x.cuda() if torch.cuda.is_available() else x 395 | -------------------------------------------------------------------------------- /models/scinet/trainer.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import copy 4 | import torch.nn as nn 5 | import numpy as np 6 | from models.scinet.SCINet import SCINet 7 | from models.scinet.utils.tools import EarlyStopping 8 | from torch import optim 9 | 10 | 11 | class Trainer_SCINet: 12 | def __init__(self, config): 13 | """ 14 | Initialize class 15 | 16 | :param config: configuration , train_config 17 | :type config: dictionary 18 | """ 19 | 20 | self.output_len = config['forecast_step'] 21 | self.input_len = config['window_size'] 22 | self.input_dim = config['input_size'] 23 | self.lr = config['lr'] 24 | self.num_epochs = config['num_epochs'] 25 | 26 | self.model = SCINet(self.output_len, self.input_len, self.input_dim) 27 | 28 | def fit(self, train_loader, valid_loader): 29 | """ 30 | Train the model and return the best trained model 31 | 32 | :param train_loader: train dataloader 33 | :type train_loader: DataLoader 34 | 35 | :param valid_loader: validation dataloader 36 | :type valid_loader: DataLoader 37 | 38 | :return: trained model 39 | :rtype: model 40 | """ 41 | 42 | since = time.time() 43 | 44 | model_optim = optim.Adam(self.model.parameters(), lr=self.lr) 45 | criterion = nn.MSELoss() 46 | 47 | best_model_wts = copy.deepcopy(self.model.state_dict()) 48 | best_val_loss = 100000 49 | 50 | for epoch in range(self.num_epochs): 51 | train_loss = [] 52 | 53 | self.model.train() 54 | 55 | for i, (batch_x,batch_y) in enumerate(train_loader): 56 | model_optim.zero_grad() 57 | 58 | pred = self.model(batch_x) 59 | true = batch_y 60 | 61 | loss = criterion(pred, true) 62 | train_loss.append(loss.item()) 63 | 64 | loss.backward() 65 | model_optim.step() 66 | 67 | train_loss = np.average(train_loss) 68 | valid_loss = self.valid(valid_loader, criterion) 69 | 70 | if epoch == 0 or (epoch + 1) % 10 == 0: 71 | print() 72 | print('Epoch {}/{}'.format(epoch + 1, self.num_epochs)) 73 | print('train Loss: {:.4f} RMSE: {:.4f}'.format(train_loss, np.sqrt(train_loss))) 74 | print('val Loss: {:.4f} RMSE: {:.4f}'.format(valid_loss, np.sqrt(valid_loss))) 75 | 76 | if valid_loss < best_val_loss: 77 | best_val_loss = valid_loss 78 | best_model_wts = copy.deepcopy(self.model.state_dict()) 79 | 80 | time_elapsed = time.time() - since 81 | print('\nTraining complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60)) 82 | print('Best val MSE: {:4f}'.format(best_val_loss)) 83 | 84 | self.model.load_state_dict(best_model_wts) 85 | return self.model 86 | 87 | def valid(self, valid_loader, criterion): 88 | """ 89 | Evaluate the model in training step 90 | 91 | :param valid_loader: validation dataloader 92 | :type valid_loader: DataLoader 93 | 94 | :param criterion: criterion for caculating validation loss 95 | :type criterion: Class 96 | 97 | :return: average validation loss for all validation dataset 98 | :rtype: Tensor 99 | """ 100 | 101 | self.model.eval() 102 | 103 | total_loss = [] 104 | for i, (batch_x, batch_y) in enumerate(valid_loader): 105 | pred = self.model(batch_x) 106 | true = batch_y 107 | 108 | loss = criterion(pred.detach().cpu(), true.detach().cpu()) 109 | total_loss.append(loss) 110 | 111 | total_loss = np.average(total_loss) 112 | return total_loss 113 | 114 | def test(self, test_loader): 115 | """ 116 | Predict future values based on the best trained model 117 | 118 | :param test_loader: test dataloader 119 | :type test_loader: DataLoader 120 | 121 | :return: predicted values 122 | :rtype: numpy array 123 | """ 124 | 125 | self.model.eval() 126 | 127 | preds, trues = [], [] 128 | 129 | for i, (batch_x, batch_y) in enumerate(test_loader): 130 | pred = self.model(batch_x) 131 | true = batch_y 132 | 133 | preds.extend(pred.detach().cpu().numpy()) 134 | trues.extend(true.detach().cpu().numpy()) 135 | 136 | preds = np.array(preds) 137 | preds = np.squeeze(preds, axis=2).ravel() 138 | preds = np.expand_dims(preds, axis=-1) 139 | return preds -------------------------------------------------------------------------------- /models/scinet/utils/tools.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | import numpy as np 4 | import torch 5 | 6 | def save_model(epoch, lr, model, model_dir, model_name='pems08', horizon=12): 7 | if model_dir is None: 8 | return 9 | if not os.path.exists(model_dir): 10 | os.makedirs(model_dir) 11 | file_name = os.path.join(model_dir, model_name+str(horizon)+'.bin') 12 | torch.save( 13 | { 14 | 'epoch': epoch, 15 | 'lr': lr, 16 | 'model': model.state_dict(), 17 | }, file_name) 18 | print('save model in ',file_name) 19 | 20 | 21 | def load_model(model, model_dir, model_name='pems08', horizon=12): 22 | if not model_dir: 23 | return 24 | file_name = os.path.join(model_dir, model_name+str(horizon)+'.bin') 25 | 26 | if not os.path.exists(file_name): 27 | return 28 | with open(file_name, 'rb') as f: 29 | checkpoint = torch.load(f, map_location=lambda storage, loc: storage) 30 | print('This model was trained for {} epochs'.format(checkpoint['epoch'])) 31 | model.load_state_dict(checkpoint['model']) 32 | epoch = checkpoint['epoch'] 33 | lr = checkpoint['lr'] 34 | print('loaded the model...', file_name, 'now lr:', lr, 'now epoch:', epoch) 35 | return model, lr, epoch 36 | 37 | def adjust_learning_rate(optimizer, epoch, args): 38 | if args.lradj==1: 39 | lr_adjust = {epoch: args.lr * (0.95 ** (epoch // 1))} 40 | 41 | elif args.lradj==2: 42 | lr_adjust = { 43 | 0: 0.0001, 5: 0.0005, 10:0.001, 20: 0.0001, 30: 0.00005, 40: 0.00001 44 | , 70: 0.000001 45 | } 46 | 47 | if epoch in lr_adjust.keys(): 48 | lr = lr_adjust[epoch] 49 | for param_group in optimizer.param_groups: 50 | param_group['lr'] = lr 51 | print('Updating learning rate to {}'.format(lr)) 52 | else: 53 | for param_group in optimizer.param_groups: 54 | lr = param_group['lr'] 55 | return lr 56 | 57 | class EarlyStopping: 58 | def __init__(self, patience=7, verbose=False, delta=0.01): 59 | self.patience = patience 60 | self.verbose = verbose 61 | self.counter = 0 62 | self.best_score = None 63 | self.early_stop = False 64 | self.val_loss_min = np.Inf 65 | self.delta = delta 66 | 67 | def __call__(self, val_loss, model, path): 68 | score = -val_loss 69 | if self.best_score is None: 70 | self.best_score = score 71 | if path is not None : 72 | self.save_checkpoint(val_loss, model, path) 73 | elif score < self.best_score + self.delta: 74 | self.counter += 1 75 | print(f'EarlyStopping counter: {self.counter} out of {self.patience}') 76 | if self.counter >= self.patience: 77 | self.early_stop = True 78 | else: 79 | self.best_score = score 80 | if path is not None : 81 | self.save_checkpoint(val_loss, model, path) 82 | self.counter = 0 83 | 84 | def save_checkpoint(self, val_loss, model, path): 85 | if self.verbose: 86 | print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...') 87 | torch.save(model.state_dict(), path+'/'+'checkpoint.pth') 88 | self.val_loss_min = val_loss 89 | 90 | class dotdict(dict): 91 | """dot.notation access to dictionary attributes""" 92 | __getattr__ = dict.get 93 | __setattr__ = dict.__setitem__ 94 | __delattr__ = dict.__delitem__ 95 | 96 | class StandardScaler(): 97 | def __init__(self): 98 | self.mean = 0. 99 | self.std = 1. 100 | 101 | def fit(self, data): 102 | self.mean = data.mean(0) 103 | self.std = data.std(0) 104 | 105 | def transform(self, data): 106 | mean = torch.from_numpy(self.mean).type_as(data).to(data.device) if torch.is_tensor(data) else self.mean 107 | std = torch.from_numpy(self.std).type_as(data).to(data.device) if torch.is_tensor(data) else self.std 108 | return (data - mean) / std 109 | 110 | def inverse_transform(self, data): 111 | mean = torch.from_numpy(self.mean).type_as(data).to(data.device) if torch.is_tensor(data) else self.mean 112 | std = torch.from_numpy(self.std).type_as(data).to(data.device) if torch.is_tensor(data) else self.std 113 | return (data * std) + mean 114 | -------------------------------------------------------------------------------- /scaler/minmax_scaler.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClustProject/KUDataForecasting/5e5720d5e4db46e404bd14206fef5673263cda7f/scaler/minmax_scaler.pkl -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | import numpy as np 4 | import pandas as pd 5 | import matplotlib.pyplot as plt 6 | from sklearn.preprocessing import MinMaxScaler 7 | 8 | 9 | def load_data(forder_dir): 10 | train_data = pd.read_csv(os.path.join(forder_dir, 'train_data.csv')) 11 | train_data = train_data["MT_320"].values 12 | 13 | test_data = pd.read_csv(os.path.join(forder_dir, 'test_data.csv')) 14 | test_data = test_data["MT_320"].values 15 | 16 | print(train_data.shape) 17 | print(test_data.shape) 18 | return train_data, test_data 19 | 20 | 21 | def get_train_val_data(train_data, valid_data, scaler_path): 22 | # normalization 23 | scaler = MinMaxScaler() 24 | 25 | if len(train_data.shape) == 1: # shape=(time_steps, ) 26 | scaler = scaler.fit(np.expand_dims(train_data, axis=-1)) 27 | elif len(train_data.shape) < 3: # shape=(num_of_instance, input_dims) 28 | scaler = scaler.fit(train_data) 29 | else: # shape=(num_of_instance, input_dims, time_steps) 30 | origin_shape = train_data.shape 31 | scaler = scaler.fit(np.transpose(train_data, (0, 2, 1)).reshape(-1, origin_shape[1])) 32 | 33 | scaled_data = [] 34 | for data in [train_data, valid_data]: 35 | if len(train_data.shape) == 1: # shape=(time_steps, ) 36 | data = scaler.transform(np.expand_dims(data, axis=-1)) 37 | data = data.flatten() 38 | elif len(data.shape) < 3: # shape=(num_of_instance, input_dims) 39 | data = scaler.transform(data) 40 | else: # shape=(num_of_instance, input_dims, time_steps) 41 | data = scaler.transform(np.transpose(data, (0, 2, 1)).reshape(-1, origin_shape[1])) 42 | data = np.transpose(data.reshape(-1, origin_shape[2], origin_shape[1]), (0, 2, 1)) 43 | scaled_data.append(data) 44 | 45 | # save scaler 46 | print(f"Save MinMaxScaler in path: {scaler_path}") 47 | pickle.dump(scaler, open(scaler_path, 'wb')) 48 | return scaled_data 49 | 50 | 51 | def get_test_data(test_data, scaler_path): 52 | # load scaler 53 | scaler = pickle.load(open(scaler_path, 'rb')) 54 | 55 | # normalization 56 | if len(test_data.shape) == 1: # shape=(time_steps, ) 57 | scaled_test_data = scaler.transform(np.expand_dims(test_data, axis=-1)) 58 | scaled_test_data = scaled_test_data.flatten() 59 | elif len(test_data.shape) < 3: # shape=(num_of_instance, input_dims) 60 | scaled_test_data = scaler.transform(test_data) 61 | else: # shape=(num_of_instance, input_dims, time_steps) 62 | origin_shape = test_data.shape 63 | scaled_test_data = scaler.transform(np.transpose(test_data, (0, 2, 1)).reshape(-1, origin_shape[1])) 64 | scaled_test_data = np.transpose(scaled_test_data.reshape(-1, origin_shape[2], origin_shape[1]), (0, 2, 1)) 65 | return scaled_test_data, scaler 66 | 67 | 68 | def get_plot(result_df): 69 | # set number of subplots (2000개의 데이터를 한 subplot에 시각화) 70 | num_fig = len(result_df) // 2000 + int(len(result_df) % 2000 != 0) 71 | fig, ax = plt.subplots(num_fig, 1, figsize=(24, 6 * num_fig)) 72 | ax = [ax] if num_fig == 1 else ax 73 | 74 | for i in range(num_fig): 75 | # set true/predicted values for each subplot 76 | true_data = result_df.iloc[i*2000:(i+1)*2000].loc[:, 'actual_value'] 77 | pred_data = result_df.iloc[i*2000:(i+1)*2000].loc[:, 'predicted_value'] 78 | 79 | # plot true/predicted values 80 | ax[i].plot(true_data.index, true_data.values, alpha=0.5, label='Actual') 81 | ax[i].plot(pred_data.index, pred_data.values, alpha=0.5, label='Predicted') 82 | 83 | # set range of x and y axis 84 | min_x = i * 2000 if num_fig > 1 else 0 85 | max_x = (i + 1) * 2000 if num_fig > 1 else len(result_df) 86 | min_y = min(result_df['actual_value'].min(), result_df['predicted_value'].min()) 87 | max_y = max(result_df['actual_value'].max(), result_df['predicted_value'].max()) 88 | 89 | ax[i].set_xlim(min_x, max_x) 90 | ax[i].set_ylim(min_y, max_y) 91 | ax[i].set_xlabel('Index') 92 | ax[i].set_ylabel('Value') 93 | ax[i].legend() 94 | 95 | plt.title('Actual Values vs. Predicted Values') 96 | plt.show() --------------------------------------------------------------------------------