├── .gitignore ├── requirements.txt ├── layers ├── einops_modules.py ├── RevIN.py └── Embed.py ├── exp ├── exp_basic.py ├── exp_supervised_finetuning.py └── exp_long_term_forecasting.py ├── LICENSE ├── exp_settings_and_results ├── few_shot_learning_5% │ ├── ETTh2.json │ ├── ETTh1.json │ ├── Traffic.json │ ├── ECL.json │ ├── ETTm1.json │ ├── ETTm2.json │ └── Weather.json ├── few_shot_learning_10% │ ├── ECL.json │ ├── ETTm1.json │ ├── ETTh2.json │ ├── ETTm2.json │ ├── Traffic.json │ ├── Weather.json │ └── ETTh1.json ├── long_term_forecasting │ ├── ETTm2.json │ ├── Weather.json │ ├── ECL.json │ ├── ETTh2.json │ ├── ETTm1.json │ ├── Traffic.json │ └── ETTh1.json ├── ablation_study │ ├── ETTh1_10% │ │ ├── with_FT.json │ │ ├── with_LP.json │ │ ├── LLM4TS.json │ │ ├── without_SFT.json │ │ ├── without_Temp.json │ │ └── without_PEFT.json │ └── ETTh1_full │ │ ├── with_FT.json │ │ ├── with_LP.json │ │ ├── LLM4TS.json │ │ ├── without_SFT.json │ │ ├── without_Temp.json │ │ └── without_PEFT.json └── self_supervised_learning │ └── ETTh1.json ├── data_provider ├── data_factory.py └── data_loader.py ├── README.md ├── utils ├── timefeatures.py └── tools.py ├── models └── LLM4TS.py └── main.py /.gitignore: -------------------------------------------------------------------------------- 1 | /dataset 2 | /checkpoints 3 | /results 4 | /LLM 5 | *.pyc 6 | __pycache__ -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | torch==1.13.1 2 | transformers==4.30.2 3 | peft==0.4.0 4 | matplotlib==3.7.0 5 | numpy==1.23.5 6 | pandas==1.5.3 7 | scikit-learn==1.2.2 8 | tqdm==4.64.1 9 | einops==0.6.1 10 | patool==1.12 11 | sktime==0.16.1 12 | scipy==1.10.1 13 | ray[tune]==2.2.0 -------------------------------------------------------------------------------- /layers/einops_modules.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from einops import rearrange 4 | 5 | 6 | class RearrangeModule(nn.Module): 7 | def __init__(self, pattern, **shapes): 8 | super().__init__() 9 | self.pattern = pattern 10 | self.shapes = shapes 11 | 12 | def forward(self, x): 13 | return rearrange(x, self.pattern, **self.shapes) 14 | -------------------------------------------------------------------------------- /exp/exp_basic.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | from models import LLM4TS 4 | 5 | 6 | class Exp_Basic(object): 7 | def __init__(self, args): 8 | self.args = args 9 | self.model_dict = { 10 | "LLM4TS": LLM4TS, 11 | } 12 | self.device = self._acquire_device() 13 | self.model = self._build_model().to(self.device) 14 | 15 | def _build_model(self): 16 | raise NotImplementedError 17 | 18 | def _acquire_device(self): 19 | if self.args.use_gpu: 20 | device = torch.device( 21 | "cuda" if torch.cuda.is_available() else "cpu" 22 | ) # Assume we only use 1 gpu at most 23 | print("Use GPU") 24 | else: 25 | device = torch.device("cpu") 26 | print("Use CPU") 27 | return device 28 | 29 | def _get_data(self): 30 | pass 31 | 32 | def train(self): 33 | pass 34 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 blacksnail789521 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /exp_settings_and_results/few_shot_learning_5%/ETTh2.json: -------------------------------------------------------------------------------- 1 | { 2 | "avg_mse": 0.3593347850311559, 3 | "avg_mae": 0.40542498352437456, 4 | "96_mse": 0.3140943417521684, 5 | "96_mae": 0.3756436850679548, 6 | "192_mse": 0.3658379280851001, 7 | "192_mae": 0.408174221106127, 8 | "336_mse": 0.3980720852561992, 9 | "336_mae": 0.4324570443990419, 10 | "config": { 11 | "enable_supervised_finetuning": true, 12 | "first_k_layers": 6, 13 | "patch_len": 16, 14 | "stride": 8, 15 | "seq_len": 512, 16 | "ft_mode": "lp_ft", 17 | "dropout": 0.05, 18 | "token_embed_type": "conv", 19 | "token_embed_kernel_size": 3, 20 | "temporal_embed_type": "learned", 21 | "freq": "h", 22 | "peft_method": "lora", 23 | "sft_optim": "AdamW", 24 | "sft_learning_rate": 0.00463439458692088, 25 | "sft_lradj": "type3", 26 | "sft_weight_decay": 0.0029934177891999094, 27 | "sft_train_epochs": 10, 28 | "dft_optim": "AdamW", 29 | "dft_learning_rate": 0.0008428217323636276, 30 | "dft_lradj": "type3", 31 | "dft_weight_decay": 0.00023078648948777515, 32 | "dft_train_epochs": 5, 33 | "peft_params_r": 8, 34 | "peft_params_lora_alpha": 32, 35 | "peft_params_lora_dropout": 0.2 36 | } 37 | } -------------------------------------------------------------------------------- /exp_settings_and_results/few_shot_learning_5%/ETTh1.json: -------------------------------------------------------------------------------- 1 | { 2 | "avg_mse": 0.6517747566218635, 3 | "avg_mae": 0.552009147490299, 4 | "96_mse": 0.5091448755640733, 5 | "96_mae": 0.4848005669681649, 6 | "192_mse": 0.717340866927387, 7 | "192_mae": 0.5817653703040817, 8 | "336_mse": 0.7288385273741304, 9 | "336_mae": 0.5894615051986503, 10 | "config": { 11 | "enable_supervised_finetuning": true, 12 | "first_k_layers": 6, 13 | "patch_len": 16, 14 | "stride": 8, 15 | "seq_len": 512, 16 | "ft_mode": "lp_ft", 17 | "dropout": 0.2, 18 | "token_embed_type": "conv", 19 | "token_embed_kernel_size": 7, 20 | "temporal_embed_type": "learned", 21 | "freq": "t", 22 | "peft_method": "adalora", 23 | "sft_optim": "AdamW", 24 | "sft_learning_rate": 0.0008977618904234359, 25 | "sft_lradj": "type3", 26 | "sft_weight_decay": 9.00736499514127e-05, 27 | "sft_train_epochs": 5, 28 | "dft_optim": "AdamW", 29 | "dft_learning_rate": 0.00037183848720322897, 30 | "dft_lradj": "warmup", 31 | "dft_weight_decay": 0.0005167239790985688, 32 | "dft_train_epochs": 15, 33 | "peft_params_r": 16, 34 | "peft_params_lora_alpha": 16, 35 | "peft_params_lora_dropout": 0.01 36 | } 37 | } -------------------------------------------------------------------------------- /exp_settings_and_results/few_shot_learning_5%/Traffic.json: -------------------------------------------------------------------------------- 1 | { 2 | "avg_mse": 0.4189952636989563, 3 | "avg_mae": 0.2956442037135309, 4 | "96_mse": 0.4018342090304455, 5 | "96_mae": 0.2852768277145826, 6 | "192_mse": 0.41833095912979035, 7 | "192_mae": 0.2934198833169432, 8 | "336_mse": 0.43682062293663315, 9 | "336_mae": 0.3082359001090669, 10 | "config": { 11 | "enable_supervised_finetuning": true, 12 | "first_k_layers": 6, 13 | "patch_len": 16, 14 | "stride": 8, 15 | "seq_len": 512, 16 | "ft_mode": "lp_ft", 17 | "dropout": 0.05, 18 | "token_embed_type": "conv", 19 | "token_embed_kernel_size": 7, 20 | "temporal_embed_type": "learned", 21 | "freq": "h", 22 | "peft_method": "lora", 23 | "sft_optim": "AdamW", 24 | "sft_learning_rate": 0.002961258205026751, 25 | "sft_lradj": "type3", 26 | "sft_weight_decay": 0.0024954851483474424, 27 | "sft_train_epochs": 5, 28 | "dft_optim": "AdamW", 29 | "dft_learning_rate": 8.593348739882271e-05, 30 | "dft_lradj": "constant", 31 | "dft_weight_decay": 8.720060884710044e-05, 32 | "dft_train_epochs": 5, 33 | "peft_params_r": 32, 34 | "peft_params_lora_alpha": 32, 35 | "peft_params_lora_dropout": 0.1 36 | } 37 | } -------------------------------------------------------------------------------- /exp_settings_and_results/few_shot_learning_10%/ECL.json: -------------------------------------------------------------------------------- 1 | { 2 | "avg_mse": 0.17266129841405617, 3 | "avg_mae": 0.2644907359507301, 4 | "96_mse": 0.13574501204317627, 5 | "96_mae": 0.23112047354588913, 6 | "192_mse": 0.1525547295349285, 7 | "192_mae": 0.24648193730412976, 8 | "336_mse": 0.1730688690082093, 9 | "336_mae": 0.26790672887730915, 10 | "720_mse": 0.22927658306991058, 11 | "720_mae": 0.31245380407559226, 12 | "config": { 13 | "enable_supervised_finetuning": true, 14 | "first_k_layers": 6, 15 | "patch_len": 16, 16 | "stride": 8, 17 | "seq_len": 512, 18 | "ft_mode": "lp_ft", 19 | "dropout": 0, 20 | "token_embed_type": "conv", 21 | "token_embed_kernel_size": 7, 22 | "temporal_embed_type": "learned", 23 | "freq": "t", 24 | "peft_method": "lora", 25 | "sft_optim": "AdamW", 26 | "sft_learning_rate": 0.0006587912666243346, 27 | "sft_lradj": "warmup", 28 | "sft_weight_decay": 0.004152985020883332, 29 | "sft_train_epochs": 10, 30 | "dft_optim": "AdamW", 31 | "dft_learning_rate": 2.8559477084934125e-05, 32 | "dft_lradj": "type1", 33 | "dft_weight_decay": 1.163853061418773e-05, 34 | "dft_train_epochs": 5, 35 | "peft_params_r": 32, 36 | "peft_params_lora_alpha": 32, 37 | "peft_params_lora_dropout": 0.01 38 | } 39 | } -------------------------------------------------------------------------------- /exp_settings_and_results/few_shot_learning_10%/ETTm1.json: -------------------------------------------------------------------------------- 1 | { 2 | "avg_mse": 0.4082063059743731, 3 | "avg_mae": 0.4132635266439843, 4 | "96_mse": 0.3605705441859288, 5 | "96_mae": 0.3883078470826149, 6 | "192_mse": 0.3862779987613102, 7 | "192_mae": 0.40188549275135066, 8 | "336_mse": 0.415191201591178, 9 | "336_mae": 0.4178434151567911, 10 | "720_mse": 0.47078547935907533, 11 | "720_mae": 0.4450173515851806, 12 | "config": { 13 | "enable_supervised_finetuning": true, 14 | "first_k_layers": 6, 15 | "patch_len": 16, 16 | "stride": 8, 17 | "seq_len": 336, 18 | "ft_mode": "lp_ft", 19 | "dropout": 0.05, 20 | "token_embed_type": "conv", 21 | "token_embed_kernel_size": 5, 22 | "temporal_embed_type": "learned", 23 | "freq": "h", 24 | "peft_method": "adalora", 25 | "sft_optim": "AdamW", 26 | "sft_learning_rate": 0.00396907444116491, 27 | "sft_lradj": "constant", 28 | "sft_weight_decay": 0.008463485336373476, 29 | "sft_train_epochs": 5, 30 | "dft_optim": "AdamW", 31 | "dft_learning_rate": 5.910555052997684e-05, 32 | "dft_lradj": "type1", 33 | "dft_weight_decay": 0.0020646620154687358, 34 | "dft_train_epochs": 15, 35 | "peft_params_r": 16, 36 | "peft_params_lora_alpha": 32, 37 | "peft_params_lora_dropout": 0.1 38 | } 39 | } -------------------------------------------------------------------------------- /exp_settings_and_results/few_shot_learning_5%/ECL.json: -------------------------------------------------------------------------------- 1 | { 2 | "avg_mse": 0.1730417352691438, 3 | "avg_mae": 0.26637881421880655, 4 | "96_mse": 0.13985414700174198, 5 | "96_mae": 0.2359702680133389, 6 | "192_mse": 0.15532581101870896, 7 | "192_mae": 0.24974770207705926, 8 | "336_mse": 0.17468783538396357, 9 | "336_mae": 0.26935303123359505, 10 | "720_mse": 0.2222991476721606, 11 | "720_mae": 0.31044425555123295, 12 | "config": { 13 | "enable_supervised_finetuning": true, 14 | "first_k_layers": 6, 15 | "patch_len": 16, 16 | "stride": 8, 17 | "seq_len": 512, 18 | "ft_mode": "lp_ft", 19 | "dropout": 0, 20 | "token_embed_type": "conv", 21 | "token_embed_kernel_size": 7, 22 | "temporal_embed_type": "learned", 23 | "freq": "t", 24 | "peft_method": "lora", 25 | "sft_optim": "AdamW", 26 | "sft_learning_rate": 0.0006587912666243346, 27 | "sft_lradj": "warmup", 28 | "sft_weight_decay": 0.004152985020883332, 29 | "sft_train_epochs": 10, 30 | "dft_optim": "AdamW", 31 | "dft_learning_rate": 2.8559477084934125e-05, 32 | "dft_lradj": "type1", 33 | "dft_weight_decay": 1.163853061418773e-05, 34 | "dft_train_epochs": 5, 35 | "peft_params_r": 32, 36 | "peft_params_lora_alpha": 32, 37 | "peft_params_lora_dropout": 0.01 38 | } 39 | } -------------------------------------------------------------------------------- /exp_settings_and_results/long_term_forecasting/ETTm2.json: -------------------------------------------------------------------------------- 1 | { 2 | "avg_mse": 0.2512851421192321, 3 | "avg_mae": 0.3135132541831298, 4 | "96_mse": 0.16537583956065086, 5 | "96_mae": 0.2546601102997859, 6 | "192_mse": 0.2205615916422435, 7 | "192_mae": 0.29232204299081455, 8 | "336_mse": 0.26833962195699934, 9 | "336_mae": 0.3261761643776768, 10 | "720_mse": 0.3508635153170346, 11 | "720_mae": 0.3808946990642418, 12 | "config": { 13 | "enable_supervised_finetuning": true, 14 | "first_k_layers": 6, 15 | "patch_len": 16, 16 | "stride": 8, 17 | "seq_len": 512, 18 | "ft_mode": "lp_ft", 19 | "dropout": 0, 20 | "token_embed_type": "conv", 21 | "token_embed_kernel_size": 7, 22 | "temporal_embed_type": "learned", 23 | "freq": "t", 24 | "peft_method": "lora", 25 | "sft_optim": "AdamW", 26 | "sft_learning_rate": 0.0006587912666243346, 27 | "sft_lradj": "warmup", 28 | "sft_weight_decay": 0.004152985020883332, 29 | "sft_train_epochs": 10, 30 | "dft_optim": "AdamW", 31 | "dft_learning_rate": 2.8559477084934125e-05, 32 | "dft_lradj": "type1", 33 | "dft_weight_decay": 1.163853061418773e-05, 34 | "dft_train_epochs": 5, 35 | "peft_params_r": 32, 36 | "peft_params_lora_alpha": 32, 37 | "peft_params_lora_dropout": 0.01 38 | } 39 | } -------------------------------------------------------------------------------- /exp_settings_and_results/long_term_forecasting/Weather.json: -------------------------------------------------------------------------------- 1 | { 2 | "avg_mse": 0.22367991229100498, 3 | "avg_mae": 0.2604551366416404, 4 | "96_mse": 0.14768949380388188, 5 | "96_mae": 0.19669907371226314, 6 | "192_mse": 0.19161255181029016, 7 | "192_mae": 0.238091575596833, 8 | "336_mse": 0.24188374050638892, 9 | "336_mae": 0.2777469868473174, 10 | "720_mse": 0.3135338630434589, 11 | "720_mae": 0.329282910410148, 12 | "config": { 13 | "enable_supervised_finetuning": true, 14 | "first_k_layers": 6, 15 | "patch_len": 16, 16 | "stride": 8, 17 | "seq_len": 512, 18 | "ft_mode": "lp_ft", 19 | "dropout": 0, 20 | "token_embed_type": "conv", 21 | "token_embed_kernel_size": 7, 22 | "temporal_embed_type": "learned", 23 | "freq": "t", 24 | "peft_method": "lora", 25 | "sft_optim": "AdamW", 26 | "sft_learning_rate": 0.0006587912666243346, 27 | "sft_lradj": "warmup", 28 | "sft_weight_decay": 0.004152985020883332, 29 | "sft_train_epochs": 10, 30 | "dft_optim": "AdamW", 31 | "dft_learning_rate": 2.8559477084934125e-05, 32 | "dft_lradj": "type1", 33 | "dft_weight_decay": 1.163853061418773e-05, 34 | "dft_train_epochs": 5, 35 | "peft_params_r": 32, 36 | "peft_params_lora_alpha": 32, 37 | "peft_params_lora_dropout": 0.01 38 | } 39 | } -------------------------------------------------------------------------------- /exp_settings_and_results/few_shot_learning_10%/ETTh2.json: -------------------------------------------------------------------------------- 1 | { 2 | "avg_mse": 0.3668148093975354, 3 | "avg_mae": 0.40734422475099386, 4 | "96_mse": 0.28289095775567386, 5 | "96_mae": 0.3512332376680876, 6 | "192_mse": 0.3643847509813147, 7 | "192_mae": 0.4003806918048534, 8 | "336_mse": 0.3740609099003051, 9 | "336_mae": 0.4160220239231055, 10 | "720_mse": 0.4459226189528481, 11 | "720_mae": 0.4617409456079289, 12 | "config": { 13 | "enable_supervised_finetuning": true, 14 | "first_k_layers": 6, 15 | "patch_len": 16, 16 | "stride": 8, 17 | "seq_len": 512, 18 | "ft_mode": "lp_ft", 19 | "dropout": 0.2, 20 | "token_embed_type": "conv", 21 | "token_embed_kernel_size": 3, 22 | "temporal_embed_type": "learned", 23 | "freq": "h", 24 | "peft_method": "lora", 25 | "sft_optim": "AdamW", 26 | "sft_learning_rate": 1.761386023445074e-05, 27 | "sft_lradj": "type1", 28 | "sft_weight_decay": 0.0014878023301189934, 29 | "sft_train_epochs": 5, 30 | "dft_optim": "AdamW", 31 | "dft_learning_rate": 2.5915355196803626e-05, 32 | "dft_lradj": "constant", 33 | "dft_weight_decay": 7.12744911069106e-05, 34 | "dft_train_epochs": 15, 35 | "peft_params_r": 16, 36 | "peft_params_lora_alpha": 16, 37 | "peft_params_lora_dropout": 0.01 38 | } 39 | } -------------------------------------------------------------------------------- /exp_settings_and_results/few_shot_learning_10%/ETTm2.json: -------------------------------------------------------------------------------- 1 | { 2 | "avg_mse": 0.2765491245946339, 3 | "avg_mae": 0.32448810714084464, 4 | "96_mse": 0.18423963612757432, 5 | "96_mae": 0.26550369685850084, 6 | "192_mse": 0.2405183758320553, 7 | "192_mae": 0.30138234039405726, 8 | "336_mse": 0.2949767461732814, 9 | "336_mae": 0.3376855989427943, 10 | "720_mse": 0.38646174024562446, 11 | "720_mae": 0.39338079236802603, 12 | "config": { 13 | "enable_supervised_finetuning": true, 14 | "first_k_layers": 6, 15 | "patch_len": 16, 16 | "stride": 8, 17 | "seq_len": 512, 18 | "ft_mode": "lp_ft", 19 | "dropout": 0.15, 20 | "token_embed_type": "conv", 21 | "token_embed_kernel_size": 5, 22 | "temporal_embed_type": "learned", 23 | "freq": "t", 24 | "peft_method": "lora", 25 | "sft_optim": "AdamW", 26 | "sft_learning_rate": 8.009961304276983e-05, 27 | "sft_lradj": "type1", 28 | "sft_weight_decay": 0.0007526261836224936, 29 | "sft_train_epochs": 3, 30 | "dft_optim": "AdamW", 31 | "dft_learning_rate": 0.00016236413919079034, 32 | "dft_lradj": "warmup", 33 | "dft_weight_decay": 0.00017136935532693613, 34 | "dft_train_epochs": 15, 35 | "peft_params_r": 8, 36 | "peft_params_lora_alpha": 64, 37 | "peft_params_lora_dropout": 0.1 38 | } 39 | } -------------------------------------------------------------------------------- /exp_settings_and_results/few_shot_learning_10%/Traffic.json: -------------------------------------------------------------------------------- 1 | { 2 | "avg_mse": 0.4323859544502553, 3 | "avg_mae": 0.30291283019966153, 4 | "96_mse": 0.40288596942611077, 5 | "96_mae": 0.28837625268720973, 6 | "192_mse": 0.41671515921168134, 7 | "192_mae": 0.29422074904329115, 8 | "336_mse": 0.42980134337638387, 9 | "336_mae": 0.3022993756970722, 10 | "720_mse": 0.4801413457868452, 11 | "720_mae": 0.326754943371073, 12 | "config": { 13 | "enable_supervised_finetuning": true, 14 | "first_k_layers": 6, 15 | "patch_len": 16, 16 | "stride": 8, 17 | "seq_len": 512, 18 | "ft_mode": "lp_ft", 19 | "dropout": 0, 20 | "token_embed_type": "conv", 21 | "token_embed_kernel_size": 7, 22 | "temporal_embed_type": "learned", 23 | "freq": "t", 24 | "peft_method": "lora", 25 | "sft_optim": "AdamW", 26 | "sft_learning_rate": 0.0006587912666243346, 27 | "sft_lradj": "warmup", 28 | "sft_weight_decay": 0.004152985020883332, 29 | "sft_train_epochs": 10, 30 | "dft_optim": "AdamW", 31 | "dft_learning_rate": 2.8559477084934125e-05, 32 | "dft_lradj": "type1", 33 | "dft_weight_decay": 1.163853061418773e-05, 34 | "dft_train_epochs": 5, 35 | "peft_params_r": 32, 36 | "peft_params_lora_alpha": 32, 37 | "peft_params_lora_dropout": 0.01 38 | } 39 | } -------------------------------------------------------------------------------- /exp_settings_and_results/few_shot_learning_10%/Weather.json: -------------------------------------------------------------------------------- 1 | { 2 | "avg_mse": 0.23497123109867896, 3 | "avg_mae": 0.27083866676820795, 4 | "96_mse": 0.15871047432237004, 5 | "96_mae": 0.20795829865539972, 6 | "192_mse": 0.2045661680066698, 7 | "192_mae": 0.24988744428527307, 8 | "336_mse": 0.2543834175318124, 9 | "336_mae": 0.28867301886084545, 10 | "720_mse": 0.3222248645338636, 11 | "720_mae": 0.33683590527131346, 12 | "config": { 13 | "enable_supervised_finetuning": true, 14 | "first_k_layers": 6, 15 | "patch_len": 16, 16 | "stride": 8, 17 | "seq_len": 512, 18 | "ft_mode": "lp_ft", 19 | "dropout": 0, 20 | "token_embed_type": "conv", 21 | "token_embed_kernel_size": 7, 22 | "temporal_embed_type": "learned", 23 | "freq": "t", 24 | "peft_method": "lora", 25 | "sft_optim": "AdamW", 26 | "sft_learning_rate": 0.0006587912666243346, 27 | "sft_lradj": "warmup", 28 | "sft_weight_decay": 0.004152985020883332, 29 | "sft_train_epochs": 10, 30 | "dft_optim": "AdamW", 31 | "dft_learning_rate": 2.8559477084934125e-05, 32 | "dft_lradj": "type1", 33 | "dft_weight_decay": 1.163853061418773e-05, 34 | "dft_train_epochs": 5, 35 | "peft_params_r": 32, 36 | "peft_params_lora_alpha": 32, 37 | "peft_params_lora_dropout": 0.01 38 | } 39 | } -------------------------------------------------------------------------------- /exp_settings_and_results/few_shot_learning_5%/ETTm1.json: -------------------------------------------------------------------------------- 1 | { 2 | "avg_mse": 0.4129955238929678, 3 | "avg_mae": 0.41757456760264083, 4 | "96_mse": 0.3492803602742079, 5 | "96_mae": 0.37922345233173704, 6 | "192_mse": 0.3744124431819061, 7 | "192_mae": 0.39420419057563355, 8 | "336_mse": 0.4119301780497384, 9 | "336_mae": 0.41723850627967846, 10 | "720_mse": 0.5163591140660189, 11 | "720_mae": 0.47963212122351434, 12 | "config": { 13 | "enable_supervised_finetuning": true, 14 | "first_k_layers": 6, 15 | "patch_len": 16, 16 | "stride": 8, 17 | "seq_len": 336, 18 | "ft_mode": "lp_ft", 19 | "dropout": 0.15, 20 | "token_embed_type": "conv", 21 | "token_embed_kernel_size": 7, 22 | "temporal_embed_type": "learned", 23 | "freq": "t", 24 | "peft_method": "lora", 25 | "sft_optim": "AdamW", 26 | "sft_learning_rate": 0.0014254794512505988, 27 | "sft_lradj": "type3", 28 | "sft_weight_decay": 0.0013882401969567406, 29 | "sft_train_epochs": 5, 30 | "dft_optim": "AdamW", 31 | "dft_learning_rate": 1.456048810138906e-05, 32 | "dft_lradj": "warmup", 33 | "dft_weight_decay": 0.00012182825071187053, 34 | "dft_train_epochs": 15, 35 | "peft_params_r": 8, 36 | "peft_params_lora_alpha": 32, 37 | "peft_params_lora_dropout": 0.01 38 | } 39 | } -------------------------------------------------------------------------------- /exp_settings_and_results/few_shot_learning_5%/ETTm2.json: -------------------------------------------------------------------------------- 1 | { 2 | "avg_mse": 0.2862211482064806, 3 | "avg_mae": 0.33263397352829893, 4 | "96_mse": 0.19228354337601325, 5 | "96_mae": 0.273645885145435, 6 | "192_mse": 0.24913822762764892, 7 | "192_mae": 0.3093143456748554, 8 | "336_mse": 0.3011840900994445, 9 | "336_mae": 0.3420091030236922, 10 | "720_mse": 0.40227873172281553, 11 | "720_mae": 0.40556656026921306, 12 | "config": { 13 | "enable_supervised_finetuning": true, 14 | "first_k_layers": 6, 15 | "patch_len": 16, 16 | "stride": 8, 17 | "seq_len": 336, 18 | "ft_mode": "lp_ft", 19 | "dropout": 0.2, 20 | "token_embed_type": "conv", 21 | "token_embed_kernel_size": 7, 22 | "temporal_embed_type": "learned", 23 | "freq": "h", 24 | "peft_method": "lora", 25 | "sft_optim": "AdamW", 26 | "sft_learning_rate": 9.103983326841708e-05, 27 | "sft_lradj": "constant", 28 | "sft_weight_decay": 1.9523009641050137e-05, 29 | "sft_train_epochs": 10, 30 | "dft_optim": "AdamW", 31 | "dft_learning_rate": 0.0015298792920727755, 32 | "dft_lradj": "type1", 33 | "dft_weight_decay": 0.0007239045524346014, 34 | "dft_train_epochs": 5, 35 | "peft_params_r": 32, 36 | "peft_params_lora_alpha": 32, 37 | "peft_params_lora_dropout": 0.2 38 | } 39 | } -------------------------------------------------------------------------------- /exp_settings_and_results/few_shot_learning_5%/Weather.json: -------------------------------------------------------------------------------- 1 | { 2 | "avg_mse": 0.25624072842739404, 3 | "avg_mae": 0.2925444164325979, 4 | "96_mse": 0.17372342500720397, 5 | "96_mae": 0.22787300341583733, 6 | "192_mse": 0.21881220967420495, 7 | "192_mae": 0.2657165223788569, 8 | "336_mse": 0.2767625668819375, 9 | "336_mae": 0.3103517231797374, 10 | "720_mse": 0.3556647121462298, 11 | "720_mae": 0.3662364167559598, 12 | "config": { 13 | "enable_supervised_finetuning": true, 14 | "first_k_layers": 6, 15 | "patch_len": 16, 16 | "stride": 8, 17 | "seq_len": 336, 18 | "ft_mode": "lp_ft", 19 | "dropout": 0.15, 20 | "token_embed_type": "conv", 21 | "token_embed_kernel_size": 7, 22 | "temporal_embed_type": "learned", 23 | "freq": "t", 24 | "peft_method": "lora", 25 | "sft_optim": "AdamW", 26 | "sft_learning_rate": 0.0014254794512505988, 27 | "sft_lradj": "type3", 28 | "sft_weight_decay": 0.0013882401969567406, 29 | "sft_train_epochs": 5, 30 | "dft_optim": "AdamW", 31 | "dft_learning_rate": 1.456048810138906e-05, 32 | "dft_lradj": "warmup", 33 | "dft_weight_decay": 0.00012182825071187053, 34 | "dft_train_epochs": 15, 35 | "peft_params_r": 8, 36 | "peft_params_lora_alpha": 32, 37 | "peft_params_lora_dropout": 0.01 38 | } 39 | } -------------------------------------------------------------------------------- /exp_settings_and_results/long_term_forecasting/ECL.json: -------------------------------------------------------------------------------- 1 | { 2 | "avg_mse": 0.1596046181603027, 3 | "avg_mae": 0.2536694026603774, 4 | "96_mse": 0.12813662682959626, 5 | "96_mae": 0.22362288404681788, 6 | "192_mse": 0.14616833755463982, 7 | "192_mae": 0.24063030127236645, 8 | "336_mse": 0.16334813465414055, 9 | "336_mae": 0.2584188626321883, 10 | "720_mse": 0.2007653736028341, 11 | "720_mae": 0.29200556269013705, 12 | "config": { 13 | "enable_supervised_finetuning": true, 14 | "first_k_layers": 6, 15 | "patch_len": 16, 16 | "stride": 8, 17 | "seq_len": 512, 18 | "ft_mode": "lp_ft", 19 | "dropout": 0.05, 20 | "token_embed_type": "conv", 21 | "token_embed_kernel_size": 7, 22 | "temporal_embed_type": "learned", 23 | "freq": "h", 24 | "peft_method": "lora", 25 | "sft_optim": "AdamW", 26 | "sft_learning_rate": 0.002961258205026751, 27 | "sft_lradj": "type3", 28 | "sft_weight_decay": 0.0024954851483474424, 29 | "sft_train_epochs": 5, 30 | "dft_optim": "AdamW", 31 | "dft_learning_rate": 8.593348739882271e-05, 32 | "dft_lradj": "constant", 33 | "dft_weight_decay": 8.720060884710044e-05, 34 | "dft_train_epochs": 5, 35 | "peft_params_r": 32, 36 | "peft_params_lora_alpha": 32, 37 | "peft_params_lora_dropout": 0.1 38 | } 39 | } -------------------------------------------------------------------------------- /exp_settings_and_results/long_term_forecasting/ETTh2.json: -------------------------------------------------------------------------------- 1 | { 2 | "avg_mse": 0.3336903495190831, 3 | "avg_mae": 0.3831483608868698, 4 | "96_mse": 0.26913441163732815, 5 | "96_mae": 0.33234352138089507, 6 | "192_mse": 0.3289847359010557, 7 | "192_mae": 0.37771206597487134, 8 | "336_mse": 0.3534982433314804, 9 | "336_mae": 0.39661516045494905, 10 | "720_mse": 0.38314400720646824, 11 | "720_mae": 0.42592269573676383, 12 | "config": { 13 | "enable_supervised_finetuning": true, 14 | "first_k_layers": 6, 15 | "patch_len": 16, 16 | "stride": 8, 17 | "seq_len": 512, 18 | "ft_mode": "lp_ft", 19 | "dropout": 0.05, 20 | "token_embed_type": "conv", 21 | "token_embed_kernel_size": 5, 22 | "temporal_embed_type": "learned", 23 | "freq": "t", 24 | "peft_method": "lora", 25 | "sft_optim": "AdamW", 26 | "sft_learning_rate": 0.0004329665954491725, 27 | "sft_lradj": "type1", 28 | "sft_weight_decay": 0.00023829134317392904, 29 | "sft_train_epochs": 5, 30 | "dft_optim": "AdamW", 31 | "dft_learning_rate": 1.227864012823814e-05, 32 | "dft_lradj": "type3", 33 | "dft_weight_decay": 0.008795726505385702, 34 | "dft_train_epochs": 15, 35 | "peft_params_r": 8, 36 | "peft_params_lora_alpha": 64, 37 | "peft_params_lora_dropout": 0.01 38 | } 39 | } -------------------------------------------------------------------------------- /exp_settings_and_results/long_term_forecasting/ETTm1.json: -------------------------------------------------------------------------------- 1 | { 2 | "avg_mse": 0.34293574750920347, 3 | "avg_mae": 0.37871368049012616, 4 | "96_mse": 0.2859586432385139, 5 | "96_mae": 0.34372167088664496, 6 | "192_mse": 0.32440984418446367, 7 | "192_mae": 0.36641655010836466, 8 | "336_mse": 0.35325440019369125, 9 | "336_mae": 0.38526901122378676, 10 | "720_mse": 0.4081201024201451, 11 | "720_mae": 0.41944748974170815, 12 | "config": { 13 | "enable_supervised_finetuning": true, 14 | "first_k_layers": 6, 15 | "patch_len": 16, 16 | "stride": 8, 17 | "seq_len": 512, 18 | "ft_mode": "lp_ft", 19 | "dropout": 0, 20 | "token_embed_type": "conv", 21 | "token_embed_kernel_size": 7, 22 | "temporal_embed_type": "learned", 23 | "freq": "t", 24 | "peft_method": "lora", 25 | "sft_optim": "AdamW", 26 | "sft_learning_rate": 0.0006587912666243346, 27 | "sft_lradj": "warmup", 28 | "sft_weight_decay": 0.004152985020883332, 29 | "sft_train_epochs": 10, 30 | "dft_optim": "AdamW", 31 | "dft_learning_rate": 2.8559477084934125e-05, 32 | "dft_lradj": "type1", 33 | "dft_weight_decay": 1.163853061418773e-05, 34 | "dft_train_epochs": 5, 35 | "peft_params_r": 32, 36 | "peft_params_lora_alpha": 32, 37 | "peft_params_lora_dropout": 0.01 38 | } 39 | } -------------------------------------------------------------------------------- /exp_settings_and_results/long_term_forecasting/Traffic.json: -------------------------------------------------------------------------------- 1 | { 2 | "avg_mse": 0.4016558447044133, 3 | "avg_mae": 0.27340921193739504, 4 | "96_mse": 0.3726588024615495, 5 | "96_mae": 0.25952800016057975, 6 | "192_mse": 0.39105753566706847, 7 | "192_mae": 0.2659919295582739, 8 | "336_mse": 0.40546211632661094, 9 | "336_mae": 0.2752185870242315, 10 | "720_mse": 0.4374449243624243, 11 | "720_mae": 0.2928983310064949, 12 | "config": { 13 | "enable_supervised_finetuning": true, 14 | "first_k_layers": 6, 15 | "patch_len": 16, 16 | "stride": 8, 17 | "seq_len": 336, 18 | "ft_mode": "lp_ft", 19 | "dropout": 0.05, 20 | "token_embed_type": "conv", 21 | "token_embed_kernel_size": 5, 22 | "temporal_embed_type": "learned", 23 | "freq": "t", 24 | "peft_method": "lora", 25 | "sft_optim": "AdamW", 26 | "sft_learning_rate": 9.246234676674871e-05, 27 | "sft_lradj": "constant", 28 | "sft_weight_decay": 0.0002080276016465926, 29 | "sft_train_epochs": 3, 30 | "dft_optim": "AdamW", 31 | "dft_learning_rate": 2.397663767855072e-05, 32 | "dft_lradj": "type1", 33 | "dft_weight_decay": 0.001996259510442195, 34 | "dft_train_epochs": 5, 35 | "peft_params_r": 32, 36 | "peft_params_lora_alpha": 64, 37 | "peft_params_lora_dropout": 0 38 | } 39 | } -------------------------------------------------------------------------------- /exp_settings_and_results/few_shot_learning_10%/ETTh1.json: -------------------------------------------------------------------------------- 1 | { 2 | "avg_mse": 0.5253690779324471, 3 | "avg_mae": 0.4933016270103202, 4 | "96_mse": 0.4175363998664053, 5 | "96_mae": 0.4325748729078393, 6 | "192_mse": 0.46993525326251984, 7 | "192_mae": 0.46875807642936707, 8 | "336_mse": 0.5052978624315823, 9 | "336_mae": 0.4994736629373887, 10 | "720_mse": 0.708706796169281, 11 | "720_mae": 0.5723998957666857, 12 | "config": { 13 | "enable_supervised_finetuning": true, 14 | "first_k_layers": 6, 15 | "patch_len": 16, 16 | "stride": 8, 17 | "seq_len": 512, 18 | "ft_mode": "lp_ft", 19 | "dropout": 0.1, 20 | "token_embed_type": "conv", 21 | "token_embed_kernel_size": 7, 22 | "temporal_embed_type": "learned", 23 | "freq": "t", 24 | "peft_method": "adalora", 25 | "sft_optim": "AdamW", 26 | "sft_learning_rate": 0.0014838277734676884, 27 | "sft_lradj": "constant", 28 | "sft_weight_decay": 0.0013331487231159165, 29 | "sft_train_epochs": 10, 30 | "dft_optim": "AdamW", 31 | "dft_learning_rate": 9.776360476482794e-05, 32 | "dft_lradj": "constant", 33 | "dft_weight_decay": 2.8696003883523563e-05, 34 | "dft_train_epochs": 10, 35 | "peft_params_r": 16, 36 | "peft_params_lora_alpha": 32, 37 | "peft_params_lora_dropout": 0.1 38 | } 39 | } -------------------------------------------------------------------------------- /exp_settings_and_results/long_term_forecasting/ETTh1.json: -------------------------------------------------------------------------------- 1 | { 2 | "avg_mse": 0.404595475105504, 3 | "avg_mae": 0.41837801291208365, 4 | "96_mse": 0.37162686217772334, 5 | "96_mae": 0.39451182829706294, 6 | "192_mse": 0.4038821678194735, 7 | "192_mae": 0.41222142345375484, 8 | "336_mse": 0.42068295750547857, 9 | "336_mae": 0.4224090891725877, 10 | "720_mse": 0.42218991291934044, 11 | "720_mae": 0.44436971072492926, 12 | "config": { 13 | "enable_supervised_finetuning": true, 14 | "first_k_layers": 6, 15 | "patch_len": 16, 16 | "stride": 8, 17 | "seq_len": 336, 18 | "ft_mode": "lp_ft", 19 | "dropout": 0.05, 20 | "token_embed_type": "conv", 21 | "token_embed_kernel_size": 3, 22 | "temporal_embed_type": "learned", 23 | "freq": "h", 24 | "peft_method": "lora", 25 | "sft_optim": "AdamW", 26 | "sft_learning_rate": 7.912045141879411e-05, 27 | "sft_lradj": "constant", 28 | "sft_weight_decay": 0.0005542494992024964, 29 | "sft_train_epochs": 5, 30 | "dft_optim": "AdamW", 31 | "dft_learning_rate": 1.8257759510439175e-05, 32 | "dft_lradj": "constant", 33 | "dft_weight_decay": 0.0014555863788252605, 34 | "dft_train_epochs": 15, 35 | "peft_params_r": 8, 36 | "peft_params_lora_alpha": 64, 37 | "peft_params_lora_dropout": 0 38 | } 39 | } -------------------------------------------------------------------------------- /exp_settings_and_results/ablation_study/ETTh1_10%/with_FT.json: -------------------------------------------------------------------------------- 1 | { 2 | "w/ FT/avg_mse": 0.547840612445316, 3 | "w/ FT/avg_mae": 0.5064623653192499, 4 | "w/ FT/96_mse": 0.42390422444594533, 5 | "w/ FT/96_mae": 0.43688264724455383, 6 | "w/ FT/192_mse": 0.4776424343387286, 7 | "w/ FT/192_mae": 0.4743239175942209, 8 | "w/ FT/336_mse": 0.5459943694226882, 9 | "w/ FT/336_mae": 0.5249937658800798, 10 | "w/ FT/720_mse": 0.743821421573902, 11 | "w/ FT/720_mae": 0.5896491305581455, 12 | "config": { 13 | "enable_supervised_finetuning": true, 14 | "first_k_layers": 6, 15 | "patch_len": 16, 16 | "stride": 8, 17 | "seq_len": 512, 18 | "ft_mode": "ft", 19 | "dropout": 0.1, 20 | "token_embed_type": "conv", 21 | "token_embed_kernel_size": 7, 22 | "temporal_embed_type": "learned", 23 | "freq": "t", 24 | "peft_method": "adalora", 25 | "sft_optim": "AdamW", 26 | "sft_learning_rate": 0.0014838277734676884, 27 | "sft_lradj": "constant", 28 | "sft_weight_decay": 0.0013331487231159165, 29 | "sft_train_epochs": 10, 30 | "dft_optim": "AdamW", 31 | "dft_learning_rate": 9.776360476482794e-05, 32 | "dft_lradj": "constant", 33 | "dft_weight_decay": 2.8696003883523563e-05, 34 | "dft_train_epochs": 10, 35 | "peft_params_r": 16, 36 | "peft_params_lora_alpha": 32, 37 | "peft_params_lora_dropout": 0.1 38 | } 39 | } -------------------------------------------------------------------------------- /exp_settings_and_results/ablation_study/ETTh1_10%/with_LP.json: -------------------------------------------------------------------------------- 1 | { 2 | "w/ LP/avg_mse": 0.5309157789549867, 3 | "w/ LP/avg_mae": 0.49628133409933134, 4 | "w/ LP/96_mse": 0.4200222045183182, 5 | "w/ LP/96_mae": 0.433258698174828, 6 | "w/ LP/192_mse": 0.455323186599546, 7 | "w/ LP/192_mae": 0.4549119240707821, 8 | "w/ LP/336_mse": 0.5113077102338567, 9 | "w/ LP/336_mae": 0.507017978850533, 10 | "w/ LP/720_mse": 0.7370100144682259, 11 | "w/ LP/720_mae": 0.5899367353011822, 12 | "config": { 13 | "enable_supervised_finetuning": true, 14 | "first_k_layers": 6, 15 | "patch_len": 16, 16 | "stride": 8, 17 | "seq_len": 512, 18 | "ft_mode": "lp", 19 | "dropout": 0.1, 20 | "token_embed_type": "conv", 21 | "token_embed_kernel_size": 7, 22 | "temporal_embed_type": "learned", 23 | "freq": "t", 24 | "peft_method": "adalora", 25 | "sft_optim": "AdamW", 26 | "sft_learning_rate": 0.0014838277734676884, 27 | "sft_lradj": "constant", 28 | "sft_weight_decay": 0.0013331487231159165, 29 | "sft_train_epochs": 10, 30 | "dft_optim": "AdamW", 31 | "dft_learning_rate": 9.776360476482794e-05, 32 | "dft_lradj": "constant", 33 | "dft_weight_decay": 2.8696003883523563e-05, 34 | "dft_train_epochs": 10, 35 | "peft_params_r": 16, 36 | "peft_params_lora_alpha": 32, 37 | "peft_params_lora_dropout": 0.1 38 | } 39 | } -------------------------------------------------------------------------------- /exp_settings_and_results/ablation_study/ETTh1_full/with_FT.json: -------------------------------------------------------------------------------- 1 | { 2 | "w/ FT/avg_mse": 0.4040784662690934, 3 | "w/ FT/avg_mae": 0.4189475368814515, 4 | "w/ FT/96_mse": 0.3710692760191466, 5 | "w/ FT/96_mae": 0.3944097710283179, 6 | "w/ FT/192_mse": 0.40437455682290924, 7 | "w/ FT/192_mae": 0.4134441771441036, 8 | "w/ FT/336_mse": 0.4202580320484498, 9 | "w/ FT/336_mae": 0.4231886907535441, 10 | "w/ FT/720_mse": 0.42061200018586786, 11 | "w/ FT/720_mae": 0.44474750859984036, 12 | "config": { 13 | "enable_supervised_finetuning": true, 14 | "first_k_layers": 6, 15 | "patch_len": 16, 16 | "stride": 8, 17 | "seq_len": 336, 18 | "ft_mode": "ft", 19 | "dropout": 0.05, 20 | "token_embed_type": "conv", 21 | "token_embed_kernel_size": 3, 22 | "temporal_embed_type": "learned", 23 | "freq": "h", 24 | "peft_method": "lora", 25 | "sft_optim": "AdamW", 26 | "sft_learning_rate": 7.912045141879411e-05, 27 | "sft_lradj": "constant", 28 | "sft_weight_decay": 0.0005542494992024964, 29 | "sft_train_epochs": 5, 30 | "dft_optim": "AdamW", 31 | "dft_learning_rate": 1.8257759510439175e-05, 32 | "dft_lradj": "constant", 33 | "dft_weight_decay": 0.0014555863788252605, 34 | "dft_train_epochs": 15, 35 | "peft_params_r": 8, 36 | "peft_params_lora_alpha": 64, 37 | "peft_params_lora_dropout": 0 38 | } 39 | } -------------------------------------------------------------------------------- /exp_settings_and_results/ablation_study/ETTh1_full/with_LP.json: -------------------------------------------------------------------------------- 1 | { 2 | "w/ LP/avg_mse": 0.4106562138135688, 3 | "w/ LP/avg_mae": 0.4219597279435555, 4 | "w/ LP/96_mse": 0.3778376238126504, 5 | "w/ LP/96_mae": 0.39833269229060725, 6 | "w/ LP/192_mse": 0.41086429730057716, 7 | "w/ LP/192_mae": 0.41685105363527936, 8 | "w/ LP/336_mse": 0.42610518765800137, 9 | "w/ LP/336_mae": 0.4248685941976659, 10 | "w/ LP/720_mse": 0.42781774648304643, 11 | "w/ LP/720_mae": 0.4477865716506695, 12 | "config": { 13 | "enable_supervised_finetuning": true, 14 | "first_k_layers": 6, 15 | "patch_len": 16, 16 | "stride": 8, 17 | "seq_len": 336, 18 | "ft_mode": "lp", 19 | "dropout": 0.05, 20 | "token_embed_type": "conv", 21 | "token_embed_kernel_size": 3, 22 | "temporal_embed_type": "learned", 23 | "freq": "h", 24 | "peft_method": "lora", 25 | "sft_optim": "AdamW", 26 | "sft_learning_rate": 7.912045141879411e-05, 27 | "sft_lradj": "constant", 28 | "sft_weight_decay": 0.0005542494992024964, 29 | "sft_train_epochs": 5, 30 | "dft_optim": "AdamW", 31 | "dft_learning_rate": 1.8257759510439175e-05, 32 | "dft_lradj": "constant", 33 | "dft_weight_decay": 0.0014555863788252605, 34 | "dft_train_epochs": 15, 35 | "peft_params_r": 8, 36 | "peft_params_lora_alpha": 64, 37 | "peft_params_lora_dropout": 0 38 | } 39 | } -------------------------------------------------------------------------------- /exp_settings_and_results/self_supervised_learning/ETTh1.json: -------------------------------------------------------------------------------- 1 | { 2 | "avg_mse": 0.38127220511946375, 3 | "avg_mae": 0.4079239417326668, 4 | "24_mse": 0.31505577724713546, 5 | "24_mae": 0.3651582293021373, 6 | "48_mse": 0.34210289112831416, 7 | "48_mae": 0.38413793005441366, 8 | "168_mse": 0.4010437006080473, 9 | "168_mae": 0.4153244745086979, 10 | "336_mse": 0.4216286653981489, 11 | "336_mae": 0.4277173894293168, 12 | "720_mse": 0.426529991215673, 13 | "720_mae": 0.4472816853687681, 14 | "config": { 15 | "enable_supervised_finetuning": true, 16 | "first_k_layers": 6, 17 | "patch_len": 12, 18 | "stride": 12, 19 | "seq_len": 512, 20 | "ft_mode": "lp", 21 | "dropout": 0, 22 | "token_embed_type": "conv", 23 | "token_embed_kernel_size": 3, 24 | "temporal_embed_type": "learned", 25 | "freq": "t", 26 | "peft_method": "lora", 27 | "sft_optim": "AdamW", 28 | "sft_learning_rate": 0.001067795624664354, 29 | "sft_lradj": "constant", 30 | "sft_weight_decay": 0.0095510333777999, 31 | "sft_train_epochs": 5, 32 | "dft_optim": "AdamW", 33 | "dft_learning_rate": 0.0011039568093674122, 34 | "dft_lradj": "warmup", 35 | "dft_weight_decay": 0.0012836785599665656, 36 | "dft_train_epochs": 10, 37 | "peft_params_r": 32, 38 | "peft_params_lora_alpha": 16, 39 | "peft_params_lora_dropout": 0 40 | } 41 | } -------------------------------------------------------------------------------- /exp_settings_and_results/ablation_study/ETTh1_10%/LLM4TS.json: -------------------------------------------------------------------------------- 1 | { 2 | "LLM4TS/avg_mse": 0.5253690779324471, 3 | "LLM4TS/avg_mae": 0.4933016270103202, 4 | "LLM4TS/96_mse": 0.4175363998664053, 5 | "LLM4TS/96_mae": 0.4325748729078393, 6 | "LLM4TS/192_mse": 0.46993525326251984, 7 | "LLM4TS/192_mae": 0.46875807642936707, 8 | "LLM4TS/336_mse": 0.5052978624315823, 9 | "LLM4TS/336_mae": 0.4994736629373887, 10 | "LLM4TS/720_mse": 0.708706796169281, 11 | "LLM4TS/720_mae": 0.5723998957666857, 12 | "config": { 13 | "enable_supervised_finetuning": true, 14 | "first_k_layers": 6, 15 | "patch_len": 16, 16 | "stride": 8, 17 | "seq_len": 512, 18 | "ft_mode": "lp_ft", 19 | "dropout": 0.1, 20 | "token_embed_type": "conv", 21 | "token_embed_kernel_size": 7, 22 | "temporal_embed_type": "learned", 23 | "freq": "t", 24 | "peft_method": "adalora", 25 | "sft_optim": "AdamW", 26 | "sft_learning_rate": 0.0014838277734676884, 27 | "sft_lradj": "constant", 28 | "sft_weight_decay": 0.0013331487231159165, 29 | "sft_train_epochs": 10, 30 | "dft_optim": "AdamW", 31 | "dft_learning_rate": 9.776360476482794e-05, 32 | "dft_lradj": "constant", 33 | "dft_weight_decay": 2.8696003883523563e-05, 34 | "dft_train_epochs": 10, 35 | "peft_params_r": 16, 36 | "peft_params_lora_alpha": 32, 37 | "peft_params_lora_dropout": 0.1 38 | } 39 | } -------------------------------------------------------------------------------- /exp_settings_and_results/ablation_study/ETTh1_full/LLM4TS.json: -------------------------------------------------------------------------------- 1 | { 2 | "LLM4TS/avg_mse": 0.404595475105504, 3 | "LLM4TS/avg_mae": 0.41837801291208365, 4 | "LLM4TS/96_mse": 0.37162686217772334, 5 | "LLM4TS/96_mae": 0.39451182829706294, 6 | "LLM4TS/192_mse": 0.4038821678194735, 7 | "LLM4TS/192_mae": 0.41222142345375484, 8 | "LLM4TS/336_mse": 0.42068295750547857, 9 | "LLM4TS/336_mae": 0.4224090891725877, 10 | "LLM4TS/720_mse": 0.42218991291934044, 11 | "LLM4TS/720_mae": 0.44436971072492926, 12 | "config": { 13 | "enable_supervised_finetuning": true, 14 | "first_k_layers": 6, 15 | "patch_len": 16, 16 | "stride": 8, 17 | "seq_len": 336, 18 | "ft_mode": "lp_ft", 19 | "dropout": 0.05, 20 | "token_embed_type": "conv", 21 | "token_embed_kernel_size": 3, 22 | "temporal_embed_type": "learned", 23 | "freq": "h", 24 | "peft_method": "lora", 25 | "sft_optim": "AdamW", 26 | "sft_learning_rate": 7.912045141879411e-05, 27 | "sft_lradj": "constant", 28 | "sft_weight_decay": 0.0005542494992024964, 29 | "sft_train_epochs": 5, 30 | "dft_optim": "AdamW", 31 | "dft_learning_rate": 1.8257759510439175e-05, 32 | "dft_lradj": "constant", 33 | "dft_weight_decay": 0.0014555863788252605, 34 | "dft_train_epochs": 15, 35 | "peft_params_r": 8, 36 | "peft_params_lora_alpha": 64, 37 | "peft_params_lora_dropout": 0 38 | } 39 | } -------------------------------------------------------------------------------- /exp_settings_and_results/ablation_study/ETTh1_full/without_SFT.json: -------------------------------------------------------------------------------- 1 | { 2 | "w/o SFT/avg_mse": 0.4059136984196724, 3 | "w/o SFT/avg_mae": 0.4196452624467826, 4 | "w/o SFT/96_mse": 0.37221683797083405, 5 | "w/o SFT/96_mae": 0.39510199427604675, 6 | "w/o SFT/192_mse": 0.40428175777196884, 7 | "w/o SFT/192_mae": 0.411007531815105, 8 | "w/o SFT/336_mse": 0.4228590573458111, 9 | "w/o SFT/336_mae": 0.42390817403793335, 10 | "w/o SFT/720_mse": 0.4242971405900758, 11 | "w/o SFT/720_mae": 0.4485633496580453, 12 | "config": { 13 | "enable_supervised_finetuning": false, 14 | "first_k_layers": 6, 15 | "patch_len": 16, 16 | "stride": 8, 17 | "seq_len": 336, 18 | "ft_mode": "lp_ft", 19 | "dropout": 0.05, 20 | "token_embed_type": "conv", 21 | "token_embed_kernel_size": 3, 22 | "temporal_embed_type": "learned", 23 | "freq": "h", 24 | "peft_method": "lora", 25 | "sft_optim": "AdamW", 26 | "sft_learning_rate": 7.912045141879411e-05, 27 | "sft_lradj": "constant", 28 | "sft_weight_decay": 0.0005542494992024964, 29 | "sft_train_epochs": 5, 30 | "dft_optim": "AdamW", 31 | "dft_learning_rate": 1.8257759510439175e-05, 32 | "dft_lradj": "constant", 33 | "dft_weight_decay": 0.0014555863788252605, 34 | "dft_train_epochs": 15, 35 | "peft_params_r": 8, 36 | "peft_params_lora_alpha": 64, 37 | "peft_params_lora_dropout": 0 38 | } 39 | } -------------------------------------------------------------------------------- /exp_settings_and_results/ablation_study/ETTh1_10%/without_SFT.json: -------------------------------------------------------------------------------- 1 | { 2 | "w/o SFT/avg_mse": 0.5549458877034458, 3 | "w/o SFT/avg_mae": 0.5024389364001858, 4 | "w/o SFT/96_mse": 0.43092299919379384, 5 | "w/o SFT/96_mae": 0.4386895604823765, 6 | "w/o SFT/192_mse": 0.48811978846788406, 7 | "w/o SFT/192_mae": 0.47489838384919697, 8 | "w/o SFT/336_mse": 0.5383511238238391, 9 | "w/o SFT/336_mae": 0.5066761523485184, 10 | "w/o SFT/720_mse": 0.762389639328266, 11 | "w/o SFT/720_mae": 0.5894916489206511, 12 | "config": { 13 | "enable_supervised_finetuning": false, 14 | "first_k_layers": 6, 15 | "patch_len": 16, 16 | "stride": 8, 17 | "seq_len": 512, 18 | "ft_mode": "lp_ft", 19 | "dropout": 0.1, 20 | "token_embed_type": "conv", 21 | "token_embed_kernel_size": 7, 22 | "temporal_embed_type": "learned", 23 | "freq": "t", 24 | "peft_method": "adalora", 25 | "sft_optim": "AdamW", 26 | "sft_learning_rate": 0.0014838277734676884, 27 | "sft_lradj": "constant", 28 | "sft_weight_decay": 0.0013331487231159165, 29 | "sft_train_epochs": 10, 30 | "dft_optim": "AdamW", 31 | "dft_learning_rate": 9.776360476482794e-05, 32 | "dft_lradj": "constant", 33 | "dft_weight_decay": 2.8696003883523563e-05, 34 | "dft_train_epochs": 10, 35 | "peft_params_r": 16, 36 | "peft_params_lora_alpha": 32, 37 | "peft_params_lora_dropout": 0.1 38 | } 39 | } -------------------------------------------------------------------------------- /exp_settings_and_results/ablation_study/ETTh1_full/without_Temp.json: -------------------------------------------------------------------------------- 1 | { 2 | "w/o Temp/avg_mse": 0.41659387801132713, 3 | "w/o Temp/avg_mae": 0.42553263869106833, 4 | "w/o Temp/96_mse": 0.37862489450919, 5 | "w/o Temp/96_mae": 0.39735841202108485, 6 | "w/o Temp/192_mse": 0.4111339474717776, 7 | "w/o Temp/192_mae": 0.4165892435444726, 8 | "w/o Temp/336_mse": 0.433856383404311, 9 | "w/o Temp/336_mae": 0.43068647296989665, 10 | "w/o Temp/720_mse": 0.44276028666003, 11 | "w/o Temp/720_mae": 0.4574964262288192, 12 | "config": { 13 | "enable_supervised_finetuning": true, 14 | "first_k_layers": 6, 15 | "patch_len": 16, 16 | "stride": 8, 17 | "seq_len": 336, 18 | "ft_mode": "lp_ft", 19 | "dropout": 0.05, 20 | "token_embed_type": "conv", 21 | "token_embed_kernel_size": 3, 22 | "temporal_embed_type": "none", 23 | "freq": "h", 24 | "peft_method": "lora", 25 | "sft_optim": "AdamW", 26 | "sft_learning_rate": 7.912045141879411e-05, 27 | "sft_lradj": "constant", 28 | "sft_weight_decay": 0.0005542494992024964, 29 | "sft_train_epochs": 5, 30 | "dft_optim": "AdamW", 31 | "dft_learning_rate": 1.8257759510439175e-05, 32 | "dft_lradj": "constant", 33 | "dft_weight_decay": 0.0014555863788252605, 34 | "dft_train_epochs": 15, 35 | "peft_params_r": 8, 36 | "peft_params_lora_alpha": 64, 37 | "peft_params_lora_dropout": 0 38 | } 39 | } -------------------------------------------------------------------------------- /exp_settings_and_results/ablation_study/ETTh1_10%/without_Temp.json: -------------------------------------------------------------------------------- 1 | { 2 | "w/o Temp/avg_mse": 0.5291693180891133, 3 | "w/o Temp/avg_mae": 0.49808378676335785, 4 | "w/o Temp/96_mse": 0.4224777017769061, 5 | "w/o Temp/96_mae": 0.4347051021299864, 6 | "w/o Temp/192_mse": 0.4632568326261308, 7 | "w/o Temp/192_mae": 0.4653799376553959, 8 | "w/o Temp/336_mse": 0.5166722387075424, 9 | "w/o Temp/336_mae": 0.5081914032206816, 10 | "w/o Temp/720_mse": 0.7142704992458738, 11 | "w/o Temp/720_mae": 0.5840587040473675, 12 | "config": { 13 | "enable_supervised_finetuning": true, 14 | "first_k_layers": 6, 15 | "patch_len": 16, 16 | "stride": 8, 17 | "seq_len": 512, 18 | "ft_mode": "lp_ft", 19 | "dropout": 0.1, 20 | "token_embed_type": "conv", 21 | "token_embed_kernel_size": 7, 22 | "temporal_embed_type": "none", 23 | "freq": "t", 24 | "peft_method": "adalora", 25 | "sft_optim": "AdamW", 26 | "sft_learning_rate": 0.0014838277734676884, 27 | "sft_lradj": "constant", 28 | "sft_weight_decay": 0.0013331487231159165, 29 | "sft_train_epochs": 10, 30 | "dft_optim": "AdamW", 31 | "dft_learning_rate": 9.776360476482794e-05, 32 | "dft_lradj": "constant", 33 | "dft_weight_decay": 2.8696003883523563e-05, 34 | "dft_train_epochs": 10, 35 | "peft_params_r": 16, 36 | "peft_params_lora_alpha": 32, 37 | "peft_params_lora_dropout": 0.1 38 | } 39 | } -------------------------------------------------------------------------------- /data_provider/data_factory.py: -------------------------------------------------------------------------------- 1 | from data_provider.data_loader import ( 2 | Dataset_ETT_hour, 3 | Dataset_ETT_minute, 4 | Dataset_Custom, 5 | ) 6 | from torch.utils.data import DataLoader 7 | 8 | data_dict = { 9 | "ETTh1": Dataset_ETT_hour, 10 | "ETTh2": Dataset_ETT_hour, 11 | "ETTm1": Dataset_ETT_minute, 12 | "ETTm2": Dataset_ETT_minute, 13 | "custom": Dataset_Custom, 14 | } 15 | 16 | 17 | def data_provider(args, flag): 18 | Data = data_dict[args.data] 19 | 20 | timeenc = 0 if args.embed != "timeF" else 1 21 | 22 | if flag == "test": 23 | shuffle_flag = False 24 | drop_last = True 25 | # batch_size = 1 # bsz=1 for evaluation 26 | batch_size = args.batch_size 27 | freq = args.freq 28 | else: 29 | shuffle_flag = True 30 | drop_last = True 31 | batch_size = args.batch_size # bsz for train and valid 32 | freq = args.freq 33 | 34 | # forecasting/imputation 35 | data_set = Data( 36 | root_path=args.root_path, 37 | data_path=args.data_path, 38 | flag=flag, 39 | size=[args.seq_len, args.label_len, args.pred_len], 40 | features=args.features, 41 | target=args.target, 42 | timeenc=timeenc, 43 | freq=freq, 44 | percent=args.percent, 45 | return_single_feature=args.return_single_feature, 46 | ) 47 | print(flag, len(data_set)) 48 | data_loader = DataLoader( 49 | data_set, 50 | batch_size=batch_size, 51 | shuffle=shuffle_flag, 52 | # num_workers=args.num_workers, # not very stable 53 | drop_last=drop_last, 54 | ) 55 | 56 | return data_set, data_loader 57 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # LLM4TS 2 | 3 | Welcome to the official codebase of LLM4TS. 4 | This project is based on research that has been accepted for publication in the ACM Transactions on Intelligent Systems and Technology (TIST) 2025. 5 | 6 | # Usage 7 | 8 | 1. Install Python 3.8, and use `requirements.txt` to install the dependencies 9 | ``` 10 | pip install -r requirements.txt 11 | ``` 12 | 2. Place all datasets in the `dataset` folder. The datasets can be downloaded from [this link](https://drive.google.com/drive/folders/1vE0ONyqPlym2JaaAoEe0XNDR8FS_d322). 13 | 3. Place the [GPT-2 model from Hugging Face](https://huggingface.co/gpt2/tree/main) into the `LLM/gpt2` directory. 14 | 4. To execute the script with configuration settings passed via argparse, use: 15 | ``` 16 | python main.py --... 17 | ``` 18 | Alternatively, if you prefer to use locally defined parameters to overwrite args for faster experimentation iterations, run: 19 | ``` 20 | python main.py --overwrite_args 21 | ``` 22 | 5. Please refer to `exp_settings_and_results` to see all the experiments' settings and corresponding results. 23 | 24 | # Citation 25 | 26 | If you find value in this repository, we kindly ask that you cite our paper. 27 | 28 | ``` 29 | @article{chang2023llm4ts, 30 | title={LLM4TS: Two-Stage Fine-Tuning for Time-Series Forecasting with Pre-Trained LLMs}, 31 | author={Chang, Ching and Peng, Wen-Chih and Chen, Tien-Fu}, 32 | journal={arXiv preprint arXiv:2308.08469}, 33 | year={2023} 34 | } 35 | ``` 36 | 37 | # Contact 38 | 39 | If you have any questions or suggestions, please reach out to Ching Chang at [blacksnail789521@gmail.com](mailto:blacksnail789521@gmail.com), or raise them in the 'Issues' section. 40 | 41 | # Acknowledgement 42 | 43 | This library was built upon the following repositories: 44 | 45 | * Time Series Library (TSlib): [https://github.com/thuml/Time-Series-Library](https://github.com/thuml/Time-Series-Library) 46 | -------------------------------------------------------------------------------- /exp_settings_and_results/ablation_study/ETTh1_10%/without_PEFT.json: -------------------------------------------------------------------------------- 1 | { 2 | "w/o PEFT/avg_mse": 0.6002152191327375, 3 | "w/o PEFT/avg_mae": 0.5180479845580993, 4 | "w/o PEFT/96_mse": 0.42279261389845296, 5 | "w/o PEFT/96_mae": 0.4333449311946568, 6 | "w/o PEFT/192_mse": 0.47179754657877815, 7 | "w/o PEFT/192_mae": 0.4626004348198573, 8 | "w/o PEFT/336_mse": 0.5256991377648186, 9 | "w/o PEFT/336_mae": 0.5041476663421182, 10 | "w/o PEFT/720_mse": 0.9805715782889004, 11 | "w/o PEFT/720_mae": 0.672098905875765, 12 | "config": { 13 | "enable_supervised_finetuning": true, 14 | "first_k_layers": 6, 15 | "patch_len": 16, 16 | "stride": 8, 17 | "seq_len": 512, 18 | "ft_mode": "lp_ft", 19 | "dropout": 0.1, 20 | "token_embed_type": "conv", 21 | "token_embed_kernel_size": 7, 22 | "temporal_embed_type": "learned", 23 | "freq": "t", 24 | "peft_method": "none", 25 | "peft_params": { 26 | "lora": { 27 | "r": 16, 28 | "lora_alpha": 32, 29 | "lora_dropout": 0 30 | }, 31 | "adalora": { 32 | "r": 16, 33 | "lora_alpha": 32, 34 | "lora_dropout": 0.1, 35 | "task_type": "FEATURE_EXTRACTION", 36 | "target_modules": [ 37 | "c_attn" 38 | ], 39 | "fan_in_fan_out": true 40 | }, 41 | "ia3": { 42 | "target_modules": [ 43 | "c_attn" 44 | ], 45 | "feedforward_modules": [ 46 | "mlp" 47 | ] 48 | } 49 | }, 50 | "sft_optim": "AdamW", 51 | "sft_learning_rate": 0.0014838277734676884, 52 | "sft_lradj": "constant", 53 | "sft_weight_decay": 0.0013331487231159165, 54 | "sft_train_epochs": 10, 55 | "dft_optim": "AdamW", 56 | "dft_learning_rate": 9.776360476482794e-05, 57 | "dft_lradj": "constant", 58 | "dft_weight_decay": 2.8696003883523563e-05, 59 | "dft_train_epochs": 10, 60 | "peft_params_r": 16, 61 | "peft_params_lora_alpha": 32, 62 | "peft_params_lora_dropout": 0 63 | } 64 | } -------------------------------------------------------------------------------- /exp_settings_and_results/ablation_study/ETTh1_full/without_PEFT.json: -------------------------------------------------------------------------------- 1 | { 2 | "w/o PEFT/avg_mse": 0.4079580057688392, 3 | "w/o PEFT/avg_mae": 0.4194214855356688, 4 | "w/o PEFT/96_mse": 0.3738920904303852, 5 | "w/o PEFT/96_mae": 0.39341700390765544, 6 | "w/o PEFT/192_mse": 0.4080122858285904, 7 | "w/o PEFT/192_mae": 0.41289205021328396, 8 | "w/o PEFT/336_mse": 0.42045845704920154, 9 | "w/o PEFT/336_mae": 0.42135421318166394, 10 | "w/o PEFT/720_mse": 0.4294691897671798, 11 | "w/o PEFT/720_mae": 0.4500226748400721, 12 | "config": { 13 | "enable_supervised_finetuning": true, 14 | "first_k_layers": 6, 15 | "patch_len": 16, 16 | "stride": 8, 17 | "seq_len": 336, 18 | "ft_mode": "lp_ft", 19 | "dropout": 0.05, 20 | "token_embed_type": "conv", 21 | "token_embed_kernel_size": 3, 22 | "temporal_embed_type": "learned", 23 | "freq": "h", 24 | "peft_method": "none", 25 | "peft_params": { 26 | "lora": { 27 | "r": 8, 28 | "lora_alpha": 64, 29 | "lora_dropout": 0, 30 | "task_type": "FEATURE_EXTRACTION", 31 | "target_modules": [ 32 | "c_attn" 33 | ], 34 | "fan_in_fan_out": true 35 | }, 36 | "adalora": { 37 | "r": 16, 38 | "lora_alpha": 64, 39 | "lora_dropout": 0.001 40 | }, 41 | "ia3": { 42 | "target_modules": [ 43 | "c_attn" 44 | ], 45 | "feedforward_modules": [ 46 | "mlp" 47 | ] 48 | } 49 | }, 50 | "sft_optim": "AdamW", 51 | "sft_learning_rate": 7.912045141879411e-05, 52 | "sft_lradj": "constant", 53 | "sft_weight_decay": 0.0005542494992024964, 54 | "sft_train_epochs": 5, 55 | "dft_optim": "AdamW", 56 | "dft_learning_rate": 1.8257759510439175e-05, 57 | "dft_lradj": "constant", 58 | "dft_weight_decay": 0.0014555863788252605, 59 | "dft_train_epochs": 15, 60 | "peft_params_r": 8, 61 | "peft_params_lora_alpha": 64, 62 | "peft_params_lora_dropout": 0 63 | } 64 | } -------------------------------------------------------------------------------- /layers/RevIN.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class RevIN(nn.Module): 6 | def __init__(self, num_features: int, eps=1e-5, affine=True): 7 | """ 8 | :param num_features: the number of features or channels 9 | :param eps: a value added for numerical stability 10 | :param affine: if True, RevIN has learnable affine parameters 11 | """ 12 | super(RevIN, self).__init__() 13 | self.num_features = num_features 14 | self.eps = eps 15 | self.affine = affine 16 | if self.affine: 17 | self._init_params() 18 | 19 | def forward(self, x, mode: str, mask=None): 20 | if mode == "norm": 21 | self._get_statistics(x, mask) # get mean and std 22 | x = self._normalize(x, mask) 23 | elif mode == "denorm": 24 | x = self._denormalize(x) 25 | else: 26 | raise NotImplementedError 27 | return x 28 | 29 | def _init_params(self): 30 | # initialize RevIN params: (C,) 31 | # trainable 32 | self.affine_weight = nn.Parameter(torch.ones(self.num_features)) 33 | self.affine_bias = nn.Parameter(torch.zeros(self.num_features)) 34 | 35 | def _get_statistics(self, x, mask): 36 | dim2reduce = tuple(range(1, x.ndim - 1)) 37 | if mask is None: 38 | self.mean = torch.mean(x, dim=dim2reduce, keepdim=True).detach() 39 | self.stdev = torch.sqrt( 40 | torch.var(x, dim=dim2reduce, keepdim=True, unbiased=False) + self.eps 41 | ).detach() 42 | else: 43 | self.mean = ( 44 | torch.sum(x, dim=dim2reduce, keepdim=True) 45 | / torch.sum(mask, dim=dim2reduce, keepdim=True).detach() 46 | ) 47 | x_cent = x - self.mean 48 | x_cent = x_cent.masked_fill(mask == 0, 0) # reset the masked values to 0 49 | self.stdev = torch.sqrt( 50 | torch.sum(x_cent * x_cent, dim=dim2reduce, keepdim=True) 51 | / torch.sum(mask, dim=dim2reduce, keepdim=True) 52 | + self.eps 53 | ).detach() 54 | 55 | def _normalize(self, x, mask): 56 | x = x - self.mean 57 | if mask is not None: 58 | x = x.masked_fill(mask == 0, 0) # reset the masked values to 0 59 | x = x / self.stdev 60 | if self.affine: 61 | x = x * self.affine_weight 62 | x = x + self.affine_bias 63 | return x 64 | 65 | def _denormalize(self, x): 66 | if self.affine: 67 | x = x - self.affine_bias 68 | x = x / (self.affine_weight + self.eps * self.eps) 69 | x = x * self.stdev 70 | x = x + self.mean 71 | return x 72 | -------------------------------------------------------------------------------- /utils/timefeatures.py: -------------------------------------------------------------------------------- 1 | # From: gluonts/src/gluonts/time_feature/_base.py 2 | # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"). 5 | # You may not use this file except in compliance with the License. 6 | # A copy of the License is located at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # or in the "license" file accompanying this file. This file is distributed 11 | # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 12 | # express or implied. See the License for the specific language governing 13 | # permissions and limitations under the License. 14 | 15 | from typing import List 16 | 17 | import numpy as np 18 | import pandas as pd 19 | from pandas.tseries import offsets 20 | from pandas.tseries.frequencies import to_offset 21 | 22 | 23 | class TimeFeature: 24 | def __init__(self): 25 | pass 26 | 27 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 28 | pass 29 | 30 | def __repr__(self): 31 | return self.__class__.__name__ + "()" 32 | 33 | 34 | class SecondOfMinute(TimeFeature): 35 | """Minute of hour encoded as value between [-0.5, 0.5]""" 36 | 37 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 38 | return index.second / 59.0 - 0.5 39 | 40 | 41 | class MinuteOfHour(TimeFeature): 42 | """Minute of hour encoded as value between [-0.5, 0.5]""" 43 | 44 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 45 | return index.minute / 59.0 - 0.5 46 | 47 | 48 | class HourOfDay(TimeFeature): 49 | """Hour of day encoded as value between [-0.5, 0.5]""" 50 | 51 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 52 | return index.hour / 23.0 - 0.5 53 | 54 | 55 | class DayOfWeek(TimeFeature): 56 | """Hour of day encoded as value between [-0.5, 0.5]""" 57 | 58 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 59 | return index.dayofweek / 6.0 - 0.5 60 | 61 | 62 | class DayOfMonth(TimeFeature): 63 | """Day of month encoded as value between [-0.5, 0.5]""" 64 | 65 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 66 | return (index.day - 1) / 30.0 - 0.5 67 | 68 | 69 | class DayOfYear(TimeFeature): 70 | """Day of year encoded as value between [-0.5, 0.5]""" 71 | 72 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 73 | return (index.dayofyear - 1) / 365.0 - 0.5 74 | 75 | 76 | class MonthOfYear(TimeFeature): 77 | """Month of year encoded as value between [-0.5, 0.5]""" 78 | 79 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 80 | return (index.month - 1) / 11.0 - 0.5 81 | 82 | 83 | class WeekOfYear(TimeFeature): 84 | """Week of year encoded as value between [-0.5, 0.5]""" 85 | 86 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 87 | return (index.isocalendar().week - 1) / 52.0 - 0.5 88 | 89 | 90 | def time_features_from_frequency_str(freq_str: str) -> List[TimeFeature]: 91 | """ 92 | Returns a list of time features that will be appropriate for the given frequency string. 93 | Parameters 94 | ---------- 95 | freq_str 96 | Frequency string of the form [multiple][granularity] such as "12H", "5min", "1D" etc. 97 | """ 98 | 99 | features_by_offsets = { 100 | offsets.YearEnd: [], 101 | offsets.QuarterEnd: [MonthOfYear], 102 | offsets.MonthEnd: [MonthOfYear], 103 | offsets.Week: [DayOfMonth, WeekOfYear], 104 | offsets.Day: [DayOfWeek, DayOfMonth, DayOfYear], 105 | offsets.BusinessDay: [DayOfWeek, DayOfMonth, DayOfYear], 106 | offsets.Hour: [HourOfDay, DayOfWeek, DayOfMonth, DayOfYear], 107 | offsets.Minute: [ 108 | MinuteOfHour, 109 | HourOfDay, 110 | DayOfWeek, 111 | DayOfMonth, 112 | DayOfYear, 113 | ], 114 | offsets.Second: [ 115 | SecondOfMinute, 116 | MinuteOfHour, 117 | HourOfDay, 118 | DayOfWeek, 119 | DayOfMonth, 120 | DayOfYear, 121 | ], 122 | } 123 | 124 | offset = to_offset(freq_str) 125 | 126 | for offset_type, feature_classes in features_by_offsets.items(): 127 | if isinstance(offset, offset_type): 128 | return [cls() for cls in feature_classes] 129 | 130 | supported_freq_msg = f""" 131 | Unsupported frequency {freq_str} 132 | The following frequencies are supported: 133 | Y - yearly 134 | alias: A 135 | M - monthly 136 | W - weekly 137 | D - daily 138 | B - business days 139 | H - hourly 140 | T - minutely 141 | alias: min 142 | S - secondly 143 | """ 144 | raise RuntimeError(supported_freq_msg) 145 | 146 | 147 | def time_features(dates, freq='h'): 148 | return np.vstack([feat(dates) for feat in time_features_from_frequency_str(freq)]) 149 | -------------------------------------------------------------------------------- /utils/tools.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import numpy as np 3 | import torch 4 | import matplotlib.pyplot as plt 5 | import random 6 | 7 | plt.switch_backend("agg") 8 | 9 | 10 | def adjust_learning_rate(optimizer, epoch, learning_rate, lradj): 11 | # Setup lr_adjust 12 | if lradj == "type1": 13 | lr_adjust = {epoch: learning_rate * (0.5 ** ((epoch - 1) // 1))} 14 | elif lradj == "type2": 15 | lr_adjust = {2: 5e-5, 4: 1e-5, 6: 5e-6, 8: 1e-6, 10: 5e-7, 15: 1e-7, 20: 5e-8} 16 | elif lradj == "type3": 17 | lr_adjust = { 18 | epoch: learning_rate 19 | if epoch < 3 20 | else learning_rate * (0.9 ** ((epoch - 3) // 1)) 21 | } 22 | elif lradj == "constant": 23 | lr_adjust = {epoch: learning_rate} 24 | elif lradj == "warmup": 25 | if epoch < 5: # increase lr for first 5 epochs 26 | lr_adjust = {epoch: learning_rate * (epoch + 1) / 5} 27 | else: # decrease lr for the rest 28 | lr_adjust = {epoch: learning_rate * (0.9 ** ((epoch - 5) // 1))} 29 | 30 | # Use lr_adjust to update learning rate on certain epochs 31 | if epoch in lr_adjust.keys(): 32 | lr = lr_adjust[epoch] 33 | for param_group in optimizer.param_groups: 34 | param_group["lr"] = lr 35 | print("Updating learning rate to {}".format(lr)) 36 | 37 | 38 | class EarlyStopping: 39 | def __init__(self, patience=7, verbose=False, delta=0): 40 | self.patience = patience 41 | self.verbose = verbose 42 | self.counter = 0 43 | self.best_score = None 44 | self.early_stop = False 45 | self.val_loss_min = np.Inf 46 | self.delta = delta 47 | 48 | def __call__(self, val_loss, model, path): 49 | score = -val_loss 50 | if self.best_score is None: 51 | self.best_score = score 52 | self.save_checkpoint(val_loss, model, path) 53 | elif score < self.best_score + self.delta: 54 | self.counter += 1 55 | print(f"EarlyStopping counter: {self.counter} out of {self.patience}") 56 | if self.counter >= self.patience: 57 | self.early_stop = True 58 | else: 59 | self.best_score = score 60 | self.save_checkpoint(val_loss, model, path) 61 | self.counter = 0 62 | 63 | def save_checkpoint(self, val_loss, model, path): 64 | if self.verbose: 65 | print( 66 | f"Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ..." 67 | ) 68 | torch.save(model.state_dict(), path + "/" + "checkpoint.pth") 69 | self.val_loss_min = val_loss 70 | 71 | 72 | class dotdict(dict): 73 | """dot.notation access to dictionary attributes""" 74 | 75 | __getattr__ = dict.get 76 | __setattr__ = dict.__setitem__ 77 | __delattr__ = dict.__delitem__ 78 | 79 | 80 | class StandardScaler: 81 | def __init__(self, mean, std): 82 | self.mean = mean 83 | self.std = std 84 | 85 | def transform(self, data): 86 | return (data - self.mean) / self.std 87 | 88 | def inverse_transform(self, data): 89 | return (data * self.std) + self.mean 90 | 91 | 92 | def visual(true, preds=None, name="./pic/test.pdf"): 93 | """ 94 | Results visualization 95 | """ 96 | plt.figure() 97 | plt.plot(true, label="GroundTruth", linewidth=2) 98 | if preds is not None: 99 | plt.plot(preds, label="Prediction", linewidth=2) 100 | plt.legend() 101 | plt.savefig(name, bbox_inches="tight") 102 | 103 | 104 | def adjustment(gt, pred): 105 | anomaly_state = False 106 | for i in range(len(gt)): 107 | if gt[i] == 1 and pred[i] == 1 and not anomaly_state: 108 | anomaly_state = True 109 | for j in range(i, 0, -1): 110 | if gt[j] == 0: 111 | break 112 | else: 113 | if pred[j] == 0: 114 | pred[j] = 1 115 | for j in range(i, len(gt)): 116 | if gt[j] == 0: 117 | break 118 | else: 119 | if pred[j] == 0: 120 | pred[j] = 1 121 | elif gt[i] == 0: 122 | anomaly_state = False 123 | if anomaly_state: 124 | pred[i] = 1 125 | return gt, pred 126 | 127 | 128 | def cal_accuracy(y_pred, y_true): 129 | return np.mean(y_pred == y_true) 130 | 131 | 132 | def print_params(model): 133 | total_params = sum(p.numel() for p in model.parameters()) 134 | trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad) 135 | 136 | print("-----------------------------------------") 137 | print(f"Total number of parameters: {total_params:,}") 138 | print(f"Number of trainable parameters: {trainable_params:,}") 139 | trainable_percentage = (trainable_params / total_params) * 100 140 | print(f"Percentage of trainable parameters: {trainable_percentage:.2f}%") 141 | 142 | print("-----------------------------------------") 143 | for name, param in model.named_parameters(): 144 | if param.requires_grad: 145 | if "h." in name and "h.0" not in name: 146 | continue 147 | print(name.replace("base_model.model.", "").replace("h.0", "h.x")) 148 | print("-----------------------------------------") 149 | 150 | 151 | def change_dict_to_args(configs): 152 | args = argparse.Namespace() 153 | for key, value in configs.items(): 154 | setattr(args, key, value) 155 | return args 156 | 157 | def set_seed(seed: int) -> None: 158 | random.seed(seed) 159 | np.random.seed(seed) 160 | torch.manual_seed(seed) 161 | # torch.cuda.manual_seed_all(seed) 162 | 163 | def print_formatted_dict(d): 164 | for key, value in d.items(): 165 | if isinstance(value, float): 166 | print(f"{key}: {value:.3f}") 167 | else: 168 | print(f"{key}: {value}") -------------------------------------------------------------------------------- /models/LLM4TS.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from pathlib import Path 5 | from transformers import AutoModel, AutoConfig 6 | from peft import get_peft_config, get_peft_model, TaskType, LoraConfig, AdaLoraConfig 7 | from einops import rearrange 8 | from collections import OrderedDict 9 | from layers.Embed import PatchEmbedding_temp 10 | from layers.RevIN import RevIN 11 | from layers.einops_modules import RearrangeModule 12 | 13 | 14 | class Model(nn.Module): 15 | """ 16 | Paper link: https://arxiv.org/pdf/2308.08469.pdf 17 | """ 18 | 19 | def __init__(self, args): 20 | super().__init__() 21 | self.args = args 22 | 23 | self.C = args.enc_in if args.return_single_feature == False else 1 24 | 25 | # RevIN 26 | self.revin = RevIN(self.C, affine=True) 27 | 28 | # Input layer 29 | self._set_input_layer() 30 | 31 | # Output layer 32 | self._set_output_layer() 33 | 34 | # Model 35 | self._set_model() 36 | 37 | def _set_input_layer(self): 38 | if self.args.LLM == "gpt2": 39 | pos_embed_type = "none" 40 | else: 41 | # elif args.LLM == "llama": 42 | pos_embed_type = "learned" 43 | token_embed_type = ( 44 | "linear" 45 | if getattr(self.args, "token_embed_type", None) is None 46 | else self.args.token_embed_type 47 | ) 48 | temporal_embed_type = ( 49 | "learned" 50 | if getattr(self.args, "temporal_embed_type", None) is None 51 | else self.args.temporal_embed_type 52 | ) 53 | token_embed_kernel_size = ( 54 | 3 55 | if getattr(self.args, "token_embed_kernel_size", None) is None 56 | else self.args.token_embed_kernel_size 57 | ) 58 | self.input_layer = PatchEmbedding_temp( 59 | self.args.C_t, 60 | self.args.d_model, 61 | self.args.patch_len, 62 | self.args.stride, 63 | self.args.dropout, 64 | pos_embed_type=pos_embed_type, 65 | token_embed_type=token_embed_type, 66 | kernel_size=token_embed_kernel_size, 67 | temporal_embed_type=temporal_embed_type, 68 | freq=self.args.freq, 69 | ) # (B * C, T_p, D) 70 | 71 | def _set_output_layer(self): 72 | if self.args.task_name == "supervised_finetuning": 73 | self.output_layer = nn.Sequential( 74 | OrderedDict( 75 | [ 76 | ( 77 | "linear", 78 | nn.Linear( 79 | self.args.d_model, self.args.patch_len, bias=False 80 | ), 81 | ), 82 | ("dropout", nn.Dropout(self.args.dropout)), 83 | # (B * C, T_p, P) 84 | ] 85 | ) 86 | ) 87 | else: 88 | T_p = int((self.args.seq_len - self.args.patch_len) / self.args.stride + 2) 89 | T_out = self.args.pred_len 90 | self.output_layer = nn.Sequential( 91 | OrderedDict( 92 | [ 93 | ("flatten", nn.Flatten(start_dim=1)), 94 | # (B * C, T_p * D) 95 | ( 96 | "linear", 97 | nn.Linear(self.args.d_model * T_p, T_out, bias=False), 98 | ), 99 | # (B * C, T_out) 100 | ("dropout", nn.Dropout(self.args.dropout)), 101 | ( 102 | "rearrange", 103 | RearrangeModule("(B C) T_out -> B T_out C", C=self.C), 104 | ), 105 | # (B, T_out, C) 106 | ] 107 | ) 108 | ) 109 | 110 | def _set_model(self): 111 | # Load LLM 112 | if self.args.no_pretrain: 113 | config = AutoConfig.from_pretrained(self.args.LLM_path / "config.json") 114 | self.model = AutoModel.from_config(config) 115 | assert ( 116 | self.args.no_freeze 117 | ), "If no_pretrain is True, no_freeze must be True." 118 | else: 119 | self.model = AutoModel.from_pretrained(self.args.LLM_path) 120 | 121 | # Only choose the first K layers 122 | self.model.h = self.model.h[: self.args.first_k_layers] 123 | 124 | # Apply PEFT 125 | peft_method = self.args.peft_method 126 | if peft_method == "none": 127 | print("No PEFT applied.") 128 | 129 | if not self.args.no_freeze: # we should freeze 130 | # (self.model) Unfreeze the parameters of wpe, and freeze the others 131 | for name, param in self.model.named_parameters(): 132 | if any(term in name for term in ["wpe"]): 133 | # if any(term in name for term in ["ln", "wpe"]): 134 | param.requires_grad = True 135 | else: 136 | param.requires_grad = False 137 | 138 | else: 139 | print(f"Apply PEFT: {peft_method}") 140 | 141 | # Set peft_params 142 | if peft_method == "adalora" and self.args.peft_params_lora_dropout == 0: 143 | # adalora with dropout=0 is problematic 144 | self.args.peft_params_lora_dropout = 0.01 145 | peft_params = { 146 | "r": self.args.peft_params_r, 147 | "lora_alpha": self.args.peft_params_lora_alpha, 148 | "lora_dropout": self.args.peft_params_lora_dropout, 149 | } 150 | peft_params["task_type"] = TaskType.FEATURE_EXTRACTION 151 | peft_params["target_modules"] = ["c_attn"] 152 | peft_params["fan_in_fan_out"] = True 153 | 154 | # Use peft_params to get peft_config 155 | if peft_method == "lora": 156 | peft_config = LoraConfig(**peft_params) 157 | elif peft_method == "adalora": 158 | peft_config = AdaLoraConfig(**peft_params) 159 | else: 160 | raise NotImplementedError 161 | 162 | # Apply PEFT to model (all weights in self.model are frozen) 163 | self.model = get_peft_model(self.model, peft_config) 164 | 165 | if not self.args.no_freeze: # we should freeze 166 | # (self.model) Only unfreeze the parameters of ln and wpe 167 | for name, param in self.model.named_parameters(): 168 | if any(term in name for term in ["ln", "wpe"]): 169 | param.requires_grad = True 170 | else: 171 | # Unfreeze all parameters 172 | for param in self.model.parameters(): 173 | param.requires_grad = True 174 | 175 | # Linear probing 176 | if "lp" in self.args.ft_mode: # "lp" or "lp_ft" 177 | print("Apply linear probing.") 178 | # Freeze revin, input_layer and model 179 | for param in self.revin.parameters(): 180 | param.requires_grad = False 181 | for param in self.input_layer.parameters(): 182 | param.requires_grad = False 183 | for param in self.model.parameters(): 184 | param.requires_grad = False 185 | 186 | def linear_probe_to_fine_tuning(self): 187 | print("Linear probing to fine-tuning.") 188 | # Unfreeze revin, input_layer and peft 189 | for param in self.revin.parameters(): 190 | param.requires_grad = True 191 | for param in self.input_layer.parameters(): 192 | param.requires_grad = True 193 | for name, param in self.model.named_parameters(): 194 | if any(term in name for term in ["lora", "ia3"]): 195 | param.requires_grad = True 196 | 197 | def supervised_finetuning(self, x, x_mark): # (B, T_in, C) 198 | # x: normalized 199 | # y: normalized + patched 200 | 201 | x = self.input_layer(x, x_mark) # (B * C, T_p, D) 202 | y = self.model(inputs_embeds=x).last_hidden_state # (B * C, T_p, D) 203 | y = self.output_layer(y) # (B * C, T_p, P) 204 | 205 | return y 206 | 207 | def forecast(self, x, x_mark): # (B, T_in, C) 208 | x = self.revin(x, "norm") # (B, T_in, C) 209 | x = self.input_layer(x, x_mark) # (B * C, T_p, D) 210 | y = self.model(inputs_embeds=x).last_hidden_state # (B * C, T_p, D) 211 | y = self.output_layer(y) # (B, T_out, C) 212 | y = self.revin(y, "denorm") 213 | return y 214 | 215 | def imputation(self, x, x_mark, mask): # (B, T_in, C) 216 | x = self.revin(x, "norm", mask) # (B, T_in, C) with mask 217 | x = self.input_layer(x, x_mark) # (B * C, T_p, D) 218 | y = self.model(inputs_embeds=x).last_hidden_state # (B * C, T_p, D) 219 | y = self.output_layer(y) # (B, T_in, C) 220 | y = self.revin(y, "denorm") 221 | 222 | return y 223 | 224 | def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): 225 | if self.args.task_name == "supervised_finetuning": 226 | dec_out = self.supervised_finetuning(x_enc, x_mark_enc) 227 | return dec_out # (B, T_out, C), T_out = T_in 228 | elif self.args.task_name == "long_term_forecast": 229 | dec_out = self.forecast(x_enc, x_mark_enc) 230 | return dec_out # (B, T_out, C) 231 | else: 232 | raise NotImplementedError 233 | return None 234 | -------------------------------------------------------------------------------- /exp/exp_supervised_finetuning.py: -------------------------------------------------------------------------------- 1 | from data_provider.data_factory import data_provider 2 | from data_provider.data_factory import data_provider 3 | from exp.exp_basic import Exp_Basic 4 | from utils.tools import EarlyStopping, adjust_learning_rate, visual 5 | import torch 6 | import torch.nn as nn 7 | from torch import optim 8 | import os 9 | import time 10 | import warnings 11 | import numpy as np 12 | from tqdm import tqdm 13 | import shutil 14 | from einops import rearrange 15 | import time 16 | from utils.tools import print_params 17 | 18 | warnings.filterwarnings("ignore") 19 | 20 | 21 | def patching_ci(x, patch_len, stride): 22 | # Prepare padding 23 | padding_patch_layer = nn.ReplicationPad1d((0, stride)) 24 | 25 | # do patching and embedding on tokens 26 | x = rearrange(x, "B T C -> B C T") # (B, C, T) 27 | x = padding_patch_layer(x) # (B, C, T+S) 28 | x = x.unfold(dimension=-1, size=patch_len, step=stride) # (B, C, T_p, P) 29 | x = rearrange(x, "B C T_p P -> (B C) T_p P") # (B * C, T_p, P) 30 | 31 | return x 32 | 33 | 34 | def update_batch_x_y(batch_x, batch_y, args, enable_y_patching=True): 35 | # Apply instance normalization to batch_x and batch_y 36 | batch_all = torch.cat((batch_x, batch_y[:, -args.stride :, :]), dim=1) 37 | dim2reduce = tuple(range(1, batch_all.ndim - 1)) 38 | mean = torch.mean(batch_all, dim=dim2reduce, keepdim=True).detach() 39 | stdev = torch.sqrt( 40 | torch.var(batch_all, dim=dim2reduce, keepdim=True, unbiased=False) + 1e-5 41 | ).detach() 42 | batch_all = (batch_all - mean) / stdev 43 | batch_x = batch_all[:, : args.seq_len, :] 44 | batch_y = batch_all[:, -(args.label_len + args.pred_len) :, :] 45 | 46 | # Apply patching_ci to batch_y 47 | if enable_y_patching: 48 | batch_y = patching_ci(batch_y, args.patch_len, args.stride) 49 | 50 | return batch_x, batch_y 51 | 52 | 53 | class Exp_Supervised_Finetuning(Exp_Basic): 54 | def __init__(self, args): 55 | super(Exp_Supervised_Finetuning, self).__init__(args) 56 | # 1. set args, model_dict, device into self 57 | # 2. build model 58 | 59 | def _build_model(self): 60 | if getattr(self, "model", None) is not None: 61 | raise ValueError("Model already exists!") 62 | 63 | # Try to save C_t into args 64 | train_data, train_loader = self._get_data(flag="train") 65 | batch_x, batch_y, batch_x_mark, batch_y_mark = next(iter(train_loader)) 66 | self.args.C_t = batch_x_mark.shape[2] 67 | 68 | # Build model 69 | model = self.model_dict[self.args.model].Model(self.args).float() # Feed `args` 70 | print_params(model) 71 | 72 | return model 73 | 74 | def _get_data(self, flag): 75 | data_set, data_loader = data_provider(self.args, flag) 76 | return data_set, data_loader 77 | 78 | def _select_optimizer(self): 79 | model_optim = getattr(optim, self.args.sft_optim)( 80 | self.model.parameters(), 81 | lr=self.args.sft_learning_rate, 82 | weight_decay=self.args.sft_weight_decay, 83 | ) 84 | return model_optim 85 | 86 | def _select_criterion(self): 87 | criterion = nn.MSELoss() 88 | return criterion 89 | 90 | def train(self, use_tqdm=False): 91 | print( 92 | ">>>>> start training (supervised finetuning) : {}>>>>>".format( 93 | self.args.setting 94 | ) 95 | ) 96 | 97 | # Get data 98 | train_data, train_loader = self._get_data(flag="train") 99 | vali_data, vali_loader = self._get_data(flag="val") 100 | test_data, test_loader = self._get_data(flag="test") 101 | assert self.args.seq_len == self.args.label_len + self.args.pred_len, ( 102 | "seq_len should be equal to label_len + pred_len, but got " 103 | f"seq_len:{self.args.seq_len}, label_len:{self.args.label_len}, " 104 | f"pred_len:{self.args.pred_len}" 105 | ) 106 | assert self.args.pred_len == self.args.stride 107 | assert len(train_loader) > 0, "The train_loader is empty!" 108 | assert len(vali_loader) > 0, "The vali_loader is empty!" 109 | assert len(test_loader) > 0, "The test_loader is empty!" 110 | 111 | path = os.path.join( 112 | self.args.checkpoints, self.args.setting 113 | ) # `setting` is just a path storing config 114 | if not os.path.exists(path): 115 | os.makedirs(path) 116 | 117 | start_time = time.time() 118 | time_now = time.time() 119 | 120 | train_steps = len(train_loader) 121 | early_stopping = EarlyStopping(patience=self.args.patience, verbose=True) 122 | 123 | model_optim = self._select_optimizer() 124 | criterion = self._select_criterion() 125 | 126 | # Automatic Mixed Precision (some op. are fp32, some are fp16) 127 | scaler = torch.cuda.amp.GradScaler(enabled=self.args.use_amp) # type: ignore 128 | 129 | for epoch in range(self.args.sft_train_epochs): 130 | iter_count = 0 131 | train_loss = [] 132 | 133 | self.model.train() 134 | epoch_time = time.time() 135 | for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in ( 136 | tqdm(enumerate(train_loader), total=len(train_loader)) 137 | if use_tqdm 138 | else enumerate(train_loader) 139 | ): 140 | batch_x, batch_y = update_batch_x_y(batch_x, batch_y, self.args) 141 | batch_y_shape = batch_y.shape 142 | iter_count += 1 143 | model_optim.zero_grad() 144 | batch_x = batch_x.float().to(self.device) 145 | batch_y = batch_y.float().to(self.device) 146 | batch_x_mark = batch_x_mark.float().to(self.device) 147 | batch_y_mark = batch_y_mark.float().to(self.device) 148 | 149 | # encoder - decoder 150 | with torch.cuda.amp.autocast(enabled=self.args.use_amp): # type: ignore 151 | outputs = self.model( 152 | batch_x, batch_x_mark, None, batch_y_mark 153 | ) # embedding + encoder + decoder 154 | 155 | assert ( 156 | batch_y_shape == batch_y.shape 157 | ), f"batch_y_shape: {batch_y_shape}, batch_y.shape: {batch_y.shape}" 158 | loss = criterion(outputs, batch_y) 159 | train_loss.append(loss.item()) 160 | 161 | # Show loss 162 | if (i + 1) % 100 == 0: 163 | print( 164 | "\titers: {0}, epoch: {1} | loss: {2:.7f}".format( 165 | i + 1, epoch + 1, loss.item() 166 | ) 167 | ) 168 | speed = (time.time() - time_now) / iter_count 169 | left_time = speed * ( 170 | (self.args.sft_train_epochs - epoch) * train_steps - i 171 | ) 172 | print( 173 | "\tspeed: {:.4f}s/iter; left time: {:.4f}s".format( 174 | speed, left_time 175 | ) 176 | ) 177 | iter_count = 0 178 | time_now = time.time() 179 | 180 | # Backward 181 | scaler.scale(loss).backward() # type: ignore 182 | scaler.step(model_optim) 183 | scaler.update() 184 | 185 | print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time)) 186 | train_loss = np.average(train_loss) 187 | # At the end of each epoch, we evaluate the validation set and test set 188 | print(">>>>> start validation >>>>>") 189 | vali_loss, vali_mae = self.get_metrics(vali_loader) 190 | # print(">>>>> start testing >>>>>") 191 | # test_loss, test_mae = self.get_metrics(test_loader) 192 | 193 | print( 194 | "Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f}".format( 195 | epoch + 1, train_steps, train_loss, vali_loss 196 | ) 197 | ) 198 | early_stopping(vali_loss, self.model, path) 199 | if early_stopping.early_stop: 200 | print("Early stopping") 201 | break 202 | 203 | adjust_learning_rate( 204 | model_optim, epoch + 1, self.args.sft_learning_rate, self.args.sft_lradj 205 | ) 206 | print("------------------------------------------------------------------") 207 | 208 | best_model_path = path + "/" + "checkpoint.pth" 209 | self.model.load_state_dict(torch.load(best_model_path), strict=False) 210 | # shutil.rmtree(path, ignore_errors=True) # delete the checkpoint folder 211 | 212 | return None 213 | 214 | metrics = {} # loss = mse 215 | print("### Calculating metrics for train ###") 216 | metrics["train_loss"], metrics["train_mae"] = self.get_metrics(train_loader) 217 | print("### Calculating metrics for vali ###") 218 | metrics["val_loss"], metrics["val_mae"] = self.get_metrics(vali_loader) 219 | print("### Calculating metrics for test ###") 220 | metrics["test_loss"], metrics["test_mae"] = self.get_metrics(test_loader) 221 | print("===============================") 222 | print(metrics) 223 | print("===============================") 224 | 225 | end_time = time.time() 226 | self.spent_time = end_time - start_time 227 | 228 | return metrics 229 | 230 | def get_metrics(self, data_loader, use_tqdm=False): 231 | total_mse = 0 232 | total_mae = 0 233 | total_samples = 0 234 | 235 | self.model.eval() 236 | with torch.no_grad(): 237 | for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in ( 238 | tqdm(enumerate(data_loader), total=len(data_loader)) 239 | if use_tqdm 240 | else enumerate(data_loader) 241 | ): 242 | batch_x, batch_y = update_batch_x_y(batch_x, batch_y, self.args) 243 | batch_x = batch_x.float().to(self.device) 244 | batch_y = batch_y.float().to(self.device) 245 | 246 | batch_x_mark = batch_x_mark.float().to(self.device) 247 | batch_y_mark = batch_y_mark.float().to(self.device) 248 | 249 | # decoder input 250 | dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len :, :]).float() 251 | dec_inp = ( 252 | torch.cat([batch_y[:, : self.args.label_len, :], dec_inp], dim=1) 253 | .float() 254 | .to(self.device) 255 | ) 256 | # encoder - decoder 257 | with torch.cuda.amp.autocast(enabled=self.args.use_amp): # type: ignore 258 | outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark) 259 | 260 | pred = outputs.detach() 261 | true = batch_y.detach() 262 | 263 | batch_mse = torch.mean((pred - true) ** 2).item() 264 | batch_mae = torch.mean(torch.abs(pred - true)).item() 265 | 266 | total_mse += batch_mse * len(batch_x) 267 | total_mae += batch_mae * len(batch_x) 268 | total_samples += len(batch_x) 269 | 270 | mse = total_mse / total_samples 271 | mae = total_mae / total_samples 272 | 273 | return mse, mae 274 | -------------------------------------------------------------------------------- /exp/exp_long_term_forecasting.py: -------------------------------------------------------------------------------- 1 | from data_provider.data_factory import data_provider 2 | from exp.exp_basic import Exp_Basic 3 | from utils.tools import EarlyStopping, adjust_learning_rate, visual 4 | import torch 5 | import torch.nn as nn 6 | from torch import optim 7 | import os 8 | import time 9 | import warnings 10 | import numpy as np 11 | from tqdm import tqdm 12 | import shutil 13 | from utils.tools import print_params 14 | 15 | warnings.filterwarnings("ignore") 16 | 17 | 18 | class Exp_Long_Term_Forecast(Exp_Basic): 19 | def __init__(self, args): 20 | super(Exp_Long_Term_Forecast, self).__init__(args) 21 | # 1. set args, model_dict, device into self 22 | # 2. build model 23 | 24 | def _build_model(self): 25 | if getattr(self, "model", None) is not None: 26 | raise ValueError("Model already exists!") 27 | 28 | # Try to save C_t into args 29 | train_data, train_loader = self._get_data(flag="train") 30 | batch_x, batch_y, batch_x_mark, batch_y_mark = next(iter(train_loader)) 31 | self.args.C_t = batch_x_mark.shape[2] 32 | 33 | # Build model 34 | model = ( 35 | self.model_dict[self.args.model].Model(self.args).float() 36 | ) # Feed `args` 37 | print_params(model) 38 | 39 | return model 40 | 41 | def _get_data(self, flag): 42 | data_set, data_loader = data_provider(self.args, flag) 43 | return data_set, data_loader 44 | 45 | def _select_optimizer(self): 46 | model_optim = getattr(optim, self.args.dft_optim)( 47 | self.model.parameters(), 48 | lr=self.args.dft_learning_rate, 49 | weight_decay=self.args.dft_weight_decay, 50 | ) 51 | return model_optim 52 | 53 | def _select_criterion(self): 54 | criterion = nn.MSELoss() 55 | return criterion 56 | 57 | def train(self, use_tqdm=False): 58 | print( 59 | f">>>>> start training (long-term forecasting: {self.args.pred_len}) : {self.args.setting}>>>>>" 60 | ) 61 | 62 | # Load the model (if we have already trained it with sft) 63 | if self.args.enable_supervised_finetuning: 64 | checkpoint = torch.load( 65 | os.path.join("./checkpoints/sft_" + self.args.setting, "checkpoint.pth") 66 | ) 67 | # Get a list of keys related to the output layer to delete 68 | keys_related_to_output_layer = [ 69 | k for k in checkpoint.keys() if "output_layer" in k 70 | ] 71 | for key in keys_related_to_output_layer: 72 | del checkpoint[key] 73 | 74 | # Load the modified state dict 75 | self.model.load_state_dict(checkpoint, strict=False) 76 | print("### Successfully loaded the model trained with sft ###") 77 | 78 | # Get data 79 | train_data, train_loader = self._get_data(flag="train") 80 | vali_data, vali_loader = self._get_data(flag="val") 81 | test_data, test_loader = self._get_data(flag="test") 82 | assert len(train_loader) > 0, "The train_loader is empty!" 83 | assert len(vali_loader) > 0, "The vali_loader is empty!" 84 | assert len(test_loader) > 0, "The test_loader is empty!" 85 | 86 | path = os.path.join( 87 | self.args.checkpoints, self.args.setting 88 | ) # `setting` is just a path storing config 89 | if not os.path.exists(path): 90 | os.makedirs(path) 91 | 92 | start_time = time.time() 93 | time_now = time.time() 94 | 95 | train_steps = len(train_loader) 96 | early_stopping = EarlyStopping(patience=self.args.patience, verbose=True) 97 | 98 | model_optim = self._select_optimizer() 99 | criterion = self._select_criterion() 100 | 101 | # Automatic Mixed Precision (some op. are fp32, some are fp16) 102 | scaler = torch.cuda.amp.GradScaler(enabled=self.args.use_amp) # type: ignore 103 | 104 | best_test_loss, best_test_mae, best_epoch = ( 105 | np.inf, 106 | np.inf, 107 | 0, 108 | ) # for capturing the best test loss during training 109 | for epoch in range(self.args.dft_train_epochs): 110 | iter_count = 0 111 | train_loss = [] 112 | 113 | self.model.train() 114 | epoch_time = time.time() 115 | 116 | # Change from linear probing to fine-tuning in the middle of training 117 | # (Only happens in the downstream task, not in the supervised fine-tuning) 118 | if ( 119 | self.args.ft_mode == "lp_ft" 120 | and epoch == self.args.dft_train_epochs // 2 121 | ): 122 | self.model.linear_probe_to_fine_tuning() 123 | print_params(self.model) 124 | 125 | for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in ( 126 | tqdm(enumerate(train_loader), total=len(train_loader)) 127 | if use_tqdm 128 | else enumerate(train_loader) 129 | ): 130 | batch_y_shape = batch_y.shape 131 | iter_count += 1 132 | model_optim.zero_grad() 133 | batch_x = batch_x.float().to(self.device) 134 | batch_y = batch_y.float().to(self.device) 135 | batch_x_mark = batch_x_mark.float().to(self.device) 136 | batch_y_mark = batch_y_mark.float().to(self.device) 137 | 138 | # encoder - decoder 139 | with torch.cuda.amp.autocast(enabled=self.args.use_amp): # type: ignore 140 | outputs = self.model( 141 | batch_x, batch_x_mark, None, batch_y_mark 142 | ) # embedding + encoder + decoder 143 | 144 | # M: multivariate predict multivariate, S: univariate predict univariate, MS: multivariate predict univariate 145 | f_dim = -1 if self.args.features == "MS" else 0 146 | outputs = outputs[:, -self.args.pred_len :, f_dim:] 147 | batch_y = batch_y[:, -self.args.pred_len :, f_dim:] 148 | assert ( 149 | batch_y_shape == batch_y.shape 150 | ), f"batch_y_shape: {batch_y_shape}, batch_y.shape: {batch_y.shape}" 151 | loss = criterion(outputs, batch_y) 152 | train_loss.append(loss.item()) 153 | 154 | # Show loss 155 | if (i + 1) % 100 == 0: 156 | print( 157 | "\titers: {0}, epoch: {1} | loss: {2:.7f}".format( 158 | i + 1, epoch + 1, loss.item() 159 | ) 160 | ) 161 | speed = (time.time() - time_now) / iter_count 162 | left_time = speed * ( 163 | (self.args.dft_train_epochs - epoch) * train_steps - i 164 | ) 165 | print( 166 | "\tspeed: {:.4f}s/iter; left time: {:.4f}s".format( 167 | speed, left_time 168 | ) 169 | ) 170 | iter_count = 0 171 | time_now = time.time() 172 | 173 | # Backward 174 | scaler.scale(loss).backward() # type: ignore 175 | scaler.step(model_optim) 176 | scaler.update() 177 | 178 | print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time)) 179 | train_loss = np.average(train_loss) 180 | # At the end of each epoch, we evaluate the validation set and test set 181 | print(">>>>> start validation >>>>>") 182 | vali_loss, vali_mae = self.get_metrics(vali_loader) 183 | print(">>>>> start testing >>>>>") 184 | test_loss, test_mae = self.get_metrics(test_loader) 185 | if test_loss < best_test_loss: 186 | best_test_loss = test_loss 187 | best_test_mae = test_mae 188 | best_epoch = epoch + 1 189 | 190 | print( 191 | "Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}".format( 192 | epoch + 1, train_steps, train_loss, vali_loss, test_loss 193 | ) 194 | ) 195 | early_stopping(vali_loss, self.model, path) 196 | if early_stopping.early_stop: 197 | print("Early stopping") 198 | break 199 | 200 | adjust_learning_rate( 201 | model_optim, epoch + 1, self.args.dft_learning_rate, self.args.dft_lradj 202 | ) 203 | print("------------------------------------------------------------------") 204 | 205 | best_model_path = path + "/" + "checkpoint.pth" 206 | self.model.load_state_dict(torch.load(best_model_path)) 207 | # shutil.rmtree(path, ignore_errors=True) # delete the checkpoint folder 208 | 209 | metrics = {} # loss = mse 210 | # print("### Calculating metrics for train ###") 211 | # metrics["train_loss"], metrics["train_mae"] = self.get_metrics(train_loader) 212 | # print("### Calculating metrics for vali ###") 213 | # metrics["val_loss"], metrics["val_mae"] = self.get_metrics(vali_loader) 214 | # print("### Calculating metrics for test ###") 215 | # metrics["test_loss"], metrics["test_mae"] = self.get_metrics(test_loader) 216 | metrics["best_test_loss"], metrics["best_test_mae"], metrics["best_epoch"] = ( 217 | best_test_loss, 218 | best_test_mae, 219 | best_epoch, 220 | ) 221 | print("===============================") 222 | print(metrics) 223 | print("===============================") 224 | 225 | end_time = time.time() 226 | self.spent_time = end_time - start_time 227 | 228 | return metrics 229 | 230 | def get_metrics(self, data_loader, use_tqdm=False): 231 | total_mse = 0 232 | total_mae = 0 233 | total_samples = 0 234 | 235 | self.model.eval() 236 | with torch.no_grad(): 237 | for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in ( 238 | tqdm(enumerate(data_loader), total=len(data_loader)) 239 | if use_tqdm 240 | else enumerate(data_loader) 241 | ): 242 | batch_x = batch_x.float().to(self.device) 243 | batch_y = batch_y.float().to(self.device) 244 | 245 | batch_x_mark = batch_x_mark.float().to(self.device) 246 | batch_y_mark = batch_y_mark.float().to(self.device) 247 | 248 | # decoder input 249 | dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len :, :]).float() 250 | dec_inp = ( 251 | torch.cat([batch_y[:, : self.args.label_len, :], dec_inp], dim=1) 252 | .float() 253 | .to(self.device) 254 | ) 255 | # encoder - decoder 256 | with torch.cuda.amp.autocast(enabled=self.args.use_amp): # type: ignore 257 | outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark) 258 | 259 | f_dim = -1 if self.args.features == "MS" else 0 260 | outputs = outputs[:, -self.args.pred_len :, f_dim:] 261 | batch_y = batch_y[:, -self.args.pred_len :, f_dim:] 262 | pred = outputs.detach() 263 | true = batch_y.detach() 264 | 265 | batch_mse = torch.mean((pred - true) ** 2).item() 266 | batch_mae = torch.mean(torch.abs(pred - true)).item() 267 | 268 | total_mse += batch_mse * len(batch_x) 269 | total_mae += batch_mae * len(batch_x) 270 | total_samples += len(batch_x) 271 | 272 | mse = total_mse / total_samples 273 | mae = total_mae / total_samples 274 | 275 | return mse, mae 276 | -------------------------------------------------------------------------------- /layers/Embed.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch.nn.utils import weight_norm 5 | import math 6 | from einops import rearrange 7 | from collections import OrderedDict 8 | 9 | 10 | class PositionalEmbedding(nn.Module): 11 | def __init__(self, d_model, max_len=5000): 12 | super(PositionalEmbedding, self).__init__() 13 | # Compute the positional encodings once in log space. 14 | pe = torch.zeros(max_len, d_model).float() 15 | pe.require_grad = False 16 | 17 | position = torch.arange(0, max_len).float().unsqueeze(1) 18 | div_term = ( 19 | torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model) 20 | ).exp() 21 | 22 | pe[:, 0::2] = torch.sin(position * div_term) 23 | pe[:, 1::2] = torch.cos(position * div_term) 24 | 25 | pe = pe.unsqueeze(0) 26 | self.register_buffer("pe", pe) 27 | 28 | def forward(self, x): 29 | return self.pe[:, : x.size(1)] 30 | 31 | 32 | class PositionalEmbedding_trainable(nn.Module): 33 | def __init__(self, d_model, max_len=5000): 34 | super().__init__() 35 | 36 | # Create a parameter tensor of size [max_length, d_model] 37 | pe = torch.randn(max_len, d_model).float() 38 | 39 | # Register it as a parameter that will be updated during training 40 | self.pe = nn.Parameter(pe, requires_grad=True) 41 | 42 | def forward(self, x): 43 | # Just return the first T position embeddings 44 | return self.pe[None, : x.size(1)] 45 | 46 | 47 | class TokenEmbedding(nn.Module): 48 | def __init__(self, c_in, d_model, kernel_size=3): 49 | super(TokenEmbedding, self).__init__() 50 | padding = (kernel_size - 1) // 2 # `same` padding 51 | self.tokenConv = nn.Conv1d( 52 | in_channels=c_in, 53 | out_channels=d_model, 54 | kernel_size=kernel_size, 55 | padding=padding, 56 | padding_mode="circular", 57 | bias=False, 58 | ) 59 | for m in self.modules(): 60 | if isinstance(m, nn.Conv1d): 61 | nn.init.kaiming_normal_( 62 | m.weight, mode="fan_in", nonlinearity="leaky_relu" 63 | ) 64 | 65 | def forward(self, x): 66 | x = self.tokenConv(x.permute(0, 2, 1)).transpose(1, 2) 67 | return x 68 | 69 | 70 | class FixedEmbedding(nn.Module): 71 | def __init__(self, c_in, d_model): 72 | super(FixedEmbedding, self).__init__() 73 | 74 | w = torch.zeros(c_in, d_model).float() 75 | w.require_grad = False 76 | 77 | position = torch.arange(0, c_in).float().unsqueeze(1) 78 | div_term = ( 79 | torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model) 80 | ).exp() 81 | 82 | w[:, 0::2] = torch.sin(position * div_term) 83 | w[:, 1::2] = torch.cos(position * div_term) 84 | 85 | self.emb = nn.Embedding(c_in, d_model) 86 | self.emb.weight = nn.Parameter(w, requires_grad=False) 87 | 88 | def forward(self, x): 89 | return self.emb(x).detach() 90 | 91 | 92 | class TemporalEmbedding(nn.Module): 93 | def __init__(self, d_model, embed_type="fixed", freq="h"): 94 | super(TemporalEmbedding, self).__init__() 95 | 96 | minute_size = 4 # 15 minutes 97 | hour_size = 24 98 | weekday_size = 7 99 | day_size = 32 100 | month_size = 13 101 | 102 | Embed = FixedEmbedding if embed_type == "fixed" else nn.Embedding 103 | if freq == "t": 104 | self.minute_embed = Embed(minute_size, d_model) 105 | self.hour_embed = Embed(hour_size, d_model) 106 | self.weekday_embed = Embed(weekday_size, d_model) 107 | self.day_embed = Embed(day_size, d_model) 108 | self.month_embed = Embed(month_size, d_model) 109 | 110 | def forward(self, x): 111 | x = x.long() 112 | minute_x = ( 113 | self.minute_embed(x[:, :, 4]) if hasattr(self, "minute_embed") else 0.0 114 | ) 115 | hour_x = self.hour_embed(x[:, :, 3]) 116 | weekday_x = self.weekday_embed(x[:, :, 2]) 117 | day_x = self.day_embed(x[:, :, 1]) 118 | month_x = self.month_embed(x[:, :, 0]) 119 | 120 | return hour_x + weekday_x + day_x + month_x + minute_x 121 | 122 | 123 | class TimeFeatureEmbedding(nn.Module): 124 | def __init__(self, d_model, embed_type="timeF", freq="h"): 125 | super(TimeFeatureEmbedding, self).__init__() 126 | 127 | freq_map = {"h": 4, "t": 5, "s": 6, "m": 1, "a": 1, "w": 2, "d": 3, "b": 3} 128 | d_inp = freq_map[freq] 129 | self.embed = nn.Linear(d_inp, d_model, bias=False) 130 | 131 | def forward(self, x): 132 | return self.embed(x) 133 | 134 | 135 | class DataEmbedding(nn.Module): 136 | def __init__(self, c_in, d_model, embed_type="fixed", freq="h", dropout=0.1): 137 | super(DataEmbedding, self).__init__() 138 | 139 | self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model) 140 | self.position_embedding = PositionalEmbedding(d_model=d_model) 141 | self.temporal_embedding = ( 142 | TemporalEmbedding(d_model=d_model, embed_type=embed_type, freq=freq) 143 | if embed_type != "timeF" 144 | else TimeFeatureEmbedding(d_model=d_model, embed_type=embed_type, freq=freq) 145 | ) 146 | self.dropout = nn.Dropout(p=dropout) 147 | 148 | def forward(self, x, x_mark): 149 | if x_mark is None: 150 | x = self.value_embedding(x) + self.position_embedding(x) 151 | else: 152 | x = ( 153 | self.value_embedding(x) 154 | + self.temporal_embedding(x_mark) 155 | + self.position_embedding(x) 156 | ) 157 | return self.dropout(x) 158 | 159 | 160 | class DataEmbedding_wo_pos(nn.Module): 161 | def __init__(self, c_in, d_model, embed_type="fixed", freq="h", dropout=0.1): 162 | super(DataEmbedding_wo_pos, self).__init__() 163 | 164 | self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model) 165 | self.temporal_embedding = ( 166 | TemporalEmbedding(d_model=d_model, embed_type=embed_type, freq=freq) 167 | if embed_type != "timeF" 168 | else TimeFeatureEmbedding(d_model=d_model, embed_type=embed_type, freq=freq) 169 | ) 170 | self.dropout = nn.Dropout(p=dropout) 171 | 172 | def forward(self, x, x_mark): 173 | if x_mark is None: 174 | x = self.value_embedding(x) 175 | else: 176 | x = self.value_embedding(x) + self.temporal_embedding(x_mark) 177 | return self.dropout(x) 178 | 179 | 180 | class PatchEmbedding(nn.Module): 181 | def __init__( 182 | self, d_model, patch_len, stride, padding, dropout, learnable_position=False 183 | ): 184 | super(PatchEmbedding, self).__init__() 185 | # Patching 186 | self.patch_len = patch_len 187 | self.stride = stride 188 | self.padding_patch_layer = nn.ReplicationPad1d((0, padding)) 189 | 190 | # Backbone, Input encoding: projection of feature vectors onto a d-dim vector space 191 | self.value_embedding = nn.Linear(patch_len, d_model, bias=False) 192 | 193 | # Positional embedding 194 | if learnable_position: 195 | self.position_embedding = PositionalEmbedding_trainable(d_model) 196 | else: 197 | self.position_embedding = PositionalEmbedding(d_model) 198 | 199 | # Residual dropout 200 | self.dropout = nn.Dropout(dropout) 201 | 202 | def forward(self, x): 203 | # do patching 204 | n_vars = x.shape[1] 205 | x = self.padding_patch_layer(x) # (B, C, T) -> (B, C, T+S) 206 | x = x.unfold( 207 | dimension=-1, size=self.patch_len, step=self.stride 208 | ) # (B, C, T+S) -> (B, C, T_p, P) 209 | x = torch.reshape( 210 | x, (x.shape[0] * x.shape[1], x.shape[2], x.shape[3]) 211 | ) # (B, C, T_p, P) -> (B * C, T_p, P) 212 | # Input encoding 213 | x = self.value_embedding(x) + self.position_embedding( 214 | x 215 | ) # (B * C, T_p, P) -> (B * C, T_p, D) 216 | return self.dropout(x), n_vars 217 | 218 | 219 | class PatchEmbedding_temp(nn.Module): 220 | def __init__( 221 | self, 222 | C_t, 223 | d_model, 224 | patch_len, 225 | stride, 226 | dropout, 227 | pos_embed_type="none", 228 | token_embed_type="linear", 229 | kernel_size=3, 230 | temporal_embed_type="learned", 231 | freq="h", 232 | ): 233 | super(PatchEmbedding_temp, self).__init__() 234 | # Patching 235 | self.patch_len = patch_len 236 | self.stride = stride 237 | self.padding_patch_layer = nn.ReplicationPad1d((0, stride)) 238 | 239 | # Positional embedding (none, learnable, fixed) 240 | if pos_embed_type == "none": 241 | self.position_embedding = None 242 | elif pos_embed_type == "learnable": # nn.Parameter 243 | self.position_embedding = PositionalEmbedding_trainable(d_model) 244 | else: # sin/cos 245 | self.position_embedding = PositionalEmbedding(d_model) 246 | 247 | # Token embedding (linear, conv) 248 | if token_embed_type == "linear": 249 | self.value_embedding = nn.Linear(patch_len, d_model, bias=False) 250 | elif token_embed_type == "conv": 251 | self.value_embedding = TokenEmbedding( 252 | c_in=patch_len, d_model=d_model, kernel_size=kernel_size 253 | ) 254 | 255 | # Temporal embedding (none, fixed, learned, timeF) 256 | if temporal_embed_type == "none": 257 | self.temporal_embedding = None 258 | else: 259 | # fixed, learned, timeF 260 | self.temporal_embedding = ( 261 | TemporalEmbedding( 262 | d_model=d_model, embed_type=temporal_embed_type, freq=freq 263 | ) 264 | if temporal_embed_type != "timeF" 265 | else TimeFeatureEmbedding( 266 | d_model=d_model, embed_type=temporal_embed_type, freq=freq 267 | ) 268 | ) 269 | 270 | # Residual dropout 271 | self.dropout = nn.Dropout(dropout) 272 | 273 | def forward(self, x, x_mark=None): 274 | C = x.shape[2] # x.shape = (B, T, C) 275 | 276 | # do patching and embedding on tokens 277 | x = rearrange(x, "B T C -> B C T") # (B, C, T) 278 | x = self.padding_patch_layer(x) # (B, C, T+S) 279 | x = x.unfold( 280 | dimension=-1, size=self.patch_len, step=self.stride 281 | ) # (B, C, T_p, P) 282 | x = rearrange(x, "B C T_p P -> (B C) T_p P") # (B * C, T_p, P) 283 | x = self.value_embedding(x) # (B * C, T_p, D) 284 | 285 | # do patching and embedding on tokens 286 | if x_mark is not None and self.temporal_embedding is not None: 287 | x_mark = rearrange(x_mark, "B T C_t -> B C_t T") # (B, C_t, T) 288 | x_mark = self.padding_patch_layer(x_mark) # (B, C_t, T+S) 289 | x_mark = x_mark.unfold( 290 | dimension=-1, size=self.patch_len, step=self.stride 291 | ) # (B, C_t, T_p, P) 292 | x_mark = x_mark.unsqueeze(1).repeat(1, C, 1, 1, 1) # (B, C, C_t, T_p, P) 293 | x_mark = rearrange( 294 | x_mark, "B C C_t T_p P -> (B C) T_p P C_t" 295 | ) # (B * C, T_p, P, C_t) 296 | x_mark = x_mark[ 297 | :, :, 0, : 298 | ] # (B * C, T_p, C_t) # select the first value in each patch 299 | x_mark = self.temporal_embedding(x_mark) # (B * C, T_p, D) 300 | else: 301 | # Even if we have x_mark, we still need to set it to None 302 | # if self.temporal_embedding is None 303 | x_mark = None 304 | 305 | # Add positional embedding 306 | if self.position_embedding is not None: 307 | x = x + self.position_embedding(x) 308 | 309 | return self.dropout(x + x_mark) if x_mark is not None else self.dropout(x) 310 | 311 | 312 | class PatchEmbedding_temp_old(nn.Module): 313 | def __init__( 314 | self, 315 | C_t, 316 | d_model, 317 | patch_len, 318 | stride, 319 | dropout, 320 | pos_embed_type="none", 321 | token_embed_type="linear", 322 | temporal_embed_type="learnable", 323 | ): 324 | super(PatchEmbedding_temp_old, self).__init__() 325 | # Patching 326 | self.patch_len = patch_len 327 | self.stride = stride 328 | self.padding_patch_layer = nn.ReplicationPad1d((0, stride)) 329 | 330 | # Positional embedding (none, learnable, fixed) 331 | if pos_embed_type == "none": 332 | self.position_embedding = None 333 | elif pos_embed_type == "learnable": # nn.Parameter 334 | self.position_embedding = PositionalEmbedding_trainable(d_model) 335 | else: # sin/cos 336 | self.position_embedding = PositionalEmbedding(d_model) 337 | 338 | # Token embedding (linear, conv) 339 | if token_embed_type == "linear": 340 | self.value_embedding = nn.Linear(patch_len, d_model, bias=False) 341 | elif token_embed_type == "conv": 342 | self.value_embedding = TokenEmbedding(c_in=patch_len, d_model=d_model) 343 | 344 | # Temporal embedding (none, learnable) 345 | if temporal_embed_type == "none": 346 | self.temporal_embedding = None 347 | elif temporal_embed_type == "learnable": 348 | self.temporal_embedding = nn.Sequential( 349 | OrderedDict( 350 | [ 351 | ("flatten", nn.Flatten(start_dim=-2)), 352 | ("linear", nn.Linear(patch_len * C_t, d_model, bias=False)), 353 | ] 354 | ) 355 | ) 356 | 357 | # Residual dropout 358 | self.dropout = nn.Dropout(dropout) 359 | 360 | def forward(self, x, x_mark=None): 361 | C = x.shape[2] # x.shape = (B, T, C) 362 | 363 | # do patching and embedding on tokens 364 | x = rearrange(x, "B T C -> B C T") # (B, C, T) 365 | x = self.padding_patch_layer(x) # (B, C, T+S) 366 | x = x.unfold( 367 | dimension=-1, size=self.patch_len, step=self.stride 368 | ) # (B, C, T_p, P) 369 | x = rearrange(x, "B C T_p P -> (B C) T_p P") # (B * C, T_p, P) 370 | x = self.value_embedding(x) # (B * C, T_p, D) 371 | 372 | # do patching and embedding on tokens 373 | if x_mark is not None and self.temporal_embedding is not None: 374 | x_mark = rearrange(x_mark, "B T C_t -> B C_t T") # (B, C_t, T) 375 | x_mark = self.padding_patch_layer(x_mark) # (B, C_t, T+S) 376 | x_mark = x_mark.unfold( 377 | dimension=-1, size=self.patch_len, step=self.stride 378 | ) # (B, C_t, T_p, P) 379 | x_mark = x_mark.unsqueeze(1).repeat(1, C, 1, 1, 1) # (B, C, C_t, T_p, P) 380 | x_mark = rearrange( 381 | x_mark, "B C C_t T_p P -> (B C) T_p P C_t" 382 | ) # (B * C, T_p, P, C_t) 383 | x_mark = self.temporal_embedding(x_mark) # (B * C, T_p, D) 384 | else: 385 | # Even if we have x_mark, we still need to set it to None 386 | # if self.temporal_embedding is None 387 | x_mark = None 388 | 389 | # Add positional embedding 390 | if self.position_embedding is not None: 391 | x = x + self.position_embedding(x) 392 | 393 | return self.dropout(x + x_mark) if x_mark is not None else self.dropout(x) 394 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import torch 3 | import numpy as np 4 | from copy import deepcopy 5 | import shutil 6 | from pathlib import Path 7 | 8 | from exp.exp_long_term_forecasting import Exp_Long_Term_Forecast 9 | from exp.exp_supervised_finetuning import Exp_Supervised_Finetuning 10 | from utils.tools import set_seed, print_formatted_dict 11 | from data_provider.data_loader import update_args_from_dataset 12 | 13 | 14 | def get_args_from_parser() -> argparse.Namespace: 15 | parser = argparse.ArgumentParser(description="LLM4TS") 16 | 17 | # * basic config 18 | parser.add_argument( 19 | "--task_name", 20 | type=str, 21 | default="long_term_forecast", 22 | help="time-series task", 23 | choices=["long_term_forecast"], 24 | ) 25 | parser.add_argument("--model_id", type=str, default="test", help="model id") 26 | parser.add_argument( 27 | "--model", 28 | type=str, 29 | default="LLM4TS", 30 | help="model name", 31 | ) 32 | parser.add_argument( 33 | "--overwrite_args", 34 | action="store_true", 35 | help="overwrite args with fixed_params and tunable_params", 36 | default=False, 37 | ) 38 | parser.add_argument( 39 | "--delete_checkpoints", 40 | action="store_true", 41 | help="delete checkpoints after training", 42 | # default=False, 43 | default=True, 44 | ) 45 | 46 | # * data loader 47 | parser.add_argument( 48 | "--data_name", 49 | type=str, 50 | default="ETTh1", 51 | help="dataset name", 52 | choices=[ 53 | "Weather", 54 | "ETTh1", 55 | "ETTh2", 56 | "ETTm1", 57 | "ETTm2", 58 | "ECL", 59 | "Traffic", 60 | ], 61 | ) 62 | parser.add_argument("--data", type=str, default="ETTh1", help="dataset type") 63 | parser.add_argument( 64 | "--root_path", 65 | type=str, 66 | default="./data/ETT/", 67 | help="root path of the data file", 68 | ) 69 | parser.add_argument("--data_path", type=str, default="ETTh1.csv", help="data file") 70 | parser.add_argument( 71 | "--features", 72 | type=str, 73 | default="M", 74 | help="forecasting task, options:[M, S, MS]; M:multivariate predict multivariate, S:univariate predict univariate, MS:multivariate predict univariate (only for downstream tasks)", 75 | ) 76 | parser.add_argument( 77 | "--target", type=str, default="OT", help="target feature in S or MS task" 78 | ) 79 | parser.add_argument( 80 | "--freq", 81 | type=str, 82 | default="h", 83 | help="freq for time features encoding, options:[s:secondly, t:minutely, h:hourly, d:daily, b:business days, w:weekly, m:monthly], you can also use more detailed freq like 15min or 3h", 84 | ) 85 | parser.add_argument( 86 | "--checkpoints", 87 | type=str, 88 | default="./checkpoints/", 89 | help="location of model checkpoints", 90 | ) 91 | parser.add_argument( 92 | "--pred_len_list", 93 | type=int, 94 | nargs="+", 95 | default=[96, 192, 336, 720], 96 | help="the prediction length list", 97 | ) 98 | parser.add_argument( 99 | "--percent", 100 | type=int, 101 | default=100, 102 | help="the percentage of the training set", 103 | ) 104 | 105 | # * forecasting task 106 | parser.add_argument("--seq_len", type=int, default=96, help="input sequence length") 107 | parser.add_argument("--label_len", type=int, default=0, help="start token length") 108 | parser.add_argument( 109 | "--pred_len", type=int, default=96, help="prediction sequence length" 110 | ) 111 | 112 | # * model architecture 113 | parser.add_argument( 114 | "--LLM", 115 | type=str, 116 | default="gpt2", 117 | help="the pretrained LLM model", 118 | # choices=["gpt2", "llama", "falcon"], 119 | choices=["gpt2"], 120 | ) 121 | parser.add_argument( 122 | "--no_freeze", 123 | action="store_true", 124 | help="if False, we will freeze the parameters of the pretrained LLM model", 125 | default=False, 126 | ) 127 | parser.add_argument( 128 | "--no_pretrain", 129 | action="store_true", 130 | help="if False, we will use the pretrained weights of the LLM model", 131 | default=False, 132 | ) 133 | parser.add_argument( 134 | "--first_k_layers", 135 | type=int, 136 | default=6, 137 | help="the number of initial layers to be used in LLM", 138 | ) 139 | parser.add_argument("--enc_in", type=int, default=7, help="encoder input size (C)") 140 | parser.add_argument("--d_model", type=int, default=512, help="dimension of model") 141 | parser.add_argument("--n_heads", type=int, default=8, help="num of heads") 142 | parser.add_argument("--dropout", type=float, default=0.1, help="dropout") 143 | parser.add_argument( 144 | "--embed", 145 | type=str, 146 | default="timeF", 147 | help="time features encoding, options:[timeF, fixed, learned]", 148 | ) 149 | parser.add_argument( 150 | "--token_embed_type", 151 | type=str, 152 | default="conv", 153 | choices=["linear", "conv"], 154 | help="token embedding type", 155 | ) 156 | parser.add_argument( 157 | "--token_embed_kernel_size", 158 | type=int, 159 | default=3, 160 | help="token embedding kernel size (for conv)", 161 | ) 162 | parser.add_argument( 163 | "--temporal_embed_type", 164 | type=str, 165 | default="learned", 166 | choices=["none", "fixed", "learned", "timeF"], 167 | help="temporal embedding type", 168 | ) 169 | parser.add_argument("--activation", type=str, default="gelu", help="activation") 170 | parser.add_argument( 171 | "--patch_len", type=int, default=16, help="the length of the patch" 172 | ) 173 | parser.add_argument( 174 | "--stride", type=int, default=16, help="the stride of the patch" 175 | ) 176 | 177 | # * peft (LoRA-related) 178 | parser.add_argument( 179 | "--peft_method", 180 | type=str, 181 | default="lora", 182 | choices=["none", "lora", "adalora"], 183 | help="PEFT method", 184 | ) 185 | parser.add_argument( 186 | "--peft_params_r", 187 | type=int, 188 | default=8, 189 | help="the dimension of the low-rank matrices", 190 | ) 191 | parser.add_argument( 192 | "--peft_params_lora_alpha", 193 | type=int, 194 | default=32, 195 | help="the scaling factor for the low-rank matrices", 196 | ) 197 | parser.add_argument( 198 | "--peft_params_lora_dropout", 199 | type=float, 200 | default=0.1, 201 | help="the dropout probability of the LoRA layers", 202 | ) 203 | 204 | # * training_stage_params (sft) 205 | parser.add_argument( 206 | "--enable_supervised_finetuning", 207 | type=bool, 208 | default=True, 209 | help="enable supervised finetuning (sft)", 210 | ) 211 | parser.add_argument( 212 | "--sft_optim", 213 | type=str, 214 | default="Adam", 215 | help="optimizer", 216 | choices=["Adam", "AdamW", "RMSprop"], 217 | ) 218 | parser.add_argument( 219 | "--sft_learning_rate", type=float, default=0.001, help="optimizer learning rate" 220 | ) 221 | parser.add_argument( 222 | "--sft_lradj", type=str, default="type1", help="adjust learning rate" 223 | ) 224 | parser.add_argument("--sft_weight_decay", type=float, default=0.001) 225 | parser.add_argument("--sft_train_epochs", type=int, default=10, help="train epochs") 226 | 227 | # * training_stage_params (dft) 228 | parser.add_argument( 229 | "--dft_optim", 230 | type=str, 231 | default="Adam", 232 | help="optimizer", 233 | choices=["Adam", "AdamW", "RMSprop"], 234 | ) 235 | parser.add_argument( 236 | "--dft_learning_rate", type=float, default=0.001, help="optimizer learning rate" 237 | ) 238 | parser.add_argument( 239 | "--dft_lradj", type=str, default="type1", help="adjust learning rate" 240 | ) 241 | parser.add_argument("--dft_weight_decay", type=float, default=0.001) 242 | parser.add_argument("--dft_train_epochs", type=int, default=10, help="train epochs") 243 | 244 | # * training_stage_params (shared) 245 | parser.add_argument( 246 | "--num_workers", type=int, default=8, help="data loader num workers" 247 | ) 248 | parser.add_argument( 249 | "--batch_size", type=int, default=128, help="batch size of train input data" 250 | ) 251 | parser.add_argument( 252 | "--patience", type=int, default=3, help="early stopping patience" 253 | ) 254 | parser.add_argument( 255 | "--delta", type=float, default=0.0001, help="early stopping delta" 256 | ) 257 | parser.add_argument( 258 | "--ft_mode", 259 | type=str, 260 | default="lp_ft", 261 | choices=["lp_ft", "lp", "ft"], 262 | help="fine-tuning mode (it should be `ft` for sft and `lp_ft` for dft)", 263 | ) 264 | 265 | # * Hardware 266 | parser.add_argument("--use_gpu", type=bool, default=True, help="use gpu") 267 | parser.add_argument( 268 | "--use_amp", 269 | action="store_true", 270 | help="use automatic mixed precision training", 271 | # default=False, 272 | default=True, 273 | ) 274 | 275 | args, _ = parser.parse_known_args() 276 | args.use_gpu = True if torch.cuda.is_available() and args.use_gpu else False 277 | args.root_path = Path.cwd() # Set this outside of the trainable function 278 | 279 | args.return_single_feature = ( 280 | True # make batch_size invariant to the number of features 281 | ) 282 | 283 | return args 284 | 285 | 286 | def update_args_from_fixed_params( 287 | args: argparse.Namespace, fixed_params: dict 288 | ) -> argparse.Namespace: 289 | # Update args 290 | for key, value in fixed_params.items(): 291 | print("### [Fixed] Set {} to {}".format(key, value)) 292 | setattr(args, key, value) 293 | 294 | return args 295 | 296 | 297 | def update_args_from_tunable_params( 298 | args: argparse.Namespace, tunable_params: dict 299 | ) -> argparse.Namespace: 300 | for key, value in tunable_params.items(): 301 | print("### [Tunable] Set {} to {}".format(key, value)) 302 | setattr(args, key, value) 303 | 304 | return args 305 | 306 | 307 | def update_args(args, fixed_params, tunable_params): 308 | # Check if there are duplicated keys 309 | duplicated_keys = set(fixed_params.keys()) & set(tunable_params.keys()) 310 | assert not duplicated_keys, f"Duplicated keys found: {duplicated_keys}" 311 | 312 | # Update args from fixed_params, tunable_params, and dataset 313 | if args.overwrite_args: 314 | args = update_args_from_fixed_params(args, fixed_params) 315 | args = update_args_from_tunable_params(args, tunable_params) 316 | args = update_args_from_dataset(args) 317 | 318 | args.setting = "{}_{}_{}_ft{}_sl{}_ll{}_pl{}_dm{}_nh{}_eb{}".format( 319 | args.task_name, 320 | args.model_id, 321 | args.model, 322 | args.features, 323 | args.seq_len, 324 | args.label_len, 325 | args.pred_len, 326 | args.d_model, 327 | args.n_heads, 328 | args.embed, 329 | ) 330 | print(f"Args in experiment: {args}") 331 | 332 | # Create sft_args 333 | sft_args = deepcopy(args) 334 | sft_args.task_name = "supervised_finetuning" 335 | sft_args.ft_mode = "ft" # no need to probe 336 | sft_args.features = "M" # there's no univariate task in sft 337 | sft_args.pred_len = sft_args.stride 338 | sft_args.label_len = sft_args.seq_len - sft_args.pred_len 339 | sft_args.setting = "sft_" + sft_args.setting 340 | 341 | # Create dft_args (just like args) 342 | dft_args = deepcopy(args) 343 | 344 | return sft_args, dft_args 345 | 346 | 347 | def get_exp(args): 348 | # Downstream task 349 | if args.task_name == "long_term_forecast": 350 | exp = Exp_Long_Term_Forecast(args) 351 | elif args.task_name == "supervised_finetuning": 352 | exp = Exp_Supervised_Finetuning(args) 353 | else: 354 | raise NotImplementedError 355 | 356 | return exp 357 | 358 | 359 | def trainable( 360 | tunable_params: dict, 361 | fixed_params: dict, 362 | args: argparse.Namespace, 363 | ) -> dict: 364 | # Update args 365 | sft_args, dft_args = update_args(args, fixed_params, tunable_params) 366 | 367 | # sft 368 | if dft_args.enable_supervised_finetuning: 369 | sft_exp = get_exp(sft_args) 370 | sft_metrics = sft_exp.train(use_tqdm=True) 371 | 372 | # dft 373 | dft_metrics_dict = {} 374 | for pred_len in dft_args.pred_len_list: 375 | # Update pred_len 376 | dft_args.pred_len = pred_len 377 | 378 | dft_exp = get_exp(dft_args) 379 | dft_metrics_dict[pred_len] = dft_exp.train(use_tqdm=True) 380 | 381 | # Return metrics 382 | return_metrics = {} 383 | return_metrics["avg_mse"] = np.mean( 384 | [v["best_test_loss"] for v in dft_metrics_dict.values()] 385 | ) 386 | return_metrics["avg_mae"] = np.mean( 387 | [v["best_test_mae"] for v in dft_metrics_dict.values()] 388 | ) 389 | for pred_len in dft_args.pred_len_list: 390 | return_metrics[f"{pred_len}_mse"] = dft_metrics_dict[pred_len]["best_test_loss"] 391 | return_metrics[f"{pred_len}_mae"] = dft_metrics_dict[pred_len]["best_test_mae"] 392 | 393 | if args.delete_checkpoints: 394 | # Delete both sft and dft checkpoints 395 | shutil.rmtree(args.checkpoints) 396 | 397 | return return_metrics # we only care about downstream task's best_test_loss 398 | 399 | 400 | if __name__ == "__main__": 401 | """------------------------------------""" 402 | # data_name = "Weather" # 21 403 | data_name = "ETTh1" # 7 404 | # data_name = "ETTh2" # 7 405 | # data_name = "ETTm1" # 7 406 | # data_name = "ETTm2" # 7 407 | # data_name = "ECL" # 321 408 | # data_name = "Traffic" # 862 409 | 410 | # pred_len_list = [96, 192, 336, 720] 411 | # pred_len_list = [96, 192, 336] # 5% (ETTh1, ETTh2, Traffic) 412 | # pred_len_list = [24, 48, 168, 336, 720] # linear probe 413 | pred_len_list = [96] 414 | # pred_len_list = [192] 415 | # pred_len_list = [336] 416 | # pred_len_list = [720] 417 | 418 | percent = 100 419 | # percent = 10 420 | # percent = 5 421 | 422 | # num_workers = 4 423 | # num_workers = 6 424 | num_workers = 8 425 | 426 | batch_size = 128 # 8G 427 | # batch_size = 512 # 24G 428 | """------------------------------------""" 429 | set_seed(seed=2023) 430 | 431 | # Setup args 432 | args = get_args_from_parser() 433 | 434 | # Setup fixed params 435 | fixed_params = { 436 | "data_name": data_name, 437 | "pred_len_list": pred_len_list, 438 | "percent": percent, 439 | "num_workers": num_workers, 440 | "batch_size": batch_size, 441 | } 442 | 443 | # Setup tunable params 444 | # TODO: copy `config` from `exp_settings_and_results` (be careful with the boolean values) 445 | tunable_params = { 446 | "enable_supervised_finetuning": True, 447 | "first_k_layers": 6, 448 | "patch_len": 16, 449 | "stride": 8, 450 | "seq_len": 336, 451 | "ft_mode": "lp_ft", 452 | "dropout": 0.05, 453 | "token_embed_type": "conv", 454 | "token_embed_kernel_size": 3, 455 | "temporal_embed_type": "learned", 456 | "freq": "h", 457 | # "peft_method": "lora", 458 | "peft_method": "adalora", 459 | "sft_optim": "AdamW", 460 | "sft_learning_rate": 7.912045141879411e-05, 461 | "sft_lradj": "constant", 462 | "sft_weight_decay": 0.0005542494992024964, 463 | "sft_train_epochs": 5, 464 | "dft_optim": "AdamW", 465 | "dft_learning_rate": 1.8257759510439175e-05, 466 | "dft_lradj": "constant", 467 | "dft_weight_decay": 0.0014555863788252605, 468 | "dft_train_epochs": 15, 469 | "peft_params_r": 8, 470 | "peft_params_lora_alpha": 64, 471 | "peft_params_lora_dropout": 0, 472 | } 473 | 474 | # Run 475 | return_metrics = trainable(tunable_params, fixed_params, args) 476 | print_formatted_dict(return_metrics) 477 | -------------------------------------------------------------------------------- /data_provider/data_loader.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import pandas as pd 4 | import glob 5 | import re 6 | import torch 7 | from torch.utils.data import Dataset, DataLoader 8 | from sklearn.preprocessing import StandardScaler 9 | from utils.timefeatures import time_features 10 | import warnings 11 | import argparse 12 | from pathlib import Path 13 | 14 | warnings.filterwarnings("ignore") 15 | 16 | 17 | class Dataset_ETT_hour(Dataset): 18 | def __init__( 19 | self, 20 | root_path, 21 | flag="train", 22 | size=None, 23 | features="S", 24 | data_path="ETTh1.csv", 25 | target="OT", 26 | scale=True, 27 | timeenc=0, 28 | freq="h", 29 | seasonal_patterns=None, 30 | percent=100, 31 | return_single_feature=False, 32 | ): 33 | # size [seq_len, label_len, pred_len] 34 | # info 35 | if size == None: 36 | self.seq_len = 24 * 4 * 4 37 | self.label_len = 24 * 4 38 | self.pred_len = 24 * 4 39 | else: 40 | self.seq_len = size[0] 41 | self.label_len = size[1] 42 | self.pred_len = size[2] 43 | # init 44 | assert flag in ["train", "test", "val"] 45 | type_map = {"train": 0, "val": 1, "test": 2} 46 | self.set_type = type_map[flag] 47 | 48 | self.features = features 49 | self.target = target 50 | self.scale = scale 51 | self.timeenc = timeenc 52 | self.freq = freq 53 | 54 | self.percent = percent 55 | self.return_single_feature = return_single_feature 56 | self.root_path = root_path 57 | self.data_path = data_path 58 | self.__read_data__() 59 | self.C = self.data_x.shape[1] # data_x.shape = (total_len, C) 60 | 61 | def __read_data__(self): 62 | self.scaler = StandardScaler() 63 | df_raw = pd.read_csv(str(self.root_path / self.data_path)) 64 | 65 | border1s = [ 66 | 0, 67 | 12 * 30 * 24 - self.seq_len, 68 | 12 * 30 * 24 + 4 * 30 * 24 - self.seq_len, 69 | ] 70 | border2s = [ 71 | 12 * 30 * 24, 72 | 12 * 30 * 24 + 4 * 30 * 24, 73 | 12 * 30 * 24 + 8 * 30 * 24, 74 | ] 75 | border1 = border1s[self.set_type] 76 | border2 = border2s[self.set_type] 77 | 78 | if self.set_type == 0: 79 | border2 = (border2 - self.seq_len) * self.percent // 100 + self.seq_len 80 | 81 | if self.features == "M" or self.features == "MS": 82 | cols_data = df_raw.columns[1:] 83 | df_data = df_raw[cols_data] 84 | elif self.features == "S": 85 | df_data = df_raw[[self.target]] 86 | 87 | if self.scale: 88 | train_data = df_data[border1s[0] : border2s[0]] 89 | self.scaler.fit(train_data.values) 90 | data = self.scaler.transform(df_data.values) 91 | else: 92 | data = df_data.values 93 | 94 | df_stamp = df_raw[["date"]][border1:border2] 95 | df_stamp["date"] = pd.to_datetime(df_stamp.date) 96 | if self.timeenc == 0: 97 | df_stamp["month"] = df_stamp.date.apply(lambda row: row.month, 1) 98 | df_stamp["day"] = df_stamp.date.apply(lambda row: row.day, 1) 99 | df_stamp["weekday"] = df_stamp.date.apply(lambda row: row.weekday(), 1) 100 | df_stamp["hour"] = df_stamp.date.apply(lambda row: row.hour, 1) 101 | data_stamp = df_stamp.drop(["date"], 1).values 102 | elif self.timeenc == 1: 103 | data_stamp = time_features( 104 | pd.to_datetime(df_stamp["date"].values), freq=self.freq 105 | ) 106 | data_stamp = data_stamp.transpose(1, 0) 107 | 108 | self.data_x = data[border1:border2] 109 | self.data_y = data[border1:border2] 110 | self.data_stamp = data_stamp 111 | 112 | def __getitem__(self, index): 113 | if self.return_single_feature == False: 114 | s_begin = index 115 | s_end = s_begin + self.seq_len 116 | r_begin = s_end - self.label_len 117 | r_end = r_begin + self.label_len + self.pred_len 118 | 119 | seq_x = self.data_x[s_begin:s_end] 120 | seq_y = self.data_y[r_begin:r_end] 121 | seq_x_mark = self.data_stamp[s_begin:s_end] 122 | seq_y_mark = self.data_stamp[r_begin:r_end] 123 | else: 124 | original_index = index // self.C 125 | channel_index = index % self.C 126 | 127 | s_begin = original_index 128 | s_end = s_begin + self.seq_len 129 | r_begin = s_end - self.label_len 130 | r_end = r_begin + self.label_len + self.pred_len 131 | 132 | seq_x = self.data_x[s_begin:s_end, channel_index : channel_index + 1] 133 | seq_y = self.data_y[r_begin:r_end, channel_index : channel_index + 1] 134 | seq_x_mark = self.data_stamp[s_begin:s_end] 135 | seq_y_mark = self.data_stamp[r_begin:r_end] 136 | 137 | return seq_x, seq_y, seq_x_mark, seq_y_mark 138 | 139 | def __len__(self): 140 | if self.return_single_feature == False: 141 | return len(self.data_x) - self.seq_len - self.pred_len + 1 142 | else: 143 | return (len(self.data_x) - self.seq_len - self.pred_len + 1) * self.C 144 | 145 | def inverse_transform(self, data): 146 | return self.scaler.inverse_transform(data) 147 | 148 | 149 | class Dataset_ETT_minute(Dataset): 150 | def __init__( 151 | self, 152 | root_path, 153 | flag="train", 154 | size=None, 155 | features="S", 156 | data_path="ETTm1.csv", 157 | target="OT", 158 | scale=True, 159 | timeenc=0, 160 | freq="t", 161 | seasonal_patterns=None, 162 | percent=100, 163 | return_single_feature=False, 164 | ): 165 | # size [seq_len, label_len, pred_len] 166 | # info 167 | if size == None: 168 | self.seq_len = 24 * 4 * 4 169 | self.label_len = 24 * 4 170 | self.pred_len = 24 * 4 171 | else: 172 | self.seq_len = size[0] 173 | self.label_len = size[1] 174 | self.pred_len = size[2] 175 | # init 176 | assert flag in ["train", "test", "val"] 177 | type_map = {"train": 0, "val": 1, "test": 2} 178 | self.set_type = type_map[flag] 179 | 180 | self.features = features 181 | self.target = target 182 | self.scale = scale 183 | self.timeenc = timeenc 184 | self.freq = freq 185 | 186 | self.percent = percent 187 | self.return_single_feature = return_single_feature 188 | self.root_path = root_path 189 | self.data_path = data_path 190 | self.__read_data__() 191 | self.C = self.data_x.shape[1] # data_x.shape = (total_len, C) 192 | 193 | def __read_data__(self): 194 | self.scaler = StandardScaler() 195 | df_raw = pd.read_csv(str(self.root_path / self.data_path)) 196 | 197 | border1s = [ 198 | 0, 199 | 12 * 30 * 24 * 4 - self.seq_len, 200 | 12 * 30 * 24 * 4 + 4 * 30 * 24 * 4 - self.seq_len, 201 | ] 202 | border2s = [ 203 | 12 * 30 * 24 * 4, 204 | 12 * 30 * 24 * 4 + 4 * 30 * 24 * 4, 205 | 12 * 30 * 24 * 4 + 8 * 30 * 24 * 4, 206 | ] 207 | border1 = border1s[self.set_type] 208 | border2 = border2s[self.set_type] 209 | 210 | if self.set_type == 0: 211 | border2 = (border2 - self.seq_len) * self.percent // 100 + self.seq_len 212 | 213 | if self.features == "M" or self.features == "MS": 214 | cols_data = df_raw.columns[1:] 215 | df_data = df_raw[cols_data] 216 | elif self.features == "S": 217 | df_data = df_raw[[self.target]] 218 | 219 | if self.scale: 220 | train_data = df_data[border1s[0] : border2s[0]] 221 | self.scaler.fit(train_data.values) 222 | data = self.scaler.transform(df_data.values) 223 | else: 224 | data = df_data.values 225 | 226 | df_stamp = df_raw[["date"]][border1:border2] 227 | df_stamp["date"] = pd.to_datetime(df_stamp.date) 228 | if self.timeenc == 0: 229 | df_stamp["month"] = df_stamp.date.apply(lambda row: row.month, 1) 230 | df_stamp["day"] = df_stamp.date.apply(lambda row: row.day, 1) 231 | df_stamp["weekday"] = df_stamp.date.apply(lambda row: row.weekday(), 1) 232 | df_stamp["hour"] = df_stamp.date.apply(lambda row: row.hour, 1) 233 | df_stamp["minute"] = df_stamp.date.apply(lambda row: row.minute, 1) 234 | df_stamp["minute"] = df_stamp.minute.map(lambda x: x // 15) 235 | data_stamp = df_stamp.drop(["date"], 1).values 236 | elif self.timeenc == 1: 237 | data_stamp = time_features( 238 | pd.to_datetime(df_stamp["date"].values), freq=self.freq 239 | ) 240 | data_stamp = data_stamp.transpose(1, 0) 241 | 242 | self.data_x = data[border1:border2] 243 | self.data_y = data[border1:border2] 244 | self.data_stamp = data_stamp 245 | 246 | def __getitem__(self, index): 247 | if self.return_single_feature == False: 248 | s_begin = index 249 | s_end = s_begin + self.seq_len 250 | r_begin = s_end - self.label_len 251 | r_end = r_begin + self.label_len + self.pred_len 252 | 253 | seq_x = self.data_x[s_begin:s_end] 254 | seq_y = self.data_y[r_begin:r_end] 255 | seq_x_mark = self.data_stamp[s_begin:s_end] 256 | seq_y_mark = self.data_stamp[r_begin:r_end] 257 | else: 258 | original_index = index // self.C 259 | channel_index = index % self.C 260 | 261 | s_begin = original_index 262 | s_end = s_begin + self.seq_len 263 | r_begin = s_end - self.label_len 264 | r_end = r_begin + self.label_len + self.pred_len 265 | 266 | seq_x = self.data_x[s_begin:s_end, channel_index : channel_index + 1] 267 | seq_y = self.data_y[r_begin:r_end, channel_index : channel_index + 1] 268 | seq_x_mark = self.data_stamp[s_begin:s_end] 269 | seq_y_mark = self.data_stamp[r_begin:r_end] 270 | 271 | return seq_x, seq_y, seq_x_mark, seq_y_mark 272 | 273 | def __len__(self): 274 | if self.return_single_feature == False: 275 | return len(self.data_x) - self.seq_len - self.pred_len + 1 276 | else: 277 | return (len(self.data_x) - self.seq_len - self.pred_len + 1) * self.C 278 | 279 | def inverse_transform(self, data): 280 | return self.scaler.inverse_transform(data) 281 | 282 | 283 | class Dataset_Custom(Dataset): 284 | def __init__( 285 | self, 286 | root_path, 287 | flag="train", 288 | size=None, 289 | features="S", 290 | data_path="ETTh1.csv", 291 | target="OT", 292 | scale=True, 293 | timeenc=0, 294 | freq="h", 295 | seasonal_patterns=None, 296 | percent=100, 297 | return_single_feature=False, 298 | ): 299 | # size [seq_len, label_len, pred_len] 300 | # info 301 | if size == None: 302 | self.seq_len = 24 * 4 * 4 303 | self.label_len = 24 * 4 304 | self.pred_len = 24 * 4 305 | else: 306 | self.seq_len = size[0] 307 | self.label_len = size[1] 308 | self.pred_len = size[2] 309 | # init 310 | assert flag in ["train", "test", "val"] 311 | type_map = {"train": 0, "val": 1, "test": 2} 312 | self.set_type = type_map[flag] 313 | 314 | self.features = features 315 | self.target = target 316 | self.scale = scale 317 | self.timeenc = timeenc # timeenc = 0 if args.embed != 'timeF' else 1 318 | self.freq = freq 319 | 320 | self.percent = percent 321 | self.return_single_feature = return_single_feature 322 | self.root_path = root_path 323 | self.data_path = data_path 324 | self.__read_data__() 325 | self.C = self.data_x.shape[1] # data_x.shape = (total_len, C) 326 | 327 | def __read_data__(self): 328 | self.scaler = StandardScaler() 329 | df_raw = pd.read_csv(str(self.root_path / self.data_path)) 330 | 331 | """ 332 | df_raw.columns: ['date', ...(other features), target feature] 333 | """ 334 | cols = list(df_raw.columns) 335 | cols.remove(self.target) 336 | cols.remove("date") 337 | df_raw = df_raw[["date"] + cols + [self.target]] 338 | num_train = int(len(df_raw) * 0.7) 339 | num_test = int(len(df_raw) * 0.2) 340 | num_vali = len(df_raw) - num_train - num_test 341 | border1s = [0, num_train - self.seq_len, len(df_raw) - num_test - self.seq_len] 342 | border2s = [num_train, num_train + num_vali, len(df_raw)] 343 | border1 = border1s[self.set_type] 344 | border2 = border2s[self.set_type] 345 | 346 | if self.set_type == 0: 347 | border2 = (border2 - self.seq_len) * self.percent // 100 + self.seq_len 348 | 349 | if self.features == "M" or self.features == "MS": 350 | cols_data = df_raw.columns[1:] 351 | df_data = df_raw[cols_data] 352 | elif self.features == "S": 353 | df_data = df_raw[[self.target]] 354 | 355 | if self.scale: 356 | train_data = df_data[border1s[0] : border2s[0]] 357 | self.scaler.fit(train_data.values) 358 | data = self.scaler.transform(df_data.values) 359 | else: 360 | data = df_data.values 361 | 362 | df_stamp = df_raw[["date"]][border1:border2] 363 | df_stamp["date"] = pd.to_datetime(df_stamp.date) 364 | if self.timeenc == 0: # args.embed != 'timeF' (== 'fixed') 365 | df_stamp["month"] = df_stamp.date.apply(lambda row: row.month, 1) 366 | df_stamp["day"] = df_stamp.date.apply(lambda row: row.day, 1) 367 | df_stamp["weekday"] = df_stamp.date.apply(lambda row: row.weekday(), 1) 368 | df_stamp["hour"] = df_stamp.date.apply(lambda row: row.hour, 1) 369 | data_stamp = df_stamp.drop(["date"], 1).values 370 | elif self.timeenc == 1: # args.embed != 'timeF' 371 | data_stamp = time_features( 372 | pd.to_datetime(df_stamp["date"].values), freq=self.freq 373 | ) 374 | data_stamp = data_stamp.transpose(1, 0) 375 | 376 | self.data_x = data[border1:border2] 377 | self.data_y = data[border1:border2] 378 | self.data_stamp = data_stamp 379 | 380 | def __getitem__(self, index): 381 | if self.return_single_feature == False: 382 | s_begin = index 383 | s_end = s_begin + self.seq_len 384 | r_begin = s_end - self.label_len 385 | r_end = r_begin + self.label_len + self.pred_len 386 | 387 | seq_x = self.data_x[s_begin:s_end] 388 | seq_y = self.data_y[r_begin:r_end] 389 | seq_x_mark = self.data_stamp[s_begin:s_end] 390 | seq_y_mark = self.data_stamp[r_begin:r_end] 391 | else: 392 | original_index = index // self.C 393 | channel_index = index % self.C 394 | 395 | s_begin = original_index 396 | s_end = s_begin + self.seq_len 397 | r_begin = s_end - self.label_len 398 | r_end = r_begin + self.label_len + self.pred_len 399 | 400 | seq_x = self.data_x[s_begin:s_end, channel_index : channel_index + 1] 401 | seq_y = self.data_y[r_begin:r_end, channel_index : channel_index + 1] 402 | seq_x_mark = self.data_stamp[s_begin:s_end] 403 | seq_y_mark = self.data_stamp[r_begin:r_end] 404 | 405 | return seq_x, seq_y, seq_x_mark, seq_y_mark 406 | 407 | def __len__(self): 408 | if self.return_single_feature == False: 409 | return len(self.data_x) - self.seq_len - self.pred_len + 1 410 | else: 411 | return (len(self.data_x) - self.seq_len - self.pred_len + 1) * self.C 412 | 413 | def inverse_transform(self, data): 414 | return self.scaler.inverse_transform(data) 415 | 416 | 417 | def update_args_from_dataset(args: argparse.Namespace) -> argparse.Namespace: 418 | # Set data_path, data, enc_in, seq_len 419 | if args.data_name == "Weather": 420 | args.data_path = Path(args.root_path, "dataset", "weather", "weather.csv") 421 | args.data = "custom" 422 | args.enc_in = 21 423 | elif args.data_name == "ETTh1": 424 | args.data_path = Path(args.root_path, "dataset", "ETT-small", "ETTh1.csv") 425 | args.data = "ETTh1" 426 | args.enc_in = 7 427 | elif args.data_name == "ETTh2": 428 | args.data_path = Path(args.root_path, "dataset", "ETT-small", "ETTh2.csv") 429 | args.data = "ETTh2" 430 | args.enc_in = 7 431 | elif args.data_name == "ETTm1": 432 | args.data_path = Path(args.root_path, "dataset", "ETT-small", "ETTm1.csv") 433 | args.data = "ETTm1" 434 | args.enc_in = 7 435 | elif args.data_name == "ETTm2": 436 | args.data_path = Path(args.root_path, "dataset", "ETT-small", "ETTm2.csv") 437 | args.data = "ETTm2" 438 | args.enc_in = 7 439 | elif args.data_name == "ECL": 440 | args.data_path = Path(args.root_path, "dataset", "electricity", "electricity.csv") 441 | args.data = "custom" 442 | args.enc_in = 321 443 | elif args.data_name == "Traffic": 444 | args.data_path = Path(args.root_path, "dataset", "traffic", "traffic.csv") 445 | args.data = "custom" 446 | args.enc_in = 862 447 | 448 | # Set model_id 449 | args.model_id = args.data_path.name.split(".")[0] + "_" + str(args.pred_len) 450 | 451 | # Set d_model 452 | if args.LLM == "gpt2": 453 | args.LLM_path = args.root_path / Path("LLM", "gpt2") 454 | args.d_model = 768 455 | elif args.LLM == "llama": 456 | args.LLM_path = args.root_path / Path("LLM", "llama") 457 | args.d_model = 4096 458 | elif args.LLM == "falcon": 459 | args.LLM_path = args.root_path / Path("LLM", "falcon") 460 | args.d_model = 4096 461 | else: 462 | raise ValueError(f"LLM {args.LLM} not supported") 463 | 464 | return args 465 | --------------------------------------------------------------------------------