├── LEGAL.md ├── LICENSE ├── README.md ├── data_provider ├── __init__.py ├── data_factory.py ├── data_loader.py └── m4.py ├── data_provider_pretrain ├── __init__.py ├── data_factory.py └── data_loader.py ├── dataset └── prompt_bank │ ├── ECL.txt │ ├── ETT.txt │ ├── Traffic.txt │ ├── Weather.txt │ └── m4.txt ├── ds_config_zero2.json ├── figures ├── framework.png ├── logo.png └── method-detailed-illustration.png ├── layers ├── AutoCorrelation.py ├── Autoformer_EncDec.py ├── Conv_Blocks.py ├── Embed.py ├── SelfAttention_Family.py ├── StandardNorm.py ├── Transformer_EncDec.py └── __init__.py ├── models ├── Autoformer.py ├── DLinear.py ├── TimeLLM.py └── __init__.py ├── requirements.txt ├── run_m4.py ├── run_main.py ├── run_pretrain.py ├── scripts ├── TimeLLM_ECL.sh ├── TimeLLM_ETTh1.sh ├── TimeLLM_ETTh1_ETTh2.sh ├── TimeLLM_ETTh2.sh ├── TimeLLM_ETTm1.sh ├── TimeLLM_ETTm2.sh ├── TimeLLM_M4.sh ├── TimeLLM_Traffic.sh └── TimeLLM_Weather.sh └── utils ├── __init__.py ├── losses.py ├── m4_summary.py ├── masking.py ├── metrics.py ├── timefeatures.py └── tools.py /LEGAL.md: -------------------------------------------------------------------------------- 1 | Legal Disclaimer 2 | 3 | Within this source code, the comments in Chinese shall be the original, governing version. Any comment in other languages are for reference only. In the event of any conflict between the Chinese language version comments and other language version comments, the Chinese language version shall prevail. 4 | 5 | 法律免责声明 6 | 7 | 关于代码注释部分,中文注释为官方版本,其它语言注释仅做参考。中文注释可能与其它语言注释存在不一致,当中文注释与其它语言注释存在不一致时,请以中文注释为准。 -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | 4 |

(ICLR'24) Time-LLM: Time Series Forecasting by Reprogramming Large Language Models

5 |
6 | 7 |
8 | 9 | ![](https://img.shields.io/github/last-commit/KimMeen/Time-LLM?color=green) 10 | ![](https://img.shields.io/github/stars/KimMeen/Time-LLM?color=yellow) 11 | ![](https://img.shields.io/github/forks/KimMeen/Time-LLM?color=lightblue) 12 | ![](https://img.shields.io/badge/PRs-Welcome-green) 13 | 14 |
15 | 16 |
17 | 18 | **[Paper Page]** 19 | **[YouTube Talk 1]** 20 | **[YouTube Talk 2]** 21 | **[Medium Blog]** 22 | 23 | **[机器之心中文解读]** 24 | **[量子位中文解读]** 25 | **[时序人中文解读]** 26 | **[AI算法厨房中文解读]** 27 | **[知乎中文解读]** 28 | 29 | 30 |
31 | 32 |

33 | 34 | 35 | 36 |

37 | 38 | --- 39 | > 40 | > 🙋 Please let us know if you find out a mistake or have any suggestions! 41 | > 42 | > 🌟 If you find this resource helpful, please consider to star this repository and cite our research: 43 | 44 | ``` 45 | @inproceedings{jin2023time, 46 | title={{Time-LLM}: Time series forecasting by reprogramming large language models}, 47 | author={Jin, Ming and Wang, Shiyu and Ma, Lintao and Chu, Zhixuan and Zhang, James Y and Shi, Xiaoming and Chen, Pin-Yu and Liang, Yuxuan and Li, Yuan-Fang and Pan, Shirui and Wen, Qingsong}, 48 | booktitle={International Conference on Learning Representations (ICLR)}, 49 | year={2024} 50 | } 51 | ``` 52 | 53 | ## Updates/News: 54 | 55 | 🚩 **News** (Aug. 2024): Time-LLM has been adopted by XiMou Optimization Technology Co., Ltd. (XMO) for Solar, Wind, and Weather Forecasting. 56 | 57 | 🚩 **News** (May 2024): Time-LLM has been included in [NeuralForecast](https://github.com/Nixtla/neuralforecast). Special thanks to the contributor @[JQGoh](https://github.com/JQGoh) and @[marcopeix](https://github.com/marcopeix)! 58 | 59 | 🚩 **News** (March 2024): Time-LLM has been upgraded to serve as a general framework for repurposing a wide range of language models to time series forecasting. It now defaults to supporting Llama-7B and includes compatibility with two additional smaller PLMs (GPT-2 and BERT). Simply adjust `--llm_model` and `--llm_dim` to switch backbones. 60 | 61 | ## Introduction 62 | Time-LLM is a reprogramming framework to repurpose LLMs for general time series forecasting with the backbone language models kept intact. 63 | Notably, we show that time series analysis (e.g., forecasting) can be cast as yet another "language task" that can be effectively tackled by an off-the-shelf LLM. 64 | 65 |

66 | 67 |

68 | 69 | - Time-LLM comprises two key components: (1) reprogramming the input time series into text prototype representations that are more natural for the LLM, and (2) augmenting the input context with declarative prompts (e.g., domain expert knowledge and task instructions) to guide LLM reasoning. 70 | 71 |

72 | 73 |

74 | 75 | ## Requirements 76 | Use python 3.11 from MiniConda 77 | 78 | - torch==2.2.2 79 | - accelerate==0.28.0 80 | - einops==0.7.0 81 | - matplotlib==3.7.0 82 | - numpy==1.23.5 83 | - pandas==1.5.3 84 | - scikit_learn==1.2.2 85 | - scipy==1.12.0 86 | - tqdm==4.65.0 87 | - peft==0.4.0 88 | - transformers==4.31.0 89 | - deepspeed==0.14.0 90 | - sentencepiece==0.2.0 91 | 92 | To install all dependencies: 93 | ``` 94 | pip install -r requirements.txt 95 | ``` 96 | 97 | ## Datasets 98 | You can access the well pre-processed datasets from [[Google Drive]](https://drive.google.com/file/d/1NF7VEefXCmXuWNbnNe858WvQAkJ_7wuP/view?usp=sharing), then place the downloaded contents under `./dataset` 99 | 100 | ## Quick Demos 101 | 1. Download datasets and place them under `./dataset` 102 | 2. Tune the model. We provide five experiment scripts for demonstration purpose under the folder `./scripts`. For example, you can evaluate on ETT datasets by: 103 | 104 | ```bash 105 | bash ./scripts/TimeLLM_ETTh1.sh 106 | ``` 107 | ```bash 108 | bash ./scripts/TimeLLM_ETTh2.sh 109 | ``` 110 | ```bash 111 | bash ./scripts/TimeLLM_ETTm1.sh 112 | ``` 113 | ```bash 114 | bash ./scripts/TimeLLM_ETTm2.sh 115 | ``` 116 | 117 | ## Detailed usage 118 | 119 | Please refer to ```run_main.py```, ```run_m4.py``` and ```run_pretrain.py``` for the detailed description of each hyperparameter. 120 | 121 | 122 | ## Further Reading 123 | 1, [**TimeMixer++: A General Time Series Pattern Machine for Universal Predictive Analysis**](https://arxiv.org/abs/2410.16032), in *arXiv* 2024. 124 | [\[GitHub Repo\]](https://github.com/kwuking/TimeMixer/blob/main/README.md) 125 | 126 | **Authors**: Shiyu Wang, Jiawei Li, Xiaoming Shi, Zhou Ye, Baichuan Mo, Wenze Lin, Shengtong Ju, Zhixuan Chu, Ming Jin 127 | 128 | ```bibtex 129 | @article{wang2024timemixer++, 130 | title={TimeMixer++: A General Time Series Pattern Machine for Universal Predictive Analysis}, 131 | author={Wang, Shiyu and Li, Jiawei and Shi, Xiaoming and Ye, Zhou and Mo, Baichuan and Lin, Wenze and Ju, Shengtong and Chu, Zhixuan and Jin, Ming}, 132 | journal={arXiv preprint arXiv:2410.16032}, 133 | year={2024} 134 | } 135 | ``` 136 | 137 | 2, [**Foundation Models for Time Series Analysis: A Tutorial and Survey**](https://arxiv.org/pdf/2403.14735), in *KDD* 2024. 138 | 139 | **Authors**: Yuxuan Liang, Haomin Wen, Yuqi Nie, Yushan Jiang, Ming Jin, Dongjin Song, Shirui Pan, Qingsong Wen* 140 | 141 | ```bibtex 142 | @inproceedings{liang2024foundation, 143 | title={Foundation models for time series analysis: A tutorial and survey}, 144 | author={Liang, Yuxuan and Wen, Haomin and Nie, Yuqi and Jiang, Yushan and Jin, Ming and Song, Dongjin and Pan, Shirui and Wen, Qingsong}, 145 | booktitle={ACM SIGKDD Conference on Knowledge Discovery and Data Mining (KDD 2024)}, 146 | year={2024} 147 | } 148 | ``` 149 | 150 | 3, [**Position Paper: What Can Large Language Models Tell Us about Time Series Analysis**](https://arxiv.org/abs/2402.02713), in *ICML* 2024. 151 | 152 | **Authors**: Ming Jin, Yifan Zhang, Wei Chen, Kexin Zhang, Yuxuan Liang*, Bin Yang, Jindong Wang, Shirui Pan, Qingsong Wen* 153 | 154 | ```bibtex 155 | @inproceedings{jin2024position, 156 | title={Position Paper: What Can Large Language Models Tell Us about Time Series Analysis}, 157 | author={Ming Jin and Yifan Zhang and Wei Chen and Kexin Zhang and Yuxuan Liang and Bin Yang and Jindong Wang and Shirui Pan and Qingsong Wen}, 158 | booktitle={International Conference on Machine Learning (ICML 2024)}, 159 | year={2024} 160 | } 161 | ``` 162 | 163 | 4, [**Large Models for Time Series and Spatio-Temporal Data: A Survey and Outlook**](https://arxiv.org/abs/2310.10196), in *arXiv* 2023. 164 | [\[GitHub Repo\]](https://github.com/qingsongedu/Awesome-TimeSeries-SpatioTemporal-LM-LLM) 165 | 166 | **Authors**: Ming Jin, Qingsong Wen*, Yuxuan Liang, Chaoli Zhang, Siqiao Xue, Xue Wang, James Zhang, Yi Wang, Haifeng Chen, Xiaoli Li (IEEE Fellow), Shirui Pan*, Vincent S. Tseng (IEEE Fellow), Yu Zheng (IEEE Fellow), Lei Chen (IEEE Fellow), Hui Xiong (IEEE Fellow) 167 | 168 | ```bibtex 169 | @article{jin2023lm4ts, 170 | title={Large Models for Time Series and Spatio-Temporal Data: A Survey and Outlook}, 171 | author={Ming Jin and Qingsong Wen and Yuxuan Liang and Chaoli Zhang and Siqiao Xue and Xue Wang and James Zhang and Yi Wang and Haifeng Chen and Xiaoli Li and Shirui Pan and Vincent S. Tseng and Yu Zheng and Lei Chen and Hui Xiong}, 172 | journal={arXiv preprint arXiv:2310.10196}, 173 | year={2023} 174 | } 175 | ``` 176 | 177 | 178 | 5, [**Transformers in Time Series: A Survey**](https://arxiv.org/abs/2202.07125), in IJCAI 2023. 179 | [\[GitHub Repo\]](https://github.com/qingsongedu/time-series-transformers-review) 180 | 181 | **Authors**: Qingsong Wen, Tian Zhou, Chaoli Zhang, Weiqi Chen, Ziqing Ma, Junchi Yan, Liang Sun 182 | 183 | ```bibtex 184 | @inproceedings{wen2023transformers, 185 | title={Transformers in time series: A survey}, 186 | author={Wen, Qingsong and Zhou, Tian and Zhang, Chaoli and Chen, Weiqi and Ma, Ziqing and Yan, Junchi and Sun, Liang}, 187 | booktitle={International Joint Conference on Artificial Intelligence(IJCAI)}, 188 | year={2023} 189 | } 190 | ``` 191 | 192 | 6, [**TimeMixer: Decomposable Multiscale Mixing for Time Series Forecasting**](https://openreview.net/pdf?id=7oLshfEIC2), in ICLR 2024. 193 | [\[GitHub Repo\]](https://github.com/kwuking/TimeMixer) 194 | 195 | **Authors**: Shiyu Wang, Haixu Wu, Xiaoming Shi, Tengge Hu, Huakun Luo, Lintao Ma, James Y. Zhang, Jun Zhou 196 | 197 | ```bibtex 198 | @inproceedings{wang2023timemixer, 199 | title={TimeMixer: Decomposable Multiscale Mixing for Time Series Forecasting}, 200 | author={Wang, Shiyu and Wu, Haixu and Shi, Xiaoming and Hu, Tengge and Luo, Huakun and Ma, Lintao and Zhang, James Y and ZHOU, JUN}, 201 | booktitle={International Conference on Learning Representations (ICLR)}, 202 | year={2024} 203 | } 204 | ``` 205 | 206 | ## Acknowledgement 207 | Our implementation adapts [Time-Series-Library](https://github.com/thuml/Time-Series-Library) and [OFA (GPT4TS)](https://github.com/DAMO-DI-ML/NeurIPS2023-One-Fits-All) as the code base and have extensively modified it to our purposes. We thank the authors for sharing their implementations and related resources. 208 | -------------------------------------------------------------------------------- /data_provider/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /data_provider/data_factory.py: -------------------------------------------------------------------------------- 1 | from data_provider.data_loader import Dataset_ETT_hour, Dataset_ETT_minute, Dataset_Custom, Dataset_M4 2 | from torch.utils.data import DataLoader 3 | 4 | data_dict = { 5 | 'ETTh1': Dataset_ETT_hour, 6 | 'ETTh2': Dataset_ETT_hour, 7 | 'ETTm1': Dataset_ETT_minute, 8 | 'ETTm2': Dataset_ETT_minute, 9 | 'ECL': Dataset_Custom, 10 | 'Traffic': Dataset_Custom, 11 | 'Weather': Dataset_Custom, 12 | 'm4': Dataset_M4, 13 | } 14 | 15 | 16 | def data_provider(args, flag): 17 | Data = data_dict[args.data] 18 | timeenc = 0 if args.embed != 'timeF' else 1 19 | percent = args.percent 20 | 21 | if flag == 'test': 22 | shuffle_flag = False 23 | drop_last = True 24 | batch_size = args.batch_size 25 | freq = args.freq 26 | else: 27 | shuffle_flag = True 28 | drop_last = True 29 | batch_size = args.batch_size 30 | freq = args.freq 31 | 32 | if args.data == 'm4': 33 | drop_last = False 34 | data_set = Data( 35 | root_path=args.root_path, 36 | data_path=args.data_path, 37 | flag=flag, 38 | size=[args.seq_len, args.label_len, args.pred_len], 39 | features=args.features, 40 | target=args.target, 41 | timeenc=timeenc, 42 | freq=freq, 43 | seasonal_patterns=args.seasonal_patterns 44 | ) 45 | else: 46 | data_set = Data( 47 | root_path=args.root_path, 48 | data_path=args.data_path, 49 | flag=flag, 50 | size=[args.seq_len, args.label_len, args.pred_len], 51 | features=args.features, 52 | target=args.target, 53 | timeenc=timeenc, 54 | freq=freq, 55 | percent=percent, 56 | seasonal_patterns=args.seasonal_patterns 57 | ) 58 | data_loader = DataLoader( 59 | data_set, 60 | batch_size=batch_size, 61 | shuffle=shuffle_flag, 62 | num_workers=args.num_workers, 63 | drop_last=drop_last) 64 | return data_set, data_loader 65 | -------------------------------------------------------------------------------- /data_provider/m4.py: -------------------------------------------------------------------------------- 1 | # This source code is provided for the purposes of scientific reproducibility 2 | # under the following limited license from Element AI Inc. The code is an 3 | # implementation of the N-BEATS model (Oreshkin et al., N-BEATS: Neural basis 4 | # expansion analysis for interpretable time series forecasting, 5 | # https://arxiv.org/abs/1905.10437). The copyright to the source code is 6 | # licensed under the Creative Commons - Attribution-NonCommercial 4.0 7 | # International license (CC BY-NC 4.0): 8 | # https://creativecommons.org/licenses/by-nc/4.0/. Any commercial use (whether 9 | # for the benefit of third parties or internally in production) requires an 10 | # explicit license. The subject-matter of the N-BEATS model and associated 11 | # materials are the property of Element AI Inc. and may be subject to patent 12 | # protection. No license to patents is granted hereunder (whether express or 13 | # implied). Copyright © 2020 Element AI Inc. All rights reserved. 14 | 15 | """ 16 | M4 Dataset 17 | """ 18 | from dataclasses import dataclass 19 | 20 | import numpy as np 21 | import pandas as pd 22 | import logging 23 | import os 24 | import pathlib 25 | import sys 26 | from urllib import request 27 | 28 | 29 | def url_file_name(url: str) -> str: 30 | """ 31 | Extract file name from url. 32 | 33 | :param url: URL to extract file name from. 34 | :return: File name. 35 | """ 36 | return url.split('/')[-1] if len(url) > 0 else '' 37 | 38 | 39 | def download(url: str, file_path: str) -> None: 40 | """ 41 | Download a file to the given path. 42 | 43 | :param url: URL to download 44 | :param file_path: Where to download the content. 45 | """ 46 | 47 | def progress(count, block_size, total_size): 48 | progress_pct = float(count * block_size) / float(total_size) * 100.0 49 | sys.stdout.write('\rDownloading {} to {} {:.1f}%'.format(url, file_path, progress_pct)) 50 | sys.stdout.flush() 51 | 52 | if not os.path.isfile(file_path): 53 | opener = request.build_opener() 54 | opener.addheaders = [('User-agent', 'Mozilla/5.0')] 55 | request.install_opener(opener) 56 | pathlib.Path(os.path.dirname(file_path)).mkdir(parents=True, exist_ok=True) 57 | f, _ = request.urlretrieve(url, file_path, progress) 58 | sys.stdout.write('\n') 59 | sys.stdout.flush() 60 | file_info = os.stat(f) 61 | logging.info(f'Successfully downloaded {os.path.basename(file_path)} {file_info.st_size} bytes.') 62 | else: 63 | file_info = os.stat(file_path) 64 | logging.info(f'File already exists: {file_path} {file_info.st_size} bytes.') 65 | 66 | 67 | @dataclass() 68 | class M4Dataset: 69 | ids: np.ndarray 70 | groups: np.ndarray 71 | frequencies: np.ndarray 72 | horizons: np.ndarray 73 | values: np.ndarray 74 | 75 | @staticmethod 76 | def load(training: bool = True, dataset_file: str = '../dataset/m4') -> 'M4Dataset': 77 | """ 78 | Load cached dataset. 79 | 80 | :param training: Load training part if training is True, test part otherwise. 81 | """ 82 | info_file = os.path.join(dataset_file, 'M4-info.csv') 83 | train_cache_file = os.path.join(dataset_file, 'training.npz') 84 | test_cache_file = os.path.join(dataset_file, 'test.npz') 85 | m4_info = pd.read_csv(info_file) 86 | return M4Dataset(ids=m4_info.M4id.values, 87 | groups=m4_info.SP.values, 88 | frequencies=m4_info.Frequency.values, 89 | horizons=m4_info.Horizon.values, 90 | values=np.load( 91 | train_cache_file if training else test_cache_file, 92 | allow_pickle=True)) 93 | 94 | 95 | @dataclass() 96 | class M4Meta: 97 | seasonal_patterns = ['Yearly', 'Quarterly', 'Monthly', 'Weekly', 'Daily', 'Hourly'] 98 | horizons = [6, 8, 18, 13, 14, 48] 99 | frequencies = [1, 4, 12, 1, 1, 24] 100 | horizons_map = { 101 | 'Yearly': 6, 102 | 'Quarterly': 8, 103 | 'Monthly': 18, 104 | 'Weekly': 13, 105 | 'Daily': 14, 106 | 'Hourly': 48 107 | } # different predict length 108 | frequency_map = { 109 | 'Yearly': 1, 110 | 'Quarterly': 4, 111 | 'Monthly': 12, 112 | 'Weekly': 1, 113 | 'Daily': 1, 114 | 'Hourly': 24 115 | } 116 | history_size = { 117 | 'Yearly': 1.5, 118 | 'Quarterly': 1.5, 119 | 'Monthly': 1.5, 120 | 'Weekly': 10, 121 | 'Daily': 10, 122 | 'Hourly': 10 123 | } # from interpretable.gin 124 | 125 | 126 | def load_m4_info() -> pd.DataFrame: 127 | """ 128 | Load M4Info file. 129 | 130 | :return: Pandas DataFrame of M4Info. 131 | """ 132 | return pd.read_csv(INFO_FILE_PATH) 133 | -------------------------------------------------------------------------------- /data_provider_pretrain/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KimMeen/Time-LLM/02ee1b8f6043090c7a417f0cbb64cbf753895175/data_provider_pretrain/__init__.py -------------------------------------------------------------------------------- /data_provider_pretrain/data_factory.py: -------------------------------------------------------------------------------- 1 | from torch.utils.data import DataLoader 2 | 3 | from data_provider_pretrain.data_loader import Dataset_ETT_hour, Dataset_ETT_minute 4 | 5 | data_dict = { 6 | 'ETTh1': Dataset_ETT_hour, 7 | 'ETTh2': Dataset_ETT_hour, 8 | 'ETTm1': Dataset_ETT_minute, 9 | 'ETTm2': Dataset_ETT_minute, 10 | } 11 | 12 | 13 | def data_provider(args, data, data_path, pretrain=True, flag='train'): 14 | Data = data_dict[data] 15 | timeenc = 0 if args.embed != 'timeF' else 1 16 | percent = args.percent 17 | 18 | if flag == 'test': 19 | shuffle_flag = False 20 | drop_last = True 21 | batch_size = args.batch_size 22 | freq = args.freq 23 | else: 24 | shuffle_flag = True 25 | drop_last = True 26 | batch_size = args.batch_size 27 | freq = args.freq 28 | 29 | data_set = Data( 30 | root_path=args.root_path, 31 | data_path=data_path, 32 | flag=flag, 33 | size=[args.seq_len, args.label_len, args.pred_len], 34 | features=args.features, 35 | target=args.target, 36 | timeenc=timeenc, 37 | freq=freq, 38 | percent=percent, 39 | seasonal_patterns=args.seasonal_patterns, 40 | pretrain=pretrain 41 | ) 42 | data_loader = DataLoader( 43 | data_set, 44 | batch_size=batch_size, 45 | shuffle=shuffle_flag, 46 | num_workers=args.num_workers, 47 | drop_last=drop_last) 48 | return data_set, data_loader 49 | -------------------------------------------------------------------------------- /data_provider_pretrain/data_loader.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pandas as pd 3 | from torch.utils.data import Dataset 4 | from sklearn.preprocessing import StandardScaler 5 | from utils.timefeatures import time_features 6 | import warnings 7 | 8 | warnings.filterwarnings('ignore') 9 | 10 | 11 | class Dataset_ETT_hour(Dataset): 12 | def __init__(self, root_path, flag='train', size=None, 13 | features='S', data_path='ETTh1.csv', 14 | target='OT', scale=True, timeenc=0, freq='h', percent=100, 15 | seasonal_patterns=None, pretrain=True): 16 | if size == None: 17 | self.seq_len = 24 * 4 * 4 18 | self.label_len = 24 * 4 19 | self.pred_len = 24 * 4 20 | else: 21 | self.seq_len = size[0] 22 | self.label_len = size[1] 23 | self.pred_len = size[2] 24 | # init 25 | assert flag in ['train', 'test', 'val'] 26 | type_map = {'train': 0, 'val': 1, 'test': 2} 27 | self.set_type = type_map[flag] 28 | 29 | self.percent = percent 30 | self.pretrain = pretrain 31 | self.features = features 32 | self.target = target 33 | self.scale = scale 34 | self.timeenc = timeenc 35 | self.freq = freq 36 | 37 | # self.percent = percent 38 | self.root_path = root_path 39 | self.data_path = data_path 40 | self.__read_data__() 41 | 42 | self.enc_in = self.data_x.shape[-1] 43 | self.tot_len = len(self.data_x) - self.seq_len - self.pred_len + 1 44 | 45 | def __read_data__(self): 46 | self.scaler = StandardScaler() 47 | df_raw = pd.read_csv(os.path.join(self.root_path, 48 | self.data_path)) 49 | 50 | if self.pretrain: 51 | # border1s = [0, 12 * 30 * 24 + 4 * 30 * 24 - self.seq_len, 12 * 30 * 24 + 4 * 30 * 24 - self.seq_len] 52 | # border2s = [12 * 30 * 24 + 8 * 30 * 24, 12 * 30 * 24 + 8 * 30 * 24, 12 * 30 * 24 + 8 * 30 * 24] 53 | border1s = [0, 12 * 30 * 24 + 4 * 30 * 24 - self.seq_len, 12 * 30 * 24 + 4 * 30 * 24 - self.seq_len] 54 | border2s = [12 * 30 * 24 + 4 * 30 * 24, 12 * 30 * 24 + 8 * 30 * 24, 12 * 30 * 24 + 8 * 30 * 24] 55 | else: 56 | border1s = [0, 12 * 30 * 24 - self.seq_len, 12 * 30 * 24 + 4 * 30 * 24 - self.seq_len] 57 | border2s = [12 * 30 * 24, 12 * 30 * 24 + 4 * 30 * 24, 12 * 30 * 24 + 8 * 30 * 24] 58 | 59 | border1 = border1s[self.set_type] 60 | border2 = border2s[self.set_type] 61 | 62 | if self.set_type == 0: 63 | border2 = (border2 - self.seq_len) * self.percent // 100 + self.seq_len 64 | 65 | if self.features == 'M' or self.features == 'MS': 66 | cols_data = df_raw.columns[1:] 67 | df_data = df_raw[cols_data] 68 | elif self.features == 'S': 69 | df_data = df_raw[[self.target]] 70 | 71 | if self.scale: 72 | train_data = df_data[border1s[0]:border2s[0]] 73 | self.scaler.fit(train_data.values) 74 | data = self.scaler.transform(df_data.values) 75 | else: 76 | data = df_data.values 77 | 78 | df_stamp = df_raw[['date']][border1:border2] 79 | df_stamp['date'] = pd.to_datetime(df_stamp.date) 80 | if self.timeenc == 0: 81 | df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1) 82 | df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1) 83 | df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1) 84 | df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1) 85 | data_stamp = df_stamp.drop(['date'], 1).values 86 | elif self.timeenc == 1: 87 | data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq) 88 | data_stamp = data_stamp.transpose(1, 0) 89 | 90 | self.data_x = data[border1:border2] 91 | self.data_y = data[border1:border2] 92 | self.data_stamp = data_stamp 93 | 94 | def __getitem__(self, index): 95 | feat_id = index // self.tot_len 96 | s_begin = index % self.tot_len 97 | 98 | s_end = s_begin + self.seq_len 99 | r_begin = s_end - self.label_len 100 | r_end = r_begin + self.label_len + self.pred_len 101 | seq_x = self.data_x[s_begin:s_end, feat_id:feat_id + 1] 102 | seq_y = self.data_y[r_begin:r_end, feat_id:feat_id + 1] 103 | seq_x_mark = self.data_stamp[s_begin:s_end] 104 | seq_y_mark = self.data_stamp[r_begin:r_end] 105 | 106 | return seq_x, seq_y, seq_x_mark, seq_y_mark 107 | 108 | def __len__(self): 109 | return (len(self.data_x) - self.seq_len - self.pred_len + 1) * self.enc_in 110 | 111 | def inverse_transform(self, data): 112 | return self.scaler.inverse_transform(data) 113 | 114 | 115 | class Dataset_ETT_minute(Dataset): 116 | def __init__(self, root_path, flag='train', size=None, 117 | features='S', data_path='ETTm1.csv', 118 | target='OT', scale=True, timeenc=0, freq='t', percent=100, 119 | seasonal_patterns=None, pretrain=True): 120 | if size == None: 121 | self.seq_len = 24 * 4 * 4 122 | self.label_len = 24 * 4 123 | self.pred_len = 24 * 4 124 | else: 125 | self.seq_len = size[0] 126 | self.label_len = size[1] 127 | self.pred_len = size[2] 128 | # init 129 | assert flag in ['train', 'test', 'val'] 130 | type_map = {'train': 0, 'val': 1, 'test': 2} 131 | self.set_type = type_map[flag] 132 | 133 | self.percent = percent 134 | self.pretrain = pretrain 135 | self.features = features 136 | self.target = target 137 | self.scale = scale 138 | self.timeenc = timeenc 139 | self.freq = freq 140 | 141 | self.root_path = root_path 142 | self.data_path = data_path 143 | self.__read_data__() 144 | 145 | self.enc_in = self.data_x.shape[-1] 146 | self.tot_len = len(self.data_x) - self.seq_len - self.pred_len + 1 147 | 148 | def __read_data__(self): 149 | self.scaler = StandardScaler() 150 | df_raw = pd.read_csv(os.path.join(self.root_path, 151 | self.data_path)) 152 | 153 | if self.pretrain: 154 | # border1s = [0, 12 * 30 * 24 * 4 + 4 * 30 * 24 * 4 - self.seq_len, 155 | # 12 * 30 * 24 * 4 + 4 * 30 * 24 * 4 - self.seq_len] 156 | # border2s = [12 * 30 * 24 * 4 + 8 * 30 * 24 * 4, 12 * 30 * 24 * 4 + 8 * 30 * 24 * 4, 157 | # 12 * 30 * 24 * 4 + 8 * 30 * 24 * 4] 158 | border1s = [0, 12 * 30 * 24 * 4 + 4 * 30 * 24 * 4 - self.seq_len, 159 | 12 * 30 * 24 * 4 + 4 * 30 * 24 * 4 - self.seq_len] 160 | border2s = [12 * 30 * 24 * 4 + 4 * 30 * 24 * 4, 12 * 30 * 24 * 4 + 8 * 30 * 24 * 4, 161 | 12 * 30 * 24 * 4 + 8 * 30 * 24 * 4] 162 | else: 163 | border1s = [0, 12 * 30 * 24 * 4 - self.seq_len, 12 * 30 * 24 * 4 + 4 * 30 * 24 * 4 - self.seq_len] 164 | border2s = [12 * 30 * 24 * 4, 12 * 30 * 24 * 4 + 4 * 30 * 24 * 4, 12 * 30 * 24 * 4 + 8 * 30 * 24 * 4] 165 | 166 | border1 = border1s[self.set_type] 167 | border2 = border2s[self.set_type] 168 | 169 | if self.set_type == 0: 170 | border2 = (border2 - self.seq_len) * self.percent // 100 + self.seq_len 171 | 172 | if self.features == 'M' or self.features == 'MS': 173 | cols_data = df_raw.columns[1:] 174 | df_data = df_raw[cols_data] 175 | elif self.features == 'S': 176 | df_data = df_raw[[self.target]] 177 | 178 | if self.scale: 179 | train_data = df_data[border1s[0]:border2s[0]] 180 | self.scaler.fit(train_data.values) 181 | data = self.scaler.transform(df_data.values) 182 | else: 183 | data = df_data.values 184 | 185 | df_stamp = df_raw[['date']][border1:border2] 186 | df_stamp['date'] = pd.to_datetime(df_stamp.date) 187 | if self.timeenc == 0: 188 | df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1) 189 | df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1) 190 | df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1) 191 | df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1) 192 | df_stamp['minute'] = df_stamp.date.apply(lambda row: row.minute, 1) 193 | df_stamp['minute'] = df_stamp.minute.map(lambda x: x // 15) 194 | data_stamp = df_stamp.drop(['date'], 1).values 195 | elif self.timeenc == 1: 196 | data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq) 197 | data_stamp = data_stamp.transpose(1, 0) 198 | 199 | self.data_x = data[border1:border2] 200 | self.data_y = data[border1:border2] 201 | self.data_stamp = data_stamp 202 | 203 | def __getitem__(self, index): 204 | feat_id = index // self.tot_len 205 | s_begin = index % self.tot_len 206 | 207 | s_end = s_begin + self.seq_len 208 | r_begin = s_end - self.label_len 209 | r_end = r_begin + self.label_len + self.pred_len 210 | seq_x = self.data_x[s_begin:s_end, feat_id:feat_id + 1] 211 | seq_y = self.data_y[r_begin:r_end, feat_id:feat_id + 1] 212 | seq_x_mark = self.data_stamp[s_begin:s_end] 213 | seq_y_mark = self.data_stamp[r_begin:r_end] 214 | 215 | return seq_x, seq_y, seq_x_mark, seq_y_mark 216 | 217 | def __len__(self): 218 | return (len(self.data_x) - self.seq_len - self.pred_len + 1) * self.enc_in 219 | 220 | def inverse_transform(self, data): 221 | return self.scaler.inverse_transform(data) 222 | -------------------------------------------------------------------------------- /dataset/prompt_bank/ECL.txt: -------------------------------------------------------------------------------- 1 | Measurements of electric power consumption in one household with a one-minute sampling rate over a period of almost 4 years. Different electrical quantities and some sub-metering values are available.This archive contains 2075259 measurements gathered in a house located in Sceaux (7km of Paris, France) between December 2006 and November 2010 (47 months). 2 | -------------------------------------------------------------------------------- /dataset/prompt_bank/ETT.txt: -------------------------------------------------------------------------------- 1 | The Electricity Transformer Temperature (ETT) is a crucial indicator in the electric power long-term deployment. This dataset consists of 2 years data from two separated counties in China. To explore the granularity on the Long sequence time-series forecasting (LSTF) problem, different subsets are created, {ETTh1, ETTh2} for 1-hour-level and ETTm1 for 15-minutes-level. Each data point consists of the target value ”oil temperature” and 6 power load features. The train/val/test is 12/4/4 months. 2 | 3 | -------------------------------------------------------------------------------- /dataset/prompt_bank/Traffic.txt: -------------------------------------------------------------------------------- 1 | Traffic is a collection of hourly data from California Department of Transportation, which describes the road occupancy rates measured by different sensors on San Francisco Bay area freeways. -------------------------------------------------------------------------------- /dataset/prompt_bank/Weather.txt: -------------------------------------------------------------------------------- 1 | Weather is recorded every 10 minutes for the 2020 whole year, which contains 21 meteorological indicators, such as air temperature, humidity, etc. 2 | -------------------------------------------------------------------------------- /dataset/prompt_bank/m4.txt: -------------------------------------------------------------------------------- 1 | The M4 dataset is a collection of 100,000 time series used for the fourth edition of the Makridakis forecasting Competition. The M4 dataset consists of time series of yearly, quarterly, monthly and other (weekly, daily and hourly) data, which are divided into training and test sets. The minimum numbers of observations in the training test are 13 for yearly, 16 for quarterly, 42 for monthly, 80 for weekly, 93 for daily and 700 for hourly series. The participants were asked to produce the following numbers of forecasts beyond the available data that they had been given: six for yearly, eight for quarterly, 18 for monthly series, 13 for weekly series and 14 and 48 forecasts respectively for the daily and hourly ones. 2 | 3 | -------------------------------------------------------------------------------- /ds_config_zero2.json: -------------------------------------------------------------------------------- 1 | { 2 | "bf16": { 3 | "enabled": true, 4 | "auto_cast": true 5 | }, 6 | "zero_optimization": { 7 | "stage": 2, 8 | "allgather_partitions": true, 9 | "allgather_bucket_size": 2e8, 10 | "overlap_comm": true, 11 | "reduce_scatter": true, 12 | "reduce_bucket_size": 2e8, 13 | "contiguous_gradients": true, 14 | "sub_group_size": 1e9 15 | }, 16 | "gradient_accumulation_steps": "auto", 17 | "train_batch_size": "auto", 18 | "train_micro_batch_size_per_gpu": "auto", 19 | "steps_per_print": 10, 20 | "wall_clock_breakdown": false 21 | } -------------------------------------------------------------------------------- /figures/framework.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KimMeen/Time-LLM/02ee1b8f6043090c7a417f0cbb64cbf753895175/figures/framework.png -------------------------------------------------------------------------------- /figures/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KimMeen/Time-LLM/02ee1b8f6043090c7a417f0cbb64cbf753895175/figures/logo.png -------------------------------------------------------------------------------- /figures/method-detailed-illustration.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KimMeen/Time-LLM/02ee1b8f6043090c7a417f0cbb64cbf753895175/figures/method-detailed-illustration.png -------------------------------------------------------------------------------- /layers/AutoCorrelation.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import matplotlib.pyplot as plt 5 | import numpy as np 6 | import math 7 | from math import sqrt 8 | import os 9 | 10 | 11 | class AutoCorrelation(nn.Module): 12 | """ 13 | AutoCorrelation Mechanism with the following two phases: 14 | (1) period-based dependencies discovery 15 | (2) time delay aggregation 16 | This block can replace the self-attention family mechanism seamlessly. 17 | """ 18 | 19 | def __init__(self, mask_flag=True, factor=1, scale=None, attention_dropout=0.1, output_attention=False): 20 | super(AutoCorrelation, self).__init__() 21 | self.factor = factor 22 | self.scale = scale 23 | self.mask_flag = mask_flag 24 | self.output_attention = output_attention 25 | self.dropout = nn.Dropout(attention_dropout) 26 | 27 | def time_delay_agg_training(self, values, corr): 28 | """ 29 | SpeedUp version of Autocorrelation (a batch-normalization style design) 30 | This is for the training phase. 31 | """ 32 | head = values.shape[1] 33 | channel = values.shape[2] 34 | length = values.shape[3] 35 | # find top k 36 | top_k = int(self.factor * math.log(length)) 37 | mean_value = torch.mean(torch.mean(corr, dim=1), dim=1) 38 | index = torch.topk(torch.mean(mean_value, dim=0), top_k, dim=-1)[1] 39 | weights = torch.stack([mean_value[:, index[i]] for i in range(top_k)], dim=-1) 40 | # update corr 41 | tmp_corr = torch.softmax(weights, dim=-1) 42 | # aggregation 43 | tmp_values = values 44 | delays_agg = torch.zeros_like(values).float() 45 | for i in range(top_k): 46 | pattern = torch.roll(tmp_values, -int(index[i]), -1) 47 | delays_agg = delays_agg + pattern * \ 48 | (tmp_corr[:, i].unsqueeze(1).unsqueeze(1).unsqueeze(1).repeat(1, head, channel, length)) 49 | return delays_agg 50 | 51 | def time_delay_agg_inference(self, values, corr): 52 | """ 53 | SpeedUp version of Autocorrelation (a batch-normalization style design) 54 | This is for the inference phase. 55 | """ 56 | batch = values.shape[0] 57 | head = values.shape[1] 58 | channel = values.shape[2] 59 | length = values.shape[3] 60 | # index init 61 | init_index = torch.arange(length).unsqueeze(0).unsqueeze(0).unsqueeze(0).repeat(batch, head, channel, 1).cuda() 62 | # find top k 63 | top_k = int(self.factor * math.log(length)) 64 | mean_value = torch.mean(torch.mean(corr, dim=1), dim=1) 65 | weights, delay = torch.topk(mean_value, top_k, dim=-1) 66 | # update corr 67 | tmp_corr = torch.softmax(weights, dim=-1) 68 | # aggregation 69 | tmp_values = values.repeat(1, 1, 1, 2) 70 | delays_agg = torch.zeros_like(values).float() 71 | for i in range(top_k): 72 | tmp_delay = init_index + delay[:, i].unsqueeze(1).unsqueeze(1).unsqueeze(1).repeat(1, head, channel, length) 73 | pattern = torch.gather(tmp_values, dim=-1, index=tmp_delay) 74 | delays_agg = delays_agg + pattern * \ 75 | (tmp_corr[:, i].unsqueeze(1).unsqueeze(1).unsqueeze(1).repeat(1, head, channel, length)) 76 | return delays_agg 77 | 78 | def time_delay_agg_full(self, values, corr): 79 | """ 80 | Standard version of Autocorrelation 81 | """ 82 | batch = values.shape[0] 83 | head = values.shape[1] 84 | channel = values.shape[2] 85 | length = values.shape[3] 86 | # index init 87 | init_index = torch.arange(length).unsqueeze(0).unsqueeze(0).unsqueeze(0).repeat(batch, head, channel, 1).cuda() 88 | # find top k 89 | top_k = int(self.factor * math.log(length)) 90 | weights, delay = torch.topk(corr, top_k, dim=-1) 91 | # update corr 92 | tmp_corr = torch.softmax(weights, dim=-1) 93 | # aggregation 94 | tmp_values = values.repeat(1, 1, 1, 2) 95 | delays_agg = torch.zeros_like(values).float() 96 | for i in range(top_k): 97 | tmp_delay = init_index + delay[..., i].unsqueeze(-1) 98 | pattern = torch.gather(tmp_values, dim=-1, index=tmp_delay) 99 | delays_agg = delays_agg + pattern * (tmp_corr[..., i].unsqueeze(-1)) 100 | return delays_agg 101 | 102 | def forward(self, queries, keys, values, attn_mask): 103 | B, L, H, E = queries.shape 104 | _, S, _, D = values.shape 105 | if L > S: 106 | zeros = torch.zeros_like(queries[:, :(L - S), :]).float() 107 | values = torch.cat([values, zeros], dim=1) 108 | keys = torch.cat([keys, zeros], dim=1) 109 | else: 110 | values = values[:, :L, :, :] 111 | keys = keys[:, :L, :, :] 112 | 113 | # period-based dependencies 114 | q_fft = torch.fft.rfft(queries.permute(0, 2, 3, 1).contiguous(), dim=-1) 115 | k_fft = torch.fft.rfft(keys.permute(0, 2, 3, 1).contiguous(), dim=-1) 116 | res = q_fft * torch.conj(k_fft) 117 | corr = torch.fft.irfft(res, dim=-1) 118 | 119 | # time delay agg 120 | if self.training: 121 | V = self.time_delay_agg_training(values.permute(0, 2, 3, 1).contiguous(), corr).permute(0, 3, 1, 2) 122 | else: 123 | V = self.time_delay_agg_inference(values.permute(0, 2, 3, 1).contiguous(), corr).permute(0, 3, 1, 2) 124 | 125 | if self.output_attention: 126 | return (V.contiguous(), corr.permute(0, 3, 1, 2)) 127 | else: 128 | return (V.contiguous(), None) 129 | 130 | 131 | class AutoCorrelationLayer(nn.Module): 132 | def __init__(self, correlation, d_model, n_heads, d_keys=None, 133 | d_values=None): 134 | super(AutoCorrelationLayer, self).__init__() 135 | 136 | d_keys = d_keys or (d_model // n_heads) 137 | d_values = d_values or (d_model // n_heads) 138 | 139 | self.inner_correlation = correlation 140 | self.query_projection = nn.Linear(d_model, d_keys * n_heads) 141 | self.key_projection = nn.Linear(d_model, d_keys * n_heads) 142 | self.value_projection = nn.Linear(d_model, d_values * n_heads) 143 | self.out_projection = nn.Linear(d_values * n_heads, d_model) 144 | self.n_heads = n_heads 145 | 146 | def forward(self, queries, keys, values, attn_mask): 147 | B, L, _ = queries.shape 148 | _, S, _ = keys.shape 149 | H = self.n_heads 150 | 151 | queries = self.query_projection(queries).view(B, L, H, -1) 152 | keys = self.key_projection(keys).view(B, S, H, -1) 153 | values = self.value_projection(values).view(B, S, H, -1) 154 | 155 | out, attn = self.inner_correlation( 156 | queries, 157 | keys, 158 | values, 159 | attn_mask 160 | ) 161 | out = out.view(B, L, -1) 162 | 163 | return self.out_projection(out), attn 164 | -------------------------------------------------------------------------------- /layers/Autoformer_EncDec.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class my_Layernorm(nn.Module): 7 | """ 8 | Special designed layernorm for the seasonal part 9 | """ 10 | 11 | def __init__(self, channels): 12 | super(my_Layernorm, self).__init__() 13 | self.layernorm = nn.LayerNorm(channels) 14 | 15 | def forward(self, x): 16 | x_hat = self.layernorm(x) 17 | bias = torch.mean(x_hat, dim=1).unsqueeze(1).repeat(1, x.shape[1], 1) 18 | return x_hat - bias 19 | 20 | 21 | class moving_avg(nn.Module): 22 | """ 23 | Moving average block to highlight the trend of time series 24 | """ 25 | 26 | def __init__(self, kernel_size, stride): 27 | super(moving_avg, self).__init__() 28 | self.kernel_size = kernel_size 29 | self.avg = nn.AvgPool1d(kernel_size=kernel_size, stride=stride, padding=0) 30 | 31 | def forward(self, x): 32 | # padding on the both ends of time series 33 | front = x[:, 0:1, :].repeat(1, (self.kernel_size - 1) // 2, 1) 34 | end = x[:, -1:, :].repeat(1, (self.kernel_size - 1) // 2, 1) 35 | x = torch.cat([front, x, end], dim=1) 36 | x = self.avg(x.permute(0, 2, 1)) 37 | x = x.permute(0, 2, 1) 38 | return x 39 | 40 | 41 | class series_decomp(nn.Module): 42 | """ 43 | Series decomposition block 44 | """ 45 | 46 | def __init__(self, kernel_size): 47 | super(series_decomp, self).__init__() 48 | self.moving_avg = moving_avg(kernel_size, stride=1) 49 | 50 | def forward(self, x): 51 | moving_mean = self.moving_avg(x) 52 | res = x - moving_mean 53 | return res, moving_mean 54 | 55 | 56 | class series_decomp_multi(nn.Module): 57 | """ 58 | Multiple Series decomposition block from FEDformer 59 | """ 60 | 61 | def __init__(self, kernel_size): 62 | super(series_decomp_multi, self).__init__() 63 | self.kernel_size = kernel_size 64 | self.series_decomp = [series_decomp(kernel) for kernel in kernel_size] 65 | 66 | def forward(self, x): 67 | moving_mean = [] 68 | res = [] 69 | for func in self.series_decomp: 70 | sea, moving_avg = func(x) 71 | moving_mean.append(moving_avg) 72 | res.append(sea) 73 | 74 | sea = sum(res) / len(res) 75 | moving_mean = sum(moving_mean) / len(moving_mean) 76 | return sea, moving_mean 77 | 78 | 79 | class EncoderLayer(nn.Module): 80 | """ 81 | Autoformer encoder layer with the progressive decomposition architecture 82 | """ 83 | 84 | def __init__(self, attention, d_model, d_ff=None, moving_avg=25, dropout=0.1, activation="relu"): 85 | super(EncoderLayer, self).__init__() 86 | d_ff = d_ff or 4 * d_model 87 | self.attention = attention 88 | self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1, bias=False) 89 | self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1, bias=False) 90 | self.decomp1 = series_decomp(moving_avg) 91 | self.decomp2 = series_decomp(moving_avg) 92 | self.dropout = nn.Dropout(dropout) 93 | self.activation = F.relu if activation == "relu" else F.gelu 94 | 95 | def forward(self, x, attn_mask=None): 96 | new_x, attn = self.attention( 97 | x, x, x, 98 | attn_mask=attn_mask 99 | ) 100 | x = x + self.dropout(new_x) 101 | x, _ = self.decomp1(x) 102 | y = x 103 | y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1)))) 104 | y = self.dropout(self.conv2(y).transpose(-1, 1)) 105 | res, _ = self.decomp2(x + y) 106 | return res, attn 107 | 108 | 109 | class Encoder(nn.Module): 110 | """ 111 | Autoformer encoder 112 | """ 113 | 114 | def __init__(self, attn_layers, conv_layers=None, norm_layer=None): 115 | super(Encoder, self).__init__() 116 | self.attn_layers = nn.ModuleList(attn_layers) 117 | self.conv_layers = nn.ModuleList(conv_layers) if conv_layers is not None else None 118 | self.norm = norm_layer 119 | 120 | def forward(self, x, attn_mask=None): 121 | attns = [] 122 | if self.conv_layers is not None: 123 | for attn_layer, conv_layer in zip(self.attn_layers, self.conv_layers): 124 | x, attn = attn_layer(x, attn_mask=attn_mask) 125 | x = conv_layer(x) 126 | attns.append(attn) 127 | x, attn = self.attn_layers[-1](x) 128 | attns.append(attn) 129 | else: 130 | for attn_layer in self.attn_layers: 131 | x, attn = attn_layer(x, attn_mask=attn_mask) 132 | attns.append(attn) 133 | 134 | if self.norm is not None: 135 | x = self.norm(x) 136 | 137 | return x, attns 138 | 139 | 140 | class DecoderLayer(nn.Module): 141 | """ 142 | Autoformer decoder layer with the progressive decomposition architecture 143 | """ 144 | 145 | def __init__(self, self_attention, cross_attention, d_model, c_out, d_ff=None, 146 | moving_avg=25, dropout=0.1, activation="relu"): 147 | super(DecoderLayer, self).__init__() 148 | d_ff = d_ff or 4 * d_model 149 | self.self_attention = self_attention 150 | self.cross_attention = cross_attention 151 | self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1, bias=False) 152 | self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1, bias=False) 153 | self.decomp1 = series_decomp(moving_avg) 154 | self.decomp2 = series_decomp(moving_avg) 155 | self.decomp3 = series_decomp(moving_avg) 156 | self.dropout = nn.Dropout(dropout) 157 | self.projection = nn.Conv1d(in_channels=d_model, out_channels=c_out, kernel_size=3, stride=1, padding=1, 158 | padding_mode='circular', bias=False) 159 | self.activation = F.relu if activation == "relu" else F.gelu 160 | 161 | def forward(self, x, cross, x_mask=None, cross_mask=None): 162 | x = x + self.dropout(self.self_attention( 163 | x, x, x, 164 | attn_mask=x_mask 165 | )[0]) 166 | x, trend1 = self.decomp1(x) 167 | x = x + self.dropout(self.cross_attention( 168 | x, cross, cross, 169 | attn_mask=cross_mask 170 | )[0]) 171 | x, trend2 = self.decomp2(x) 172 | y = x 173 | y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1)))) 174 | y = self.dropout(self.conv2(y).transpose(-1, 1)) 175 | x, trend3 = self.decomp3(x + y) 176 | 177 | residual_trend = trend1 + trend2 + trend3 178 | residual_trend = self.projection(residual_trend.permute(0, 2, 1)).transpose(1, 2) 179 | return x, residual_trend 180 | 181 | 182 | class Decoder(nn.Module): 183 | """ 184 | Autoformer encoder 185 | """ 186 | 187 | def __init__(self, layers, norm_layer=None, projection=None): 188 | super(Decoder, self).__init__() 189 | self.layers = nn.ModuleList(layers) 190 | self.norm = norm_layer 191 | self.projection = projection 192 | 193 | def forward(self, x, cross, x_mask=None, cross_mask=None, trend=None): 194 | for layer in self.layers: 195 | x, residual_trend = layer(x, cross, x_mask=x_mask, cross_mask=cross_mask) 196 | trend = trend + residual_trend 197 | 198 | if self.norm is not None: 199 | x = self.norm(x) 200 | 201 | if self.projection is not None: 202 | x = self.projection(x) 203 | return x, trend 204 | -------------------------------------------------------------------------------- /layers/Conv_Blocks.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class Inception_Block_V1(nn.Module): 6 | def __init__(self, in_channels, out_channels, num_kernels=6, init_weight=True): 7 | super(Inception_Block_V1, self).__init__() 8 | self.in_channels = in_channels 9 | self.out_channels = out_channels 10 | self.num_kernels = num_kernels 11 | kernels = [] 12 | for i in range(self.num_kernels): 13 | kernels.append(nn.Conv2d(in_channels, out_channels, kernel_size=2 * i + 1, padding=i)) 14 | self.kernels = nn.ModuleList(kernels) 15 | if init_weight: 16 | self._initialize_weights() 17 | 18 | def _initialize_weights(self): 19 | for m in self.modules(): 20 | if isinstance(m, nn.Conv2d): 21 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 22 | if m.bias is not None: 23 | nn.init.constant_(m.bias, 0) 24 | 25 | def forward(self, x): 26 | res_list = [] 27 | for i in range(self.num_kernels): 28 | res_list.append(self.kernels[i](x)) 29 | res = torch.stack(res_list, dim=-1).mean(-1) 30 | return res 31 | 32 | 33 | class Inception_Block_V2(nn.Module): 34 | def __init__(self, in_channels, out_channels, num_kernels=6, init_weight=True): 35 | super(Inception_Block_V2, self).__init__() 36 | self.in_channels = in_channels 37 | self.out_channels = out_channels 38 | self.num_kernels = num_kernels 39 | kernels = [] 40 | for i in range(self.num_kernels // 2): 41 | kernels.append(nn.Conv2d(in_channels, out_channels, kernel_size=[1, 2 * i + 3], padding=[0, i + 1])) 42 | kernels.append(nn.Conv2d(in_channels, out_channels, kernel_size=[2 * i + 3, 1], padding=[i + 1, 0])) 43 | kernels.append(nn.Conv2d(in_channels, out_channels, kernel_size=1)) 44 | self.kernels = nn.ModuleList(kernels) 45 | if init_weight: 46 | self._initialize_weights() 47 | 48 | def _initialize_weights(self): 49 | for m in self.modules(): 50 | if isinstance(m, nn.Conv2d): 51 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 52 | if m.bias is not None: 53 | nn.init.constant_(m.bias, 0) 54 | 55 | def forward(self, x): 56 | res_list = [] 57 | for i in range(self.num_kernels + 1): 58 | res_list.append(self.kernels[i](x)) 59 | res = torch.stack(res_list, dim=-1).mean(-1) 60 | return res 61 | -------------------------------------------------------------------------------- /layers/Embed.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch import Tensor 5 | from torch.nn.utils import weight_norm 6 | import math 7 | 8 | 9 | class PositionalEmbedding(nn.Module): 10 | def __init__(self, d_model, max_len=5000): 11 | super(PositionalEmbedding, self).__init__() 12 | # Compute the positional encodings once in log space. 13 | pe = torch.zeros(max_len, d_model).float() 14 | pe.require_grad = False 15 | 16 | position = torch.arange(0, max_len).float().unsqueeze(1) 17 | div_term = (torch.arange(0, d_model, 2).float() 18 | * -(math.log(10000.0) / d_model)).exp() 19 | 20 | pe[:, 0::2] = torch.sin(position * div_term) 21 | pe[:, 1::2] = torch.cos(position * div_term) 22 | 23 | pe = pe.unsqueeze(0) 24 | self.register_buffer('pe', pe) 25 | 26 | def forward(self, x): 27 | return self.pe[:, :x.size(1)] 28 | 29 | 30 | class TokenEmbedding(nn.Module): 31 | def __init__(self, c_in, d_model): 32 | super(TokenEmbedding, self).__init__() 33 | padding = 1 if torch.__version__ >= '1.5.0' else 2 34 | self.tokenConv = nn.Conv1d(in_channels=c_in, out_channels=d_model, 35 | kernel_size=3, padding=padding, padding_mode='circular', bias=False) 36 | for m in self.modules(): 37 | if isinstance(m, nn.Conv1d): 38 | nn.init.kaiming_normal_( 39 | m.weight, mode='fan_in', nonlinearity='leaky_relu') 40 | 41 | def forward(self, x): 42 | x = self.tokenConv(x.permute(0, 2, 1)).transpose(1, 2) 43 | return x 44 | 45 | 46 | class FixedEmbedding(nn.Module): 47 | def __init__(self, c_in, d_model): 48 | super(FixedEmbedding, self).__init__() 49 | 50 | w = torch.zeros(c_in, d_model).float() 51 | w.require_grad = False 52 | 53 | position = torch.arange(0, c_in).float().unsqueeze(1) 54 | div_term = (torch.arange(0, d_model, 2).float() 55 | * -(math.log(10000.0) / d_model)).exp() 56 | 57 | w[:, 0::2] = torch.sin(position * div_term) 58 | w[:, 1::2] = torch.cos(position * div_term) 59 | 60 | self.emb = nn.Embedding(c_in, d_model) 61 | self.emb.weight = nn.Parameter(w, requires_grad=False) 62 | 63 | def forward(self, x): 64 | return self.emb(x).detach() 65 | 66 | 67 | class TemporalEmbedding(nn.Module): 68 | def __init__(self, d_model, embed_type='fixed', freq='h'): 69 | super(TemporalEmbedding, self).__init__() 70 | 71 | minute_size = 4 72 | hour_size = 24 73 | weekday_size = 7 74 | day_size = 32 75 | month_size = 13 76 | 77 | Embed = FixedEmbedding if embed_type == 'fixed' else nn.Embedding 78 | if freq == 't': 79 | self.minute_embed = Embed(minute_size, d_model) 80 | self.hour_embed = Embed(hour_size, d_model) 81 | self.weekday_embed = Embed(weekday_size, d_model) 82 | self.day_embed = Embed(day_size, d_model) 83 | self.month_embed = Embed(month_size, d_model) 84 | 85 | def forward(self, x): 86 | x = x.long() 87 | minute_x = self.minute_embed(x[:, :, 4]) if hasattr( 88 | self, 'minute_embed') else 0. 89 | hour_x = self.hour_embed(x[:, :, 3]) 90 | weekday_x = self.weekday_embed(x[:, :, 2]) 91 | day_x = self.day_embed(x[:, :, 1]) 92 | month_x = self.month_embed(x[:, :, 0]) 93 | 94 | return hour_x + weekday_x + day_x + month_x + minute_x 95 | 96 | 97 | class TimeFeatureEmbedding(nn.Module): 98 | def __init__(self, d_model, embed_type='timeF', freq='h'): 99 | super(TimeFeatureEmbedding, self).__init__() 100 | 101 | freq_map = {'h': 4, 't': 5, 's': 6, 102 | 'm': 1, 'a': 1, 'w': 2, 'd': 3, 'b': 3} 103 | d_inp = freq_map[freq] 104 | self.embed = nn.Linear(d_inp, d_model, bias=False) 105 | 106 | def forward(self, x): 107 | return self.embed(x) 108 | 109 | 110 | class DataEmbedding(nn.Module): 111 | def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1): 112 | super(DataEmbedding, self).__init__() 113 | 114 | self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model) 115 | self.position_embedding = PositionalEmbedding(d_model=d_model) 116 | self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type, 117 | freq=freq) if embed_type != 'timeF' else TimeFeatureEmbedding( 118 | d_model=d_model, embed_type=embed_type, freq=freq) 119 | self.dropout = nn.Dropout(p=dropout) 120 | 121 | def forward(self, x, x_mark): 122 | if x_mark is None: 123 | x = self.value_embedding(x) + self.position_embedding(x).to(x.device) 124 | else: 125 | x = self.value_embedding( 126 | x) + self.temporal_embedding(x_mark) + self.position_embedding(x) 127 | return self.dropout(x) 128 | 129 | 130 | class DataEmbedding_wo_pos(nn.Module): 131 | def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1): 132 | super(DataEmbedding_wo_pos, self).__init__() 133 | 134 | self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model) 135 | self.position_embedding = PositionalEmbedding(d_model=d_model) 136 | self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type, 137 | freq=freq) if embed_type != 'timeF' else TimeFeatureEmbedding( 138 | d_model=d_model, embed_type=embed_type, freq=freq) 139 | self.dropout = nn.Dropout(p=dropout) 140 | 141 | def forward(self, x, x_mark): 142 | if x_mark is None: 143 | x = self.value_embedding(x) 144 | else: 145 | x = self.value_embedding(x) + self.temporal_embedding(x_mark) 146 | return self.dropout(x) 147 | 148 | 149 | class ReplicationPad1d(nn.Module): 150 | def __init__(self, padding) -> None: 151 | super(ReplicationPad1d, self).__init__() 152 | self.padding = padding 153 | 154 | def forward(self, input: Tensor) -> Tensor: 155 | replicate_padding = input[:, :, -1].unsqueeze(-1).repeat(1, 1, self.padding[-1]) 156 | output = torch.cat([input, replicate_padding], dim=-1) 157 | return output 158 | 159 | 160 | class PatchEmbedding(nn.Module): 161 | def __init__(self, d_model, patch_len, stride, dropout): 162 | super(PatchEmbedding, self).__init__() 163 | # Patching 164 | self.patch_len = patch_len 165 | self.stride = stride 166 | self.padding_patch_layer = ReplicationPad1d((0, stride)) 167 | 168 | # Backbone, Input encoding: projection of feature vectors onto a d-dim vector space 169 | self.value_embedding = TokenEmbedding(patch_len, d_model) 170 | 171 | # Positional embedding 172 | # self.position_embedding = PositionalEmbedding(d_model) 173 | 174 | # Residual dropout 175 | self.dropout = nn.Dropout(dropout) 176 | 177 | def forward(self, x): 178 | # do patching 179 | n_vars = x.shape[1] 180 | x = self.padding_patch_layer(x) 181 | x = x.unfold(dimension=-1, size=self.patch_len, step=self.stride) 182 | x = torch.reshape(x, (x.shape[0] * x.shape[1], x.shape[2], x.shape[3])) 183 | # Input encoding 184 | x = self.value_embedding(x) 185 | return self.dropout(x), n_vars 186 | 187 | 188 | class DataEmbedding_wo_time(nn.Module): 189 | def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1): 190 | super(DataEmbedding_wo_time, self).__init__() 191 | 192 | self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model) 193 | self.position_embedding = PositionalEmbedding(d_model=d_model) 194 | self.dropout = nn.Dropout(p=dropout) 195 | 196 | def forward(self, x): 197 | x = self.value_embedding(x) + self.position_embedding(x) 198 | return self.dropout(x) 199 | -------------------------------------------------------------------------------- /layers/SelfAttention_Family.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | from math import sqrt 5 | from utils.masking import TriangularCausalMask, ProbMask 6 | from reformer_pytorch import LSHSelfAttention 7 | 8 | 9 | class DSAttention(nn.Module): 10 | '''De-stationary Attention''' 11 | 12 | def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False): 13 | super(DSAttention, self).__init__() 14 | self.scale = scale 15 | self.mask_flag = mask_flag 16 | self.output_attention = output_attention 17 | self.dropout = nn.Dropout(attention_dropout) 18 | 19 | def forward(self, queries, keys, values, attn_mask, tau=None, delta=None): 20 | B, L, H, E = queries.shape 21 | _, S, _, D = values.shape 22 | scale = self.scale or 1. / sqrt(E) 23 | 24 | tau = 1.0 if tau is None else tau.unsqueeze( 25 | 1).unsqueeze(1) # B x 1 x 1 x 1 26 | delta = 0.0 if delta is None else delta.unsqueeze( 27 | 1).unsqueeze(1) # B x 1 x 1 x S 28 | 29 | # De-stationary Attention, rescaling pre-softmax score with learned de-stationary factors 30 | scores = torch.einsum("blhe,bshe->bhls", queries, keys) * tau + delta 31 | 32 | if self.mask_flag: 33 | if attn_mask is None: 34 | attn_mask = TriangularCausalMask(B, L, device=queries.device) 35 | 36 | scores.masked_fill_(attn_mask.mask, -np.inf) 37 | 38 | A = self.dropout(torch.softmax(scale * scores, dim=-1)) 39 | V = torch.einsum("bhls,bshd->blhd", A, values) 40 | 41 | if self.output_attention: 42 | return (V.contiguous(), A) 43 | else: 44 | return (V.contiguous(), None) 45 | 46 | 47 | class FullAttention(nn.Module): 48 | def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False): 49 | super(FullAttention, self).__init__() 50 | self.scale = scale 51 | self.mask_flag = mask_flag 52 | self.output_attention = output_attention 53 | self.dropout = nn.Dropout(attention_dropout) 54 | 55 | def forward(self, queries, keys, values, attn_mask, tau=None, delta=None): 56 | B, L, H, E = queries.shape 57 | _, S, _, D = values.shape 58 | scale = self.scale or 1. / sqrt(E) 59 | 60 | scores = torch.einsum("blhe,bshe->bhls", queries, keys) 61 | 62 | if self.mask_flag: 63 | if attn_mask is None: 64 | attn_mask = TriangularCausalMask(B, L, device=queries.device) 65 | 66 | scores.masked_fill_(attn_mask.mask, -np.inf) 67 | 68 | A = self.dropout(torch.softmax(scale * scores, dim=-1)) 69 | V = torch.einsum("bhls,bshd->blhd", A, values) 70 | 71 | if self.output_attention: 72 | return (V.contiguous(), A) 73 | else: 74 | return (V.contiguous(), None) 75 | 76 | 77 | class ProbAttention(nn.Module): 78 | def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False): 79 | super(ProbAttention, self).__init__() 80 | self.factor = factor 81 | self.scale = scale 82 | self.mask_flag = mask_flag 83 | self.output_attention = output_attention 84 | self.dropout = nn.Dropout(attention_dropout) 85 | 86 | def _prob_QK(self, Q, K, sample_k, n_top): # n_top: c*ln(L_q) 87 | # Q [B, H, L, D] 88 | B, H, L_K, E = K.shape 89 | _, _, L_Q, _ = Q.shape 90 | 91 | # calculate the sampled Q_K 92 | K_expand = K.unsqueeze(-3).expand(B, H, L_Q, L_K, E) 93 | # real U = U_part(factor*ln(L_k))*L_q 94 | index_sample = torch.randint(L_K, (L_Q, sample_k)) 95 | K_sample = K_expand[:, :, torch.arange( 96 | L_Q).unsqueeze(1), index_sample, :] 97 | Q_K_sample = torch.matmul( 98 | Q.unsqueeze(-2), K_sample.transpose(-2, -1)).squeeze() 99 | 100 | # find the Top_k query with sparisty measurement 101 | M = Q_K_sample.max(-1)[0] - torch.div(Q_K_sample.sum(-1), L_K) 102 | M_top = M.topk(n_top, sorted=False)[1] 103 | 104 | # use the reduced Q to calculate Q_K 105 | Q_reduce = Q[torch.arange(B)[:, None, None], 106 | torch.arange(H)[None, :, None], 107 | M_top, :] # factor*ln(L_q) 108 | Q_K = torch.matmul(Q_reduce, K.transpose(-2, -1)) # factor*ln(L_q)*L_k 109 | 110 | return Q_K, M_top 111 | 112 | def _get_initial_context(self, V, L_Q): 113 | B, H, L_V, D = V.shape 114 | if not self.mask_flag: 115 | # V_sum = V.sum(dim=-2) 116 | V_sum = V.mean(dim=-2) 117 | contex = V_sum.unsqueeze(-2).expand(B, H, 118 | L_Q, V_sum.shape[-1]).clone() 119 | else: # use mask 120 | # requires that L_Q == L_V, i.e. for self-attention only 121 | assert (L_Q == L_V) 122 | contex = V.cumsum(dim=-2) 123 | return contex 124 | 125 | def _update_context(self, context_in, V, scores, index, L_Q, attn_mask): 126 | B, H, L_V, D = V.shape 127 | 128 | if self.mask_flag: 129 | attn_mask = ProbMask(B, H, L_Q, index, scores, device=V.device) 130 | scores.masked_fill_(attn_mask.mask, -np.inf) 131 | 132 | attn = torch.softmax(scores, dim=-1) # nn.Softmax(dim=-1)(scores) 133 | 134 | context_in[torch.arange(B)[:, None, None], 135 | torch.arange(H)[None, :, None], 136 | index, :] = torch.matmul(attn, V).type_as(context_in) 137 | if self.output_attention: 138 | attns = (torch.ones([B, H, L_V, L_V]) / 139 | L_V).type_as(attn).to(attn.device) 140 | attns[torch.arange(B)[:, None, None], torch.arange(H)[ 141 | None, :, None], index, :] = attn 142 | return (context_in, attns) 143 | else: 144 | return (context_in, None) 145 | 146 | def forward(self, queries, keys, values, attn_mask, tau=None, delta=None): 147 | B, L_Q, H, D = queries.shape 148 | _, L_K, _, _ = keys.shape 149 | 150 | queries = queries.transpose(2, 1) 151 | keys = keys.transpose(2, 1) 152 | values = values.transpose(2, 1) 153 | 154 | U_part = self.factor * \ 155 | np.ceil(np.log(L_K)).astype('int').item() # c*ln(L_k) 156 | u = self.factor * \ 157 | np.ceil(np.log(L_Q)).astype('int').item() # c*ln(L_q) 158 | 159 | U_part = U_part if U_part < L_K else L_K 160 | u = u if u < L_Q else L_Q 161 | 162 | scores_top, index = self._prob_QK( 163 | queries, keys, sample_k=U_part, n_top=u) 164 | 165 | # add scale factor 166 | scale = self.scale or 1. / sqrt(D) 167 | if scale is not None: 168 | scores_top = scores_top * scale 169 | # get the context 170 | context = self._get_initial_context(values, L_Q) 171 | # update the context with selected top_k queries 172 | context, attn = self._update_context( 173 | context, values, scores_top, index, L_Q, attn_mask) 174 | 175 | return context.contiguous(), attn 176 | 177 | 178 | class AttentionLayer(nn.Module): 179 | def __init__(self, attention, d_model, n_heads, d_keys=None, 180 | d_values=None): 181 | super(AttentionLayer, self).__init__() 182 | 183 | d_keys = d_keys or (d_model // n_heads) 184 | d_values = d_values or (d_model // n_heads) 185 | 186 | self.inner_attention = attention 187 | self.query_projection = nn.Linear(d_model, d_keys * n_heads) 188 | self.key_projection = nn.Linear(d_model, d_keys * n_heads) 189 | self.value_projection = nn.Linear(d_model, d_values * n_heads) 190 | self.out_projection = nn.Linear(d_values * n_heads, d_model) 191 | self.n_heads = n_heads 192 | 193 | def forward(self, queries, keys, values, attn_mask, tau=None, delta=None): 194 | B, L, _ = queries.shape 195 | _, S, _ = keys.shape 196 | H = self.n_heads 197 | 198 | queries = self.query_projection(queries).view(B, L, H, -1) 199 | keys = self.key_projection(keys).view(B, S, H, -1) 200 | values = self.value_projection(values).view(B, S, H, -1) 201 | 202 | out, attn = self.inner_attention( 203 | queries, 204 | keys, 205 | values, 206 | attn_mask, 207 | tau=tau, 208 | delta=delta 209 | ) 210 | out = out.view(B, L, -1) 211 | 212 | return self.out_projection(out), attn 213 | 214 | 215 | class ReformerLayer(nn.Module): 216 | def __init__(self, attention, d_model, n_heads, d_keys=None, 217 | d_values=None, causal=False, bucket_size=4, n_hashes=4): 218 | super().__init__() 219 | self.bucket_size = bucket_size 220 | self.attn = LSHSelfAttention( 221 | dim=d_model, 222 | heads=n_heads, 223 | bucket_size=bucket_size, 224 | n_hashes=n_hashes, 225 | causal=causal 226 | ) 227 | 228 | def fit_length(self, queries): 229 | # inside reformer: assert N % (bucket_size * 2) == 0 230 | B, N, C = queries.shape 231 | if N % (self.bucket_size * 2) == 0: 232 | return queries 233 | else: 234 | # fill the time series 235 | fill_len = (self.bucket_size * 2) - (N % (self.bucket_size * 2)) 236 | return torch.cat([queries, torch.zeros([B, fill_len, C]).to(queries.device)], dim=1) 237 | 238 | def forward(self, queries, keys, values, attn_mask, tau, delta): 239 | # in Reformer: defalut queries=keys 240 | B, N, C = queries.shape 241 | queries = self.attn(self.fit_length(queries))[:, :N, :] 242 | return queries, None 243 | -------------------------------------------------------------------------------- /layers/StandardNorm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class Normalize(nn.Module): 6 | def __init__(self, num_features: int, eps=1e-5, affine=False, subtract_last=False, non_norm=False): 7 | """ 8 | :param num_features: the number of features or channels 9 | :param eps: a value added for numerical stability 10 | :param affine: if True, RevIN has learnable affine parameters 11 | """ 12 | super(Normalize, self).__init__() 13 | self.num_features = num_features 14 | self.eps = eps 15 | self.affine = affine 16 | self.subtract_last = subtract_last 17 | self.non_norm = non_norm 18 | if self.affine: 19 | self._init_params() 20 | 21 | def forward(self, x, mode: str): 22 | if mode == 'norm': 23 | self._get_statistics(x) 24 | x = self._normalize(x) 25 | elif mode == 'denorm': 26 | x = self._denormalize(x) 27 | else: 28 | raise NotImplementedError 29 | return x 30 | 31 | def _init_params(self): 32 | # initialize RevIN params: (C,) 33 | self.affine_weight = nn.Parameter(torch.ones(self.num_features)) 34 | self.affine_bias = nn.Parameter(torch.zeros(self.num_features)) 35 | 36 | def _get_statistics(self, x): 37 | dim2reduce = tuple(range(1, x.ndim - 1)) 38 | if self.subtract_last: 39 | self.last = x[:, -1, :].unsqueeze(1) 40 | else: 41 | self.mean = torch.mean(x, dim=dim2reduce, keepdim=True).detach() 42 | self.stdev = torch.sqrt(torch.var(x, dim=dim2reduce, keepdim=True, unbiased=False) + self.eps).detach() 43 | 44 | def _normalize(self, x): 45 | if self.non_norm: 46 | return x 47 | if self.subtract_last: 48 | x = x - self.last 49 | else: 50 | x = x - self.mean 51 | x = x / self.stdev 52 | if self.affine: 53 | x = x * self.affine_weight 54 | x = x + self.affine_bias 55 | return x 56 | 57 | def _denormalize(self, x): 58 | if self.non_norm: 59 | return x 60 | if self.affine: 61 | x = x - self.affine_bias 62 | x = x / (self.affine_weight + self.eps * self.eps) 63 | x = x * self.stdev 64 | if self.subtract_last: 65 | x = x + self.last 66 | else: 67 | x = x + self.mean 68 | return x 69 | -------------------------------------------------------------------------------- /layers/Transformer_EncDec.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class ConvLayer(nn.Module): 7 | def __init__(self, c_in): 8 | super(ConvLayer, self).__init__() 9 | self.downConv = nn.Conv1d(in_channels=c_in, 10 | out_channels=c_in, 11 | kernel_size=3, 12 | padding=2, 13 | padding_mode='circular') 14 | self.norm = nn.BatchNorm1d(c_in) 15 | self.activation = nn.ELU() 16 | self.maxPool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1) 17 | 18 | def forward(self, x): 19 | x = self.downConv(x.permute(0, 2, 1)) 20 | x = self.norm(x) 21 | x = self.activation(x) 22 | x = self.maxPool(x) 23 | x = x.transpose(1, 2) 24 | return x 25 | 26 | 27 | class EncoderLayer(nn.Module): 28 | def __init__(self, attention, d_model, d_ff=None, dropout=0.1, activation="relu"): 29 | super(EncoderLayer, self).__init__() 30 | d_ff = d_ff or 4 * d_model 31 | self.attention = attention 32 | self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1) 33 | self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1) 34 | self.norm1 = nn.LayerNorm(d_model) 35 | self.norm2 = nn.LayerNorm(d_model) 36 | self.dropout = nn.Dropout(dropout) 37 | self.activation = F.relu if activation == "relu" else F.gelu 38 | 39 | def forward(self, x, attn_mask=None, tau=None, delta=None): 40 | new_x, attn = self.attention( 41 | x, x, x, 42 | attn_mask=attn_mask, 43 | tau=tau, delta=delta 44 | ) 45 | x = x + self.dropout(new_x) 46 | 47 | y = x = self.norm1(x) 48 | y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1)))) 49 | y = self.dropout(self.conv2(y).transpose(-1, 1)) 50 | 51 | return self.norm2(x + y), attn 52 | 53 | 54 | class Encoder(nn.Module): 55 | def __init__(self, attn_layers, conv_layers=None, norm_layer=None): 56 | super(Encoder, self).__init__() 57 | self.attn_layers = nn.ModuleList(attn_layers) 58 | self.conv_layers = nn.ModuleList(conv_layers) if conv_layers is not None else None 59 | self.norm = norm_layer 60 | 61 | def forward(self, x, attn_mask=None, tau=None, delta=None): 62 | # x [B, L, D] 63 | attns = [] 64 | if self.conv_layers is not None: 65 | for i, (attn_layer, conv_layer) in enumerate(zip(self.attn_layers, self.conv_layers)): 66 | delta = delta if i == 0 else None 67 | x, attn = attn_layer(x, attn_mask=attn_mask, tau=tau, delta=delta) 68 | x = conv_layer(x) 69 | attns.append(attn) 70 | x, attn = self.attn_layers[-1](x, tau=tau, delta=None) 71 | attns.append(attn) 72 | else: 73 | for attn_layer in self.attn_layers: 74 | x, attn = attn_layer(x, attn_mask=attn_mask, tau=tau, delta=delta) 75 | attns.append(attn) 76 | 77 | if self.norm is not None: 78 | x = self.norm(x) 79 | 80 | return x, attns 81 | 82 | 83 | class DecoderLayer(nn.Module): 84 | def __init__(self, self_attention, cross_attention, d_model, d_ff=None, 85 | dropout=0.1, activation="relu"): 86 | super(DecoderLayer, self).__init__() 87 | d_ff = d_ff or 4 * d_model 88 | self.self_attention = self_attention 89 | self.cross_attention = cross_attention 90 | self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1) 91 | self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1) 92 | self.norm1 = nn.LayerNorm(d_model) 93 | self.norm2 = nn.LayerNorm(d_model) 94 | self.norm3 = nn.LayerNorm(d_model) 95 | self.dropout = nn.Dropout(dropout) 96 | self.activation = F.relu if activation == "relu" else F.gelu 97 | 98 | def forward(self, x, cross, x_mask=None, cross_mask=None, tau=None, delta=None): 99 | x = x + self.dropout(self.self_attention( 100 | x, x, x, 101 | attn_mask=x_mask, 102 | tau=tau, delta=None 103 | )[0]) 104 | x = self.norm1(x) 105 | 106 | x = x + self.dropout(self.cross_attention( 107 | x, cross, cross, 108 | attn_mask=cross_mask, 109 | tau=tau, delta=delta 110 | )[0]) 111 | 112 | y = x = self.norm2(x) 113 | y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1)))) 114 | y = self.dropout(self.conv2(y).transpose(-1, 1)) 115 | 116 | return self.norm3(x + y) 117 | 118 | 119 | class Decoder(nn.Module): 120 | def __init__(self, layers, norm_layer=None, projection=None): 121 | super(Decoder, self).__init__() 122 | self.layers = nn.ModuleList(layers) 123 | self.norm = norm_layer 124 | self.projection = projection 125 | 126 | def forward(self, x, cross, x_mask=None, cross_mask=None, tau=None, delta=None): 127 | for layer in self.layers: 128 | x = layer(x, cross, x_mask=x_mask, cross_mask=cross_mask, tau=tau, delta=delta) 129 | 130 | if self.norm is not None: 131 | x = self.norm(x) 132 | 133 | if self.projection is not None: 134 | x = self.projection(x) 135 | return x 136 | -------------------------------------------------------------------------------- /layers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KimMeen/Time-LLM/02ee1b8f6043090c7a417f0cbb64cbf753895175/layers/__init__.py -------------------------------------------------------------------------------- /models/Autoformer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from layers.Embed import DataEmbedding, DataEmbedding_wo_pos 5 | from layers.AutoCorrelation import AutoCorrelation, AutoCorrelationLayer 6 | from layers.Autoformer_EncDec import Encoder, Decoder, EncoderLayer, DecoderLayer, my_Layernorm, series_decomp 7 | import math 8 | import numpy as np 9 | 10 | 11 | class Model(nn.Module): 12 | """ 13 | Autoformer is the first method to achieve the series-wise connection, 14 | with inherent O(LlogL) complexity 15 | Paper link: https://openreview.net/pdf?id=I55UqU-M11y 16 | """ 17 | 18 | def __init__(self, configs): 19 | super(Model, self).__init__() 20 | self.task_name = configs.task_name 21 | self.seq_len = configs.seq_len 22 | self.label_len = configs.label_len 23 | self.pred_len = configs.pred_len 24 | self.output_attention = configs.output_attention 25 | 26 | # Decomp 27 | kernel_size = configs.moving_avg 28 | self.decomp = series_decomp(kernel_size) 29 | 30 | # Embedding 31 | self.enc_embedding = DataEmbedding_wo_pos(configs.enc_in, configs.d_model, configs.embed, configs.freq, 32 | configs.dropout) 33 | # Encoder 34 | self.encoder = Encoder( 35 | [ 36 | EncoderLayer( 37 | AutoCorrelationLayer( 38 | AutoCorrelation(False, configs.factor, attention_dropout=configs.dropout, 39 | output_attention=configs.output_attention), 40 | configs.d_model, configs.n_heads), 41 | configs.d_model, 42 | configs.d_ff, 43 | moving_avg=configs.moving_avg, 44 | dropout=configs.dropout, 45 | activation=configs.activation 46 | ) for l in range(configs.e_layers) 47 | ], 48 | norm_layer=my_Layernorm(configs.d_model) 49 | ) 50 | # Decoder 51 | if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': 52 | self.dec_embedding = DataEmbedding_wo_pos(configs.dec_in, configs.d_model, configs.embed, configs.freq, 53 | configs.dropout) 54 | self.decoder = Decoder( 55 | [ 56 | DecoderLayer( 57 | AutoCorrelationLayer( 58 | AutoCorrelation(True, configs.factor, attention_dropout=configs.dropout, 59 | output_attention=False), 60 | configs.d_model, configs.n_heads), 61 | AutoCorrelationLayer( 62 | AutoCorrelation(False, configs.factor, attention_dropout=configs.dropout, 63 | output_attention=False), 64 | configs.d_model, configs.n_heads), 65 | configs.d_model, 66 | configs.c_out, 67 | configs.d_ff, 68 | moving_avg=configs.moving_avg, 69 | dropout=configs.dropout, 70 | activation=configs.activation, 71 | ) 72 | for l in range(configs.d_layers) 73 | ], 74 | norm_layer=my_Layernorm(configs.d_model), 75 | projection=nn.Linear(configs.d_model, configs.c_out, bias=True) 76 | ) 77 | if self.task_name == 'imputation': 78 | self.projection = nn.Linear( 79 | configs.d_model, configs.c_out, bias=True) 80 | if self.task_name == 'anomaly_detection': 81 | self.projection = nn.Linear( 82 | configs.d_model, configs.c_out, bias=True) 83 | if self.task_name == 'classification': 84 | self.act = F.gelu 85 | self.dropout = nn.Dropout(configs.dropout) 86 | self.projection = nn.Linear( 87 | configs.d_model * configs.seq_len, configs.num_class) 88 | 89 | def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec): 90 | # decomp init 91 | mean = torch.mean(x_enc, dim=1).unsqueeze( 92 | 1).repeat(1, self.pred_len, 1) 93 | zeros = torch.zeros([x_dec.shape[0], self.pred_len, 94 | x_dec.shape[2]], device=x_enc.device) 95 | seasonal_init, trend_init = self.decomp(x_enc) 96 | # decoder input 97 | trend_init = torch.cat( 98 | [trend_init[:, -self.label_len:, :], mean], dim=1) 99 | seasonal_init = torch.cat( 100 | [seasonal_init[:, -self.label_len:, :], zeros], dim=1) 101 | # enc 102 | enc_out = self.enc_embedding(x_enc, x_mark_enc) 103 | enc_out, attns = self.encoder(enc_out, attn_mask=None) 104 | # dec 105 | dec_out = self.dec_embedding(seasonal_init, x_mark_dec) 106 | seasonal_part, trend_part = self.decoder(dec_out, enc_out, x_mask=None, cross_mask=None, 107 | trend=trend_init) 108 | # final 109 | dec_out = trend_part + seasonal_part 110 | return dec_out 111 | 112 | def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask): 113 | # enc 114 | enc_out = self.enc_embedding(x_enc, x_mark_enc) 115 | enc_out, attns = self.encoder(enc_out, attn_mask=None) 116 | # final 117 | dec_out = self.projection(enc_out) 118 | return dec_out 119 | 120 | def anomaly_detection(self, x_enc): 121 | # enc 122 | enc_out = self.enc_embedding(x_enc, None) 123 | enc_out, attns = self.encoder(enc_out, attn_mask=None) 124 | # final 125 | dec_out = self.projection(enc_out) 126 | return dec_out 127 | 128 | def classification(self, x_enc, x_mark_enc): 129 | # enc 130 | enc_out = self.enc_embedding(x_enc, None) 131 | enc_out, attns = self.encoder(enc_out, attn_mask=None) 132 | 133 | # Output 134 | # the output transformer encoder/decoder embeddings don't include non-linearity 135 | output = self.act(enc_out) 136 | output = self.dropout(output) 137 | # zero-out padding embeddings 138 | output = output * x_mark_enc.unsqueeze(-1) 139 | # (batch_size, seq_length * d_model) 140 | output = output.reshape(output.shape[0], -1) 141 | output = self.projection(output) # (batch_size, num_classes) 142 | return output 143 | 144 | def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): 145 | if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': 146 | dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec) 147 | return dec_out[:, -self.pred_len:, :] # [B, L, D] 148 | if self.task_name == 'imputation': 149 | dec_out = self.imputation( 150 | x_enc, x_mark_enc, x_dec, x_mark_dec, mask) 151 | return dec_out # [B, L, D] 152 | if self.task_name == 'anomaly_detection': 153 | dec_out = self.anomaly_detection(x_enc) 154 | return dec_out # [B, L, D] 155 | if self.task_name == 'classification': 156 | dec_out = self.classification(x_enc, x_mark_enc) 157 | return dec_out # [B, N] 158 | return None 159 | -------------------------------------------------------------------------------- /models/DLinear.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from layers.Autoformer_EncDec import series_decomp 5 | 6 | 7 | class Model(nn.Module): 8 | """ 9 | Paper link: https://arxiv.org/pdf/2205.13504.pdf 10 | """ 11 | 12 | def __init__(self, configs, individual=False): 13 | """ 14 | individual: Bool, whether shared model among different variates. 15 | """ 16 | super(Model, self).__init__() 17 | self.task_name = configs.task_name 18 | self.seq_len = configs.seq_len 19 | if self.task_name == 'classification' or self.task_name == 'anomaly_detection' or self.task_name == 'imputation': 20 | self.pred_len = configs.seq_len 21 | else: 22 | self.pred_len = configs.pred_len 23 | 24 | self.decompsition = series_decomp(configs.moving_avg) 25 | self.individual = individual 26 | self.channels = configs.enc_in 27 | 28 | if self.individual: 29 | self.Linear_Seasonal = nn.ModuleList() 30 | self.Linear_Trend = nn.ModuleList() 31 | 32 | for i in range(self.channels): 33 | self.Linear_Seasonal.append( 34 | nn.Linear(self.seq_len, self.pred_len)) 35 | self.Linear_Trend.append( 36 | nn.Linear(self.seq_len, self.pred_len)) 37 | 38 | self.Linear_Seasonal[i].weight = nn.Parameter( 39 | (1 / self.seq_len) * torch.ones([self.pred_len, self.seq_len])) 40 | self.Linear_Trend[i].weight = nn.Parameter( 41 | (1 / self.seq_len) * torch.ones([self.pred_len, self.seq_len])) 42 | else: 43 | self.Linear_Seasonal = nn.Linear(self.seq_len, self.pred_len) 44 | self.Linear_Trend = nn.Linear(self.seq_len, self.pred_len) 45 | 46 | self.Linear_Seasonal.weight = nn.Parameter( 47 | (1 / self.seq_len) * torch.ones([self.pred_len, self.seq_len])) 48 | self.Linear_Trend.weight = nn.Parameter( 49 | (1 / self.seq_len) * torch.ones([self.pred_len, self.seq_len])) 50 | 51 | if self.task_name == 'classification': 52 | self.act = F.gelu 53 | self.dropout = nn.Dropout(configs.dropout) 54 | self.projection = nn.Linear( 55 | configs.enc_in * configs.seq_len, configs.num_class) 56 | 57 | def encoder(self, x): 58 | seasonal_init, trend_init = self.decompsition(x) 59 | seasonal_init, trend_init = seasonal_init.permute( 60 | 0, 2, 1), trend_init.permute(0, 2, 1) 61 | if self.individual: 62 | seasonal_output = torch.zeros([seasonal_init.size(0), seasonal_init.size(1), self.pred_len], 63 | dtype=seasonal_init.dtype).to(seasonal_init.device) 64 | trend_output = torch.zeros([trend_init.size(0), trend_init.size(1), self.pred_len], 65 | dtype=trend_init.dtype).to(trend_init.device) 66 | for i in range(self.channels): 67 | seasonal_output[:, i, :] = self.Linear_Seasonal[i]( 68 | seasonal_init[:, i, :]) 69 | trend_output[:, i, :] = self.Linear_Trend[i]( 70 | trend_init[:, i, :]) 71 | else: 72 | seasonal_output = self.Linear_Seasonal(seasonal_init) 73 | trend_output = self.Linear_Trend(trend_init) 74 | x = seasonal_output + trend_output 75 | return x.permute(0, 2, 1) 76 | 77 | def forecast(self, x_enc): 78 | return self.encoder(x_enc) 79 | 80 | def imputation(self, x_enc): 81 | return self.encoder(x_enc) 82 | 83 | def anomaly_detection(self, x_enc): 84 | return self.encoder(x_enc) 85 | 86 | def classification(self, x_enc): 87 | enc_out = self.encoder(x_enc) 88 | # Output 89 | # (batch_size, seq_length * d_model) 90 | output = enc_out.reshape(enc_out.shape[0], -1) 91 | output = self.projection(output) # (batch_size, num_classes) 92 | return output 93 | 94 | def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): 95 | if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': 96 | dec_out = self.forecast(x_enc) 97 | return dec_out[:, -self.pred_len:, :] # [B, L, D] 98 | if self.task_name == 'imputation': 99 | dec_out = self.imputation(x_enc) 100 | return dec_out # [B, L, D] 101 | if self.task_name == 'anomaly_detection': 102 | dec_out = self.anomaly_detection(x_enc) 103 | return dec_out # [B, L, D] 104 | if self.task_name == 'classification': 105 | dec_out = self.classification(x_enc) 106 | return dec_out # [B, N] 107 | return None 108 | -------------------------------------------------------------------------------- /models/TimeLLM.py: -------------------------------------------------------------------------------- 1 | from math import sqrt 2 | 3 | import torch 4 | import torch.nn as nn 5 | 6 | from transformers import LlamaConfig, LlamaModel, LlamaTokenizer, GPT2Config, GPT2Model, GPT2Tokenizer, BertConfig, \ 7 | BertModel, BertTokenizer 8 | from layers.Embed import PatchEmbedding 9 | import transformers 10 | from layers.StandardNorm import Normalize 11 | 12 | transformers.logging.set_verbosity_error() 13 | 14 | 15 | class FlattenHead(nn.Module): 16 | def __init__(self, n_vars, nf, target_window, head_dropout=0): 17 | super().__init__() 18 | self.n_vars = n_vars 19 | self.flatten = nn.Flatten(start_dim=-2) 20 | self.linear = nn.Linear(nf, target_window) 21 | self.dropout = nn.Dropout(head_dropout) 22 | 23 | def forward(self, x): 24 | x = self.flatten(x) 25 | x = self.linear(x) 26 | x = self.dropout(x) 27 | return x 28 | 29 | 30 | class Model(nn.Module): 31 | 32 | def __init__(self, configs, patch_len=16, stride=8): 33 | super(Model, self).__init__() 34 | self.task_name = configs.task_name 35 | self.pred_len = configs.pred_len 36 | self.seq_len = configs.seq_len 37 | self.d_ff = configs.d_ff 38 | self.top_k = 5 39 | self.d_llm = configs.llm_dim 40 | self.patch_len = configs.patch_len 41 | self.stride = configs.stride 42 | 43 | if configs.llm_model == 'LLAMA': 44 | # self.llama_config = LlamaConfig.from_pretrained('/mnt/alps/modelhub/pretrained_model/LLaMA/7B_hf/') 45 | self.llama_config = LlamaConfig.from_pretrained('huggyllama/llama-7b') 46 | self.llama_config.num_hidden_layers = configs.llm_layers 47 | self.llama_config.output_attentions = True 48 | self.llama_config.output_hidden_states = True 49 | try: 50 | self.llm_model = LlamaModel.from_pretrained( 51 | # "/mnt/alps/modelhub/pretrained_model/LLaMA/7B_hf/", 52 | 'huggyllama/llama-7b', 53 | trust_remote_code=True, 54 | local_files_only=True, 55 | config=self.llama_config, 56 | # load_in_4bit=True 57 | ) 58 | except EnvironmentError: # downloads model from HF is not already done 59 | print("Local model files not found. Attempting to download...") 60 | self.llm_model = LlamaModel.from_pretrained( 61 | # "/mnt/alps/modelhub/pretrained_model/LLaMA/7B_hf/", 62 | 'huggyllama/llama-7b', 63 | trust_remote_code=True, 64 | local_files_only=False, 65 | config=self.llama_config, 66 | # load_in_4bit=True 67 | ) 68 | try: 69 | self.tokenizer = LlamaTokenizer.from_pretrained( 70 | # "/mnt/alps/modelhub/pretrained_model/LLaMA/7B_hf/tokenizer.model", 71 | 'huggyllama/llama-7b', 72 | trust_remote_code=True, 73 | local_files_only=True 74 | ) 75 | except EnvironmentError: # downloads the tokenizer from HF if not already done 76 | print("Local tokenizer files not found. Atempting to download them..") 77 | self.tokenizer = LlamaTokenizer.from_pretrained( 78 | # "/mnt/alps/modelhub/pretrained_model/LLaMA/7B_hf/tokenizer.model", 79 | 'huggyllama/llama-7b', 80 | trust_remote_code=True, 81 | local_files_only=False 82 | ) 83 | elif configs.llm_model == 'GPT2': 84 | self.gpt2_config = GPT2Config.from_pretrained('openai-community/gpt2') 85 | 86 | self.gpt2_config.num_hidden_layers = configs.llm_layers 87 | self.gpt2_config.output_attentions = True 88 | self.gpt2_config.output_hidden_states = True 89 | try: 90 | self.llm_model = GPT2Model.from_pretrained( 91 | 'openai-community/gpt2', 92 | trust_remote_code=True, 93 | local_files_only=True, 94 | config=self.gpt2_config, 95 | ) 96 | except EnvironmentError: # downloads model from HF is not already done 97 | print("Local model files not found. Attempting to download...") 98 | self.llm_model = GPT2Model.from_pretrained( 99 | 'openai-community/gpt2', 100 | trust_remote_code=True, 101 | local_files_only=False, 102 | config=self.gpt2_config, 103 | ) 104 | 105 | try: 106 | self.tokenizer = GPT2Tokenizer.from_pretrained( 107 | 'openai-community/gpt2', 108 | trust_remote_code=True, 109 | local_files_only=True 110 | ) 111 | except EnvironmentError: # downloads the tokenizer from HF if not already done 112 | print("Local tokenizer files not found. Atempting to download them..") 113 | self.tokenizer = GPT2Tokenizer.from_pretrained( 114 | 'openai-community/gpt2', 115 | trust_remote_code=True, 116 | local_files_only=False 117 | ) 118 | elif configs.llm_model == 'BERT': 119 | self.bert_config = BertConfig.from_pretrained('google-bert/bert-base-uncased') 120 | 121 | self.bert_config.num_hidden_layers = configs.llm_layers 122 | self.bert_config.output_attentions = True 123 | self.bert_config.output_hidden_states = True 124 | try: 125 | self.llm_model = BertModel.from_pretrained( 126 | 'google-bert/bert-base-uncased', 127 | trust_remote_code=True, 128 | local_files_only=True, 129 | config=self.bert_config, 130 | ) 131 | except EnvironmentError: # downloads model from HF is not already done 132 | print("Local model files not found. Attempting to download...") 133 | self.llm_model = BertModel.from_pretrained( 134 | 'google-bert/bert-base-uncased', 135 | trust_remote_code=True, 136 | local_files_only=False, 137 | config=self.bert_config, 138 | ) 139 | 140 | try: 141 | self.tokenizer = BertTokenizer.from_pretrained( 142 | 'google-bert/bert-base-uncased', 143 | trust_remote_code=True, 144 | local_files_only=True 145 | ) 146 | except EnvironmentError: # downloads the tokenizer from HF if not already done 147 | print("Local tokenizer files not found. Atempting to download them..") 148 | self.tokenizer = BertTokenizer.from_pretrained( 149 | 'google-bert/bert-base-uncased', 150 | trust_remote_code=True, 151 | local_files_only=False 152 | ) 153 | else: 154 | raise Exception('LLM model is not defined') 155 | 156 | if self.tokenizer.eos_token: 157 | self.tokenizer.pad_token = self.tokenizer.eos_token 158 | else: 159 | pad_token = '[PAD]' 160 | self.tokenizer.add_special_tokens({'pad_token': pad_token}) 161 | self.tokenizer.pad_token = pad_token 162 | 163 | for param in self.llm_model.parameters(): 164 | param.requires_grad = False 165 | 166 | if configs.prompt_domain: 167 | self.description = configs.content 168 | else: 169 | self.description = 'The Electricity Transformer Temperature (ETT) is a crucial indicator in the electric power long-term deployment.' 170 | 171 | self.dropout = nn.Dropout(configs.dropout) 172 | 173 | self.patch_embedding = PatchEmbedding( 174 | configs.d_model, self.patch_len, self.stride, configs.dropout) 175 | 176 | self.word_embeddings = self.llm_model.get_input_embeddings().weight 177 | self.vocab_size = self.word_embeddings.shape[0] 178 | self.num_tokens = 1000 179 | self.mapping_layer = nn.Linear(self.vocab_size, self.num_tokens) 180 | 181 | self.reprogramming_layer = ReprogrammingLayer(configs.d_model, configs.n_heads, self.d_ff, self.d_llm) 182 | 183 | self.patch_nums = int((configs.seq_len - self.patch_len) / self.stride + 2) 184 | self.head_nf = self.d_ff * self.patch_nums 185 | 186 | if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': 187 | self.output_projection = FlattenHead(configs.enc_in, self.head_nf, self.pred_len, 188 | head_dropout=configs.dropout) 189 | else: 190 | raise NotImplementedError 191 | 192 | self.normalize_layers = Normalize(configs.enc_in, affine=False) 193 | 194 | def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): 195 | if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': 196 | dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec) 197 | return dec_out[:, -self.pred_len:, :] 198 | return None 199 | 200 | def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec): 201 | 202 | x_enc = self.normalize_layers(x_enc, 'norm') 203 | 204 | B, T, N = x_enc.size() 205 | x_enc = x_enc.permute(0, 2, 1).contiguous().reshape(B * N, T, 1) 206 | 207 | min_values = torch.min(x_enc, dim=1)[0] 208 | max_values = torch.max(x_enc, dim=1)[0] 209 | medians = torch.median(x_enc, dim=1).values 210 | lags = self.calcute_lags(x_enc) 211 | trends = x_enc.diff(dim=1).sum(dim=1) 212 | 213 | prompt = [] 214 | for b in range(x_enc.shape[0]): 215 | min_values_str = str(min_values[b].tolist()[0]) 216 | max_values_str = str(max_values[b].tolist()[0]) 217 | median_values_str = str(medians[b].tolist()[0]) 218 | lags_values_str = str(lags[b].tolist()) 219 | prompt_ = ( 220 | f"<|start_prompt|>Dataset description: {self.description}" 221 | f"Task description: forecast the next {str(self.pred_len)} steps given the previous {str(self.seq_len)} steps information; " 222 | "Input statistics: " 223 | f"min value {min_values_str}, " 224 | f"max value {max_values_str}, " 225 | f"median value {median_values_str}, " 226 | f"the trend of input is {'upward' if trends[b] > 0 else 'downward'}, " 227 | f"top 5 lags are : {lags_values_str}<||>" 228 | ) 229 | 230 | prompt.append(prompt_) 231 | 232 | x_enc = x_enc.reshape(B, N, T).permute(0, 2, 1).contiguous() 233 | 234 | prompt = self.tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=2048).input_ids 235 | prompt_embeddings = self.llm_model.get_input_embeddings()(prompt.to(x_enc.device)) # (batch, prompt_token, dim) 236 | 237 | source_embeddings = self.mapping_layer(self.word_embeddings.permute(1, 0)).permute(1, 0) 238 | 239 | x_enc = x_enc.permute(0, 2, 1).contiguous() 240 | enc_out, n_vars = self.patch_embedding(x_enc.to(torch.bfloat16)) 241 | enc_out = self.reprogramming_layer(enc_out, source_embeddings, source_embeddings) 242 | llama_enc_out = torch.cat([prompt_embeddings, enc_out], dim=1) 243 | dec_out = self.llm_model(inputs_embeds=llama_enc_out).last_hidden_state 244 | dec_out = dec_out[:, :, :self.d_ff] 245 | 246 | dec_out = torch.reshape( 247 | dec_out, (-1, n_vars, dec_out.shape[-2], dec_out.shape[-1])) 248 | dec_out = dec_out.permute(0, 1, 3, 2).contiguous() 249 | 250 | dec_out = self.output_projection(dec_out[:, :, :, -self.patch_nums:]) 251 | dec_out = dec_out.permute(0, 2, 1).contiguous() 252 | 253 | dec_out = self.normalize_layers(dec_out, 'denorm') 254 | 255 | return dec_out 256 | 257 | def calcute_lags(self, x_enc): 258 | q_fft = torch.fft.rfft(x_enc.permute(0, 2, 1).contiguous(), dim=-1) 259 | k_fft = torch.fft.rfft(x_enc.permute(0, 2, 1).contiguous(), dim=-1) 260 | res = q_fft * torch.conj(k_fft) 261 | corr = torch.fft.irfft(res, dim=-1) 262 | mean_value = torch.mean(corr, dim=1) 263 | _, lags = torch.topk(mean_value, self.top_k, dim=-1) 264 | return lags 265 | 266 | 267 | class ReprogrammingLayer(nn.Module): 268 | def __init__(self, d_model, n_heads, d_keys=None, d_llm=None, attention_dropout=0.1): 269 | super(ReprogrammingLayer, self).__init__() 270 | 271 | d_keys = d_keys or (d_model // n_heads) 272 | 273 | self.query_projection = nn.Linear(d_model, d_keys * n_heads) 274 | self.key_projection = nn.Linear(d_llm, d_keys * n_heads) 275 | self.value_projection = nn.Linear(d_llm, d_keys * n_heads) 276 | self.out_projection = nn.Linear(d_keys * n_heads, d_llm) 277 | self.n_heads = n_heads 278 | self.dropout = nn.Dropout(attention_dropout) 279 | 280 | def forward(self, target_embedding, source_embedding, value_embedding): 281 | B, L, _ = target_embedding.shape 282 | S, _ = source_embedding.shape 283 | H = self.n_heads 284 | 285 | target_embedding = self.query_projection(target_embedding).view(B, L, H, -1) 286 | source_embedding = self.key_projection(source_embedding).view(S, H, -1) 287 | value_embedding = self.value_projection(value_embedding).view(S, H, -1) 288 | 289 | out = self.reprogramming(target_embedding, source_embedding, value_embedding) 290 | 291 | out = out.reshape(B, L, -1) 292 | 293 | return self.out_projection(out) 294 | 295 | def reprogramming(self, target_embedding, source_embedding, value_embedding): 296 | B, L, H, E = target_embedding.shape 297 | 298 | scale = 1. / sqrt(E) 299 | 300 | scores = torch.einsum("blhe,she->bhls", target_embedding, source_embedding) 301 | 302 | A = self.dropout(torch.softmax(scale * scores, dim=-1)) 303 | reprogramming_embedding = torch.einsum("bhls,she->blhe", A, value_embedding) 304 | 305 | return reprogramming_embedding 306 | -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KimMeen/Time-LLM/02ee1b8f6043090c7a417f0cbb64cbf753895175/models/__init__.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | torch==2.2.2 2 | accelerate==0.28.0 3 | einops==0.7.0 4 | matplotlib==3.7.0 5 | numpy==1.23.5 6 | pandas==1.5.3 7 | scikit_learn==1.2.2 8 | scipy==1.12.0 9 | tqdm==4.65.0 10 | peft==0.4.0 11 | transformers==4.31.0 12 | deepspeed==0.14.0 13 | sentencepiece==0.2.0 14 | -------------------------------------------------------------------------------- /run_m4.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import torch 3 | from accelerate import Accelerator, DeepSpeedPlugin 4 | from accelerate import DistributedDataParallelKwargs 5 | from torch import optim 6 | from torch.optim import lr_scheduler 7 | 8 | from data_provider.m4 import M4Meta 9 | from models import Autoformer, DLinear, TimeLLM 10 | 11 | from data_provider.data_factory import data_provider 12 | import time 13 | import random 14 | import numpy as np 15 | import pandas 16 | 17 | from utils.losses import smape_loss 18 | from utils.m4_summary import M4Summary 19 | import os 20 | 21 | os.environ['CURL_CA_BUNDLE'] = '' 22 | os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:64" 23 | 24 | from utils.tools import del_files, EarlyStopping, adjust_learning_rate, load_content, test 25 | 26 | parser = argparse.ArgumentParser(description='Time-LLM') 27 | 28 | fix_seed = 2021 29 | random.seed(fix_seed) 30 | torch.manual_seed(fix_seed) 31 | np.random.seed(fix_seed) 32 | 33 | # basic config 34 | parser.add_argument('--task_name', type=str, required=True, default='long_term_forecast', 35 | help='task name, options:[long_term_forecast, short_term_forecast, imputation, classification, anomaly_detection]') 36 | parser.add_argument('--is_training', type=int, required=True, default=1, help='status') 37 | parser.add_argument('--model_id', type=str, required=True, default='test', help='model id') 38 | parser.add_argument('--model_comment', type=str, required=True, default='none', help='prefix when saving test results') 39 | parser.add_argument('--model', type=str, required=True, default='Autoformer', 40 | help='model name, options: [Autoformer, DLinear]') 41 | parser.add_argument('--seed', type=int, default=0, help='random seed') 42 | 43 | # data loader 44 | parser.add_argument('--data', type=str, required=True, default='ETTm1', help='dataset type') 45 | parser.add_argument('--root_path', type=str, default='./dataset', help='root path of the data file') 46 | parser.add_argument('--data_path', type=str, default='ETTh1.csv', help='data file') 47 | parser.add_argument('--features', type=str, default='M', 48 | help='forecasting task, options:[M, S, MS]; ' 49 | 'M:multivariate predict multivariate, S: univariate predict univariate, ' 50 | 'MS:multivariate predict univariate') 51 | parser.add_argument('--target', type=str, default='OT', help='target feature in S or MS task') 52 | parser.add_argument('--loader', type=str, default='modal', help='dataset type') 53 | parser.add_argument('--freq', type=str, default='h', 54 | help='freq for time features encoding, ' 55 | 'options:[s:secondly, t:minutely, h:hourly, d:daily, b:business days, w:weekly, m:monthly], ' 56 | 'you can also use more detailed freq like 15min or 3h') 57 | parser.add_argument('--checkpoints', type=str, default='./checkpoints/', help='location of model checkpoints') 58 | 59 | # forecasting task 60 | parser.add_argument('--seq_len', type=int, default=96, help='input sequence length') 61 | parser.add_argument('--label_len', type=int, default=48, help='start token length') 62 | parser.add_argument('--pred_len', type=int, default=96, help='prediction sequence length') 63 | parser.add_argument('--seasonal_patterns', type=str, default='Monthly', help='subset for M4') 64 | 65 | # model define 66 | parser.add_argument('--enc_in', type=int, default=7, help='encoder input size') 67 | parser.add_argument('--dec_in', type=int, default=7, help='decoder input size') 68 | parser.add_argument('--c_out', type=int, default=7, help='output size') 69 | parser.add_argument('--d_model', type=int, default=16, help='dimension of model') 70 | parser.add_argument('--n_heads', type=int, default=8, help='num of heads') 71 | parser.add_argument('--e_layers', type=int, default=2, help='num of encoder layers') 72 | parser.add_argument('--d_layers', type=int, default=1, help='num of decoder layers') 73 | parser.add_argument('--d_ff', type=int, default=32, help='dimension of fcn') 74 | parser.add_argument('--moving_avg', type=int, default=25, help='window size of moving average') 75 | parser.add_argument('--factor', type=int, default=1, help='attn factor') 76 | parser.add_argument('--dropout', type=float, default=0.1, help='dropout') 77 | parser.add_argument('--embed', type=str, default='timeF', 78 | help='time features encoding, options:[timeF, fixed, learned]') 79 | parser.add_argument('--activation', type=str, default='gelu', help='activation') 80 | parser.add_argument('--output_attention', action='store_true', help='whether to output attention in ecoder') 81 | parser.add_argument('--patch_len', type=int, default=16, help='patch length') 82 | parser.add_argument('--stride', type=int, default=8, help='stride') 83 | parser.add_argument('--prompt_domain', type=int, default=0, help='') 84 | parser.add_argument('--llm_model', type=str, default='LLAMA', help='LLM model') # LLAMA, GPT2, BERT 85 | parser.add_argument('--llm_dim', type=int, default='4096', help='LLM model dimension')# LLama7b:4096; GPT2-small:768; BERT-base:768 86 | 87 | # optimization 88 | parser.add_argument('--num_workers', type=int, default=10, help='data loader num workers') 89 | parser.add_argument('--itr', type=int, default=1, help='experiments times') 90 | parser.add_argument('--train_epochs', type=int, default=10, help='train epochs') 91 | parser.add_argument('--align_epochs', type=int, default=10, help='alignment epochs') 92 | parser.add_argument('--batch_size', type=int, default=32, help='batch size of train input data') 93 | parser.add_argument('--eval_batch_size', type=int, default=8, help='batch size of model evaluation') 94 | parser.add_argument('--patience', type=int, default=20, help='early stopping patience') 95 | parser.add_argument('--learning_rate', type=float, default=0.0001, help='optimizer learning rate') 96 | parser.add_argument('--des', type=str, default='test', help='exp description') 97 | parser.add_argument('--loss', type=str, default='MSE', help='loss function') 98 | parser.add_argument('--lradj', type=str, default='type1', help='adjust learning rate') 99 | parser.add_argument('--pct_start', type=float, default=0.2, help='pct_start') 100 | parser.add_argument('--use_amp', action='store_true', help='use automatic mixed precision training', default=False) 101 | parser.add_argument('--llm_layers', type=int, default=6) 102 | parser.add_argument('--percent', type=int, default=100) 103 | 104 | args = parser.parse_args() 105 | ddp_kwargs = DistributedDataParallelKwargs(find_unused_parameters=True) 106 | deepspeed_plugin = DeepSpeedPlugin(hf_ds_config='./ds_config_zero2.json') 107 | accelerator = Accelerator(kwargs_handlers=[ddp_kwargs], deepspeed_plugin=deepspeed_plugin) 108 | 109 | for ii in range(args.itr): 110 | # setting record of experiments 111 | setting = '{}_{}_{}_{}_ft{}_sl{}_ll{}_pl{}_dm{}_nh{}_el{}_dl{}_df{}_fc{}_eb{}_{}_{}'.format( 112 | args.task_name, 113 | args.model_id, 114 | args.model, 115 | args.data, 116 | args.features, 117 | args.seq_len, 118 | args.label_len, 119 | args.pred_len, 120 | args.d_model, 121 | args.n_heads, 122 | args.e_layers, 123 | args.d_layers, 124 | args.d_ff, 125 | args.factor, 126 | args.embed, 127 | args.des, ii) 128 | 129 | if args.data == 'm4': 130 | args.pred_len = M4Meta.horizons_map[args.seasonal_patterns] # Up to M4 config 131 | args.seq_len = 2 * args.pred_len 132 | args.label_len = args.pred_len 133 | args.frequency_map = M4Meta.frequency_map[args.seasonal_patterns] 134 | 135 | train_data, train_loader = data_provider(args, 'train') 136 | vali_data, vali_loader = data_provider(args, 'val') 137 | test_data, test_loader = data_provider(args, 'test') 138 | 139 | if args.model == 'Autoformer': 140 | model = Autoformer.Model(args).float() 141 | elif args.model == 'DLinear': 142 | model = DLinear.Model(args).float() 143 | else: 144 | model = TimeLLM.Model(args).float() 145 | 146 | path = os.path.join(args.checkpoints, 147 | setting + '-' + args.model_comment) # unique checkpoint saving path 148 | args.content = load_content(args) 149 | if not os.path.exists(path) and accelerator.is_local_main_process: 150 | os.makedirs(path) 151 | 152 | time_now = time.time() 153 | 154 | train_steps = len(train_loader) 155 | early_stopping = EarlyStopping(accelerator=accelerator, patience=args.patience, verbose=True) 156 | 157 | model_optim = optim.Adam(model.parameters(), lr=args.learning_rate) 158 | 159 | if args.lradj == 'COS': 160 | scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(model_optim, T_max=20, eta_min=1e-8) 161 | else: 162 | scheduler = lr_scheduler.OneCycleLR(optimizer=model_optim, 163 | steps_per_epoch=train_steps, 164 | pct_start=args.pct_start, 165 | epochs=args.train_epochs, 166 | max_lr=args.learning_rate) 167 | 168 | criterion = smape_loss() 169 | 170 | train_loader, vali_loader, model, model_optim, scheduler = accelerator.prepare( 171 | train_loader, vali_loader, model, model_optim, scheduler) 172 | 173 | for epoch in range(args.train_epochs): 174 | iter_count = 0 175 | train_loss = [] 176 | 177 | model.train() 178 | epoch_time = time.time() 179 | 180 | for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(train_loader): 181 | iter_count += 1 182 | model_optim.zero_grad() 183 | batch_x = batch_x.float().to(accelerator.device) 184 | 185 | batch_y = batch_y.float().to(accelerator.device) 186 | batch_y_mark = batch_y_mark.float().to(accelerator.device) 187 | 188 | # decoder input 189 | dec_inp = torch.zeros_like(batch_y[:, -args.pred_len:, :]).float().to(accelerator.device) 190 | dec_inp = torch.cat([batch_y[:, :args.label_len, :], dec_inp], dim=1).float().to( 191 | accelerator.device) 192 | 193 | outputs = model(batch_x, None, dec_inp, None) 194 | 195 | f_dim = -1 if args.features == 'MS' else 0 196 | outputs = outputs[:, -args.pred_len:, f_dim:] 197 | batch_y = batch_y[:, -args.pred_len:, f_dim:] 198 | 199 | batch_y_mark = batch_y_mark[:, -args.pred_len:, f_dim:] 200 | loss = criterion(batch_x, args.frequency_map, outputs, batch_y, batch_y_mark) 201 | 202 | train_loss.append(loss.item()) 203 | 204 | if (i + 1) % 100 == 0: 205 | accelerator.print( 206 | "\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item()) 207 | ) 208 | speed = (time.time() - time_now) / iter_count 209 | left_time = speed * ((args.train_epochs - epoch) * train_steps - i) 210 | accelerator.print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time)) 211 | iter_count = 0 212 | time_now = time.time() 213 | 214 | accelerator.backward(loss) 215 | model_optim.step() 216 | 217 | if args.lradj == 'TST': 218 | adjust_learning_rate(accelerator, model_optim, scheduler, epoch + 1, args, printout=False) 219 | scheduler.step() 220 | 221 | accelerator.print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time)) 222 | train_loss = np.average(train_loss) 223 | vali_loss = test(args, accelerator, model, train_loader, vali_loader, criterion) 224 | test_loss = vali_loss 225 | accelerator.print( 226 | "Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}".format( 227 | epoch + 1, train_steps, train_loss, vali_loss, test_loss)) 228 | early_stopping(vali_loss, model, path) # model saving 229 | if early_stopping.early_stop: 230 | accelerator.print("Early stopping") 231 | break 232 | 233 | if args.lradj != 'TST': 234 | adjust_learning_rate(accelerator, model_optim, scheduler, epoch + 1, args, printout=True) 235 | else: 236 | accelerator.print('Updating learning rate to {}'.format(scheduler.get_last_lr()[0])) 237 | 238 | best_model_path = path + '/' + 'checkpoint' 239 | accelerator.wait_for_everyone() 240 | unwrapped_model = accelerator.unwrap_model(model) 241 | torch.cuda.synchronize() 242 | torch.cuda.empty_cache() 243 | unwrapped_model.load_state_dict(torch.load(best_model_path, map_location=lambda storage, loc: storage)) 244 | 245 | x, _ = train_loader.dataset.last_insample_window() 246 | y = test_loader.dataset.timeseries 247 | x = torch.tensor(x, dtype=torch.float32).to(accelerator.device) 248 | x = x.unsqueeze(-1) 249 | 250 | model.eval() 251 | 252 | with torch.no_grad(): 253 | B, _, C = x.shape 254 | dec_inp = torch.zeros((B, args.pred_len, C)).float().to(accelerator.device) 255 | dec_inp = torch.cat([x[:, -args.label_len:, :], dec_inp], dim=1) 256 | outputs = torch.zeros((B, args.pred_len, C)).float().to(accelerator.device) 257 | id_list = np.arange(0, B, args.eval_batch_size) 258 | id_list = np.append(id_list, B) 259 | for i in range(len(id_list) - 1): 260 | outputs[id_list[i]:id_list[i + 1], :, :] = model( 261 | x[id_list[i]:id_list[i + 1]], 262 | None, 263 | dec_inp[id_list[i]:id_list[i + 1]], 264 | None 265 | ) 266 | accelerator.wait_for_everyone() 267 | f_dim = -1 if args.features == 'MS' else 0 268 | outputs = outputs[:, -args.pred_len:, f_dim:] 269 | outputs = outputs.detach().cpu().numpy() 270 | 271 | preds = outputs 272 | trues = y 273 | x = x.detach().cpu().numpy() 274 | 275 | accelerator.print('test shape:', preds.shape) 276 | 277 | folder_path = './m4_results/' + args.model + '-' + args.model_comment + '/' 278 | if not os.path.exists(folder_path) and accelerator.is_local_main_process: 279 | os.makedirs(folder_path) 280 | 281 | if accelerator.is_local_main_process: 282 | forecasts_df = pandas.DataFrame(preds[:, :, 0], columns=[f'V{i + 1}' for i in range(args.pred_len)]) 283 | forecasts_df.index = test_loader.dataset.ids[:preds.shape[0]] 284 | forecasts_df.index.name = 'id' 285 | forecasts_df.set_index(forecasts_df.columns[0], inplace=True) 286 | forecasts_df.to_csv(folder_path + args.seasonal_patterns + '_forecast.csv') 287 | 288 | # calculate metrics 289 | accelerator.print(args.model) 290 | file_path = folder_path 291 | if 'Weekly_forecast.csv' in os.listdir(file_path) \ 292 | and 'Monthly_forecast.csv' in os.listdir(file_path) \ 293 | and 'Yearly_forecast.csv' in os.listdir(file_path) \ 294 | and 'Daily_forecast.csv' in os.listdir(file_path) \ 295 | and 'Hourly_forecast.csv' in os.listdir(file_path) \ 296 | and 'Quarterly_forecast.csv' in os.listdir(file_path): 297 | m4_summary = M4Summary(file_path, args.root_path) 298 | # m4_forecast.set_index(m4_winner_forecast.columns[0], inplace=True) 299 | smape_results, owa_results, mape, mase = m4_summary.evaluate() 300 | accelerator.print('smape:', smape_results) 301 | accelerator.print('mape:', mape) 302 | accelerator.print('mase:', mase) 303 | accelerator.print('owa:', owa_results) 304 | else: 305 | accelerator.print('After all 6 tasks are finished, you can calculate the averaged performance') 306 | 307 | accelerator.wait_for_everyone() 308 | if accelerator.is_local_main_process: 309 | path = './checkpoints' # unique checkpoint saving path 310 | del_files(path) # delete checkpoint files 311 | accelerator.print('success delete checkpoints') 312 | -------------------------------------------------------------------------------- /run_main.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import torch 3 | from accelerate import Accelerator, DeepSpeedPlugin 4 | from accelerate import DistributedDataParallelKwargs 5 | from torch import nn, optim 6 | from torch.optim import lr_scheduler 7 | from tqdm import tqdm 8 | 9 | from models import Autoformer, DLinear, TimeLLM 10 | 11 | from data_provider.data_factory import data_provider 12 | import time 13 | import random 14 | import numpy as np 15 | import os 16 | 17 | os.environ['CURL_CA_BUNDLE'] = '' 18 | os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:64" 19 | 20 | from utils.tools import del_files, EarlyStopping, adjust_learning_rate, vali, load_content 21 | 22 | parser = argparse.ArgumentParser(description='Time-LLM') 23 | 24 | fix_seed = 2021 25 | random.seed(fix_seed) 26 | torch.manual_seed(fix_seed) 27 | np.random.seed(fix_seed) 28 | 29 | # basic config 30 | parser.add_argument('--task_name', type=str, required=True, default='long_term_forecast', 31 | help='task name, options:[long_term_forecast, short_term_forecast, imputation, classification, anomaly_detection]') 32 | parser.add_argument('--is_training', type=int, required=True, default=1, help='status') 33 | parser.add_argument('--model_id', type=str, required=True, default='test', help='model id') 34 | parser.add_argument('--model_comment', type=str, required=True, default='none', help='prefix when saving test results') 35 | parser.add_argument('--model', type=str, required=True, default='Autoformer', 36 | help='model name, options: [Autoformer, DLinear]') 37 | parser.add_argument('--seed', type=int, default=2021, help='random seed') 38 | 39 | # data loader 40 | parser.add_argument('--data', type=str, required=True, default='ETTm1', help='dataset type') 41 | parser.add_argument('--root_path', type=str, default='./dataset', help='root path of the data file') 42 | parser.add_argument('--data_path', type=str, default='ETTh1.csv', help='data file') 43 | parser.add_argument('--features', type=str, default='M', 44 | help='forecasting task, options:[M, S, MS]; ' 45 | 'M:multivariate predict multivariate, S: univariate predict univariate, ' 46 | 'MS:multivariate predict univariate') 47 | parser.add_argument('--target', type=str, default='OT', help='target feature in S or MS task') 48 | parser.add_argument('--loader', type=str, default='modal', help='dataset type') 49 | parser.add_argument('--freq', type=str, default='h', 50 | help='freq for time features encoding, ' 51 | 'options:[s:secondly, t:minutely, h:hourly, d:daily, b:business days, w:weekly, m:monthly], ' 52 | 'you can also use more detailed freq like 15min or 3h') 53 | parser.add_argument('--checkpoints', type=str, default='./checkpoints/', help='location of model checkpoints') 54 | 55 | # forecasting task 56 | parser.add_argument('--seq_len', type=int, default=96, help='input sequence length') 57 | parser.add_argument('--label_len', type=int, default=48, help='start token length') 58 | parser.add_argument('--pred_len', type=int, default=96, help='prediction sequence length') 59 | parser.add_argument('--seasonal_patterns', type=str, default='Monthly', help='subset for M4') 60 | 61 | # model define 62 | parser.add_argument('--enc_in', type=int, default=7, help='encoder input size') 63 | parser.add_argument('--dec_in', type=int, default=7, help='decoder input size') 64 | parser.add_argument('--c_out', type=int, default=7, help='output size') 65 | parser.add_argument('--d_model', type=int, default=16, help='dimension of model') 66 | parser.add_argument('--n_heads', type=int, default=8, help='num of heads') 67 | parser.add_argument('--e_layers', type=int, default=2, help='num of encoder layers') 68 | parser.add_argument('--d_layers', type=int, default=1, help='num of decoder layers') 69 | parser.add_argument('--d_ff', type=int, default=32, help='dimension of fcn') 70 | parser.add_argument('--moving_avg', type=int, default=25, help='window size of moving average') 71 | parser.add_argument('--factor', type=int, default=1, help='attn factor') 72 | parser.add_argument('--dropout', type=float, default=0.1, help='dropout') 73 | parser.add_argument('--embed', type=str, default='timeF', 74 | help='time features encoding, options:[timeF, fixed, learned]') 75 | parser.add_argument('--activation', type=str, default='gelu', help='activation') 76 | parser.add_argument('--output_attention', action='store_true', help='whether to output attention in encoder') 77 | parser.add_argument('--patch_len', type=int, default=16, help='patch length') 78 | parser.add_argument('--stride', type=int, default=8, help='stride') 79 | parser.add_argument('--prompt_domain', type=int, default=0, help='') 80 | parser.add_argument('--llm_model', type=str, default='LLAMA', help='LLM model') # LLAMA, GPT2, BERT 81 | parser.add_argument('--llm_dim', type=int, default='4096', help='LLM model dimension')# LLama7b:4096; GPT2-small:768; BERT-base:768 82 | 83 | 84 | # optimization 85 | parser.add_argument('--num_workers', type=int, default=10, help='data loader num workers') 86 | parser.add_argument('--itr', type=int, default=1, help='experiments times') 87 | parser.add_argument('--train_epochs', type=int, default=10, help='train epochs') 88 | parser.add_argument('--align_epochs', type=int, default=10, help='alignment epochs') 89 | parser.add_argument('--batch_size', type=int, default=32, help='batch size of train input data') 90 | parser.add_argument('--eval_batch_size', type=int, default=8, help='batch size of model evaluation') 91 | parser.add_argument('--patience', type=int, default=10, help='early stopping patience') 92 | parser.add_argument('--learning_rate', type=float, default=0.0001, help='optimizer learning rate') 93 | parser.add_argument('--des', type=str, default='test', help='exp description') 94 | parser.add_argument('--loss', type=str, default='MSE', help='loss function') 95 | parser.add_argument('--lradj', type=str, default='type1', help='adjust learning rate') 96 | parser.add_argument('--pct_start', type=float, default=0.2, help='pct_start') 97 | parser.add_argument('--use_amp', action='store_true', help='use automatic mixed precision training', default=False) 98 | parser.add_argument('--llm_layers', type=int, default=6) 99 | parser.add_argument('--percent', type=int, default=100) 100 | 101 | args = parser.parse_args() 102 | ddp_kwargs = DistributedDataParallelKwargs(find_unused_parameters=True) 103 | deepspeed_plugin = DeepSpeedPlugin(hf_ds_config='./ds_config_zero2.json') 104 | accelerator = Accelerator(kwargs_handlers=[ddp_kwargs], deepspeed_plugin=deepspeed_plugin) 105 | 106 | for ii in range(args.itr): 107 | # setting record of experiments 108 | setting = '{}_{}_{}_{}_ft{}_sl{}_ll{}_pl{}_dm{}_nh{}_el{}_dl{}_df{}_fc{}_eb{}_{}_{}'.format( 109 | args.task_name, 110 | args.model_id, 111 | args.model, 112 | args.data, 113 | args.features, 114 | args.seq_len, 115 | args.label_len, 116 | args.pred_len, 117 | args.d_model, 118 | args.n_heads, 119 | args.e_layers, 120 | args.d_layers, 121 | args.d_ff, 122 | args.factor, 123 | args.embed, 124 | args.des, ii) 125 | 126 | train_data, train_loader = data_provider(args, 'train') 127 | vali_data, vali_loader = data_provider(args, 'val') 128 | test_data, test_loader = data_provider(args, 'test') 129 | 130 | if args.model == 'Autoformer': 131 | model = Autoformer.Model(args).float() 132 | elif args.model == 'DLinear': 133 | model = DLinear.Model(args).float() 134 | else: 135 | model = TimeLLM.Model(args).float() 136 | 137 | path = os.path.join(args.checkpoints, 138 | setting + '-' + args.model_comment) # unique checkpoint saving path 139 | args.content = load_content(args) 140 | if not os.path.exists(path) and accelerator.is_local_main_process: 141 | os.makedirs(path) 142 | 143 | time_now = time.time() 144 | 145 | train_steps = len(train_loader) 146 | early_stopping = EarlyStopping(accelerator=accelerator, patience=args.patience) 147 | 148 | trained_parameters = [] 149 | for p in model.parameters(): 150 | if p.requires_grad is True: 151 | trained_parameters.append(p) 152 | 153 | model_optim = optim.Adam(trained_parameters, lr=args.learning_rate) 154 | 155 | if args.lradj == 'COS': 156 | scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(model_optim, T_max=20, eta_min=1e-8) 157 | else: 158 | scheduler = lr_scheduler.OneCycleLR(optimizer=model_optim, 159 | steps_per_epoch=train_steps, 160 | pct_start=args.pct_start, 161 | epochs=args.train_epochs, 162 | max_lr=args.learning_rate) 163 | 164 | criterion = nn.MSELoss() 165 | mae_metric = nn.L1Loss() 166 | 167 | train_loader, vali_loader, test_loader, model, model_optim, scheduler = accelerator.prepare( 168 | train_loader, vali_loader, test_loader, model, model_optim, scheduler) 169 | 170 | if args.use_amp: 171 | scaler = torch.cuda.amp.GradScaler() 172 | 173 | for epoch in range(args.train_epochs): 174 | iter_count = 0 175 | train_loss = [] 176 | 177 | model.train() 178 | epoch_time = time.time() 179 | for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in tqdm(enumerate(train_loader)): 180 | iter_count += 1 181 | model_optim.zero_grad() 182 | 183 | batch_x = batch_x.float().to(accelerator.device) 184 | batch_y = batch_y.float().to(accelerator.device) 185 | batch_x_mark = batch_x_mark.float().to(accelerator.device) 186 | batch_y_mark = batch_y_mark.float().to(accelerator.device) 187 | 188 | # decoder input 189 | dec_inp = torch.zeros_like(batch_y[:, -args.pred_len:, :]).float().to( 190 | accelerator.device) 191 | dec_inp = torch.cat([batch_y[:, :args.label_len, :], dec_inp], dim=1).float().to( 192 | accelerator.device) 193 | 194 | # encoder - decoder 195 | if args.use_amp: 196 | with torch.cuda.amp.autocast(): 197 | if args.output_attention: 198 | outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0] 199 | else: 200 | outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark) 201 | 202 | f_dim = -1 if args.features == 'MS' else 0 203 | outputs = outputs[:, -args.pred_len:, f_dim:] 204 | batch_y = batch_y[:, -args.pred_len:, f_dim:].to(accelerator.device) 205 | loss = criterion(outputs, batch_y) 206 | train_loss.append(loss.item()) 207 | else: 208 | if args.output_attention: 209 | outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0] 210 | else: 211 | outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark) 212 | 213 | f_dim = -1 if args.features == 'MS' else 0 214 | outputs = outputs[:, -args.pred_len:, f_dim:] 215 | batch_y = batch_y[:, -args.pred_len:, f_dim:] 216 | loss = criterion(outputs, batch_y) 217 | train_loss.append(loss.item()) 218 | 219 | if (i + 1) % 100 == 0: 220 | accelerator.print( 221 | "\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item())) 222 | speed = (time.time() - time_now) / iter_count 223 | left_time = speed * ((args.train_epochs - epoch) * train_steps - i) 224 | accelerator.print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time)) 225 | iter_count = 0 226 | time_now = time.time() 227 | 228 | if args.use_amp: 229 | scaler.scale(loss).backward() 230 | scaler.step(model_optim) 231 | scaler.update() 232 | else: 233 | accelerator.backward(loss) 234 | model_optim.step() 235 | 236 | if args.lradj == 'TST': 237 | adjust_learning_rate(accelerator, model_optim, scheduler, epoch + 1, args, printout=False) 238 | scheduler.step() 239 | 240 | accelerator.print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time)) 241 | train_loss = np.average(train_loss) 242 | vali_loss, vali_mae_loss = vali(args, accelerator, model, vali_data, vali_loader, criterion, mae_metric) 243 | test_loss, test_mae_loss = vali(args, accelerator, model, test_data, test_loader, criterion, mae_metric) 244 | accelerator.print( 245 | "Epoch: {0} | Train Loss: {1:.7f} Vali Loss: {2:.7f} Test Loss: {3:.7f} MAE Loss: {4:.7f}".format( 246 | epoch + 1, train_loss, vali_loss, test_loss, test_mae_loss)) 247 | 248 | early_stopping(vali_loss, model, path) 249 | if early_stopping.early_stop: 250 | accelerator.print("Early stopping") 251 | break 252 | 253 | if args.lradj != 'TST': 254 | if args.lradj == 'COS': 255 | scheduler.step() 256 | accelerator.print("lr = {:.10f}".format(model_optim.param_groups[0]['lr'])) 257 | else: 258 | if epoch == 0: 259 | args.learning_rate = model_optim.param_groups[0]['lr'] 260 | accelerator.print("lr = {:.10f}".format(model_optim.param_groups[0]['lr'])) 261 | adjust_learning_rate(accelerator, model_optim, scheduler, epoch + 1, args, printout=True) 262 | 263 | else: 264 | accelerator.print('Updating learning rate to {}'.format(scheduler.get_last_lr()[0])) 265 | 266 | accelerator.wait_for_everyone() 267 | if accelerator.is_local_main_process: 268 | path = './checkpoints' # unique checkpoint saving path 269 | del_files(path) # delete checkpoint files 270 | accelerator.print('success delete checkpoints') -------------------------------------------------------------------------------- /run_pretrain.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import torch 3 | from accelerate import Accelerator, DeepSpeedPlugin 4 | from accelerate import DistributedDataParallelKwargs 5 | from torch import nn, optim 6 | from torch.optim import lr_scheduler 7 | 8 | from data_provider_pretrain.data_factory import data_provider 9 | from models import Autoformer, DLinear, TimeLLM 10 | 11 | import time 12 | import random 13 | import numpy as np 14 | import os 15 | 16 | os.environ['CURL_CA_BUNDLE'] = '' 17 | os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:64" 18 | 19 | from utils.tools import del_files, EarlyStopping, adjust_learning_rate, vali, load_content 20 | 21 | parser = argparse.ArgumentParser(description='Time-LLM') 22 | 23 | fix_seed = 2021 24 | random.seed(fix_seed) 25 | torch.manual_seed(fix_seed) 26 | np.random.seed(fix_seed) 27 | 28 | # basic config 29 | parser.add_argument('--task_name', type=str, required=True, default='long_term_forecast', 30 | help='task name, options:[long_term_forecast, short_term_forecast, imputation, classification, anomaly_detection]') 31 | parser.add_argument('--is_training', type=int, required=True, default=1, help='status') 32 | parser.add_argument('--model_id', type=str, required=True, default='test', help='model id') 33 | parser.add_argument('--model_comment', type=str, required=True, default='none', help='prefix when saving test results') 34 | parser.add_argument('--model', type=str, required=True, default='Autoformer', 35 | help='model name, options: [Autoformer, DLinear]') 36 | parser.add_argument('--seed', type=int, default=2021, help='random seed') 37 | 38 | # data loader 39 | parser.add_argument('--data_pretrain', type=str, required=True, default='ETTm1', help='dataset type') 40 | parser.add_argument('--data', type=str, required=True, default='ETTm1', help='dataset type') 41 | parser.add_argument('--root_path', type=str, default='./dataset', help='root path of the data file') 42 | parser.add_argument('--data_path', type=str, default='ETTh1.csv', help='data file') 43 | parser.add_argument('--data_path_pretrain', type=str, default='ETTh1.csv', help='data file') 44 | parser.add_argument('--features', type=str, default='M', 45 | help='forecasting task, options:[M, S, MS]; ' 46 | 'M:multivariate predict multivariate, S: univariate predict univariate, ' 47 | 'MS:multivariate predict univariate') 48 | parser.add_argument('--target', type=str, default='OT', help='target feature in S or MS task') 49 | parser.add_argument('--loader', type=str, default='modal', help='dataset type') 50 | parser.add_argument('--freq', type=str, default='h', 51 | help='freq for time features encoding, ' 52 | 'options:[s:secondly, t:minutely, h:hourly, d:daily, b:business days, w:weekly, m:monthly], ' 53 | 'you can also use more detailed freq like 15min or 3h') 54 | parser.add_argument('--checkpoints', type=str, default='./checkpoints/', help='location of model checkpoints') 55 | 56 | # forecasting task 57 | parser.add_argument('--seq_len', type=int, default=96, help='input sequence length') 58 | parser.add_argument('--label_len', type=int, default=48, help='start token length') 59 | parser.add_argument('--pred_len', type=int, default=96, help='prediction sequence length') 60 | parser.add_argument('--seasonal_patterns', type=str, default='Monthly', help='subset for M4') 61 | 62 | # model define 63 | parser.add_argument('--enc_in', type=int, default=7, help='encoder input size') 64 | parser.add_argument('--dec_in', type=int, default=7, help='decoder input size') 65 | parser.add_argument('--c_out', type=int, default=7, help='output size') 66 | parser.add_argument('--d_model', type=int, default=16, help='dimension of model') 67 | parser.add_argument('--n_heads', type=int, default=8, help='num of heads') 68 | parser.add_argument('--e_layers', type=int, default=2, help='num of encoder layers') 69 | parser.add_argument('--d_layers', type=int, default=1, help='num of decoder layers') 70 | parser.add_argument('--d_ff', type=int, default=32, help='dimension of fcn') 71 | parser.add_argument('--moving_avg', type=int, default=25, help='window size of moving average') 72 | parser.add_argument('--factor', type=int, default=1, help='attn factor') 73 | parser.add_argument('--dropout', type=float, default=0.1, help='dropout') 74 | parser.add_argument('--embed', type=str, default='timeF', 75 | help='time features encoding, options:[timeF, fixed, learned]') 76 | parser.add_argument('--activation', type=str, default='gelu', help='activation') 77 | parser.add_argument('--output_attention', action='store_true', help='whether to output attention in ecoder') 78 | parser.add_argument('--patch_len', type=int, default=16, help='patch length') 79 | parser.add_argument('--stride', type=int, default=8, help='stride') 80 | parser.add_argument('--prompt_domain', type=int, default=0, help='') 81 | parser.add_argument('--llm_model', type=str, default='LLAMA', help='LLM model') # LLAMA, GPT2, BERT 82 | parser.add_argument('--llm_dim', type=int, default='4096', help='LLM model dimension')# LLama7b:4096; GPT2-small:768; BERT-base:768 83 | 84 | # optimization 85 | parser.add_argument('--num_workers', type=int, default=10, help='data loader num workers') 86 | parser.add_argument('--itr', type=int, default=1, help='experiments times') 87 | parser.add_argument('--train_epochs', type=int, default=10, help='train epochs') 88 | parser.add_argument('--align_epochs', type=int, default=10, help='alignment epochs') 89 | parser.add_argument('--batch_size', type=int, default=32, help='batch size of train input data') 90 | parser.add_argument('--eval_batch_size', type=int, default=8, help='batch size of model evaluation') 91 | parser.add_argument('--patience', type=int, default=5, help='early stopping patience') 92 | parser.add_argument('--learning_rate', type=float, default=0.0001, help='optimizer learning rate') 93 | parser.add_argument('--des', type=str, default='test', help='exp description') 94 | parser.add_argument('--loss', type=str, default='MSE', help='loss function') 95 | parser.add_argument('--lradj', type=str, default='type1', help='adjust learning rate') 96 | parser.add_argument('--pct_start', type=float, default=0.2, help='pct_start') 97 | parser.add_argument('--use_amp', action='store_true', help='use automatic mixed precision training', default=False) 98 | parser.add_argument('--llm_layers', type=int, default=6) 99 | parser.add_argument('--percent', type=int, default=100) 100 | 101 | args = parser.parse_args() 102 | ddp_kwargs = DistributedDataParallelKwargs(find_unused_parameters=True) 103 | deepspeed_plugin = DeepSpeedPlugin(hf_ds_config='./ds_config_zero2.json') 104 | accelerator = Accelerator(kwargs_handlers=[ddp_kwargs], deepspeed_plugin=deepspeed_plugin) 105 | 106 | for ii in range(args.itr): 107 | # setting record of experiments 108 | setting = '{}_{}_{}_{}_ft{}_sl{}_ll{}_pl{}_dm{}_nh{}_el{}_dl{}_df{}_fc{}_eb{}_{}_{}'.format( 109 | args.task_name, 110 | args.model_id, 111 | args.model, 112 | args.data, 113 | args.features, 114 | args.seq_len, 115 | args.label_len, 116 | args.pred_len, 117 | args.d_model, 118 | args.n_heads, 119 | args.e_layers, 120 | args.d_layers, 121 | args.d_ff, 122 | args.factor, 123 | args.embed, 124 | args.des, ii) 125 | 126 | train_data, train_loader = data_provider(args, args.data_pretrain, args.data_path_pretrain, True, 'train') 127 | vali_data, vali_loader = data_provider(args, args.data_pretrain, args.data_path_pretrain, True, 'val') 128 | test_data, test_loader = data_provider(args, args.data, args.data_path, False, 'test') 129 | 130 | if args.model == 'Autoformer': 131 | model = Autoformer.Model(args).float() 132 | elif args.model == 'DLinear': 133 | model = DLinear.Model(args).float() 134 | else: 135 | model = TimeLLM.Model(args).float() 136 | 137 | path = os.path.join(args.checkpoints, 138 | setting + '-' + args.model_comment) # unique checkpoint saving path 139 | args.content = load_content(args) 140 | if not os.path.exists(path) and accelerator.is_local_main_process: 141 | os.makedirs(path) 142 | 143 | time_now = time.time() 144 | 145 | train_steps = len(train_loader) 146 | early_stopping = EarlyStopping(accelerator=accelerator, patience=args.patience) 147 | 148 | trained_parameters = [] 149 | for p in model.parameters(): 150 | if p.requires_grad is True: 151 | trained_parameters.append(p) 152 | 153 | model_optim = optim.Adam(trained_parameters, lr=args.learning_rate) 154 | 155 | if args.lradj == 'COS': 156 | scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(model_optim, T_max=20, eta_min=1e-8) 157 | else: 158 | scheduler = lr_scheduler.OneCycleLR(optimizer=model_optim, 159 | steps_per_epoch=train_steps, 160 | pct_start=args.pct_start, 161 | epochs=args.train_epochs, 162 | max_lr=args.learning_rate) 163 | 164 | criterion = nn.MSELoss() 165 | mae_metric = nn.L1Loss() 166 | 167 | train_loader, vali_loader, test_loader, model, model_optim, scheduler = accelerator.prepare( 168 | train_loader, vali_loader, test_loader, model, model_optim, scheduler) 169 | 170 | if args.use_amp: 171 | scaler = torch.cuda.amp.GradScaler() 172 | 173 | for epoch in range(args.train_epochs): 174 | iter_count = 0 175 | train_loss = [] 176 | 177 | model.train() 178 | epoch_time = time.time() 179 | for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(train_loader): 180 | iter_count += 1 181 | model_optim.zero_grad() 182 | 183 | batch_x = batch_x.float().to(accelerator.device) 184 | batch_y = batch_y.float().to(accelerator.device) 185 | batch_x_mark = batch_x_mark.float().to(accelerator.device) 186 | batch_y_mark = batch_y_mark.float().to(accelerator.device) 187 | 188 | # decoder input 189 | dec_inp = torch.zeros_like(batch_y[:, -args.pred_len:, :]).float().to( 190 | accelerator.device) 191 | dec_inp = torch.cat([batch_y[:, :args.label_len, :], dec_inp], dim=1).float().to( 192 | accelerator.device) 193 | 194 | # encoder - decoder 195 | if args.use_amp: 196 | with torch.cuda.amp.autocast(): 197 | if args.output_attention: 198 | outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0] 199 | else: 200 | outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark) 201 | 202 | f_dim = -1 if args.features == 'MS' else 0 203 | outputs = outputs[:, -args.pred_len:, f_dim:] 204 | batch_y = batch_y[:, -args.pred_len:, f_dim:].to(accelerator.device) 205 | loss = criterion(outputs, batch_y) 206 | train_loss.append(loss.item()) 207 | else: 208 | if args.output_attention: 209 | outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0] 210 | else: 211 | outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark) 212 | 213 | f_dim = -1 if args.features == 'MS' else 0 214 | outputs = outputs[:, -args.pred_len:, f_dim:] 215 | batch_y = batch_y[:, -args.pred_len:, f_dim:] 216 | loss = criterion(outputs, batch_y) 217 | train_loss.append(loss.item()) 218 | 219 | if (i + 1) % 100 == 0: 220 | accelerator.print( 221 | "\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item())) 222 | speed = (time.time() - time_now) / iter_count 223 | left_time = speed * ((args.train_epochs - epoch) * train_steps - i) 224 | accelerator.print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time)) 225 | iter_count = 0 226 | time_now = time.time() 227 | 228 | if args.use_amp: 229 | scaler.scale(loss).backward() 230 | scaler.step(model_optim) 231 | scaler.update() 232 | else: 233 | accelerator.backward(loss) 234 | model_optim.step() 235 | 236 | if args.lradj == 'TST': 237 | adjust_learning_rate(accelerator, model_optim, scheduler, epoch + 1, args, printout=False) 238 | scheduler.step() 239 | 240 | accelerator.print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time)) 241 | train_loss = np.average(train_loss) 242 | vali_loss, vali_mae_loss = vali(args, accelerator, model, vali_data, vali_loader, criterion, mae_metric) 243 | test_loss, test_mae_loss = vali(args, accelerator, model, test_data, test_loader, criterion, mae_metric) 244 | accelerator.print( 245 | "Epoch: {0} | Train Loss: {1:.7f} Vali Loss: {2:.7f} Test Loss: {3:.7f} MAE Loss: {4:.7f}".format( 246 | epoch + 1, train_loss, vali_loss, test_loss, test_mae_loss)) 247 | 248 | early_stopping(vali_loss, model, path) 249 | if early_stopping.early_stop: 250 | accelerator.print("Early stopping") 251 | break 252 | 253 | if args.lradj != 'TST': 254 | if args.lradj == 'COS': 255 | scheduler.step() 256 | accelerator.print("lr = {:.10f}".format(model_optim.param_groups[0]['lr'])) 257 | else: 258 | if epoch == 0: 259 | args.learning_rate = model_optim.param_groups[0]['lr'] 260 | accelerator.print("lr = {:.10f}".format(model_optim.param_groups[0]['lr'])) 261 | adjust_learning_rate(accelerator, model_optim, scheduler, epoch + 1, args, printout=True) 262 | 263 | else: 264 | accelerator.print('Updating learning rate to {}'.format(scheduler.get_last_lr()[0])) 265 | 266 | accelerator.wait_for_everyone() 267 | if accelerator.is_local_main_process: 268 | path = './checkpoints' # unique checkpoint saving path 269 | del_files(path) # delete checkpoint files 270 | accelerator.print('success delete checkpoints') -------------------------------------------------------------------------------- /scripts/TimeLLM_ECL.sh: -------------------------------------------------------------------------------- 1 | model_name=TimeLLM 2 | train_epochs=10 3 | learning_rate=0.01 4 | llama_layers=32 5 | 6 | master_port=00097 7 | num_process=8 8 | batch_size=24 9 | d_model=16 10 | d_ff=32 11 | 12 | comment='TimeLLM-ECL' 13 | 14 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \ 15 | --task_name long_term_forecast \ 16 | --is_training 1 \ 17 | --root_path ./dataset/electricity/ \ 18 | --data_path electricity.csv \ 19 | --model_id ECL_512_96 \ 20 | --model $model_name \ 21 | --data ECL \ 22 | --features M \ 23 | --seq_len 512 \ 24 | --label_len 48 \ 25 | --pred_len 96 \ 26 | --e_layers 2 \ 27 | --d_layers 1 \ 28 | --factor 3 \ 29 | --enc_in 321 \ 30 | --dec_in 321 \ 31 | --c_out 321 \ 32 | --batch_size $batch_size \ 33 | --learning_rate $learning_rate \ 34 | --llm_layers $llama_layers \ 35 | --train_epochs $train_epochs \ 36 | --model_comment $comment 37 | 38 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \ 39 | --task_name long_term_forecast \ 40 | --is_training 1 \ 41 | --root_path ./dataset/electricity/ \ 42 | --data_path electricity.csv \ 43 | --model_id ECL_512_192 \ 44 | --model $model_name \ 45 | --data ECL \ 46 | --features M \ 47 | --seq_len 512 \ 48 | --label_len 48 \ 49 | --pred_len 192 \ 50 | --e_layers 2 \ 51 | --d_layers 1 \ 52 | --factor 3 \ 53 | --enc_in 321 \ 54 | --dec_in 321 \ 55 | --c_out 321 \ 56 | --batch_size $batch_size \ 57 | --learning_rate $learning_rate \ 58 | --llm_layers $llama_layers \ 59 | --train_epochs $train_epochs \ 60 | --model_comment $comment 61 | 62 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \ 63 | --task_name long_term_forecast \ 64 | --is_training 1 \ 65 | --root_path ./dataset/electricity/ \ 66 | --data_path electricity.csv \ 67 | --model_id ECL_512_336 \ 68 | --model $model_name \ 69 | --data ECL \ 70 | --features M \ 71 | --seq_len 512 \ 72 | --label_len 48 \ 73 | --pred_len 336 \ 74 | --e_layers 2 \ 75 | --d_layers 1 \ 76 | --factor 3 \ 77 | --enc_in 321 \ 78 | --dec_in 321 \ 79 | --c_out 321 \ 80 | --batch_size $batch_size \ 81 | --learning_rate $learning_rate \ 82 | --llm_layers $llama_layers \ 83 | --train_epochs $train_epochs \ 84 | --model_comment $comment 85 | 86 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \ 87 | --task_name long_term_forecast \ 88 | --is_training 1 \ 89 | --root_path ./dataset/electricity/ \ 90 | --data_path electricity.csv \ 91 | --model_id ECL_512_720 \ 92 | --model $model_name \ 93 | --data ECL \ 94 | --features M \ 95 | --seq_len 512 \ 96 | --label_len 48 \ 97 | --pred_len 720 \ 98 | --e_layers 2 \ 99 | --d_layers 1 \ 100 | --factor 3 \ 101 | --enc_in 321 \ 102 | --dec_in 321 \ 103 | --c_out 321 \ 104 | --batch_size $batch_size \ 105 | --learning_rate $learning_rate \ 106 | --llm_layers $llama_layers \ 107 | --train_epochs $train_epochs \ 108 | --model_comment $comment -------------------------------------------------------------------------------- /scripts/TimeLLM_ETTh1.sh: -------------------------------------------------------------------------------- 1 | model_name=TimeLLM 2 | train_epochs=100 3 | learning_rate=0.01 4 | llama_layers=32 5 | 6 | master_port=00097 7 | num_process=8 8 | batch_size=24 9 | d_model=32 10 | d_ff=128 11 | 12 | comment='TimeLLM-ETTh1' 13 | 14 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \ 15 | --task_name long_term_forecast \ 16 | --is_training 1 \ 17 | --root_path ./dataset/ETT-small/ \ 18 | --data_path ETTh1.csv \ 19 | --model_id ETTh1_512_96 \ 20 | --model $model_name \ 21 | --data ETTh1 \ 22 | --features M \ 23 | --seq_len 512 \ 24 | --label_len 48 \ 25 | --pred_len 96 \ 26 | --factor 3 \ 27 | --enc_in 7 \ 28 | --dec_in 7 \ 29 | --c_out 7 \ 30 | --des 'Exp' \ 31 | --itr 1 \ 32 | --d_model $d_model \ 33 | --d_ff $d_ff \ 34 | --batch_size $batch_size \ 35 | --learning_rate $learning_rate \ 36 | --llm_layers $llama_layers \ 37 | --train_epochs $train_epochs \ 38 | --model_comment $comment 39 | 40 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \ 41 | --task_name long_term_forecast \ 42 | --is_training 1 \ 43 | --root_path ./dataset/ETT-small/ \ 44 | --data_path ETTh1.csv \ 45 | --model_id ETTh1_512_192 \ 46 | --model $model_name \ 47 | --data ETTh1 \ 48 | --features M \ 49 | --seq_len 512 \ 50 | --label_len 48 \ 51 | --pred_len 192 \ 52 | --factor 3 \ 53 | --enc_in 7 \ 54 | --dec_in 7 \ 55 | --c_out 7 \ 56 | --des 'Exp' \ 57 | --itr 1 \ 58 | --d_model 32 \ 59 | --d_ff 128 \ 60 | --batch_size $batch_size \ 61 | --learning_rate 0.02 \ 62 | --llm_layers $llama_layers \ 63 | --train_epochs $train_epochs \ 64 | --model_comment $comment 65 | 66 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \ 67 | --task_name long_term_forecast \ 68 | --is_training 1 \ 69 | --root_path ./dataset/ETT-small/ \ 70 | --data_path ETTh1.csv \ 71 | --model_id ETTh1_512_336 \ 72 | --model $model_name \ 73 | --data ETTh1 \ 74 | --features M \ 75 | --seq_len 512 \ 76 | --label_len 48 \ 77 | --pred_len 336 \ 78 | --factor 3 \ 79 | --enc_in 7 \ 80 | --dec_in 7 \ 81 | --c_out 7 \ 82 | --des 'Exp' \ 83 | --itr 1 \ 84 | --d_model $d_model \ 85 | --d_ff $d_ff \ 86 | --batch_size $batch_size \ 87 | --lradj 'COS'\ 88 | --learning_rate 0.001 \ 89 | --llm_layers $llama_layers \ 90 | --train_epochs $train_epochs \ 91 | --model_comment $comment 92 | 93 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \ 94 | --task_name long_term_forecast \ 95 | --is_training 1 \ 96 | --root_path ./dataset/ETT-small/ \ 97 | --data_path ETTh1.csv \ 98 | --model_id ETTh1_512_720 \ 99 | --model $model_name \ 100 | --data ETTh1 \ 101 | --features M \ 102 | --seq_len 512 \ 103 | --label_len 48 \ 104 | --pred_len 720 \ 105 | --factor 3 \ 106 | --enc_in 7 \ 107 | --dec_in 7 \ 108 | --c_out 7 \ 109 | --des 'Exp' \ 110 | --itr 1 \ 111 | --d_model $d_model \ 112 | --d_ff $d_ff \ 113 | --batch_size $batch_size \ 114 | --learning_rate $learning_rate \ 115 | --llm_layers $llama_layers \ 116 | --train_epochs $train_epochs \ 117 | --model_comment $comment -------------------------------------------------------------------------------- /scripts/TimeLLM_ETTh1_ETTh2.sh: -------------------------------------------------------------------------------- 1 | model_name=TimeLLM 2 | learning_rate=0.01 3 | llama_layers=32 4 | 5 | master_port=00097 6 | num_process=8 7 | batch_size=24 8 | d_model=32 9 | d_ff=128 10 | 11 | comment='TimeLLM-ETTh1_ETTh2' 12 | 13 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_pretrain.py \ 14 | --task_name long_term_forecast \ 15 | --is_training 1 \ 16 | --root_path ./dataset/ETT-small/ \ 17 | --data_path_pretrain ETTh1.csv \ 18 | --data_path ETTh2.csv \ 19 | --model_id ETTh1_ETTh2_512_96 \ 20 | --model $model_name \ 21 | --data_pretrain ETTh1 \ 22 | --data ETTh2 \ 23 | --features M \ 24 | --seq_len 512 \ 25 | --label_len 48 \ 26 | --pred_len 96 \ 27 | --factor 3 \ 28 | --enc_in 7 \ 29 | --dec_in 7 \ 30 | --c_out 7 \ 31 | --des 'Exp' \ 32 | --itr 1 \ 33 | --d_model $d_model \ 34 | --d_ff $d_ff \ 35 | --batch_size $batch_size \ 36 | --learning_rate $learning_rate \ 37 | --llm_layers $llama_layers \ 38 | --train_epochs 5 \ 39 | --model_comment $comment 40 | 41 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_pretrain.py \ 42 | --task_name long_term_forecast \ 43 | --is_training 1 \ 44 | --root_path ./dataset/ETT-small/ \ 45 | --data_path_pretrain ETTh1.csv \ 46 | --data_path ETTh2.csv \ 47 | --model_id ETTh1_ETTh2_512_192 \ 48 | --model $model_name \ 49 | --data_pretrain ETTh1 \ 50 | --data ETTh2 \ 51 | --features M \ 52 | --seq_len 512 \ 53 | --label_len 48 \ 54 | --pred_len 192 \ 55 | --factor 3 \ 56 | --enc_in 7 \ 57 | --dec_in 7 \ 58 | --c_out 7 \ 59 | --des 'Exp' \ 60 | --itr 1 \ 61 | --d_model 32 \ 62 | --d_ff 128 \ 63 | --batch_size $batch_size \ 64 | --learning_rate 0.02 \ 65 | --llm_layers $llama_layers \ 66 | --train_epochs 5 \ 67 | --model_comment $comment 68 | 69 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_pretrain.py \ 70 | --task_name long_term_forecast \ 71 | --is_training 1 \ 72 | --root_path ./dataset/ETT-small/ \ 73 | --data_path_pretrain ETTh1.csv \ 74 | --data_path ETTh2.csv \ 75 | --model_id ETTh1_ETTh2_512_336 \ 76 | --model $model_name \ 77 | --data_pretrain ETTh1 \ 78 | --data ETTh2 \ 79 | --features M \ 80 | --seq_len 512 \ 81 | --label_len 48 \ 82 | --pred_len 336 \ 83 | --factor 3 \ 84 | --enc_in 7 \ 85 | --dec_in 7 \ 86 | --c_out 7 \ 87 | --des 'Exp' \ 88 | --itr 1 \ 89 | --d_model $d_model \ 90 | --d_ff $d_ff \ 91 | --batch_size $batch_size \ 92 | --lradj 'COS'\ 93 | --learning_rate 0.001 \ 94 | --llm_layers $llama_layers \ 95 | --train_epochs 5 \ 96 | --model_comment $comment 97 | 98 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_pretrain.py \ 99 | --task_name long_term_forecast \ 100 | --is_training 1 \ 101 | --root_path ./dataset/ETT-small/ \ 102 | --data_path_pretrain ETTh1.csv \ 103 | --data_path ETTh2.csv \ 104 | --model_id ETTh1_ETTh2_512_720 \ 105 | --model $model_name \ 106 | --data_pretrain ETTh1 \ 107 | --data ETTh2 \ 108 | --features M \ 109 | --seq_len 512 \ 110 | --label_len 48 \ 111 | --pred_len 720 \ 112 | --factor 3 \ 113 | --enc_in 7 \ 114 | --dec_in 7 \ 115 | --c_out 7 \ 116 | --des 'Exp' \ 117 | --itr 1 \ 118 | --d_model $d_model \ 119 | --d_ff $d_ff \ 120 | --batch_size $batch_size \ 121 | --learning_rate $learning_rate \ 122 | --llm_layers $llama_layers \ 123 | --train_epochs 5 \ 124 | --model_comment $comment -------------------------------------------------------------------------------- /scripts/TimeLLM_ETTh2.sh: -------------------------------------------------------------------------------- 1 | model_name=TimeLLM 2 | train_epochs=10 3 | learning_rate=0.01 4 | llama_layers=32 5 | 6 | master_port=00098 7 | num_process=8 8 | batch_size=24 9 | d_model=32 10 | d_ff=128 11 | 12 | comment='TimeLLM-ETTh2' 13 | 14 | 15 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \ 16 | --task_name long_term_forecast \ 17 | --is_training 1 \ 18 | --root_path ./dataset/ETT-small/ \ 19 | --data_path ETTh2.csv \ 20 | --model_id ETTh2_512_96 \ 21 | --model $model_name \ 22 | --data ETTh2 \ 23 | --features M \ 24 | --seq_len 512 \ 25 | --label_len 48 \ 26 | --pred_len 96 \ 27 | --factor 3 \ 28 | --enc_in 7 \ 29 | --dec_in 7 \ 30 | --c_out 7 \ 31 | --des 'Exp' \ 32 | --itr 1 \ 33 | --d_model $d_model \ 34 | --d_ff $d_ff \ 35 | --batch_size $batch_size \ 36 | --learning_rate $learning_rate \ 37 | --llm_layers $llama_layers \ 38 | --train_epochs $train_epochs \ 39 | --model_comment $comment 40 | 41 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \ 42 | --task_name long_term_forecast \ 43 | --is_training 1 \ 44 | --root_path ./dataset/ETT-small/ \ 45 | --data_path ETTh2.csv \ 46 | --model_id ETTh2_512_192 \ 47 | --model $model_name \ 48 | --data ETTh2 \ 49 | --features M \ 50 | --seq_len 512 \ 51 | --label_len 48 \ 52 | --pred_len 192 \ 53 | --factor 3 \ 54 | --enc_in 7 \ 55 | --dec_in 7 \ 56 | --c_out 7 \ 57 | --des 'Exp' \ 58 | --itr 1 \ 59 | --d_model $d_model \ 60 | --d_ff $d_ff \ 61 | --batch_size $batch_size \ 62 | --lradj 'TST'\ 63 | --learning_rate 0.002 \ 64 | --llm_layers $llama_layers \ 65 | --train_epochs $train_epochs \ 66 | --model_comment $comment 67 | 68 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \ 69 | --task_name long_term_forecast \ 70 | --is_training 1 \ 71 | --root_path ./dataset/ETT-small/ \ 72 | --data_path ETTh2.csv \ 73 | --model_id ETTh2_512_336 \ 74 | --model $model_name \ 75 | --data ETTh2 \ 76 | --features M \ 77 | --seq_len 512 \ 78 | --label_len 48 \ 79 | --pred_len 336 \ 80 | --factor 3 \ 81 | --enc_in 7 \ 82 | --dec_in 7 \ 83 | --c_out 7 \ 84 | --des 'Exp' \ 85 | --itr 1 \ 86 | --d_model $d_model \ 87 | --d_ff $d_ff \ 88 | --batch_size $batch_size \ 89 | --lradj 'TST'\ 90 | --learning_rate 0.005 \ 91 | --llm_layers $llama_layers \ 92 | --train_epochs $train_epochs \ 93 | --model_comment $comment 94 | 95 | 96 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \ 97 | --task_name long_term_forecast \ 98 | --is_training 1 \ 99 | --root_path ./dataset/ETT-small/ \ 100 | --data_path ETTh2.csv \ 101 | --model_id ETTh2_512_720 \ 102 | --model $model_name \ 103 | --data ETTh2 \ 104 | --features M \ 105 | --seq_len 512 \ 106 | --label_len 48 \ 107 | --pred_len 720 \ 108 | --factor 3 \ 109 | --enc_in 7 \ 110 | --dec_in 7 \ 111 | --c_out 7 \ 112 | --des 'Exp' \ 113 | --itr 1 \ 114 | --d_model 16 \ 115 | --d_ff 128 \ 116 | --batch_size $batch_size \ 117 | --learning_rate 0.005 \ 118 | --lradj 'TST'\ 119 | --llm_layers $llama_layers \ 120 | --train_epochs 20 \ 121 | --patience 10 \ 122 | --model_comment $comment -------------------------------------------------------------------------------- /scripts/TimeLLM_ETTm1.sh: -------------------------------------------------------------------------------- 1 | model_name=TimeLLM 2 | train_epochs=100 3 | learning_rate=0.01 4 | llama_layers=32 5 | 6 | master_port=00097 7 | num_process=8 8 | batch_size=24 9 | d_model=32 10 | d_ff=128 11 | 12 | comment='TimeLLM-ETTm1' 13 | 14 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \ 15 | --task_name long_term_forecast \ 16 | --is_training 1 \ 17 | --root_path ./dataset/ETT-small/ \ 18 | --data_path ETTm1.csv \ 19 | --model_id ETTm1_512_96 \ 20 | --model $model_name \ 21 | --data ETTm1 \ 22 | --features M \ 23 | --seq_len 512 \ 24 | --label_len 48 \ 25 | --pred_len 96 \ 26 | --factor 3 \ 27 | --enc_in 7 \ 28 | --dec_in 7 \ 29 | --c_out 7 \ 30 | --des 'Exp' \ 31 | --itr 1 \ 32 | --d_model $d_model \ 33 | --d_ff $d_ff \ 34 | --batch_size $batch_size \ 35 | --lradj 'TST'\ 36 | --learning_rate 0.001 \ 37 | --llm_layers $llama_layers \ 38 | --train_epochs $train_epochs \ 39 | --model_comment $comment 40 | 41 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \ 42 | --task_name long_term_forecast \ 43 | --is_training 1 \ 44 | --root_path ./dataset/ETT-small/ \ 45 | --data_path ETTm1.csv \ 46 | --model_id ETTm1_512_192 \ 47 | --model $model_name \ 48 | --data ETTm1 \ 49 | --features M \ 50 | --seq_len 512 \ 51 | --label_len 48 \ 52 | --pred_len 192 \ 53 | --factor 3 \ 54 | --enc_in 7 \ 55 | --dec_in 7 \ 56 | --c_out 7 \ 57 | --des 'Exp' \ 58 | --itr 1 \ 59 | --d_model $d_model \ 60 | --d_ff $d_ff \ 61 | --batch_size $batch_size \ 62 | --learning_rate $learning_rate \ 63 | --lradj 'TST'\ 64 | --learning_rate 0.001 \ 65 | --llm_layers $llama_layers \ 66 | --train_epochs $train_epochs \ 67 | --patience 20 \ 68 | --model_comment $comment 69 | 70 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \ 71 | --task_name long_term_forecast \ 72 | --is_training 1 \ 73 | --root_path ./dataset/ETT-small/ \ 74 | --data_path ETTm1.csv \ 75 | --model_id ETTm1_512_336 \ 76 | --model $model_name \ 77 | --data ETTm1 \ 78 | --features M \ 79 | --seq_len 512 \ 80 | --label_len 48 \ 81 | --pred_len 336 \ 82 | --factor 3 \ 83 | --enc_in 7 \ 84 | --dec_in 7 \ 85 | --c_out 7 \ 86 | --des 'Exp' \ 87 | --itr 1 \ 88 | --d_model $d_model \ 89 | --d_ff $d_ff \ 90 | --batch_size $batch_size \ 91 | --learning_rate $learning_rate \ 92 | --lradj 'TST'\ 93 | --learning_rate 0.001 \ 94 | --llm_layers $llama_layers \ 95 | --train_epochs $train_epochs \ 96 | --patience 20 \ 97 | --model_comment $comment 98 | 99 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \ 100 | --task_name long_term_forecast \ 101 | --is_training 1 \ 102 | --root_path ./dataset/ETT-small/ \ 103 | --data_path ETTm1.csv \ 104 | --model_id ETTm1_512_720 \ 105 | --model $model_name \ 106 | --data ETTm1 \ 107 | --features M \ 108 | --seq_len 512 \ 109 | --label_len 48 \ 110 | --pred_len 720 \ 111 | --factor 3 \ 112 | --enc_in 7 \ 113 | --dec_in 7 \ 114 | --c_out 7 \ 115 | --des 'Exp' \ 116 | --itr 1 \ 117 | --d_model $d_model \ 118 | --d_ff $d_ff \ 119 | --batch_size $batch_size \ 120 | --learning_rate $learning_rate \ 121 | --lradj 'TST'\ 122 | --learning_rate 0.001 \ 123 | --llm_layers $llama_layers \ 124 | --train_epochs $train_epochs \ 125 | --patience 20 \ 126 | --model_comment $comment -------------------------------------------------------------------------------- /scripts/TimeLLM_ETTm2.sh: -------------------------------------------------------------------------------- 1 | model_name=TimeLLM 2 | train_epochs=10 3 | learning_rate=0.01 4 | llama_layers=32 5 | 6 | master_port=00097 7 | num_process=8 8 | batch_size=24 9 | d_model=32 10 | d_ff=128 11 | 12 | comment='TimeLLM-ETTm2' 13 | 14 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \ 15 | --task_name long_term_forecast \ 16 | --is_training 1 \ 17 | --root_path ./dataset/ETT-small/ \ 18 | --data_path ETTm2.csv \ 19 | --model_id ETTm2_512_96 \ 20 | --model $model_name \ 21 | --data ETTm2 \ 22 | --features M \ 23 | --seq_len 512 \ 24 | --label_len 48 \ 25 | --pred_len 96 \ 26 | --factor 3 \ 27 | --enc_in 7 \ 28 | --dec_in 7 \ 29 | --c_out 7 \ 30 | --des 'Exp' \ 31 | --itr 1 \ 32 | --d_model $d_model \ 33 | --d_ff $d_ff \ 34 | --batch_size 16 \ 35 | --learning_rate $learning_rate \ 36 | --llm_layers $llama_layers \ 37 | --train_epochs $train_epochs \ 38 | --model_comment $comment 39 | 40 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \ 41 | --task_name long_term_forecast \ 42 | --is_training 1 \ 43 | --root_path ./dataset/ETT-small/ \ 44 | --data_path ETTm2.csv \ 45 | --model_id ETTm2_512_192 \ 46 | --model $model_name \ 47 | --data ETTm2 \ 48 | --features M \ 49 | --seq_len 512 \ 50 | --label_len 48 \ 51 | --pred_len 192 \ 52 | --factor 3 \ 53 | --enc_in 7 \ 54 | --dec_in 7 \ 55 | --c_out 7 \ 56 | --des 'Exp' \ 57 | --itr 1 \ 58 | --d_model $d_model \ 59 | --d_ff $d_ff \ 60 | --batch_size $batch_size \ 61 | --learning_rate $learning_rate \ 62 | --lradj 'TST'\ 63 | --learning_rate 0.002 \ 64 | --llm_layers $llama_layers \ 65 | --train_epochs $train_epochs \ 66 | --model_comment $comment 67 | 68 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \ 69 | --task_name long_term_forecast \ 70 | --is_training 1 \ 71 | --root_path ./dataset/ETT-small/ \ 72 | --data_path ETTm2.csv \ 73 | --model_id ETTm2_512_336 \ 74 | --model $model_name \ 75 | --data ETTm2 \ 76 | --features M \ 77 | --seq_len 512 \ 78 | --label_len 48 \ 79 | --pred_len 336 \ 80 | --factor 3 \ 81 | --enc_in 7 \ 82 | --dec_in 7 \ 83 | --c_out 7 \ 84 | --des 'Exp' \ 85 | --itr 1 \ 86 | --d_model $d_model \ 87 | --d_ff $d_ff \ 88 | --batch_size $batch_size \ 89 | --learning_rate $learning_rate \ 90 | --lradj 'TST'\ 91 | --learning_rate 0.002 \ 92 | --llm_layers $llama_layers \ 93 | --train_epochs $train_epochs \ 94 | --model_comment $comment 95 | 96 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \ 97 | --task_name long_term_forecast \ 98 | --is_training 1 \ 99 | --root_path ./dataset/ETT-small/ \ 100 | --data_path ETTm2.csv \ 101 | --model_id ETTm2_512_720 \ 102 | --model $model_name \ 103 | --data ETTm2 \ 104 | --features M \ 105 | --seq_len 512 \ 106 | --label_len 48 \ 107 | --pred_len 720 \ 108 | --factor 3 \ 109 | --enc_in 7 \ 110 | --dec_in 7 \ 111 | --c_out 7 \ 112 | --des 'Exp' \ 113 | --itr 1 \ 114 | --d_model $d_model \ 115 | --d_ff $d_ff \ 116 | --batch_size $batch_size \ 117 | --learning_rate $learning_rate \ 118 | --lradj 'TST'\ 119 | --learning_rate 0.002 \ 120 | --llm_layers $llama_layers \ 121 | --train_epochs $train_epochs \ 122 | --model_comment $comment 123 | 124 | 125 | -------------------------------------------------------------------------------- /scripts/TimeLLM_M4.sh: -------------------------------------------------------------------------------- 1 | model_name=TimeLLM 2 | 3 | train_epochs=50 4 | llama_layers=32 5 | batch_size=24 6 | learning_rate=0.001 7 | d_model=8 8 | d_ff=32 9 | 10 | master_port=00097 11 | num_process=8 12 | 13 | comment='TimeLLM-M4' 14 | 15 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_m4.py \ 16 | --task_name short_term_forecast \ 17 | --is_training 1 \ 18 | --root_path ./dataset/m4 \ 19 | --seasonal_patterns 'Monthly' \ 20 | --model_id m4_Monthly \ 21 | --model $model_name \ 22 | --data m4 \ 23 | --features M \ 24 | --enc_in 1 \ 25 | --dec_in 1 \ 26 | --c_out 1 \ 27 | --llm_layers $llama_layers \ 28 | --d_model $d_model \ 29 | --d_ff $d_ff \ 30 | --patch_len 1 \ 31 | --stride 1 \ 32 | --batch_size $batch_size \ 33 | --des 'Exp' \ 34 | --itr 1 \ 35 | --learning_rate $learning_rate \ 36 | --loss 'SMAPE' \ 37 | --train_epochs $train_epochs \ 38 | --model_comment $comment 39 | 40 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_m4.py \ 41 | --task_name short_term_forecast \ 42 | --is_training 1 \ 43 | --root_path ./dataset/m4 \ 44 | --seasonal_patterns 'Yearly' \ 45 | --model_id m4_Yearly \ 46 | --model $model_name \ 47 | --data m4 \ 48 | --features M \ 49 | --enc_in 1 \ 50 | --dec_in 1 \ 51 | --c_out 1 \ 52 | --llm_layers $llama_layers \ 53 | --d_model $d_model \ 54 | --d_ff $d_ff \ 55 | --patch_len 1 \ 56 | --stride 1 \ 57 | --batch_size $batch_size \ 58 | --des 'Exp' \ 59 | --itr 1 \ 60 | --learning_rate $learning_rate \ 61 | --loss 'SMAPE' \ 62 | --train_epochs $train_epochs \ 63 | --model_comment $comment 64 | 65 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_m4.py \ 66 | --task_name short_term_forecast \ 67 | --is_training 1 \ 68 | --root_path ./dataset/m4 \ 69 | --seasonal_patterns 'Weekly' \ 70 | --model_id m4_Weekly \ 71 | --model $model_name \ 72 | --data m4 \ 73 | --features M \ 74 | --enc_in 1 \ 75 | --dec_in 1 \ 76 | --c_out 1 \ 77 | --llm_layers $llama_layers \ 78 | --d_model $d_model \ 79 | --d_ff $d_ff \ 80 | --patch_len 1 \ 81 | --stride 1 \ 82 | --batch_size $batch_size \ 83 | --des 'Exp' \ 84 | --itr 1 \ 85 | --learning_rate $learning_rate \ 86 | --loss 'SMAPE' \ 87 | --train_epochs $train_epochs \ 88 | --model_comment $comment 89 | 90 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_m4.py \ 91 | --task_name short_term_forecast \ 92 | --is_training 1 \ 93 | --root_path ./dataset/m4 \ 94 | --seasonal_patterns 'Daily' \ 95 | --model_id m4_Daily \ 96 | --model $model_name \ 97 | --data m4 \ 98 | --features M \ 99 | --enc_in 1 \ 100 | --dec_in 1 \ 101 | --c_out 1 \ 102 | --llm_layers $llama_layers \ 103 | --d_model $d_model \ 104 | --d_ff $d_ff \ 105 | --patch_len 1 \ 106 | --stride 1 \ 107 | --batch_size $batch_size \ 108 | --des 'Exp' \ 109 | --itr 1 \ 110 | --learning_rate $learning_rate \ 111 | --loss 'SMAPE' \ 112 | --train_epochs $train_epochs \ 113 | --model_comment $comment 114 | 115 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_m4.py \ 116 | --task_name short_term_forecast \ 117 | --is_training 1 \ 118 | --root_path ./dataset/m4 \ 119 | --seasonal_patterns 'Quarterly' \ 120 | --model_id m4_Quarterly \ 121 | --model $model_name \ 122 | --data m4 \ 123 | --features M \ 124 | --enc_in 1 \ 125 | --dec_in 1 \ 126 | --c_out 1 \ 127 | --llm_layers $llama_layers \ 128 | --d_model $d_model \ 129 | --d_ff $d_ff \ 130 | --patch_len 1 \ 131 | --stride 1 \ 132 | --batch_size $batch_size \ 133 | --des 'Exp' \ 134 | --itr 1 \ 135 | --learning_rate $learning_rate \ 136 | --loss 'SMAPE' \ 137 | --train_epochs $train_epochs \ 138 | --model_comment $comment 139 | 140 | 141 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_m4.py \ 142 | --task_name short_term_forecast \ 143 | --is_training 1 \ 144 | --root_path ./dataset/m4 \ 145 | --seasonal_patterns 'Hourly' \ 146 | --model_id m4_Hourly \ 147 | --model $model_name \ 148 | --data m4 \ 149 | --features M \ 150 | --enc_in 1 \ 151 | --dec_in 1 \ 152 | --c_out 1 \ 153 | --llm_layers $llama_layers \ 154 | --d_model $d_model \ 155 | --d_ff $d_ff \ 156 | --patch_len 1 \ 157 | --stride 1 \ 158 | --batch_size $batch_size \ 159 | --des 'Exp' \ 160 | --itr 1 \ 161 | --learning_rate $learning_rate \ 162 | --loss 'SMAPE' \ 163 | --train_epochs $train_epochs \ 164 | --model_comment $comment -------------------------------------------------------------------------------- /scripts/TimeLLM_Traffic.sh: -------------------------------------------------------------------------------- 1 | model_name=TimeLLM 2 | train_epochs=10 3 | learning_rate=0.01 4 | llama_layers=32 5 | 6 | master_port=00097 7 | num_process=8 8 | batch_size=24 9 | d_model=16 10 | d_ff=32 11 | 12 | comment='TimeLLM-Traffic' 13 | 14 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \ 15 | --task_name long_term_forecast \ 16 | --is_training 1 \ 17 | --root_path ./dataset/traffic/ \ 18 | --data_path traffic.csv \ 19 | --model_id traffic_512_96 \ 20 | --model $model_name \ 21 | --data Traffic \ 22 | --features M \ 23 | --seq_len 512 \ 24 | --label_len 48 \ 25 | --pred_len 96 \ 26 | --e_layers 2 \ 27 | --d_layers 1 \ 28 | --factor 3 \ 29 | --enc_in 862 \ 30 | --dec_in 862 \ 31 | --c_out 862 \ 32 | --batch_size $batch_size \ 33 | --learning_rate $learning_rate \ 34 | --llm_layers $llama_layers \ 35 | --train_epochs $train_epochs \ 36 | --model_comment $comment 37 | 38 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \ 39 | --task_name long_term_forecast \ 40 | --is_training 1 \ 41 | --root_path ./dataset/traffic/ \ 42 | --data_path traffic.csv \ 43 | --model_id traffic_512_96 \ 44 | --model $model_name \ 45 | --data Traffic \ 46 | --features M \ 47 | --seq_len 512 \ 48 | --label_len 48 \ 49 | --pred_len 192 \ 50 | --e_layers 2 \ 51 | --d_layers 1 \ 52 | --factor 3 \ 53 | --enc_in 862 \ 54 | --dec_in 862 \ 55 | --c_out 862 \ 56 | --batch_size $batch_size \ 57 | --learning_rate $learning_rate \ 58 | --llm_layers $llama_layers \ 59 | --train_epochs $train_epochs \ 60 | --model_comment $comment 61 | 62 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \ 63 | --task_name long_term_forecast \ 64 | --is_training 1 \ 65 | --root_path ./dataset/traffic/ \ 66 | --data_path traffic.csv \ 67 | --model_id traffic_512_96 \ 68 | --model $model_name \ 69 | --data Traffic \ 70 | --features M \ 71 | --seq_len 512 \ 72 | --label_len 48 \ 73 | --pred_len 336 \ 74 | --e_layers 2 \ 75 | --d_layers 1 \ 76 | --factor 3 \ 77 | --enc_in 862 \ 78 | --dec_in 862 \ 79 | --c_out 862 \ 80 | --batch_size 1 \ 81 | --learning_rate $learning_rate \ 82 | --llm_layers $llama_layers \ 83 | --train_epochs $train_epochs \ 84 | --model_comment $comment 85 | 86 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \ 87 | --task_name long_term_forecast \ 88 | --is_training 1 \ 89 | --root_path ./dataset/traffic/ \ 90 | --data_path traffic.csv \ 91 | --model_id traffic_512_96 \ 92 | --model $model_name \ 93 | --data Traffic \ 94 | --features M \ 95 | --seq_len 512 \ 96 | --label_len 720 \ 97 | --pred_len 96 \ 98 | --e_layers 2 \ 99 | --d_layers 1 \ 100 | --factor 3 \ 101 | --enc_in 862 \ 102 | --dec_in 862 \ 103 | --c_out 862 \ 104 | --batch_size $batch_size \ 105 | --learning_rate $learning_rate \ 106 | --llm_layers $llama_layers \ 107 | --train_epochs $train_epochs \ 108 | --model_comment $comment -------------------------------------------------------------------------------- /scripts/TimeLLM_Weather.sh: -------------------------------------------------------------------------------- 1 | model_name=TimeLLM 2 | train_epochs=10 3 | learning_rate=0.01 4 | llama_layers=32 5 | 6 | master_port=00097 7 | num_process=8 8 | batch_size=24 9 | d_model=16 10 | d_ff=32 11 | 12 | comment='TimeLLM-Weather' 13 | 14 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \ 15 | --task_name long_term_forecast \ 16 | --is_training 1 \ 17 | --root_path ./dataset/weather/ \ 18 | --data_path weather.csv \ 19 | --model_id weather_512_96 \ 20 | --model $model_name \ 21 | --data Weather \ 22 | --features M \ 23 | --seq_len 512 \ 24 | --label_len 48 \ 25 | --pred_len 96 \ 26 | --e_layers 2 \ 27 | --d_layers 1 \ 28 | --factor 3 \ 29 | --enc_in 21 \ 30 | --dec_in 21 \ 31 | --c_out 21 \ 32 | --d_model 32 \ 33 | --d_ff 32 \ 34 | --batch_size $batch_size \ 35 | --learning_rate $learning_rate \ 36 | --llm_layers $llama_layers \ 37 | --train_epochs $train_epochs \ 38 | --model_comment $comment 39 | 40 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \ 41 | --task_name long_term_forecast \ 42 | --is_training 1 \ 43 | --root_path ./dataset/weather/ \ 44 | --data_path weather.csv \ 45 | --model_id weather_512_192 \ 46 | --model $model_name \ 47 | --data Weather \ 48 | --features M \ 49 | --seq_len 512 \ 50 | --label_len 48 \ 51 | --pred_len 192 \ 52 | --e_layers 2 \ 53 | --d_layers 1 \ 54 | --factor 3 \ 55 | --enc_in 21 \ 56 | --dec_in 21 \ 57 | --c_out 21 \ 58 | --d_model 32 \ 59 | --d_ff 32 \ 60 | --batch_size $batch_size \ 61 | --learning_rate $learning_rate \ 62 | --llm_layers $llama_layers \ 63 | --train_epochs $train_epochs \ 64 | --model_comment $comment 65 | 66 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \ 67 | --task_name long_term_forecast \ 68 | --is_training 1 \ 69 | --root_path ./dataset/weather/ \ 70 | --data_path weather.csv \ 71 | --model_id weather_512_336 \ 72 | --model $model_name \ 73 | --data Weather \ 74 | --features M \ 75 | --seq_len 512 \ 76 | --label_len 48 \ 77 | --pred_len 336 \ 78 | --e_layers 2 \ 79 | --d_layers 1 \ 80 | --factor 3 \ 81 | --enc_in 21 \ 82 | --dec_in 21 \ 83 | --c_out 21 \ 84 | --d_model 32 \ 85 | --d_ff 128 \ 86 | --batch_size $batch_size \ 87 | --learning_rate $learning_rate \ 88 | --llm_layers $llama_layers \ 89 | --train_epochs 10 \ 90 | --model_comment $comment 91 | 92 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \ 93 | --task_name long_term_forecast \ 94 | --is_training 1 \ 95 | --root_path ./dataset/weather/ \ 96 | --data_path weather.csv \ 97 | --model_id weather_512_720 \ 98 | --model $model_name \ 99 | --data Weather \ 100 | --features M \ 101 | --seq_len 512 \ 102 | --label_len 48 \ 103 | --pred_len 720 \ 104 | --e_layers 2 \ 105 | --d_layers 1 \ 106 | --factor 3 \ 107 | --enc_in 21 \ 108 | --dec_in 21 \ 109 | --c_out 21 \ 110 | --d_model 32 \ 111 | --d_ff 128 \ 112 | --batch_size $batch_size \ 113 | --learning_rate $learning_rate \ 114 | --llm_layers $llama_layers \ 115 | --train_epochs 15 \ 116 | --model_comment $comment -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KimMeen/Time-LLM/02ee1b8f6043090c7a417f0cbb64cbf753895175/utils/__init__.py -------------------------------------------------------------------------------- /utils/losses.py: -------------------------------------------------------------------------------- 1 | # This source code is provided for the purposes of scientific reproducibility 2 | # under the following limited license from Element AI Inc. The code is an 3 | # implementation of the N-BEATS model (Oreshkin et al., N-BEATS: Neural basis 4 | # expansion analysis for interpretable time series forecasting, 5 | # https://arxiv.org/abs/1905.10437). The copyright to the source code is 6 | # licensed under the Creative Commons - Attribution-NonCommercial 4.0 7 | # International license (CC BY-NC 4.0): 8 | # https://creativecommons.org/licenses/by-nc/4.0/. Any commercial use (whether 9 | # for the benefit of third parties or internally in production) requires an 10 | # explicit license. The subject-matter of the N-BEATS model and associated 11 | # materials are the property of Element AI Inc. and may be subject to patent 12 | # protection. No license to patents is granted hereunder (whether express or 13 | # implied). Copyright © 2020 Element AI Inc. All rights reserved. 14 | 15 | """ 16 | Loss functions for PyTorch. 17 | """ 18 | 19 | import torch as t 20 | import torch.nn as nn 21 | import numpy as np 22 | import pdb 23 | 24 | 25 | def divide_no_nan(a, b): 26 | """ 27 | a/b where the resulted NaN or Inf are replaced by 0. 28 | """ 29 | result = a / b 30 | result[result != result] = .0 31 | result[result == np.inf] = .0 32 | return result 33 | 34 | 35 | class mape_loss(nn.Module): 36 | def __init__(self): 37 | super(mape_loss, self).__init__() 38 | 39 | def forward(self, insample: t.Tensor, freq: int, 40 | forecast: t.Tensor, target: t.Tensor, mask: t.Tensor) -> t.float: 41 | """ 42 | MAPE loss as defined in: https://en.wikipedia.org/wiki/Mean_absolute_percentage_error 43 | 44 | :param forecast: Forecast values. Shape: batch, time 45 | :param target: Target values. Shape: batch, time 46 | :param mask: 0/1 mask. Shape: batch, time 47 | :return: Loss value 48 | """ 49 | weights = divide_no_nan(mask, target) 50 | return t.mean(t.abs((forecast - target) * weights)) 51 | 52 | 53 | class smape_loss(nn.Module): 54 | def __init__(self): 55 | super(smape_loss, self).__init__() 56 | 57 | def forward(self, insample: t.Tensor, freq: int, 58 | forecast: t.Tensor, target: t.Tensor, mask: t.Tensor) -> t.float: 59 | """ 60 | sMAPE loss as defined in https://robjhyndman.com/hyndsight/smape/ (Makridakis 1993) 61 | 62 | :param forecast: Forecast values. Shape: batch, time 63 | :param target: Target values. Shape: batch, time 64 | :param mask: 0/1 mask. Shape: batch, time 65 | :return: Loss value 66 | """ 67 | return 200 * t.mean(divide_no_nan(t.abs(forecast - target), 68 | t.abs(forecast.data) + t.abs(target.data)) * mask) 69 | 70 | 71 | class mase_loss(nn.Module): 72 | def __init__(self): 73 | super(mase_loss, self).__init__() 74 | 75 | def forward(self, insample: t.Tensor, freq: int, 76 | forecast: t.Tensor, target: t.Tensor, mask: t.Tensor) -> t.float: 77 | """ 78 | MASE loss as defined in "Scaled Errors" https://robjhyndman.com/papers/mase.pdf 79 | 80 | :param insample: Insample values. Shape: batch, time_i 81 | :param freq: Frequency value 82 | :param forecast: Forecast values. Shape: batch, time_o 83 | :param target: Target values. Shape: batch, time_o 84 | :param mask: 0/1 mask. Shape: batch, time_o 85 | :return: Loss value 86 | """ 87 | masep = t.mean(t.abs(insample[:, freq:] - insample[:, :-freq]), dim=1) 88 | masked_masep_inv = divide_no_nan(mask, masep[:, None]) 89 | return t.mean(t.abs(target - forecast) * masked_masep_inv) 90 | -------------------------------------------------------------------------------- /utils/m4_summary.py: -------------------------------------------------------------------------------- 1 | # This source code is provided for the purposes of scientific reproducibility 2 | # under the following limited license from Element AI Inc. The code is an 3 | # implementation of the N-BEATS model (Oreshkin et al., N-BEATS: Neural basis 4 | # expansion analysis for interpretable time series forecasting, 5 | # https://arxiv.org/abs/1905.10437). The copyright to the source code is 6 | # licensed under the Creative Commons - Attribution-NonCommercial 4.0 7 | # International license (CC BY-NC 4.0): 8 | # https://creativecommons.org/licenses/by-nc/4.0/. Any commercial use (whether 9 | # for the benefit of third parties or internally in production) requires an 10 | # explicit license. The subject-matter of the N-BEATS model and associated 11 | # materials are the property of Element AI Inc. and may be subject to patent 12 | # protection. No license to patents is granted hereunder (whether express or 13 | # implied). Copyright 2020 Element AI Inc. All rights reserved. 14 | 15 | """ 16 | M4 Summary 17 | """ 18 | from collections import OrderedDict 19 | 20 | import numpy as np 21 | import pandas as pd 22 | 23 | from data_provider.m4 import M4Dataset 24 | from data_provider.m4 import M4Meta 25 | import os 26 | 27 | 28 | def group_values(values, groups, group_name): 29 | return np.array([v[~np.isnan(v)] for v in values[groups == group_name]]) 30 | 31 | 32 | def mase(forecast, insample, outsample, frequency): 33 | return np.mean(np.abs(forecast - outsample)) / np.mean(np.abs(insample[:-frequency] - insample[frequency:])) 34 | 35 | 36 | def smape_2(forecast, target): 37 | denom = np.abs(target) + np.abs(forecast) 38 | # divide by 1.0 instead of 0.0, in case when denom is zero the enumerator will be 0.0 anyway. 39 | denom[denom == 0.0] = 1.0 40 | return 200 * np.abs(forecast - target) / denom 41 | 42 | 43 | def mape(forecast, target): 44 | denom = np.abs(target) 45 | # divide by 1.0 instead of 0.0, in case when denom is zero the enumerator will be 0.0 anyway. 46 | denom[denom == 0.0] = 1.0 47 | return 100 * np.abs(forecast - target) / denom 48 | 49 | 50 | class M4Summary: 51 | def __init__(self, file_path, root_path): 52 | self.file_path = file_path 53 | self.training_set = M4Dataset.load(training=True, dataset_file=root_path) 54 | self.test_set = M4Dataset.load(training=False, dataset_file=root_path) 55 | self.naive_path = os.path.join(root_path, 'submission-Naive2.csv') 56 | 57 | def evaluate(self): 58 | """ 59 | Evaluate forecasts using M4 test dataset. 60 | 61 | :param forecast: Forecasts. Shape: timeseries, time. 62 | :return: sMAPE and OWA grouped by seasonal patterns. 63 | """ 64 | grouped_owa = OrderedDict() 65 | 66 | naive2_forecasts = pd.read_csv(self.naive_path).values[:, 1:].astype(np.float32) 67 | naive2_forecasts = np.array([v[~np.isnan(v)] for v in naive2_forecasts]) 68 | 69 | model_mases = {} 70 | naive2_smapes = {} 71 | naive2_mases = {} 72 | grouped_smapes = {} 73 | grouped_mapes = {} 74 | for group_name in M4Meta.seasonal_patterns: 75 | file_name = self.file_path + group_name + "_forecast.csv" 76 | if os.path.exists(file_name): 77 | model_forecast = pd.read_csv(file_name).values 78 | 79 | naive2_forecast = group_values(naive2_forecasts, self.test_set.groups, group_name) 80 | target = group_values(self.test_set.values, self.test_set.groups, group_name) 81 | # all timeseries within group have same frequency 82 | frequency = self.training_set.frequencies[self.test_set.groups == group_name][0] 83 | insample = group_values(self.training_set.values, self.test_set.groups, group_name) 84 | 85 | model_mases[group_name] = np.mean([mase(forecast=model_forecast[i], 86 | insample=insample[i], 87 | outsample=target[i], 88 | frequency=frequency) for i in range(len(model_forecast))]) 89 | naive2_mases[group_name] = np.mean([mase(forecast=naive2_forecast[i], 90 | insample=insample[i], 91 | outsample=target[i], 92 | frequency=frequency) for i in range(len(model_forecast))]) 93 | 94 | naive2_smapes[group_name] = np.mean(smape_2(naive2_forecast, target)) 95 | grouped_smapes[group_name] = np.mean(smape_2(forecast=model_forecast, target=target)) 96 | grouped_mapes[group_name] = np.mean(mape(forecast=model_forecast, target=target)) 97 | 98 | grouped_smapes = self.summarize_groups(grouped_smapes) 99 | grouped_mapes = self.summarize_groups(grouped_mapes) 100 | grouped_model_mases = self.summarize_groups(model_mases) 101 | grouped_naive2_smapes = self.summarize_groups(naive2_smapes) 102 | grouped_naive2_mases = self.summarize_groups(naive2_mases) 103 | for k in grouped_model_mases.keys(): 104 | grouped_owa[k] = (grouped_model_mases[k] / grouped_naive2_mases[k] + 105 | grouped_smapes[k] / grouped_naive2_smapes[k]) / 2 106 | 107 | def round_all(d): 108 | return dict(map(lambda kv: (kv[0], np.round(kv[1], 3)), d.items())) 109 | 110 | return round_all(grouped_smapes), round_all(grouped_owa), round_all(grouped_mapes), round_all( 111 | grouped_model_mases) 112 | 113 | def summarize_groups(self, scores): 114 | """ 115 | Re-group scores respecting M4 rules. 116 | :param scores: Scores per group. 117 | :return: Grouped scores. 118 | """ 119 | scores_summary = OrderedDict() 120 | 121 | def group_count(group_name): 122 | return len(np.where(self.test_set.groups == group_name)[0]) 123 | 124 | weighted_score = {} 125 | for g in ['Yearly', 'Quarterly', 'Monthly']: 126 | weighted_score[g] = scores[g] * group_count(g) 127 | scores_summary[g] = scores[g] 128 | 129 | others_score = 0 130 | others_count = 0 131 | for g in ['Weekly', 'Daily', 'Hourly']: 132 | others_score += scores[g] * group_count(g) 133 | others_count += group_count(g) 134 | weighted_score['Others'] = others_score 135 | scores_summary['Others'] = others_score / others_count 136 | 137 | average = np.sum(list(weighted_score.values())) / len(self.test_set.groups) 138 | scores_summary['Average'] = average 139 | 140 | return scores_summary 141 | -------------------------------------------------------------------------------- /utils/masking.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class TriangularCausalMask(): 5 | def __init__(self, B, L, device="cpu"): 6 | mask_shape = [B, 1, L, L] 7 | with torch.no_grad(): 8 | self._mask = torch.triu(torch.ones(mask_shape, dtype=torch.bool), diagonal=1).to(device) 9 | 10 | @property 11 | def mask(self): 12 | return self._mask 13 | 14 | 15 | class ProbMask(): 16 | def __init__(self, B, H, L, index, scores, device="cpu"): 17 | _mask = torch.ones(L, scores.shape[-1], dtype=torch.bool).to(device).triu(1) 18 | _mask_ex = _mask[None, None, :].expand(B, H, L, scores.shape[-1]) 19 | indicator = _mask_ex[torch.arange(B)[:, None, None], 20 | torch.arange(H)[None, :, None], 21 | index, :].to(device) 22 | self._mask = indicator.view(scores.shape).to(device) 23 | 24 | @property 25 | def mask(self): 26 | return self._mask -------------------------------------------------------------------------------- /utils/metrics.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def RSE(pred, true): 5 | return np.sqrt(np.sum((true - pred) ** 2)) / np.sqrt(np.sum((true - true.mean()) ** 2)) 6 | 7 | 8 | def CORR(pred, true): 9 | u = ((true - true.mean(0)) * (pred - pred.mean(0))).sum(0) 10 | d = np.sqrt(((true - true.mean(0)) ** 2 * (pred - pred.mean(0)) ** 2).sum(0)) 11 | return (u / d).mean(-1) 12 | 13 | 14 | def MAE(pred, true): 15 | return np.mean(np.abs(pred - true)) 16 | 17 | 18 | def MSE(pred, true): 19 | return np.mean((pred - true) ** 2) 20 | 21 | 22 | def RMSE(pred, true): 23 | return np.sqrt(MSE(pred, true)) 24 | 25 | 26 | def MAPE(pred, true): 27 | return np.mean(np.abs((pred - true) / true)) 28 | 29 | 30 | def MSPE(pred, true): 31 | return np.mean(np.square((pred - true) / true)) 32 | 33 | 34 | def metric(pred, true): 35 | mae = MAE(pred, true) 36 | mse = MSE(pred, true) 37 | rmse = RMSE(pred, true) 38 | mape = MAPE(pred, true) 39 | mspe = MSPE(pred, true) 40 | 41 | return mae, mse, rmse, mape, mspe 42 | -------------------------------------------------------------------------------- /utils/timefeatures.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import numpy as np 4 | import pandas as pd 5 | from pandas.tseries import offsets 6 | from pandas.tseries.frequencies import to_offset 7 | 8 | 9 | class TimeFeature: 10 | def __init__(self): 11 | pass 12 | 13 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 14 | pass 15 | 16 | def __repr__(self): 17 | return self.__class__.__name__ + "()" 18 | 19 | 20 | class SecondOfMinute(TimeFeature): 21 | """Minute of hour encoded as value between [-0.5, 0.5]""" 22 | 23 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 24 | return index.second / 59.0 - 0.5 25 | 26 | 27 | class MinuteOfHour(TimeFeature): 28 | """Minute of hour encoded as value between [-0.5, 0.5]""" 29 | 30 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 31 | return index.minute / 59.0 - 0.5 32 | 33 | 34 | class HourOfDay(TimeFeature): 35 | """Hour of day encoded as value between [-0.5, 0.5]""" 36 | 37 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 38 | return index.hour / 23.0 - 0.5 39 | 40 | 41 | class DayOfWeek(TimeFeature): 42 | """Hour of day encoded as value between [-0.5, 0.5]""" 43 | 44 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 45 | return index.dayofweek / 6.0 - 0.5 46 | 47 | 48 | class DayOfMonth(TimeFeature): 49 | """Day of month encoded as value between [-0.5, 0.5]""" 50 | 51 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 52 | return (index.day - 1) / 30.0 - 0.5 53 | 54 | 55 | class DayOfYear(TimeFeature): 56 | """Day of year encoded as value between [-0.5, 0.5]""" 57 | 58 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 59 | return (index.dayofyear - 1) / 365.0 - 0.5 60 | 61 | 62 | class MonthOfYear(TimeFeature): 63 | """Month of year encoded as value between [-0.5, 0.5]""" 64 | 65 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 66 | return (index.month - 1) / 11.0 - 0.5 67 | 68 | 69 | class WeekOfYear(TimeFeature): 70 | """Week of year encoded as value between [-0.5, 0.5]""" 71 | 72 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 73 | return (index.isocalendar().week - 1) / 52.0 - 0.5 74 | 75 | 76 | def time_features_from_frequency_str(freq_str: str) -> List[TimeFeature]: 77 | """ 78 | Returns a list of time features that will be appropriate for the given frequency string. 79 | Parameters 80 | ---------- 81 | freq_str 82 | Frequency string of the form [multiple][granularity] such as "12H", "5min", "1D" etc. 83 | """ 84 | 85 | features_by_offsets = { 86 | offsets.YearEnd: [], 87 | offsets.QuarterEnd: [MonthOfYear], 88 | offsets.MonthEnd: [MonthOfYear], 89 | offsets.Week: [DayOfMonth, WeekOfYear], 90 | offsets.Day: [DayOfWeek, DayOfMonth, DayOfYear], 91 | offsets.BusinessDay: [DayOfWeek, DayOfMonth, DayOfYear], 92 | offsets.Hour: [HourOfDay, DayOfWeek, DayOfMonth, DayOfYear], 93 | offsets.Minute: [ 94 | MinuteOfHour, 95 | HourOfDay, 96 | DayOfWeek, 97 | DayOfMonth, 98 | DayOfYear, 99 | ], 100 | offsets.Second: [ 101 | SecondOfMinute, 102 | MinuteOfHour, 103 | HourOfDay, 104 | DayOfWeek, 105 | DayOfMonth, 106 | DayOfYear, 107 | ], 108 | } 109 | 110 | offset = to_offset(freq_str) 111 | 112 | for offset_type, feature_classes in features_by_offsets.items(): 113 | if isinstance(offset, offset_type): 114 | return [cls() for cls in feature_classes] 115 | 116 | supported_freq_msg = f""" 117 | Unsupported frequency {freq_str} 118 | The following frequencies are supported: 119 | Y - yearly 120 | alias: A 121 | M - monthly 122 | W - weekly 123 | D - daily 124 | B - business days 125 | H - hourly 126 | T - minutely 127 | alias: min 128 | S - secondly 129 | """ 130 | raise RuntimeError(supported_freq_msg) 131 | 132 | 133 | def time_features(dates, freq='h'): 134 | return np.vstack([feat(dates) for feat in time_features_from_frequency_str(freq)]) -------------------------------------------------------------------------------- /utils/tools.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import matplotlib.pyplot as plt 4 | import shutil 5 | 6 | from tqdm import tqdm 7 | 8 | plt.switch_backend('agg') 9 | 10 | 11 | def adjust_learning_rate(accelerator, optimizer, scheduler, epoch, args, printout=True): 12 | if args.lradj == 'type1': 13 | lr_adjust = {epoch: args.learning_rate * (0.5 ** ((epoch - 1) // 1))} 14 | elif args.lradj == 'type2': 15 | lr_adjust = { 16 | 2: 5e-5, 4: 1e-5, 6: 5e-6, 8: 1e-6, 17 | 10: 5e-7, 15: 1e-7, 20: 5e-8 18 | } 19 | elif args.lradj == 'type3': 20 | lr_adjust = {epoch: args.learning_rate if epoch < 3 else args.learning_rate * (0.9 ** ((epoch - 3) // 1))} 21 | elif args.lradj == 'PEMS': 22 | lr_adjust = {epoch: args.learning_rate * (0.95 ** (epoch // 1))} 23 | elif args.lradj == 'TST': 24 | lr_adjust = {epoch: scheduler.get_last_lr()[0]} 25 | elif args.lradj == 'constant': 26 | lr_adjust = {epoch: args.learning_rate} 27 | if epoch in lr_adjust.keys(): 28 | lr = lr_adjust[epoch] 29 | for param_group in optimizer.param_groups: 30 | param_group['lr'] = lr 31 | if printout: 32 | if accelerator is not None: 33 | accelerator.print('Updating learning rate to {}'.format(lr)) 34 | else: 35 | print('Updating learning rate to {}'.format(lr)) 36 | 37 | 38 | class EarlyStopping: 39 | def __init__(self, accelerator=None, patience=7, verbose=False, delta=0, save_mode=True): 40 | self.accelerator = accelerator 41 | self.patience = patience 42 | self.verbose = verbose 43 | self.counter = 0 44 | self.best_score = None 45 | self.early_stop = False 46 | self.val_loss_min = np.Inf 47 | self.delta = delta 48 | self.save_mode = save_mode 49 | 50 | def __call__(self, val_loss, model, path): 51 | score = -val_loss 52 | if self.best_score is None: 53 | self.best_score = score 54 | if self.save_mode: 55 | self.save_checkpoint(val_loss, model, path) 56 | elif score < self.best_score + self.delta: 57 | self.counter += 1 58 | if self.accelerator is None: 59 | print(f'EarlyStopping counter: {self.counter} out of {self.patience}') 60 | else: 61 | self.accelerator.print(f'EarlyStopping counter: {self.counter} out of {self.patience}') 62 | if self.counter >= self.patience: 63 | self.early_stop = True 64 | else: 65 | self.best_score = score 66 | if self.save_mode: 67 | self.save_checkpoint(val_loss, model, path) 68 | self.counter = 0 69 | 70 | def save_checkpoint(self, val_loss, model, path): 71 | if self.verbose: 72 | if self.accelerator is not None: 73 | self.accelerator.print( 74 | f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...') 75 | else: 76 | print( 77 | f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...') 78 | 79 | if self.accelerator is not None: 80 | model = self.accelerator.unwrap_model(model) 81 | torch.save(model.state_dict(), path + '/' + 'checkpoint') 82 | else: 83 | torch.save(model.state_dict(), path + '/' + 'checkpoint') 84 | self.val_loss_min = val_loss 85 | 86 | 87 | class dotdict(dict): 88 | """dot.notation access to dictionary attributes""" 89 | __getattr__ = dict.get 90 | __setattr__ = dict.__setitem__ 91 | __delattr__ = dict.__delitem__ 92 | 93 | 94 | class StandardScaler(): 95 | def __init__(self, mean, std): 96 | self.mean = mean 97 | self.std = std 98 | 99 | def transform(self, data): 100 | return (data - self.mean) / self.std 101 | 102 | def inverse_transform(self, data): 103 | return (data * self.std) + self.mean 104 | 105 | def adjustment(gt, pred): 106 | anomaly_state = False 107 | for i in range(len(gt)): 108 | if gt[i] == 1 and pred[i] == 1 and not anomaly_state: 109 | anomaly_state = True 110 | for j in range(i, 0, -1): 111 | if gt[j] == 0: 112 | break 113 | else: 114 | if pred[j] == 0: 115 | pred[j] = 1 116 | for j in range(i, len(gt)): 117 | if gt[j] == 0: 118 | break 119 | else: 120 | if pred[j] == 0: 121 | pred[j] = 1 122 | elif gt[i] == 0: 123 | anomaly_state = False 124 | if anomaly_state: 125 | pred[i] = 1 126 | return gt, pred 127 | 128 | 129 | def cal_accuracy(y_pred, y_true): 130 | return np.mean(y_pred == y_true) 131 | 132 | 133 | def del_files(dir_path): 134 | shutil.rmtree(dir_path) 135 | 136 | 137 | def vali(args, accelerator, model, vali_data, vali_loader, criterion, mae_metric): 138 | total_loss = [] 139 | total_mae_loss = [] 140 | model.eval() 141 | with torch.no_grad(): 142 | for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in tqdm(enumerate(vali_loader)): 143 | batch_x = batch_x.float().to(accelerator.device) 144 | batch_y = batch_y.float() 145 | 146 | batch_x_mark = batch_x_mark.float().to(accelerator.device) 147 | batch_y_mark = batch_y_mark.float().to(accelerator.device) 148 | 149 | # decoder input 150 | dec_inp = torch.zeros_like(batch_y[:, -args.pred_len:, :]).float() 151 | dec_inp = torch.cat([batch_y[:, :args.label_len, :], dec_inp], dim=1).float().to( 152 | accelerator.device) 153 | # encoder - decoder 154 | if args.use_amp: 155 | with torch.cuda.amp.autocast(): 156 | if args.output_attention: 157 | outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0] 158 | else: 159 | outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark) 160 | else: 161 | if args.output_attention: 162 | outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0] 163 | else: 164 | outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark) 165 | 166 | outputs, batch_y = accelerator.gather_for_metrics((outputs, batch_y)) 167 | 168 | f_dim = -1 if args.features == 'MS' else 0 169 | outputs = outputs[:, -args.pred_len:, f_dim:] 170 | batch_y = batch_y[:, -args.pred_len:, f_dim:].to(accelerator.device) 171 | 172 | pred = outputs.detach() 173 | true = batch_y.detach() 174 | 175 | loss = criterion(pred, true) 176 | 177 | mae_loss = mae_metric(pred, true) 178 | 179 | total_loss.append(loss.item()) 180 | total_mae_loss.append(mae_loss.item()) 181 | 182 | total_loss = np.average(total_loss) 183 | total_mae_loss = np.average(total_mae_loss) 184 | 185 | model.train() 186 | return total_loss, total_mae_loss 187 | 188 | 189 | def test(args, accelerator, model, train_loader, vali_loader, criterion): 190 | x, _ = train_loader.dataset.last_insample_window() 191 | y = vali_loader.dataset.timeseries 192 | x = torch.tensor(x, dtype=torch.float32).to(accelerator.device) 193 | x = x.unsqueeze(-1) 194 | 195 | model.eval() 196 | with torch.no_grad(): 197 | B, _, C = x.shape 198 | dec_inp = torch.zeros((B, args.pred_len, C)).float().to(accelerator.device) 199 | dec_inp = torch.cat([x[:, -args.label_len:, :], dec_inp], dim=1) 200 | outputs = torch.zeros((B, args.pred_len, C)).float().to(accelerator.device) 201 | id_list = np.arange(0, B, args.eval_batch_size) 202 | id_list = np.append(id_list, B) 203 | for i in range(len(id_list) - 1): 204 | outputs[id_list[i]:id_list[i + 1], :, :] = model( 205 | x[id_list[i]:id_list[i + 1]], 206 | None, 207 | dec_inp[id_list[i]:id_list[i + 1]], 208 | None 209 | ) 210 | accelerator.wait_for_everyone() 211 | outputs = accelerator.gather_for_metrics(outputs) 212 | f_dim = -1 if args.features == 'MS' else 0 213 | outputs = outputs[:, -args.pred_len:, f_dim:] 214 | pred = outputs 215 | true = torch.from_numpy(np.array(y)).to(accelerator.device) 216 | batch_y_mark = torch.ones(true.shape).to(accelerator.device) 217 | true = accelerator.gather_for_metrics(true) 218 | batch_y_mark = accelerator.gather_for_metrics(batch_y_mark) 219 | 220 | loss = criterion(x[:, :, 0], args.frequency_map, pred[:, :, 0], true, batch_y_mark) 221 | 222 | model.train() 223 | return loss 224 | 225 | 226 | def load_content(args): 227 | if 'ETT' in args.data: 228 | file = 'ETT' 229 | else: 230 | file = args.data 231 | with open('./dataset/prompt_bank/{0}.txt'.format(file), 'r') as f: 232 | content = f.read() 233 | return content --------------------------------------------------------------------------------