├── LEGAL.md
├── LICENSE
├── README.md
├── data_provider
    ├── __init__.py
    ├── data_factory.py
    ├── data_loader.py
    └── m4.py
├── data_provider_pretrain
    ├── __init__.py
    ├── data_factory.py
    └── data_loader.py
├── dataset
    └── prompt_bank
    │   ├── ECL.txt
    │   ├── ETT.txt
    │   ├── Traffic.txt
    │   ├── Weather.txt
    │   └── m4.txt
├── ds_config_zero2.json
├── figures
    ├── framework.png
    ├── logo.png
    └── method-detailed-illustration.png
├── layers
    ├── AutoCorrelation.py
    ├── Autoformer_EncDec.py
    ├── Conv_Blocks.py
    ├── Embed.py
    ├── SelfAttention_Family.py
    ├── StandardNorm.py
    ├── Transformer_EncDec.py
    └── __init__.py
├── models
    ├── Autoformer.py
    ├── DLinear.py
    ├── TimeLLM.py
    └── __init__.py
├── requirements.txt
├── run_m4.py
├── run_main.py
├── run_pretrain.py
├── scripts
    ├── TimeLLM_ECL.sh
    ├── TimeLLM_ETTh1.sh
    ├── TimeLLM_ETTh1_ETTh2.sh
    ├── TimeLLM_ETTh2.sh
    ├── TimeLLM_ETTm1.sh
    ├── TimeLLM_ETTm2.sh
    ├── TimeLLM_M4.sh
    ├── TimeLLM_Traffic.sh
    └── TimeLLM_Weather.sh
└── utils
    ├── __init__.py
    ├── losses.py
    ├── m4_summary.py
    ├── masking.py
    ├── metrics.py
    ├── timefeatures.py
    └── tools.py


/LEGAL.md:
--------------------------------------------------------------------------------
1 | Legal Disclaimer
2 | 
3 | Within this source code, the comments in Chinese shall be the original, governing version. Any comment in other languages are for reference only. In the event of any conflict between the Chinese language version comments and other language version comments, the Chinese language version shall prevail.
4 | 
5 | 法律免责声明
6 | 
7 | 关于代码注释部分，中文注释为官方版本，其它语言注释仅做参考。中文注释可能与其它语言注释存在不一致，当中文注释与其它语言注释存在不一致时，请以中文注释为准。


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <div align="center">
  2 |   <!-- <h1><b> Time-LLM </b></h1> -->
  3 |   <!-- <h2><b> Time-LLM </b></h2> -->
  4 |   <h2><b> (ICLR'24) Time-LLM: Time Series Forecasting by Reprogramming Large Language Models </b></h2>
  5 | </div>
  6 | 
  7 | <div align="center">
  8 | 
  9 | ![](https://img.shields.io/github/last-commit/KimMeen/Time-LLM?color=green)
 10 | ![](https://img.shields.io/github/stars/KimMeen/Time-LLM?color=yellow)
 11 | ![](https://img.shields.io/github/forks/KimMeen/Time-LLM?color=lightblue)
 12 | ![](https://img.shields.io/badge/PRs-Welcome-green)
 13 | 
 14 | </div>
 15 | 
 16 | <div align="center">
 17 | 
 18 | **[<a href="https://arxiv.org/abs/2310.01728">Paper Page</a>]**
 19 | **[<a href="https://www.youtube.com/watch?v=6sFiNExS3nI">YouTube Talk 1</a>]**
 20 | **[<a href="https://www.youtube.com/watch?v=L-hRexVa32k">YouTube Talk 2</a>]**
 21 | **[<a href="https://medium.com/towards-data-science/time-llm-reprogram-an-llm-for-time-series-forecasting-e2558087b8ac">Medium Blog</a>]**
 22 | 
 23 | **[<a href="https://www.jiqizhixin.com/articles/2024-04-15?from=synced&keyword=TIME-LLM">机器之心中文解读</a>]**
 24 | **[<a href="https://mp.weixin.qq.com/s/UL_Kl0PzgfYHOnq7d3vM8Q">量子位中文解读</a>]**
 25 | **[<a href="https://mp.weixin.qq.com/s/FSxUdvPI713J2LiHnNaFCw">时序人中文解读</a>]**
 26 | **[<a href="https://mp.weixin.qq.com/s/nUiQGnHOkWznoBPqM0KHXg">AI算法厨房中文解读</a>]**
 27 | **[<a href="https://zhuanlan.zhihu.com/p/676256783">知乎中文解读</a>]**
 28 | 
 29 | 
 30 | </div>
 31 | 
 32 | <p align="center">
 33 | 
 34 | <img src="./figures/logo.png" width="70">
 35 | 
 36 | </p>
 37 | 
 38 | ---
 39 | >
 40 | > 🙋 Please let us know if you find out a mistake or have any suggestions!
 41 | > 
 42 | > 🌟 If you find this resource helpful, please consider to star this repository and cite our research:
 43 | 
 44 | ```
 45 | @inproceedings{jin2023time,
 46 |   title={{Time-LLM}: Time series forecasting by reprogramming large language models},
 47 |   author={Jin, Ming and Wang, Shiyu and Ma, Lintao and Chu, Zhixuan and Zhang, James Y and Shi, Xiaoming and Chen, Pin-Yu and Liang, Yuxuan and Li, Yuan-Fang and Pan, Shirui and Wen, Qingsong},
 48 |   booktitle={International Conference on Learning Representations (ICLR)},
 49 |   year={2024}
 50 | }
 51 | ```
 52 | 
 53 | ## Updates/News:
 54 | 
 55 | 🚩 **News** (Aug. 2024): Time-LLM has been adopted by XiMou Optimization Technology Co., Ltd. (XMO) for Solar, Wind, and Weather Forecasting.
 56 | 
 57 | 🚩 **News** (May 2024): Time-LLM has been included in [NeuralForecast](https://github.com/Nixtla/neuralforecast). Special thanks to the contributor @[JQGoh](https://github.com/JQGoh) and @[marcopeix](https://github.com/marcopeix)!
 58 | 
 59 | 🚩 **News** (March 2024): Time-LLM has been upgraded to serve as a general framework for repurposing a wide range of language models to time series forecasting. It now defaults to supporting Llama-7B and includes compatibility with two additional smaller PLMs (GPT-2 and BERT). Simply adjust `--llm_model` and `--llm_dim` to switch backbones.
 60 | 
 61 | ## Introduction
 62 | Time-LLM is a reprogramming framework to repurpose LLMs for general time series forecasting with the backbone language models kept intact.
 63 | Notably, we show that time series analysis (e.g., forecasting) can be cast as yet another "language task" that can be effectively tackled by an off-the-shelf LLM.
 64 | 
 65 | <p align="center">
 66 | <img src="./figures/framework.png" height = "360" alt="" align=center />
 67 | </p>
 68 | 
 69 | - Time-LLM comprises two key components: (1) reprogramming the input time series into text prototype representations that are more natural for the LLM, and (2) augmenting the input context with declarative prompts (e.g., domain expert knowledge and task instructions) to guide LLM reasoning.
 70 | 
 71 | <p align="center">
 72 | <img src="./figures/method-detailed-illustration.png" height = "190" alt="" align=center />
 73 | </p>
 74 | 
 75 | ## Requirements
 76 | Use python 3.11 from MiniConda
 77 | 
 78 | - torch==2.2.2
 79 | - accelerate==0.28.0
 80 | - einops==0.7.0
 81 | - matplotlib==3.7.0
 82 | - numpy==1.23.5
 83 | - pandas==1.5.3
 84 | - scikit_learn==1.2.2
 85 | - scipy==1.12.0
 86 | - tqdm==4.65.0
 87 | - peft==0.4.0
 88 | - transformers==4.31.0
 89 | - deepspeed==0.14.0
 90 | - sentencepiece==0.2.0
 91 | 
 92 | To install all dependencies:
 93 | ```
 94 | pip install -r requirements.txt
 95 | ```
 96 | 
 97 | ## Datasets
 98 | You can access the well pre-processed datasets from [[Google Drive]](https://drive.google.com/file/d/1NF7VEefXCmXuWNbnNe858WvQAkJ_7wuP/view?usp=sharing), then place the downloaded contents under `./dataset`
 99 | 
100 | ## Quick Demos
101 | 1. Download datasets and place them under `./dataset`
102 | 2. Tune the model. We provide five experiment scripts for demonstration purpose under the folder `./scripts`. For example, you can evaluate on ETT datasets by:
103 | 
104 | ```bash
105 | bash ./scripts/TimeLLM_ETTh1.sh 
106 | ```
107 | ```bash
108 | bash ./scripts/TimeLLM_ETTh2.sh 
109 | ```
110 | ```bash
111 | bash ./scripts/TimeLLM_ETTm1.sh 
112 | ```
113 | ```bash
114 | bash ./scripts/TimeLLM_ETTm2.sh
115 | ```
116 | 
117 | ## Detailed usage
118 | 
119 | Please refer to ```run_main.py```, ```run_m4.py``` and ```run_pretrain.py``` for the detailed description of each hyperparameter.
120 | 
121 | 
122 | ## Further Reading
123 | 1, [**TimeMixer++: A General Time Series Pattern Machine for Universal Predictive Analysis**](https://arxiv.org/abs/2410.16032), in *arXiv* 2024.
124 | [\[GitHub Repo\]](https://github.com/kwuking/TimeMixer/blob/main/README.md)
125 | 
126 | **Authors**: Shiyu Wang, Jiawei Li, Xiaoming Shi, Zhou Ye, Baichuan Mo, Wenze Lin, Shengtong Ju, Zhixuan Chu, Ming Jin
127 | 
128 | ```bibtex
129 | @article{wang2024timemixer++,
130 |   title={TimeMixer++: A General Time Series Pattern Machine for Universal Predictive Analysis},
131 |   author={Wang, Shiyu and Li, Jiawei and Shi, Xiaoming and Ye, Zhou and Mo, Baichuan and Lin, Wenze and Ju, Shengtong and Chu, Zhixuan and Jin, Ming},
132 |   journal={arXiv preprint arXiv:2410.16032},
133 |   year={2024}
134 | }
135 | ```
136 | 
137 | 2, [**Foundation Models for Time Series Analysis: A Tutorial and Survey**](https://arxiv.org/pdf/2403.14735), in *KDD* 2024.
138 | 
139 | **Authors**: Yuxuan Liang, Haomin Wen, Yuqi Nie, Yushan Jiang, Ming Jin, Dongjin Song, Shirui Pan, Qingsong Wen*
140 | 
141 | ```bibtex
142 | @inproceedings{liang2024foundation,
143 |   title={Foundation models for time series analysis: A tutorial and survey},
144 |   author={Liang, Yuxuan and Wen, Haomin and Nie, Yuqi and Jiang, Yushan and Jin, Ming and Song, Dongjin and Pan, Shirui and Wen, Qingsong},
145 |   booktitle={ACM SIGKDD Conference on Knowledge Discovery and Data Mining (KDD 2024)},
146 |   year={2024}
147 | }
148 | ```
149 | 
150 | 3, [**Position Paper: What Can Large Language Models Tell Us about Time Series Analysis**](https://arxiv.org/abs/2402.02713), in *ICML* 2024.
151 | 
152 | **Authors**: Ming Jin, Yifan Zhang, Wei Chen, Kexin Zhang, Yuxuan Liang*, Bin Yang, Jindong Wang, Shirui Pan, Qingsong Wen*
153 | 
154 | ```bibtex
155 | @inproceedings{jin2024position,
156 |    title={Position Paper: What Can Large Language Models Tell Us about Time Series Analysis}, 
157 |    author={Ming Jin and Yifan Zhang and Wei Chen and Kexin Zhang and Yuxuan Liang and Bin Yang and Jindong Wang and Shirui Pan and Qingsong Wen},
158 |   booktitle={International Conference on Machine Learning (ICML 2024)},
159 |   year={2024}
160 | }
161 | ```
162 | 
163 | 4, [**Large Models for Time Series and Spatio-Temporal Data: A Survey and Outlook**](https://arxiv.org/abs/2310.10196), in *arXiv* 2023.
164 | [\[GitHub Repo\]](https://github.com/qingsongedu/Awesome-TimeSeries-SpatioTemporal-LM-LLM)
165 | 
166 | **Authors**: Ming Jin, Qingsong Wen*, Yuxuan Liang, Chaoli Zhang, Siqiao Xue, Xue Wang, James Zhang, Yi Wang, Haifeng Chen, Xiaoli Li (IEEE Fellow), Shirui Pan*, Vincent S. Tseng (IEEE Fellow), Yu Zheng (IEEE Fellow), Lei Chen (IEEE Fellow), Hui Xiong (IEEE Fellow)
167 | 
168 | ```bibtex
169 | @article{jin2023lm4ts,
170 |   title={Large Models for Time Series and Spatio-Temporal Data: A Survey and Outlook}, 
171 |   author={Ming Jin and Qingsong Wen and Yuxuan Liang and Chaoli Zhang and Siqiao Xue and Xue Wang and James Zhang and Yi Wang and Haifeng Chen and Xiaoli Li and Shirui Pan and Vincent S. Tseng and Yu Zheng and Lei Chen and Hui Xiong},
172 |   journal={arXiv preprint arXiv:2310.10196},
173 |   year={2023}
174 | }
175 | ```
176 | 
177 | 
178 | 5, [**Transformers in Time Series: A Survey**](https://arxiv.org/abs/2202.07125), in IJCAI 2023.
179 | [\[GitHub Repo\]](https://github.com/qingsongedu/time-series-transformers-review)
180 | 
181 | **Authors**: Qingsong Wen, Tian Zhou, Chaoli Zhang, Weiqi Chen, Ziqing Ma, Junchi Yan, Liang Sun
182 | 
183 | ```bibtex
184 | @inproceedings{wen2023transformers,
185 |   title={Transformers in time series: A survey},
186 |   author={Wen, Qingsong and Zhou, Tian and Zhang, Chaoli and Chen, Weiqi and Ma, Ziqing and Yan, Junchi and Sun, Liang},
187 |   booktitle={International Joint Conference on Artificial Intelligence(IJCAI)},
188 |   year={2023}
189 | }
190 | ```
191 | 
192 | 6, [**TimeMixer: Decomposable Multiscale Mixing for Time Series Forecasting**](https://openreview.net/pdf?id=7oLshfEIC2), in ICLR 2024.
193 | [\[GitHub Repo\]](https://github.com/kwuking/TimeMixer)
194 | 
195 | **Authors**: Shiyu Wang, Haixu Wu, Xiaoming Shi, Tengge Hu, Huakun Luo, Lintao Ma, James Y. Zhang, Jun Zhou 
196 | 
197 | ```bibtex
198 | @inproceedings{wang2023timemixer,
199 |   title={TimeMixer: Decomposable Multiscale Mixing for Time Series Forecasting},
200 |   author={Wang, Shiyu and Wu, Haixu and Shi, Xiaoming and Hu, Tengge and Luo, Huakun and Ma, Lintao and Zhang, James Y and ZHOU, JUN},
201 |   booktitle={International Conference on Learning Representations (ICLR)},
202 |   year={2024}
203 | }
204 | ```
205 | 
206 | ## Acknowledgement
207 | Our implementation adapts [Time-Series-Library](https://github.com/thuml/Time-Series-Library) and [OFA (GPT4TS)](https://github.com/DAMO-DI-ML/NeurIPS2023-One-Fits-All) as the code base and have extensively modified it to our purposes. We thank the authors for sharing their implementations and related resources.
208 | 


--------------------------------------------------------------------------------
/data_provider/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/data_provider/data_factory.py:
--------------------------------------------------------------------------------
 1 | from data_provider.data_loader import Dataset_ETT_hour, Dataset_ETT_minute, Dataset_Custom, Dataset_M4
 2 | from torch.utils.data import DataLoader
 3 | 
 4 | data_dict = {
 5 |     'ETTh1': Dataset_ETT_hour,
 6 |     'ETTh2': Dataset_ETT_hour,
 7 |     'ETTm1': Dataset_ETT_minute,
 8 |     'ETTm2': Dataset_ETT_minute,
 9 |     'ECL': Dataset_Custom,
10 |     'Traffic': Dataset_Custom,
11 |     'Weather': Dataset_Custom,
12 |     'm4': Dataset_M4,
13 | }
14 | 
15 | 
16 | def data_provider(args, flag):
17 |     Data = data_dict[args.data]
18 |     timeenc = 0 if args.embed != 'timeF' else 1
19 |     percent = args.percent
20 | 
21 |     if flag == 'test':
22 |         shuffle_flag = False
23 |         drop_last = True
24 |         batch_size = args.batch_size
25 |         freq = args.freq
26 |     else:
27 |         shuffle_flag = True
28 |         drop_last = True
29 |         batch_size = args.batch_size
30 |         freq = args.freq
31 | 
32 |     if args.data == 'm4':
33 |         drop_last = False
34 |         data_set = Data(
35 |             root_path=args.root_path,
36 |             data_path=args.data_path,
37 |             flag=flag,
38 |             size=[args.seq_len, args.label_len, args.pred_len],
39 |             features=args.features,
40 |             target=args.target,
41 |             timeenc=timeenc,
42 |             freq=freq,
43 |             seasonal_patterns=args.seasonal_patterns
44 |         )
45 |     else:
46 |         data_set = Data(
47 |             root_path=args.root_path,
48 |             data_path=args.data_path,
49 |             flag=flag,
50 |             size=[args.seq_len, args.label_len, args.pred_len],
51 |             features=args.features,
52 |             target=args.target,
53 |             timeenc=timeenc,
54 |             freq=freq,
55 |             percent=percent,
56 |             seasonal_patterns=args.seasonal_patterns
57 |         )
58 |     data_loader = DataLoader(
59 |         data_set,
60 |         batch_size=batch_size,
61 |         shuffle=shuffle_flag,
62 |         num_workers=args.num_workers,
63 |         drop_last=drop_last)
64 |     return data_set, data_loader
65 | 


--------------------------------------------------------------------------------
/data_provider/m4.py:
--------------------------------------------------------------------------------
  1 | # This source code is provided for the purposes of scientific reproducibility
  2 | # under the following limited license from Element AI Inc. The code is an
  3 | # implementation of the N-BEATS model (Oreshkin et al., N-BEATS: Neural basis
  4 | # expansion analysis for interpretable time series forecasting,
  5 | # https://arxiv.org/abs/1905.10437). The copyright to the source code is
  6 | # licensed under the Creative Commons - Attribution-NonCommercial 4.0
  7 | # International license (CC BY-NC 4.0):
  8 | # https://creativecommons.org/licenses/by-nc/4.0/.  Any commercial use (whether
  9 | # for the benefit of third parties or internally in production) requires an
 10 | # explicit license. The subject-matter of the N-BEATS model and associated
 11 | # materials are the property of Element AI Inc. and may be subject to patent
 12 | # protection. No license to patents is granted hereunder (whether express or
 13 | # implied). Copyright © 2020 Element AI Inc. All rights reserved.
 14 | 
 15 | """
 16 | M4 Dataset
 17 | """
 18 | from dataclasses import dataclass
 19 | 
 20 | import numpy as np
 21 | import pandas as pd
 22 | import logging
 23 | import os
 24 | import pathlib
 25 | import sys
 26 | from urllib import request
 27 | 
 28 | 
 29 | def url_file_name(url: str) -> str:
 30 |     """
 31 |     Extract file name from url.
 32 | 
 33 |     :param url: URL to extract file name from.
 34 |     :return: File name.
 35 |     """
 36 |     return url.split('/')[-1] if len(url) > 0 else ''
 37 | 
 38 | 
 39 | def download(url: str, file_path: str) -> None:
 40 |     """
 41 |     Download a file to the given path.
 42 | 
 43 |     :param url: URL to download
 44 |     :param file_path: Where to download the content.
 45 |     """
 46 | 
 47 |     def progress(count, block_size, total_size):
 48 |         progress_pct = float(count * block_size) / float(total_size) * 100.0
 49 |         sys.stdout.write('\rDownloading {} to {} {:.1f}%'.format(url, file_path, progress_pct))
 50 |         sys.stdout.flush()
 51 | 
 52 |     if not os.path.isfile(file_path):
 53 |         opener = request.build_opener()
 54 |         opener.addheaders = [('User-agent', 'Mozilla/5.0')]
 55 |         request.install_opener(opener)
 56 |         pathlib.Path(os.path.dirname(file_path)).mkdir(parents=True, exist_ok=True)
 57 |         f, _ = request.urlretrieve(url, file_path, progress)
 58 |         sys.stdout.write('\n')
 59 |         sys.stdout.flush()
 60 |         file_info = os.stat(f)
 61 |         logging.info(f'Successfully downloaded {os.path.basename(file_path)} {file_info.st_size} bytes.')
 62 |     else:
 63 |         file_info = os.stat(file_path)
 64 |         logging.info(f'File already exists: {file_path} {file_info.st_size} bytes.')
 65 | 
 66 | 
 67 | @dataclass()
 68 | class M4Dataset:
 69 |     ids: np.ndarray
 70 |     groups: np.ndarray
 71 |     frequencies: np.ndarray
 72 |     horizons: np.ndarray
 73 |     values: np.ndarray
 74 | 
 75 |     @staticmethod
 76 |     def load(training: bool = True, dataset_file: str = '../dataset/m4') -> 'M4Dataset':
 77 |         """
 78 |         Load cached dataset.
 79 | 
 80 |         :param training: Load training part if training is True, test part otherwise.
 81 |         """
 82 |         info_file = os.path.join(dataset_file, 'M4-info.csv')
 83 |         train_cache_file = os.path.join(dataset_file, 'training.npz')
 84 |         test_cache_file = os.path.join(dataset_file, 'test.npz')
 85 |         m4_info = pd.read_csv(info_file)
 86 |         return M4Dataset(ids=m4_info.M4id.values,
 87 |                          groups=m4_info.SP.values,
 88 |                          frequencies=m4_info.Frequency.values,
 89 |                          horizons=m4_info.Horizon.values,
 90 |                          values=np.load(
 91 |                              train_cache_file if training else test_cache_file,
 92 |                              allow_pickle=True))
 93 | 
 94 | 
 95 | @dataclass()
 96 | class M4Meta:
 97 |     seasonal_patterns = ['Yearly', 'Quarterly', 'Monthly', 'Weekly', 'Daily', 'Hourly']
 98 |     horizons = [6, 8, 18, 13, 14, 48]
 99 |     frequencies = [1, 4, 12, 1, 1, 24]
100 |     horizons_map = {
101 |         'Yearly': 6,
102 |         'Quarterly': 8,
103 |         'Monthly': 18,
104 |         'Weekly': 13,
105 |         'Daily': 14,
106 |         'Hourly': 48
107 |     }  # different predict length
108 |     frequency_map = {
109 |         'Yearly': 1,
110 |         'Quarterly': 4,
111 |         'Monthly': 12,
112 |         'Weekly': 1,
113 |         'Daily': 1,
114 |         'Hourly': 24
115 |     }
116 |     history_size = {
117 |         'Yearly': 1.5,
118 |         'Quarterly': 1.5,
119 |         'Monthly': 1.5,
120 |         'Weekly': 10,
121 |         'Daily': 10,
122 |         'Hourly': 10
123 |     }  # from interpretable.gin
124 | 
125 | 
126 | def load_m4_info() -> pd.DataFrame:
127 |     """
128 |     Load M4Info file.
129 | 
130 |     :return: Pandas DataFrame of M4Info.
131 |     """
132 |     return pd.read_csv(INFO_FILE_PATH)
133 | 


--------------------------------------------------------------------------------
/data_provider_pretrain/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KimMeen/Time-LLM/02ee1b8f6043090c7a417f0cbb64cbf753895175/data_provider_pretrain/__init__.py


--------------------------------------------------------------------------------
/data_provider_pretrain/data_factory.py:
--------------------------------------------------------------------------------
 1 | from torch.utils.data import DataLoader
 2 | 
 3 | from data_provider_pretrain.data_loader import Dataset_ETT_hour, Dataset_ETT_minute
 4 | 
 5 | data_dict = {
 6 |     'ETTh1': Dataset_ETT_hour,
 7 |     'ETTh2': Dataset_ETT_hour,
 8 |     'ETTm1': Dataset_ETT_minute,
 9 |     'ETTm2': Dataset_ETT_minute,
10 | }
11 | 
12 | 
13 | def data_provider(args, data, data_path, pretrain=True, flag='train'):
14 |     Data = data_dict[data]
15 |     timeenc = 0 if args.embed != 'timeF' else 1
16 |     percent = args.percent
17 | 
18 |     if flag == 'test':
19 |         shuffle_flag = False
20 |         drop_last = True
21 |         batch_size = args.batch_size
22 |         freq = args.freq
23 |     else:
24 |         shuffle_flag = True
25 |         drop_last = True
26 |         batch_size = args.batch_size
27 |         freq = args.freq
28 | 
29 |     data_set = Data(
30 |         root_path=args.root_path,
31 |         data_path=data_path,
32 |         flag=flag,
33 |         size=[args.seq_len, args.label_len, args.pred_len],
34 |         features=args.features,
35 |         target=args.target,
36 |         timeenc=timeenc,
37 |         freq=freq,
38 |         percent=percent,
39 |         seasonal_patterns=args.seasonal_patterns,
40 |         pretrain=pretrain
41 |     )
42 |     data_loader = DataLoader(
43 |         data_set,
44 |         batch_size=batch_size,
45 |         shuffle=shuffle_flag,
46 |         num_workers=args.num_workers,
47 |         drop_last=drop_last)
48 |     return data_set, data_loader
49 | 


--------------------------------------------------------------------------------
/data_provider_pretrain/data_loader.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import pandas as pd
  3 | from torch.utils.data import Dataset
  4 | from sklearn.preprocessing import StandardScaler
  5 | from utils.timefeatures import time_features
  6 | import warnings
  7 | 
  8 | warnings.filterwarnings('ignore')
  9 | 
 10 | 
 11 | class Dataset_ETT_hour(Dataset):
 12 |     def __init__(self, root_path, flag='train', size=None,
 13 |                  features='S', data_path='ETTh1.csv',
 14 |                  target='OT', scale=True, timeenc=0, freq='h', percent=100,
 15 |                  seasonal_patterns=None, pretrain=True):
 16 |         if size == None:
 17 |             self.seq_len = 24 * 4 * 4
 18 |             self.label_len = 24 * 4
 19 |             self.pred_len = 24 * 4
 20 |         else:
 21 |             self.seq_len = size[0]
 22 |             self.label_len = size[1]
 23 |             self.pred_len = size[2]
 24 |         # init
 25 |         assert flag in ['train', 'test', 'val']
 26 |         type_map = {'train': 0, 'val': 1, 'test': 2}
 27 |         self.set_type = type_map[flag]
 28 | 
 29 |         self.percent = percent
 30 |         self.pretrain = pretrain
 31 |         self.features = features
 32 |         self.target = target
 33 |         self.scale = scale
 34 |         self.timeenc = timeenc
 35 |         self.freq = freq
 36 | 
 37 |         # self.percent = percent
 38 |         self.root_path = root_path
 39 |         self.data_path = data_path
 40 |         self.__read_data__()
 41 | 
 42 |         self.enc_in = self.data_x.shape[-1]
 43 |         self.tot_len = len(self.data_x) - self.seq_len - self.pred_len + 1
 44 | 
 45 |     def __read_data__(self):
 46 |         self.scaler = StandardScaler()
 47 |         df_raw = pd.read_csv(os.path.join(self.root_path,
 48 |                                           self.data_path))
 49 | 
 50 |         if self.pretrain:
 51 |             # border1s = [0, 12 * 30 * 24 + 4 * 30 * 24 - self.seq_len, 12 * 30 * 24 + 4 * 30 * 24 - self.seq_len]
 52 |             # border2s = [12 * 30 * 24 + 8 * 30 * 24, 12 * 30 * 24 + 8 * 30 * 24, 12 * 30 * 24 + 8 * 30 * 24]
 53 |             border1s = [0, 12 * 30 * 24 + 4 * 30 * 24 - self.seq_len, 12 * 30 * 24 + 4 * 30 * 24 - self.seq_len]
 54 |             border2s = [12 * 30 * 24 + 4 * 30 * 24, 12 * 30 * 24 + 8 * 30 * 24, 12 * 30 * 24 + 8 * 30 * 24]
 55 |         else:
 56 |             border1s = [0, 12 * 30 * 24 - self.seq_len, 12 * 30 * 24 + 4 * 30 * 24 - self.seq_len]
 57 |             border2s = [12 * 30 * 24, 12 * 30 * 24 + 4 * 30 * 24, 12 * 30 * 24 + 8 * 30 * 24]
 58 | 
 59 |         border1 = border1s[self.set_type]
 60 |         border2 = border2s[self.set_type]
 61 | 
 62 |         if self.set_type == 0:
 63 |             border2 = (border2 - self.seq_len) * self.percent // 100 + self.seq_len
 64 | 
 65 |         if self.features == 'M' or self.features == 'MS':
 66 |             cols_data = df_raw.columns[1:]
 67 |             df_data = df_raw[cols_data]
 68 |         elif self.features == 'S':
 69 |             df_data = df_raw[[self.target]]
 70 | 
 71 |         if self.scale:
 72 |             train_data = df_data[border1s[0]:border2s[0]]
 73 |             self.scaler.fit(train_data.values)
 74 |             data = self.scaler.transform(df_data.values)
 75 |         else:
 76 |             data = df_data.values
 77 | 
 78 |         df_stamp = df_raw[['date']][border1:border2]
 79 |         df_stamp['date'] = pd.to_datetime(df_stamp.date)
 80 |         if self.timeenc == 0:
 81 |             df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1)
 82 |             df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1)
 83 |             df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1)
 84 |             df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1)
 85 |             data_stamp = df_stamp.drop(['date'], 1).values
 86 |         elif self.timeenc == 1:
 87 |             data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq)
 88 |             data_stamp = data_stamp.transpose(1, 0)
 89 | 
 90 |         self.data_x = data[border1:border2]
 91 |         self.data_y = data[border1:border2]
 92 |         self.data_stamp = data_stamp
 93 | 
 94 |     def __getitem__(self, index):
 95 |         feat_id = index // self.tot_len
 96 |         s_begin = index % self.tot_len
 97 | 
 98 |         s_end = s_begin + self.seq_len
 99 |         r_begin = s_end - self.label_len
100 |         r_end = r_begin + self.label_len + self.pred_len
101 |         seq_x = self.data_x[s_begin:s_end, feat_id:feat_id + 1]
102 |         seq_y = self.data_y[r_begin:r_end, feat_id:feat_id + 1]
103 |         seq_x_mark = self.data_stamp[s_begin:s_end]
104 |         seq_y_mark = self.data_stamp[r_begin:r_end]
105 | 
106 |         return seq_x, seq_y, seq_x_mark, seq_y_mark
107 | 
108 |     def __len__(self):
109 |         return (len(self.data_x) - self.seq_len - self.pred_len + 1) * self.enc_in
110 | 
111 |     def inverse_transform(self, data):
112 |         return self.scaler.inverse_transform(data)
113 | 
114 | 
115 | class Dataset_ETT_minute(Dataset):
116 |     def __init__(self, root_path, flag='train', size=None,
117 |                  features='S', data_path='ETTm1.csv',
118 |                  target='OT', scale=True, timeenc=0, freq='t', percent=100,
119 |                  seasonal_patterns=None, pretrain=True):
120 |         if size == None:
121 |             self.seq_len = 24 * 4 * 4
122 |             self.label_len = 24 * 4
123 |             self.pred_len = 24 * 4
124 |         else:
125 |             self.seq_len = size[0]
126 |             self.label_len = size[1]
127 |             self.pred_len = size[2]
128 |         # init
129 |         assert flag in ['train', 'test', 'val']
130 |         type_map = {'train': 0, 'val': 1, 'test': 2}
131 |         self.set_type = type_map[flag]
132 | 
133 |         self.percent = percent
134 |         self.pretrain = pretrain
135 |         self.features = features
136 |         self.target = target
137 |         self.scale = scale
138 |         self.timeenc = timeenc
139 |         self.freq = freq
140 | 
141 |         self.root_path = root_path
142 |         self.data_path = data_path
143 |         self.__read_data__()
144 | 
145 |         self.enc_in = self.data_x.shape[-1]
146 |         self.tot_len = len(self.data_x) - self.seq_len - self.pred_len + 1
147 | 
148 |     def __read_data__(self):
149 |         self.scaler = StandardScaler()
150 |         df_raw = pd.read_csv(os.path.join(self.root_path,
151 |                                           self.data_path))
152 | 
153 |         if self.pretrain:
154 |             # border1s = [0, 12 * 30 * 24 * 4 + 4 * 30 * 24 * 4 - self.seq_len,
155 |             #             12 * 30 * 24 * 4 + 4 * 30 * 24 * 4 - self.seq_len]
156 |             # border2s = [12 * 30 * 24 * 4 + 8 * 30 * 24 * 4, 12 * 30 * 24 * 4 + 8 * 30 * 24 * 4,
157 |             #             12 * 30 * 24 * 4 + 8 * 30 * 24 * 4]
158 |             border1s = [0, 12 * 30 * 24 * 4 + 4 * 30 * 24 * 4 - self.seq_len,
159 |                         12 * 30 * 24 * 4 + 4 * 30 * 24 * 4 - self.seq_len]
160 |             border2s = [12 * 30 * 24 * 4 + 4 * 30 * 24 * 4, 12 * 30 * 24 * 4 + 8 * 30 * 24 * 4,
161 |                         12 * 30 * 24 * 4 + 8 * 30 * 24 * 4]
162 |         else:
163 |             border1s = [0, 12 * 30 * 24 * 4 - self.seq_len, 12 * 30 * 24 * 4 + 4 * 30 * 24 * 4 - self.seq_len]
164 |             border2s = [12 * 30 * 24 * 4, 12 * 30 * 24 * 4 + 4 * 30 * 24 * 4, 12 * 30 * 24 * 4 + 8 * 30 * 24 * 4]
165 | 
166 |         border1 = border1s[self.set_type]
167 |         border2 = border2s[self.set_type]
168 | 
169 |         if self.set_type == 0:
170 |             border2 = (border2 - self.seq_len) * self.percent // 100 + self.seq_len
171 | 
172 |         if self.features == 'M' or self.features == 'MS':
173 |             cols_data = df_raw.columns[1:]
174 |             df_data = df_raw[cols_data]
175 |         elif self.features == 'S':
176 |             df_data = df_raw[[self.target]]
177 | 
178 |         if self.scale:
179 |             train_data = df_data[border1s[0]:border2s[0]]
180 |             self.scaler.fit(train_data.values)
181 |             data = self.scaler.transform(df_data.values)
182 |         else:
183 |             data = df_data.values
184 | 
185 |         df_stamp = df_raw[['date']][border1:border2]
186 |         df_stamp['date'] = pd.to_datetime(df_stamp.date)
187 |         if self.timeenc == 0:
188 |             df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1)
189 |             df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1)
190 |             df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1)
191 |             df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1)
192 |             df_stamp['minute'] = df_stamp.date.apply(lambda row: row.minute, 1)
193 |             df_stamp['minute'] = df_stamp.minute.map(lambda x: x // 15)
194 |             data_stamp = df_stamp.drop(['date'], 1).values
195 |         elif self.timeenc == 1:
196 |             data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq)
197 |             data_stamp = data_stamp.transpose(1, 0)
198 | 
199 |         self.data_x = data[border1:border2]
200 |         self.data_y = data[border1:border2]
201 |         self.data_stamp = data_stamp
202 | 
203 |     def __getitem__(self, index):
204 |         feat_id = index // self.tot_len
205 |         s_begin = index % self.tot_len
206 | 
207 |         s_end = s_begin + self.seq_len
208 |         r_begin = s_end - self.label_len
209 |         r_end = r_begin + self.label_len + self.pred_len
210 |         seq_x = self.data_x[s_begin:s_end, feat_id:feat_id + 1]
211 |         seq_y = self.data_y[r_begin:r_end, feat_id:feat_id + 1]
212 |         seq_x_mark = self.data_stamp[s_begin:s_end]
213 |         seq_y_mark = self.data_stamp[r_begin:r_end]
214 | 
215 |         return seq_x, seq_y, seq_x_mark, seq_y_mark
216 | 
217 |     def __len__(self):
218 |         return (len(self.data_x) - self.seq_len - self.pred_len + 1) * self.enc_in
219 | 
220 |     def inverse_transform(self, data):
221 |         return self.scaler.inverse_transform(data)
222 | 


--------------------------------------------------------------------------------
/dataset/prompt_bank/ECL.txt:
--------------------------------------------------------------------------------
1 | Measurements of electric power consumption in one household with a one-minute sampling rate over a period of almost 4 years. Different electrical quantities and some sub-metering values are available.This archive contains 2075259 measurements gathered in a house located in Sceaux (7km of Paris, France) between December 2006 and November 2010 (47 months).
2 | 


--------------------------------------------------------------------------------
/dataset/prompt_bank/ETT.txt:
--------------------------------------------------------------------------------
1 | The Electricity Transformer Temperature (ETT) is a crucial indicator in the electric power long-term deployment. This dataset consists of 2 years data from two separated counties in China. To explore the granularity on the Long sequence time-series forecasting (LSTF) problem, different subsets are created, {ETTh1, ETTh2} for 1-hour-level and ETTm1 for 15-minutes-level. Each data point consists of the target value ”oil temperature” and 6 power load features. The train/val/test is 12/4/4 months.
2 | 
3 | 


--------------------------------------------------------------------------------
/dataset/prompt_bank/Traffic.txt:
--------------------------------------------------------------------------------
1 | Traffic is a collection of hourly data from California Department of Transportation, which describes the road occupancy rates measured by different sensors on San Francisco Bay area freeways.


--------------------------------------------------------------------------------
/dataset/prompt_bank/Weather.txt:
--------------------------------------------------------------------------------
1 | Weather is recorded every 10 minutes for the 2020 whole year, which contains 21 meteorological indicators, such as air temperature, humidity, etc.
2 | 


--------------------------------------------------------------------------------
/dataset/prompt_bank/m4.txt:
--------------------------------------------------------------------------------
1 | The M4 dataset is a collection of 100,000 time series used for the fourth edition of the Makridakis forecasting Competition. The M4 dataset consists of time series of yearly, quarterly, monthly and other (weekly, daily and hourly) data, which are divided into training and test sets. The minimum numbers of observations in the training test are 13 for yearly, 16 for quarterly, 42 for monthly, 80 for weekly, 93 for daily and 700 for hourly series. The participants were asked to produce the following numbers of forecasts beyond the available data that they had been given: six for yearly, eight for quarterly, 18 for monthly series, 13 for weekly series and 14 and 48 forecasts respectively for the daily and hourly ones.
2 | 
3 | 


--------------------------------------------------------------------------------
/ds_config_zero2.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "bf16": {
 3 |     "enabled": true,
 4 |     "auto_cast": true
 5 |   },
 6 |   "zero_optimization": {
 7 |     "stage": 2,
 8 |     "allgather_partitions": true,
 9 |     "allgather_bucket_size": 2e8,
10 |     "overlap_comm": true,
11 |     "reduce_scatter": true,
12 |     "reduce_bucket_size": 2e8,
13 |     "contiguous_gradients": true,
14 |     "sub_group_size": 1e9
15 |   },
16 |   "gradient_accumulation_steps": "auto",
17 |   "train_batch_size": "auto",
18 |   "train_micro_batch_size_per_gpu": "auto",
19 |   "steps_per_print": 10,
20 |   "wall_clock_breakdown": false
21 | }


--------------------------------------------------------------------------------
/figures/framework.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KimMeen/Time-LLM/02ee1b8f6043090c7a417f0cbb64cbf753895175/figures/framework.png


--------------------------------------------------------------------------------
/figures/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KimMeen/Time-LLM/02ee1b8f6043090c7a417f0cbb64cbf753895175/figures/logo.png


--------------------------------------------------------------------------------
/figures/method-detailed-illustration.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KimMeen/Time-LLM/02ee1b8f6043090c7a417f0cbb64cbf753895175/figures/method-detailed-illustration.png


--------------------------------------------------------------------------------
/layers/AutoCorrelation.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | import matplotlib.pyplot as plt
  5 | import numpy as np
  6 | import math
  7 | from math import sqrt
  8 | import os
  9 | 
 10 | 
 11 | class AutoCorrelation(nn.Module):
 12 |     """
 13 |     AutoCorrelation Mechanism with the following two phases:
 14 |     (1) period-based dependencies discovery
 15 |     (2) time delay aggregation
 16 |     This block can replace the self-attention family mechanism seamlessly.
 17 |     """
 18 | 
 19 |     def __init__(self, mask_flag=True, factor=1, scale=None, attention_dropout=0.1, output_attention=False):
 20 |         super(AutoCorrelation, self).__init__()
 21 |         self.factor = factor
 22 |         self.scale = scale
 23 |         self.mask_flag = mask_flag
 24 |         self.output_attention = output_attention
 25 |         self.dropout = nn.Dropout(attention_dropout)
 26 | 
 27 |     def time_delay_agg_training(self, values, corr):
 28 |         """
 29 |         SpeedUp version of Autocorrelation (a batch-normalization style design)
 30 |         This is for the training phase.
 31 |         """
 32 |         head = values.shape[1]
 33 |         channel = values.shape[2]
 34 |         length = values.shape[3]
 35 |         # find top k
 36 |         top_k = int(self.factor * math.log(length))
 37 |         mean_value = torch.mean(torch.mean(corr, dim=1), dim=1)
 38 |         index = torch.topk(torch.mean(mean_value, dim=0), top_k, dim=-1)[1]
 39 |         weights = torch.stack([mean_value[:, index[i]] for i in range(top_k)], dim=-1)
 40 |         # update corr
 41 |         tmp_corr = torch.softmax(weights, dim=-1)
 42 |         # aggregation
 43 |         tmp_values = values
 44 |         delays_agg = torch.zeros_like(values).float()
 45 |         for i in range(top_k):
 46 |             pattern = torch.roll(tmp_values, -int(index[i]), -1)
 47 |             delays_agg = delays_agg + pattern * \
 48 |                          (tmp_corr[:, i].unsqueeze(1).unsqueeze(1).unsqueeze(1).repeat(1, head, channel, length))
 49 |         return delays_agg
 50 | 
 51 |     def time_delay_agg_inference(self, values, corr):
 52 |         """
 53 |         SpeedUp version of Autocorrelation (a batch-normalization style design)
 54 |         This is for the inference phase.
 55 |         """
 56 |         batch = values.shape[0]
 57 |         head = values.shape[1]
 58 |         channel = values.shape[2]
 59 |         length = values.shape[3]
 60 |         # index init
 61 |         init_index = torch.arange(length).unsqueeze(0).unsqueeze(0).unsqueeze(0).repeat(batch, head, channel, 1).cuda()
 62 |         # find top k
 63 |         top_k = int(self.factor * math.log(length))
 64 |         mean_value = torch.mean(torch.mean(corr, dim=1), dim=1)
 65 |         weights, delay = torch.topk(mean_value, top_k, dim=-1)
 66 |         # update corr
 67 |         tmp_corr = torch.softmax(weights, dim=-1)
 68 |         # aggregation
 69 |         tmp_values = values.repeat(1, 1, 1, 2)
 70 |         delays_agg = torch.zeros_like(values).float()
 71 |         for i in range(top_k):
 72 |             tmp_delay = init_index + delay[:, i].unsqueeze(1).unsqueeze(1).unsqueeze(1).repeat(1, head, channel, length)
 73 |             pattern = torch.gather(tmp_values, dim=-1, index=tmp_delay)
 74 |             delays_agg = delays_agg + pattern * \
 75 |                          (tmp_corr[:, i].unsqueeze(1).unsqueeze(1).unsqueeze(1).repeat(1, head, channel, length))
 76 |         return delays_agg
 77 | 
 78 |     def time_delay_agg_full(self, values, corr):
 79 |         """
 80 |         Standard version of Autocorrelation
 81 |         """
 82 |         batch = values.shape[0]
 83 |         head = values.shape[1]
 84 |         channel = values.shape[2]
 85 |         length = values.shape[3]
 86 |         # index init
 87 |         init_index = torch.arange(length).unsqueeze(0).unsqueeze(0).unsqueeze(0).repeat(batch, head, channel, 1).cuda()
 88 |         # find top k
 89 |         top_k = int(self.factor * math.log(length))
 90 |         weights, delay = torch.topk(corr, top_k, dim=-1)
 91 |         # update corr
 92 |         tmp_corr = torch.softmax(weights, dim=-1)
 93 |         # aggregation
 94 |         tmp_values = values.repeat(1, 1, 1, 2)
 95 |         delays_agg = torch.zeros_like(values).float()
 96 |         for i in range(top_k):
 97 |             tmp_delay = init_index + delay[..., i].unsqueeze(-1)
 98 |             pattern = torch.gather(tmp_values, dim=-1, index=tmp_delay)
 99 |             delays_agg = delays_agg + pattern * (tmp_corr[..., i].unsqueeze(-1))
100 |         return delays_agg
101 | 
102 |     def forward(self, queries, keys, values, attn_mask):
103 |         B, L, H, E = queries.shape
104 |         _, S, _, D = values.shape
105 |         if L > S:
106 |             zeros = torch.zeros_like(queries[:, :(L - S), :]).float()
107 |             values = torch.cat([values, zeros], dim=1)
108 |             keys = torch.cat([keys, zeros], dim=1)
109 |         else:
110 |             values = values[:, :L, :, :]
111 |             keys = keys[:, :L, :, :]
112 | 
113 |         # period-based dependencies
114 |         q_fft = torch.fft.rfft(queries.permute(0, 2, 3, 1).contiguous(), dim=-1)
115 |         k_fft = torch.fft.rfft(keys.permute(0, 2, 3, 1).contiguous(), dim=-1)
116 |         res = q_fft * torch.conj(k_fft)
117 |         corr = torch.fft.irfft(res, dim=-1)
118 | 
119 |         # time delay agg
120 |         if self.training:
121 |             V = self.time_delay_agg_training(values.permute(0, 2, 3, 1).contiguous(), corr).permute(0, 3, 1, 2)
122 |         else:
123 |             V = self.time_delay_agg_inference(values.permute(0, 2, 3, 1).contiguous(), corr).permute(0, 3, 1, 2)
124 | 
125 |         if self.output_attention:
126 |             return (V.contiguous(), corr.permute(0, 3, 1, 2))
127 |         else:
128 |             return (V.contiguous(), None)
129 | 
130 | 
131 | class AutoCorrelationLayer(nn.Module):
132 |     def __init__(self, correlation, d_model, n_heads, d_keys=None,
133 |                  d_values=None):
134 |         super(AutoCorrelationLayer, self).__init__()
135 | 
136 |         d_keys = d_keys or (d_model // n_heads)
137 |         d_values = d_values or (d_model // n_heads)
138 | 
139 |         self.inner_correlation = correlation
140 |         self.query_projection = nn.Linear(d_model, d_keys * n_heads)
141 |         self.key_projection = nn.Linear(d_model, d_keys * n_heads)
142 |         self.value_projection = nn.Linear(d_model, d_values * n_heads)
143 |         self.out_projection = nn.Linear(d_values * n_heads, d_model)
144 |         self.n_heads = n_heads
145 | 
146 |     def forward(self, queries, keys, values, attn_mask):
147 |         B, L, _ = queries.shape
148 |         _, S, _ = keys.shape
149 |         H = self.n_heads
150 | 
151 |         queries = self.query_projection(queries).view(B, L, H, -1)
152 |         keys = self.key_projection(keys).view(B, S, H, -1)
153 |         values = self.value_projection(values).view(B, S, H, -1)
154 | 
155 |         out, attn = self.inner_correlation(
156 |             queries,
157 |             keys,
158 |             values,
159 |             attn_mask
160 |         )
161 |         out = out.view(B, L, -1)
162 | 
163 |         return self.out_projection(out), attn
164 | 


--------------------------------------------------------------------------------
/layers/Autoformer_EncDec.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | 
  6 | class my_Layernorm(nn.Module):
  7 |     """
  8 |     Special designed layernorm for the seasonal part
  9 |     """
 10 | 
 11 |     def __init__(self, channels):
 12 |         super(my_Layernorm, self).__init__()
 13 |         self.layernorm = nn.LayerNorm(channels)
 14 | 
 15 |     def forward(self, x):
 16 |         x_hat = self.layernorm(x)
 17 |         bias = torch.mean(x_hat, dim=1).unsqueeze(1).repeat(1, x.shape[1], 1)
 18 |         return x_hat - bias
 19 | 
 20 | 
 21 | class moving_avg(nn.Module):
 22 |     """
 23 |     Moving average block to highlight the trend of time series
 24 |     """
 25 | 
 26 |     def __init__(self, kernel_size, stride):
 27 |         super(moving_avg, self).__init__()
 28 |         self.kernel_size = kernel_size
 29 |         self.avg = nn.AvgPool1d(kernel_size=kernel_size, stride=stride, padding=0)
 30 | 
 31 |     def forward(self, x):
 32 |         # padding on the both ends of time series
 33 |         front = x[:, 0:1, :].repeat(1, (self.kernel_size - 1) // 2, 1)
 34 |         end = x[:, -1:, :].repeat(1, (self.kernel_size - 1) // 2, 1)
 35 |         x = torch.cat([front, x, end], dim=1)
 36 |         x = self.avg(x.permute(0, 2, 1))
 37 |         x = x.permute(0, 2, 1)
 38 |         return x
 39 | 
 40 | 
 41 | class series_decomp(nn.Module):
 42 |     """
 43 |     Series decomposition block
 44 |     """
 45 | 
 46 |     def __init__(self, kernel_size):
 47 |         super(series_decomp, self).__init__()
 48 |         self.moving_avg = moving_avg(kernel_size, stride=1)
 49 | 
 50 |     def forward(self, x):
 51 |         moving_mean = self.moving_avg(x)
 52 |         res = x - moving_mean
 53 |         return res, moving_mean
 54 | 
 55 | 
 56 | class series_decomp_multi(nn.Module):
 57 |     """
 58 |     Multiple Series decomposition block from FEDformer
 59 |     """
 60 | 
 61 |     def __init__(self, kernel_size):
 62 |         super(series_decomp_multi, self).__init__()
 63 |         self.kernel_size = kernel_size
 64 |         self.series_decomp = [series_decomp(kernel) for kernel in kernel_size]
 65 | 
 66 |     def forward(self, x):
 67 |         moving_mean = []
 68 |         res = []
 69 |         for func in self.series_decomp:
 70 |             sea, moving_avg = func(x)
 71 |             moving_mean.append(moving_avg)
 72 |             res.append(sea)
 73 | 
 74 |         sea = sum(res) / len(res)
 75 |         moving_mean = sum(moving_mean) / len(moving_mean)
 76 |         return sea, moving_mean
 77 | 
 78 | 
 79 | class EncoderLayer(nn.Module):
 80 |     """
 81 |     Autoformer encoder layer with the progressive decomposition architecture
 82 |     """
 83 | 
 84 |     def __init__(self, attention, d_model, d_ff=None, moving_avg=25, dropout=0.1, activation="relu"):
 85 |         super(EncoderLayer, self).__init__()
 86 |         d_ff = d_ff or 4 * d_model
 87 |         self.attention = attention
 88 |         self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1, bias=False)
 89 |         self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1, bias=False)
 90 |         self.decomp1 = series_decomp(moving_avg)
 91 |         self.decomp2 = series_decomp(moving_avg)
 92 |         self.dropout = nn.Dropout(dropout)
 93 |         self.activation = F.relu if activation == "relu" else F.gelu
 94 | 
 95 |     def forward(self, x, attn_mask=None):
 96 |         new_x, attn = self.attention(
 97 |             x, x, x,
 98 |             attn_mask=attn_mask
 99 |         )
100 |         x = x + self.dropout(new_x)
101 |         x, _ = self.decomp1(x)
102 |         y = x
103 |         y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
104 |         y = self.dropout(self.conv2(y).transpose(-1, 1))
105 |         res, _ = self.decomp2(x + y)
106 |         return res, attn
107 | 
108 | 
109 | class Encoder(nn.Module):
110 |     """
111 |     Autoformer encoder
112 |     """
113 | 
114 |     def __init__(self, attn_layers, conv_layers=None, norm_layer=None):
115 |         super(Encoder, self).__init__()
116 |         self.attn_layers = nn.ModuleList(attn_layers)
117 |         self.conv_layers = nn.ModuleList(conv_layers) if conv_layers is not None else None
118 |         self.norm = norm_layer
119 | 
120 |     def forward(self, x, attn_mask=None):
121 |         attns = []
122 |         if self.conv_layers is not None:
123 |             for attn_layer, conv_layer in zip(self.attn_layers, self.conv_layers):
124 |                 x, attn = attn_layer(x, attn_mask=attn_mask)
125 |                 x = conv_layer(x)
126 |                 attns.append(attn)
127 |             x, attn = self.attn_layers[-1](x)
128 |             attns.append(attn)
129 |         else:
130 |             for attn_layer in self.attn_layers:
131 |                 x, attn = attn_layer(x, attn_mask=attn_mask)
132 |                 attns.append(attn)
133 | 
134 |         if self.norm is not None:
135 |             x = self.norm(x)
136 | 
137 |         return x, attns
138 | 
139 | 
140 | class DecoderLayer(nn.Module):
141 |     """
142 |     Autoformer decoder layer with the progressive decomposition architecture
143 |     """
144 | 
145 |     def __init__(self, self_attention, cross_attention, d_model, c_out, d_ff=None,
146 |                  moving_avg=25, dropout=0.1, activation="relu"):
147 |         super(DecoderLayer, self).__init__()
148 |         d_ff = d_ff or 4 * d_model
149 |         self.self_attention = self_attention
150 |         self.cross_attention = cross_attention
151 |         self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1, bias=False)
152 |         self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1, bias=False)
153 |         self.decomp1 = series_decomp(moving_avg)
154 |         self.decomp2 = series_decomp(moving_avg)
155 |         self.decomp3 = series_decomp(moving_avg)
156 |         self.dropout = nn.Dropout(dropout)
157 |         self.projection = nn.Conv1d(in_channels=d_model, out_channels=c_out, kernel_size=3, stride=1, padding=1,
158 |                                     padding_mode='circular', bias=False)
159 |         self.activation = F.relu if activation == "relu" else F.gelu
160 | 
161 |     def forward(self, x, cross, x_mask=None, cross_mask=None):
162 |         x = x + self.dropout(self.self_attention(
163 |             x, x, x,
164 |             attn_mask=x_mask
165 |         )[0])
166 |         x, trend1 = self.decomp1(x)
167 |         x = x + self.dropout(self.cross_attention(
168 |             x, cross, cross,
169 |             attn_mask=cross_mask
170 |         )[0])
171 |         x, trend2 = self.decomp2(x)
172 |         y = x
173 |         y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
174 |         y = self.dropout(self.conv2(y).transpose(-1, 1))
175 |         x, trend3 = self.decomp3(x + y)
176 | 
177 |         residual_trend = trend1 + trend2 + trend3
178 |         residual_trend = self.projection(residual_trend.permute(0, 2, 1)).transpose(1, 2)
179 |         return x, residual_trend
180 | 
181 | 
182 | class Decoder(nn.Module):
183 |     """
184 |     Autoformer encoder
185 |     """
186 | 
187 |     def __init__(self, layers, norm_layer=None, projection=None):
188 |         super(Decoder, self).__init__()
189 |         self.layers = nn.ModuleList(layers)
190 |         self.norm = norm_layer
191 |         self.projection = projection
192 | 
193 |     def forward(self, x, cross, x_mask=None, cross_mask=None, trend=None):
194 |         for layer in self.layers:
195 |             x, residual_trend = layer(x, cross, x_mask=x_mask, cross_mask=cross_mask)
196 |             trend = trend + residual_trend
197 | 
198 |         if self.norm is not None:
199 |             x = self.norm(x)
200 | 
201 |         if self.projection is not None:
202 |             x = self.projection(x)
203 |         return x, trend
204 | 


--------------------------------------------------------------------------------
/layers/Conv_Blocks.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class Inception_Block_V1(nn.Module):
 6 |     def __init__(self, in_channels, out_channels, num_kernels=6, init_weight=True):
 7 |         super(Inception_Block_V1, self).__init__()
 8 |         self.in_channels = in_channels
 9 |         self.out_channels = out_channels
10 |         self.num_kernels = num_kernels
11 |         kernels = []
12 |         for i in range(self.num_kernels):
13 |             kernels.append(nn.Conv2d(in_channels, out_channels, kernel_size=2 * i + 1, padding=i))
14 |         self.kernels = nn.ModuleList(kernels)
15 |         if init_weight:
16 |             self._initialize_weights()
17 | 
18 |     def _initialize_weights(self):
19 |         for m in self.modules():
20 |             if isinstance(m, nn.Conv2d):
21 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
22 |                 if m.bias is not None:
23 |                     nn.init.constant_(m.bias, 0)
24 | 
25 |     def forward(self, x):
26 |         res_list = []
27 |         for i in range(self.num_kernels):
28 |             res_list.append(self.kernels[i](x))
29 |         res = torch.stack(res_list, dim=-1).mean(-1)
30 |         return res
31 | 
32 | 
33 | class Inception_Block_V2(nn.Module):
34 |     def __init__(self, in_channels, out_channels, num_kernels=6, init_weight=True):
35 |         super(Inception_Block_V2, self).__init__()
36 |         self.in_channels = in_channels
37 |         self.out_channels = out_channels
38 |         self.num_kernels = num_kernels
39 |         kernels = []
40 |         for i in range(self.num_kernels // 2):
41 |             kernels.append(nn.Conv2d(in_channels, out_channels, kernel_size=[1, 2 * i + 3], padding=[0, i + 1]))
42 |             kernels.append(nn.Conv2d(in_channels, out_channels, kernel_size=[2 * i + 3, 1], padding=[i + 1, 0]))
43 |         kernels.append(nn.Conv2d(in_channels, out_channels, kernel_size=1))
44 |         self.kernels = nn.ModuleList(kernels)
45 |         if init_weight:
46 |             self._initialize_weights()
47 | 
48 |     def _initialize_weights(self):
49 |         for m in self.modules():
50 |             if isinstance(m, nn.Conv2d):
51 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
52 |                 if m.bias is not None:
53 |                     nn.init.constant_(m.bias, 0)
54 | 
55 |     def forward(self, x):
56 |         res_list = []
57 |         for i in range(self.num_kernels + 1):
58 |             res_list.append(self.kernels[i](x))
59 |         res = torch.stack(res_list, dim=-1).mean(-1)
60 |         return res
61 | 


--------------------------------------------------------------------------------
/layers/Embed.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from torch import Tensor
  5 | from torch.nn.utils import weight_norm
  6 | import math
  7 | 
  8 | 
  9 | class PositionalEmbedding(nn.Module):
 10 |     def __init__(self, d_model, max_len=5000):
 11 |         super(PositionalEmbedding, self).__init__()
 12 |         # Compute the positional encodings once in log space.
 13 |         pe = torch.zeros(max_len, d_model).float()
 14 |         pe.require_grad = False
 15 | 
 16 |         position = torch.arange(0, max_len).float().unsqueeze(1)
 17 |         div_term = (torch.arange(0, d_model, 2).float()
 18 |                     * -(math.log(10000.0) / d_model)).exp()
 19 | 
 20 |         pe[:, 0::2] = torch.sin(position * div_term)
 21 |         pe[:, 1::2] = torch.cos(position * div_term)
 22 | 
 23 |         pe = pe.unsqueeze(0)
 24 |         self.register_buffer('pe', pe)
 25 | 
 26 |     def forward(self, x):
 27 |         return self.pe[:, :x.size(1)]
 28 | 
 29 | 
 30 | class TokenEmbedding(nn.Module):
 31 |     def __init__(self, c_in, d_model):
 32 |         super(TokenEmbedding, self).__init__()
 33 |         padding = 1 if torch.__version__ >= '1.5.0' else 2
 34 |         self.tokenConv = nn.Conv1d(in_channels=c_in, out_channels=d_model,
 35 |                                    kernel_size=3, padding=padding, padding_mode='circular', bias=False)
 36 |         for m in self.modules():
 37 |             if isinstance(m, nn.Conv1d):
 38 |                 nn.init.kaiming_normal_(
 39 |                     m.weight, mode='fan_in', nonlinearity='leaky_relu')
 40 | 
 41 |     def forward(self, x):
 42 |         x = self.tokenConv(x.permute(0, 2, 1)).transpose(1, 2)
 43 |         return x
 44 | 
 45 | 
 46 | class FixedEmbedding(nn.Module):
 47 |     def __init__(self, c_in, d_model):
 48 |         super(FixedEmbedding, self).__init__()
 49 | 
 50 |         w = torch.zeros(c_in, d_model).float()
 51 |         w.require_grad = False
 52 | 
 53 |         position = torch.arange(0, c_in).float().unsqueeze(1)
 54 |         div_term = (torch.arange(0, d_model, 2).float()
 55 |                     * -(math.log(10000.0) / d_model)).exp()
 56 | 
 57 |         w[:, 0::2] = torch.sin(position * div_term)
 58 |         w[:, 1::2] = torch.cos(position * div_term)
 59 | 
 60 |         self.emb = nn.Embedding(c_in, d_model)
 61 |         self.emb.weight = nn.Parameter(w, requires_grad=False)
 62 | 
 63 |     def forward(self, x):
 64 |         return self.emb(x).detach()
 65 | 
 66 | 
 67 | class TemporalEmbedding(nn.Module):
 68 |     def __init__(self, d_model, embed_type='fixed', freq='h'):
 69 |         super(TemporalEmbedding, self).__init__()
 70 | 
 71 |         minute_size = 4
 72 |         hour_size = 24
 73 |         weekday_size = 7
 74 |         day_size = 32
 75 |         month_size = 13
 76 | 
 77 |         Embed = FixedEmbedding if embed_type == 'fixed' else nn.Embedding
 78 |         if freq == 't':
 79 |             self.minute_embed = Embed(minute_size, d_model)
 80 |         self.hour_embed = Embed(hour_size, d_model)
 81 |         self.weekday_embed = Embed(weekday_size, d_model)
 82 |         self.day_embed = Embed(day_size, d_model)
 83 |         self.month_embed = Embed(month_size, d_model)
 84 | 
 85 |     def forward(self, x):
 86 |         x = x.long()
 87 |         minute_x = self.minute_embed(x[:, :, 4]) if hasattr(
 88 |             self, 'minute_embed') else 0.
 89 |         hour_x = self.hour_embed(x[:, :, 3])
 90 |         weekday_x = self.weekday_embed(x[:, :, 2])
 91 |         day_x = self.day_embed(x[:, :, 1])
 92 |         month_x = self.month_embed(x[:, :, 0])
 93 | 
 94 |         return hour_x + weekday_x + day_x + month_x + minute_x
 95 | 
 96 | 
 97 | class TimeFeatureEmbedding(nn.Module):
 98 |     def __init__(self, d_model, embed_type='timeF', freq='h'):
 99 |         super(TimeFeatureEmbedding, self).__init__()
100 | 
101 |         freq_map = {'h': 4, 't': 5, 's': 6,
102 |                     'm': 1, 'a': 1, 'w': 2, 'd': 3, 'b': 3}
103 |         d_inp = freq_map[freq]
104 |         self.embed = nn.Linear(d_inp, d_model, bias=False)
105 | 
106 |     def forward(self, x):
107 |         return self.embed(x)
108 | 
109 | 
110 | class DataEmbedding(nn.Module):
111 |     def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1):
112 |         super(DataEmbedding, self).__init__()
113 | 
114 |         self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model)
115 |         self.position_embedding = PositionalEmbedding(d_model=d_model)
116 |         self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type,
117 |                                                     freq=freq) if embed_type != 'timeF' else TimeFeatureEmbedding(
118 |             d_model=d_model, embed_type=embed_type, freq=freq)
119 |         self.dropout = nn.Dropout(p=dropout)
120 | 
121 |     def forward(self, x, x_mark):
122 |         if x_mark is None:
123 |             x = self.value_embedding(x) + self.position_embedding(x).to(x.device)
124 |         else:
125 |             x = self.value_embedding(
126 |                 x) + self.temporal_embedding(x_mark) + self.position_embedding(x)
127 |         return self.dropout(x)
128 | 
129 | 
130 | class DataEmbedding_wo_pos(nn.Module):
131 |     def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1):
132 |         super(DataEmbedding_wo_pos, self).__init__()
133 | 
134 |         self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model)
135 |         self.position_embedding = PositionalEmbedding(d_model=d_model)
136 |         self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type,
137 |                                                     freq=freq) if embed_type != 'timeF' else TimeFeatureEmbedding(
138 |             d_model=d_model, embed_type=embed_type, freq=freq)
139 |         self.dropout = nn.Dropout(p=dropout)
140 | 
141 |     def forward(self, x, x_mark):
142 |         if x_mark is None:
143 |             x = self.value_embedding(x)
144 |         else:
145 |             x = self.value_embedding(x) + self.temporal_embedding(x_mark)
146 |         return self.dropout(x)
147 | 
148 | 
149 | class ReplicationPad1d(nn.Module):
150 |     def __init__(self, padding) -> None:
151 |         super(ReplicationPad1d, self).__init__()
152 |         self.padding = padding
153 | 
154 |     def forward(self, input: Tensor) -> Tensor:
155 |         replicate_padding = input[:, :, -1].unsqueeze(-1).repeat(1, 1, self.padding[-1])
156 |         output = torch.cat([input, replicate_padding], dim=-1)
157 |         return output
158 | 
159 | 
160 | class PatchEmbedding(nn.Module):
161 |     def __init__(self, d_model, patch_len, stride, dropout):
162 |         super(PatchEmbedding, self).__init__()
163 |         # Patching
164 |         self.patch_len = patch_len
165 |         self.stride = stride
166 |         self.padding_patch_layer = ReplicationPad1d((0, stride))
167 | 
168 |         # Backbone, Input encoding: projection of feature vectors onto a d-dim vector space
169 |         self.value_embedding = TokenEmbedding(patch_len, d_model)
170 | 
171 |         # Positional embedding
172 |         # self.position_embedding = PositionalEmbedding(d_model)
173 | 
174 |         # Residual dropout
175 |         self.dropout = nn.Dropout(dropout)
176 | 
177 |     def forward(self, x):
178 |         # do patching
179 |         n_vars = x.shape[1]
180 |         x = self.padding_patch_layer(x)
181 |         x = x.unfold(dimension=-1, size=self.patch_len, step=self.stride)
182 |         x = torch.reshape(x, (x.shape[0] * x.shape[1], x.shape[2], x.shape[3]))
183 |         # Input encoding
184 |         x = self.value_embedding(x)
185 |         return self.dropout(x), n_vars
186 | 
187 | 
188 | class DataEmbedding_wo_time(nn.Module):
189 |     def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1):
190 |         super(DataEmbedding_wo_time, self).__init__()
191 | 
192 |         self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model)
193 |         self.position_embedding = PositionalEmbedding(d_model=d_model)
194 |         self.dropout = nn.Dropout(p=dropout)
195 | 
196 |     def forward(self, x):
197 |         x = self.value_embedding(x) + self.position_embedding(x)
198 |         return self.dropout(x)
199 | 


--------------------------------------------------------------------------------
/layers/SelfAttention_Family.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import numpy as np
  4 | from math import sqrt
  5 | from utils.masking import TriangularCausalMask, ProbMask
  6 | from reformer_pytorch import LSHSelfAttention
  7 | 
  8 | 
  9 | class DSAttention(nn.Module):
 10 |     '''De-stationary Attention'''
 11 | 
 12 |     def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False):
 13 |         super(DSAttention, self).__init__()
 14 |         self.scale = scale
 15 |         self.mask_flag = mask_flag
 16 |         self.output_attention = output_attention
 17 |         self.dropout = nn.Dropout(attention_dropout)
 18 | 
 19 |     def forward(self, queries, keys, values, attn_mask, tau=None, delta=None):
 20 |         B, L, H, E = queries.shape
 21 |         _, S, _, D = values.shape
 22 |         scale = self.scale or 1. / sqrt(E)
 23 | 
 24 |         tau = 1.0 if tau is None else tau.unsqueeze(
 25 |             1).unsqueeze(1)  # B x 1 x 1 x 1
 26 |         delta = 0.0 if delta is None else delta.unsqueeze(
 27 |             1).unsqueeze(1)  # B x 1 x 1 x S
 28 | 
 29 |         # De-stationary Attention, rescaling pre-softmax score with learned de-stationary factors
 30 |         scores = torch.einsum("blhe,bshe->bhls", queries, keys) * tau + delta
 31 | 
 32 |         if self.mask_flag:
 33 |             if attn_mask is None:
 34 |                 attn_mask = TriangularCausalMask(B, L, device=queries.device)
 35 | 
 36 |             scores.masked_fill_(attn_mask.mask, -np.inf)
 37 | 
 38 |         A = self.dropout(torch.softmax(scale * scores, dim=-1))
 39 |         V = torch.einsum("bhls,bshd->blhd", A, values)
 40 | 
 41 |         if self.output_attention:
 42 |             return (V.contiguous(), A)
 43 |         else:
 44 |             return (V.contiguous(), None)
 45 | 
 46 | 
 47 | class FullAttention(nn.Module):
 48 |     def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False):
 49 |         super(FullAttention, self).__init__()
 50 |         self.scale = scale
 51 |         self.mask_flag = mask_flag
 52 |         self.output_attention = output_attention
 53 |         self.dropout = nn.Dropout(attention_dropout)
 54 | 
 55 |     def forward(self, queries, keys, values, attn_mask, tau=None, delta=None):
 56 |         B, L, H, E = queries.shape
 57 |         _, S, _, D = values.shape
 58 |         scale = self.scale or 1. / sqrt(E)
 59 | 
 60 |         scores = torch.einsum("blhe,bshe->bhls", queries, keys)
 61 | 
 62 |         if self.mask_flag:
 63 |             if attn_mask is None:
 64 |                 attn_mask = TriangularCausalMask(B, L, device=queries.device)
 65 | 
 66 |             scores.masked_fill_(attn_mask.mask, -np.inf)
 67 | 
 68 |         A = self.dropout(torch.softmax(scale * scores, dim=-1))
 69 |         V = torch.einsum("bhls,bshd->blhd", A, values)
 70 | 
 71 |         if self.output_attention:
 72 |             return (V.contiguous(), A)
 73 |         else:
 74 |             return (V.contiguous(), None)
 75 | 
 76 | 
 77 | class ProbAttention(nn.Module):
 78 |     def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False):
 79 |         super(ProbAttention, self).__init__()
 80 |         self.factor = factor
 81 |         self.scale = scale
 82 |         self.mask_flag = mask_flag
 83 |         self.output_attention = output_attention
 84 |         self.dropout = nn.Dropout(attention_dropout)
 85 | 
 86 |     def _prob_QK(self, Q, K, sample_k, n_top):  # n_top: c*ln(L_q)
 87 |         # Q [B, H, L, D]
 88 |         B, H, L_K, E = K.shape
 89 |         _, _, L_Q, _ = Q.shape
 90 | 
 91 |         # calculate the sampled Q_K
 92 |         K_expand = K.unsqueeze(-3).expand(B, H, L_Q, L_K, E)
 93 |         # real U = U_part(factor*ln(L_k))*L_q
 94 |         index_sample = torch.randint(L_K, (L_Q, sample_k))
 95 |         K_sample = K_expand[:, :, torch.arange(
 96 |             L_Q).unsqueeze(1), index_sample, :]
 97 |         Q_K_sample = torch.matmul(
 98 |             Q.unsqueeze(-2), K_sample.transpose(-2, -1)).squeeze()
 99 | 
100 |         # find the Top_k query with sparisty measurement
101 |         M = Q_K_sample.max(-1)[0] - torch.div(Q_K_sample.sum(-1), L_K)
102 |         M_top = M.topk(n_top, sorted=False)[1]
103 | 
104 |         # use the reduced Q to calculate Q_K
105 |         Q_reduce = Q[torch.arange(B)[:, None, None],
106 |                    torch.arange(H)[None, :, None],
107 |                    M_top, :]  # factor*ln(L_q)
108 |         Q_K = torch.matmul(Q_reduce, K.transpose(-2, -1))  # factor*ln(L_q)*L_k
109 | 
110 |         return Q_K, M_top
111 | 
112 |     def _get_initial_context(self, V, L_Q):
113 |         B, H, L_V, D = V.shape
114 |         if not self.mask_flag:
115 |             # V_sum = V.sum(dim=-2)
116 |             V_sum = V.mean(dim=-2)
117 |             contex = V_sum.unsqueeze(-2).expand(B, H,
118 |                                                 L_Q, V_sum.shape[-1]).clone()
119 |         else:  # use mask
120 |             # requires that L_Q == L_V, i.e. for self-attention only
121 |             assert (L_Q == L_V)
122 |             contex = V.cumsum(dim=-2)
123 |         return contex
124 | 
125 |     def _update_context(self, context_in, V, scores, index, L_Q, attn_mask):
126 |         B, H, L_V, D = V.shape
127 | 
128 |         if self.mask_flag:
129 |             attn_mask = ProbMask(B, H, L_Q, index, scores, device=V.device)
130 |             scores.masked_fill_(attn_mask.mask, -np.inf)
131 | 
132 |         attn = torch.softmax(scores, dim=-1)  # nn.Softmax(dim=-1)(scores)
133 | 
134 |         context_in[torch.arange(B)[:, None, None],
135 |         torch.arange(H)[None, :, None],
136 |         index, :] = torch.matmul(attn, V).type_as(context_in)
137 |         if self.output_attention:
138 |             attns = (torch.ones([B, H, L_V, L_V]) /
139 |                      L_V).type_as(attn).to(attn.device)
140 |             attns[torch.arange(B)[:, None, None], torch.arange(H)[
141 |                                                   None, :, None], index, :] = attn
142 |             return (context_in, attns)
143 |         else:
144 |             return (context_in, None)
145 | 
146 |     def forward(self, queries, keys, values, attn_mask, tau=None, delta=None):
147 |         B, L_Q, H, D = queries.shape
148 |         _, L_K, _, _ = keys.shape
149 | 
150 |         queries = queries.transpose(2, 1)
151 |         keys = keys.transpose(2, 1)
152 |         values = values.transpose(2, 1)
153 | 
154 |         U_part = self.factor * \
155 |                  np.ceil(np.log(L_K)).astype('int').item()  # c*ln(L_k)
156 |         u = self.factor * \
157 |             np.ceil(np.log(L_Q)).astype('int').item()  # c*ln(L_q)
158 | 
159 |         U_part = U_part if U_part < L_K else L_K
160 |         u = u if u < L_Q else L_Q
161 | 
162 |         scores_top, index = self._prob_QK(
163 |             queries, keys, sample_k=U_part, n_top=u)
164 | 
165 |         # add scale factor
166 |         scale = self.scale or 1. / sqrt(D)
167 |         if scale is not None:
168 |             scores_top = scores_top * scale
169 |         # get the context
170 |         context = self._get_initial_context(values, L_Q)
171 |         # update the context with selected top_k queries
172 |         context, attn = self._update_context(
173 |             context, values, scores_top, index, L_Q, attn_mask)
174 | 
175 |         return context.contiguous(), attn
176 | 
177 | 
178 | class AttentionLayer(nn.Module):
179 |     def __init__(self, attention, d_model, n_heads, d_keys=None,
180 |                  d_values=None):
181 |         super(AttentionLayer, self).__init__()
182 | 
183 |         d_keys = d_keys or (d_model // n_heads)
184 |         d_values = d_values or (d_model // n_heads)
185 | 
186 |         self.inner_attention = attention
187 |         self.query_projection = nn.Linear(d_model, d_keys * n_heads)
188 |         self.key_projection = nn.Linear(d_model, d_keys * n_heads)
189 |         self.value_projection = nn.Linear(d_model, d_values * n_heads)
190 |         self.out_projection = nn.Linear(d_values * n_heads, d_model)
191 |         self.n_heads = n_heads
192 | 
193 |     def forward(self, queries, keys, values, attn_mask, tau=None, delta=None):
194 |         B, L, _ = queries.shape
195 |         _, S, _ = keys.shape
196 |         H = self.n_heads
197 | 
198 |         queries = self.query_projection(queries).view(B, L, H, -1)
199 |         keys = self.key_projection(keys).view(B, S, H, -1)
200 |         values = self.value_projection(values).view(B, S, H, -1)
201 | 
202 |         out, attn = self.inner_attention(
203 |             queries,
204 |             keys,
205 |             values,
206 |             attn_mask,
207 |             tau=tau,
208 |             delta=delta
209 |         )
210 |         out = out.view(B, L, -1)
211 | 
212 |         return self.out_projection(out), attn
213 | 
214 | 
215 | class ReformerLayer(nn.Module):
216 |     def __init__(self, attention, d_model, n_heads, d_keys=None,
217 |                  d_values=None, causal=False, bucket_size=4, n_hashes=4):
218 |         super().__init__()
219 |         self.bucket_size = bucket_size
220 |         self.attn = LSHSelfAttention(
221 |             dim=d_model,
222 |             heads=n_heads,
223 |             bucket_size=bucket_size,
224 |             n_hashes=n_hashes,
225 |             causal=causal
226 |         )
227 | 
228 |     def fit_length(self, queries):
229 |         # inside reformer: assert N % (bucket_size * 2) == 0
230 |         B, N, C = queries.shape
231 |         if N % (self.bucket_size * 2) == 0:
232 |             return queries
233 |         else:
234 |             # fill the time series
235 |             fill_len = (self.bucket_size * 2) - (N % (self.bucket_size * 2))
236 |             return torch.cat([queries, torch.zeros([B, fill_len, C]).to(queries.device)], dim=1)
237 | 
238 |     def forward(self, queries, keys, values, attn_mask, tau, delta):
239 |         # in Reformer: defalut queries=keys
240 |         B, N, C = queries.shape
241 |         queries = self.attn(self.fit_length(queries))[:, :N, :]
242 |         return queries, None
243 | 


--------------------------------------------------------------------------------
/layers/StandardNorm.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class Normalize(nn.Module):
 6 |     def __init__(self, num_features: int, eps=1e-5, affine=False, subtract_last=False, non_norm=False):
 7 |         """
 8 |         :param num_features: the number of features or channels
 9 |         :param eps: a value added for numerical stability
10 |         :param affine: if True, RevIN has learnable affine parameters
11 |         """
12 |         super(Normalize, self).__init__()
13 |         self.num_features = num_features
14 |         self.eps = eps
15 |         self.affine = affine
16 |         self.subtract_last = subtract_last
17 |         self.non_norm = non_norm
18 |         if self.affine:
19 |             self._init_params()
20 | 
21 |     def forward(self, x, mode: str):
22 |         if mode == 'norm':
23 |             self._get_statistics(x)
24 |             x = self._normalize(x)
25 |         elif mode == 'denorm':
26 |             x = self._denormalize(x)
27 |         else:
28 |             raise NotImplementedError
29 |         return x
30 | 
31 |     def _init_params(self):
32 |         # initialize RevIN params: (C,)
33 |         self.affine_weight = nn.Parameter(torch.ones(self.num_features))
34 |         self.affine_bias = nn.Parameter(torch.zeros(self.num_features))
35 | 
36 |     def _get_statistics(self, x):
37 |         dim2reduce = tuple(range(1, x.ndim - 1))
38 |         if self.subtract_last:
39 |             self.last = x[:, -1, :].unsqueeze(1)
40 |         else:
41 |             self.mean = torch.mean(x, dim=dim2reduce, keepdim=True).detach()
42 |         self.stdev = torch.sqrt(torch.var(x, dim=dim2reduce, keepdim=True, unbiased=False) + self.eps).detach()
43 | 
44 |     def _normalize(self, x):
45 |         if self.non_norm:
46 |             return x
47 |         if self.subtract_last:
48 |             x = x - self.last
49 |         else:
50 |             x = x - self.mean
51 |         x = x / self.stdev
52 |         if self.affine:
53 |             x = x * self.affine_weight
54 |             x = x + self.affine_bias
55 |         return x
56 | 
57 |     def _denormalize(self, x):
58 |         if self.non_norm:
59 |             return x
60 |         if self.affine:
61 |             x = x - self.affine_bias
62 |             x = x / (self.affine_weight + self.eps * self.eps)
63 |         x = x * self.stdev
64 |         if self.subtract_last:
65 |             x = x + self.last
66 |         else:
67 |             x = x + self.mean
68 |         return x
69 | 


--------------------------------------------------------------------------------
/layers/Transformer_EncDec.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | 
  6 | class ConvLayer(nn.Module):
  7 |     def __init__(self, c_in):
  8 |         super(ConvLayer, self).__init__()
  9 |         self.downConv = nn.Conv1d(in_channels=c_in,
 10 |                                   out_channels=c_in,
 11 |                                   kernel_size=3,
 12 |                                   padding=2,
 13 |                                   padding_mode='circular')
 14 |         self.norm = nn.BatchNorm1d(c_in)
 15 |         self.activation = nn.ELU()
 16 |         self.maxPool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1)
 17 | 
 18 |     def forward(self, x):
 19 |         x = self.downConv(x.permute(0, 2, 1))
 20 |         x = self.norm(x)
 21 |         x = self.activation(x)
 22 |         x = self.maxPool(x)
 23 |         x = x.transpose(1, 2)
 24 |         return x
 25 | 
 26 | 
 27 | class EncoderLayer(nn.Module):
 28 |     def __init__(self, attention, d_model, d_ff=None, dropout=0.1, activation="relu"):
 29 |         super(EncoderLayer, self).__init__()
 30 |         d_ff = d_ff or 4 * d_model
 31 |         self.attention = attention
 32 |         self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)
 33 |         self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)
 34 |         self.norm1 = nn.LayerNorm(d_model)
 35 |         self.norm2 = nn.LayerNorm(d_model)
 36 |         self.dropout = nn.Dropout(dropout)
 37 |         self.activation = F.relu if activation == "relu" else F.gelu
 38 | 
 39 |     def forward(self, x, attn_mask=None, tau=None, delta=None):
 40 |         new_x, attn = self.attention(
 41 |             x, x, x,
 42 |             attn_mask=attn_mask,
 43 |             tau=tau, delta=delta
 44 |         )
 45 |         x = x + self.dropout(new_x)
 46 | 
 47 |         y = x = self.norm1(x)
 48 |         y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
 49 |         y = self.dropout(self.conv2(y).transpose(-1, 1))
 50 | 
 51 |         return self.norm2(x + y), attn
 52 | 
 53 | 
 54 | class Encoder(nn.Module):
 55 |     def __init__(self, attn_layers, conv_layers=None, norm_layer=None):
 56 |         super(Encoder, self).__init__()
 57 |         self.attn_layers = nn.ModuleList(attn_layers)
 58 |         self.conv_layers = nn.ModuleList(conv_layers) if conv_layers is not None else None
 59 |         self.norm = norm_layer
 60 | 
 61 |     def forward(self, x, attn_mask=None, tau=None, delta=None):
 62 |         # x [B, L, D]
 63 |         attns = []
 64 |         if self.conv_layers is not None:
 65 |             for i, (attn_layer, conv_layer) in enumerate(zip(self.attn_layers, self.conv_layers)):
 66 |                 delta = delta if i == 0 else None
 67 |                 x, attn = attn_layer(x, attn_mask=attn_mask, tau=tau, delta=delta)
 68 |                 x = conv_layer(x)
 69 |                 attns.append(attn)
 70 |             x, attn = self.attn_layers[-1](x, tau=tau, delta=None)
 71 |             attns.append(attn)
 72 |         else:
 73 |             for attn_layer in self.attn_layers:
 74 |                 x, attn = attn_layer(x, attn_mask=attn_mask, tau=tau, delta=delta)
 75 |                 attns.append(attn)
 76 | 
 77 |         if self.norm is not None:
 78 |             x = self.norm(x)
 79 | 
 80 |         return x, attns
 81 | 
 82 | 
 83 | class DecoderLayer(nn.Module):
 84 |     def __init__(self, self_attention, cross_attention, d_model, d_ff=None,
 85 |                  dropout=0.1, activation="relu"):
 86 |         super(DecoderLayer, self).__init__()
 87 |         d_ff = d_ff or 4 * d_model
 88 |         self.self_attention = self_attention
 89 |         self.cross_attention = cross_attention
 90 |         self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)
 91 |         self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)
 92 |         self.norm1 = nn.LayerNorm(d_model)
 93 |         self.norm2 = nn.LayerNorm(d_model)
 94 |         self.norm3 = nn.LayerNorm(d_model)
 95 |         self.dropout = nn.Dropout(dropout)
 96 |         self.activation = F.relu if activation == "relu" else F.gelu
 97 | 
 98 |     def forward(self, x, cross, x_mask=None, cross_mask=None, tau=None, delta=None):
 99 |         x = x + self.dropout(self.self_attention(
100 |             x, x, x,
101 |             attn_mask=x_mask,
102 |             tau=tau, delta=None
103 |         )[0])
104 |         x = self.norm1(x)
105 | 
106 |         x = x + self.dropout(self.cross_attention(
107 |             x, cross, cross,
108 |             attn_mask=cross_mask,
109 |             tau=tau, delta=delta
110 |         )[0])
111 | 
112 |         y = x = self.norm2(x)
113 |         y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
114 |         y = self.dropout(self.conv2(y).transpose(-1, 1))
115 | 
116 |         return self.norm3(x + y)
117 | 
118 | 
119 | class Decoder(nn.Module):
120 |     def __init__(self, layers, norm_layer=None, projection=None):
121 |         super(Decoder, self).__init__()
122 |         self.layers = nn.ModuleList(layers)
123 |         self.norm = norm_layer
124 |         self.projection = projection
125 | 
126 |     def forward(self, x, cross, x_mask=None, cross_mask=None, tau=None, delta=None):
127 |         for layer in self.layers:
128 |             x = layer(x, cross, x_mask=x_mask, cross_mask=cross_mask, tau=tau, delta=delta)
129 | 
130 |         if self.norm is not None:
131 |             x = self.norm(x)
132 | 
133 |         if self.projection is not None:
134 |             x = self.projection(x)
135 |         return x
136 | 


--------------------------------------------------------------------------------
/layers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KimMeen/Time-LLM/02ee1b8f6043090c7a417f0cbb64cbf753895175/layers/__init__.py


--------------------------------------------------------------------------------
/models/Autoformer.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from layers.Embed import DataEmbedding, DataEmbedding_wo_pos
  5 | from layers.AutoCorrelation import AutoCorrelation, AutoCorrelationLayer
  6 | from layers.Autoformer_EncDec import Encoder, Decoder, EncoderLayer, DecoderLayer, my_Layernorm, series_decomp
  7 | import math
  8 | import numpy as np
  9 | 
 10 | 
 11 | class Model(nn.Module):
 12 |     """
 13 |     Autoformer is the first method to achieve the series-wise connection,
 14 |     with inherent O(LlogL) complexity
 15 |     Paper link: https://openreview.net/pdf?id=I55UqU-M11y
 16 |     """
 17 | 
 18 |     def __init__(self, configs):
 19 |         super(Model, self).__init__()
 20 |         self.task_name = configs.task_name
 21 |         self.seq_len = configs.seq_len
 22 |         self.label_len = configs.label_len
 23 |         self.pred_len = configs.pred_len
 24 |         self.output_attention = configs.output_attention
 25 | 
 26 |         # Decomp
 27 |         kernel_size = configs.moving_avg
 28 |         self.decomp = series_decomp(kernel_size)
 29 | 
 30 |         # Embedding
 31 |         self.enc_embedding = DataEmbedding_wo_pos(configs.enc_in, configs.d_model, configs.embed, configs.freq,
 32 |                                                   configs.dropout)
 33 |         # Encoder
 34 |         self.encoder = Encoder(
 35 |             [
 36 |                 EncoderLayer(
 37 |                     AutoCorrelationLayer(
 38 |                         AutoCorrelation(False, configs.factor, attention_dropout=configs.dropout,
 39 |                                         output_attention=configs.output_attention),
 40 |                         configs.d_model, configs.n_heads),
 41 |                     configs.d_model,
 42 |                     configs.d_ff,
 43 |                     moving_avg=configs.moving_avg,
 44 |                     dropout=configs.dropout,
 45 |                     activation=configs.activation
 46 |                 ) for l in range(configs.e_layers)
 47 |             ],
 48 |             norm_layer=my_Layernorm(configs.d_model)
 49 |         )
 50 |         # Decoder
 51 |         if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
 52 |             self.dec_embedding = DataEmbedding_wo_pos(configs.dec_in, configs.d_model, configs.embed, configs.freq,
 53 |                                                       configs.dropout)
 54 |             self.decoder = Decoder(
 55 |                 [
 56 |                     DecoderLayer(
 57 |                         AutoCorrelationLayer(
 58 |                             AutoCorrelation(True, configs.factor, attention_dropout=configs.dropout,
 59 |                                             output_attention=False),
 60 |                             configs.d_model, configs.n_heads),
 61 |                         AutoCorrelationLayer(
 62 |                             AutoCorrelation(False, configs.factor, attention_dropout=configs.dropout,
 63 |                                             output_attention=False),
 64 |                             configs.d_model, configs.n_heads),
 65 |                         configs.d_model,
 66 |                         configs.c_out,
 67 |                         configs.d_ff,
 68 |                         moving_avg=configs.moving_avg,
 69 |                         dropout=configs.dropout,
 70 |                         activation=configs.activation,
 71 |                     )
 72 |                     for l in range(configs.d_layers)
 73 |                 ],
 74 |                 norm_layer=my_Layernorm(configs.d_model),
 75 |                 projection=nn.Linear(configs.d_model, configs.c_out, bias=True)
 76 |             )
 77 |         if self.task_name == 'imputation':
 78 |             self.projection = nn.Linear(
 79 |                 configs.d_model, configs.c_out, bias=True)
 80 |         if self.task_name == 'anomaly_detection':
 81 |             self.projection = nn.Linear(
 82 |                 configs.d_model, configs.c_out, bias=True)
 83 |         if self.task_name == 'classification':
 84 |             self.act = F.gelu
 85 |             self.dropout = nn.Dropout(configs.dropout)
 86 |             self.projection = nn.Linear(
 87 |                 configs.d_model * configs.seq_len, configs.num_class)
 88 | 
 89 |     def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
 90 |         # decomp init
 91 |         mean = torch.mean(x_enc, dim=1).unsqueeze(
 92 |             1).repeat(1, self.pred_len, 1)
 93 |         zeros = torch.zeros([x_dec.shape[0], self.pred_len,
 94 |                              x_dec.shape[2]], device=x_enc.device)
 95 |         seasonal_init, trend_init = self.decomp(x_enc)
 96 |         # decoder input
 97 |         trend_init = torch.cat(
 98 |             [trend_init[:, -self.label_len:, :], mean], dim=1)
 99 |         seasonal_init = torch.cat(
100 |             [seasonal_init[:, -self.label_len:, :], zeros], dim=1)
101 |         # enc
102 |         enc_out = self.enc_embedding(x_enc, x_mark_enc)
103 |         enc_out, attns = self.encoder(enc_out, attn_mask=None)
104 |         # dec
105 |         dec_out = self.dec_embedding(seasonal_init, x_mark_dec)
106 |         seasonal_part, trend_part = self.decoder(dec_out, enc_out, x_mask=None, cross_mask=None,
107 |                                                  trend=trend_init)
108 |         # final
109 |         dec_out = trend_part + seasonal_part
110 |         return dec_out
111 | 
112 |     def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask):
113 |         # enc
114 |         enc_out = self.enc_embedding(x_enc, x_mark_enc)
115 |         enc_out, attns = self.encoder(enc_out, attn_mask=None)
116 |         # final
117 |         dec_out = self.projection(enc_out)
118 |         return dec_out
119 | 
120 |     def anomaly_detection(self, x_enc):
121 |         # enc
122 |         enc_out = self.enc_embedding(x_enc, None)
123 |         enc_out, attns = self.encoder(enc_out, attn_mask=None)
124 |         # final
125 |         dec_out = self.projection(enc_out)
126 |         return dec_out
127 | 
128 |     def classification(self, x_enc, x_mark_enc):
129 |         # enc
130 |         enc_out = self.enc_embedding(x_enc, None)
131 |         enc_out, attns = self.encoder(enc_out, attn_mask=None)
132 | 
133 |         # Output
134 |         # the output transformer encoder/decoder embeddings don't include non-linearity
135 |         output = self.act(enc_out)
136 |         output = self.dropout(output)
137 |         # zero-out padding embeddings
138 |         output = output * x_mark_enc.unsqueeze(-1)
139 |         # (batch_size, seq_length * d_model)
140 |         output = output.reshape(output.shape[0], -1)
141 |         output = self.projection(output)  # (batch_size, num_classes)
142 |         return output
143 | 
144 |     def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
145 |         if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
146 |             dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
147 |             return dec_out[:, -self.pred_len:, :]  # [B, L, D]
148 |         if self.task_name == 'imputation':
149 |             dec_out = self.imputation(
150 |                 x_enc, x_mark_enc, x_dec, x_mark_dec, mask)
151 |             return dec_out  # [B, L, D]
152 |         if self.task_name == 'anomaly_detection':
153 |             dec_out = self.anomaly_detection(x_enc)
154 |             return dec_out  # [B, L, D]
155 |         if self.task_name == 'classification':
156 |             dec_out = self.classification(x_enc, x_mark_enc)
157 |             return dec_out  # [B, N]
158 |         return None
159 | 


--------------------------------------------------------------------------------
/models/DLinear.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from layers.Autoformer_EncDec import series_decomp
  5 | 
  6 | 
  7 | class Model(nn.Module):
  8 |     """
  9 |     Paper link: https://arxiv.org/pdf/2205.13504.pdf
 10 |     """
 11 | 
 12 |     def __init__(self, configs, individual=False):
 13 |         """
 14 |         individual: Bool, whether shared model among different variates.
 15 |         """
 16 |         super(Model, self).__init__()
 17 |         self.task_name = configs.task_name
 18 |         self.seq_len = configs.seq_len
 19 |         if self.task_name == 'classification' or self.task_name == 'anomaly_detection' or self.task_name == 'imputation':
 20 |             self.pred_len = configs.seq_len
 21 |         else:
 22 |             self.pred_len = configs.pred_len
 23 | 
 24 |         self.decompsition = series_decomp(configs.moving_avg)
 25 |         self.individual = individual
 26 |         self.channels = configs.enc_in
 27 | 
 28 |         if self.individual:
 29 |             self.Linear_Seasonal = nn.ModuleList()
 30 |             self.Linear_Trend = nn.ModuleList()
 31 | 
 32 |             for i in range(self.channels):
 33 |                 self.Linear_Seasonal.append(
 34 |                     nn.Linear(self.seq_len, self.pred_len))
 35 |                 self.Linear_Trend.append(
 36 |                     nn.Linear(self.seq_len, self.pred_len))
 37 | 
 38 |                 self.Linear_Seasonal[i].weight = nn.Parameter(
 39 |                     (1 / self.seq_len) * torch.ones([self.pred_len, self.seq_len]))
 40 |                 self.Linear_Trend[i].weight = nn.Parameter(
 41 |                     (1 / self.seq_len) * torch.ones([self.pred_len, self.seq_len]))
 42 |         else:
 43 |             self.Linear_Seasonal = nn.Linear(self.seq_len, self.pred_len)
 44 |             self.Linear_Trend = nn.Linear(self.seq_len, self.pred_len)
 45 | 
 46 |             self.Linear_Seasonal.weight = nn.Parameter(
 47 |                 (1 / self.seq_len) * torch.ones([self.pred_len, self.seq_len]))
 48 |             self.Linear_Trend.weight = nn.Parameter(
 49 |                 (1 / self.seq_len) * torch.ones([self.pred_len, self.seq_len]))
 50 | 
 51 |         if self.task_name == 'classification':
 52 |             self.act = F.gelu
 53 |             self.dropout = nn.Dropout(configs.dropout)
 54 |             self.projection = nn.Linear(
 55 |                 configs.enc_in * configs.seq_len, configs.num_class)
 56 | 
 57 |     def encoder(self, x):
 58 |         seasonal_init, trend_init = self.decompsition(x)
 59 |         seasonal_init, trend_init = seasonal_init.permute(
 60 |             0, 2, 1), trend_init.permute(0, 2, 1)
 61 |         if self.individual:
 62 |             seasonal_output = torch.zeros([seasonal_init.size(0), seasonal_init.size(1), self.pred_len],
 63 |                                           dtype=seasonal_init.dtype).to(seasonal_init.device)
 64 |             trend_output = torch.zeros([trend_init.size(0), trend_init.size(1), self.pred_len],
 65 |                                        dtype=trend_init.dtype).to(trend_init.device)
 66 |             for i in range(self.channels):
 67 |                 seasonal_output[:, i, :] = self.Linear_Seasonal[i](
 68 |                     seasonal_init[:, i, :])
 69 |                 trend_output[:, i, :] = self.Linear_Trend[i](
 70 |                     trend_init[:, i, :])
 71 |         else:
 72 |             seasonal_output = self.Linear_Seasonal(seasonal_init)
 73 |             trend_output = self.Linear_Trend(trend_init)
 74 |         x = seasonal_output + trend_output
 75 |         return x.permute(0, 2, 1)
 76 | 
 77 |     def forecast(self, x_enc):
 78 |         return self.encoder(x_enc)
 79 | 
 80 |     def imputation(self, x_enc):
 81 |         return self.encoder(x_enc)
 82 | 
 83 |     def anomaly_detection(self, x_enc):
 84 |         return self.encoder(x_enc)
 85 | 
 86 |     def classification(self, x_enc):
 87 |         enc_out = self.encoder(x_enc)
 88 |         # Output
 89 |         # (batch_size, seq_length * d_model)
 90 |         output = enc_out.reshape(enc_out.shape[0], -1)
 91 |         output = self.projection(output)  # (batch_size, num_classes)
 92 |         return output
 93 | 
 94 |     def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
 95 |         if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
 96 |             dec_out = self.forecast(x_enc)
 97 |             return dec_out[:, -self.pred_len:, :]  # [B, L, D]
 98 |         if self.task_name == 'imputation':
 99 |             dec_out = self.imputation(x_enc)
100 |             return dec_out  # [B, L, D]
101 |         if self.task_name == 'anomaly_detection':
102 |             dec_out = self.anomaly_detection(x_enc)
103 |             return dec_out  # [B, L, D]
104 |         if self.task_name == 'classification':
105 |             dec_out = self.classification(x_enc)
106 |             return dec_out  # [B, N]
107 |         return None
108 | 


--------------------------------------------------------------------------------
/models/TimeLLM.py:
--------------------------------------------------------------------------------
  1 | from math import sqrt
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | 
  6 | from transformers import LlamaConfig, LlamaModel, LlamaTokenizer, GPT2Config, GPT2Model, GPT2Tokenizer, BertConfig, \
  7 |     BertModel, BertTokenizer
  8 | from layers.Embed import PatchEmbedding
  9 | import transformers
 10 | from layers.StandardNorm import Normalize
 11 | 
 12 | transformers.logging.set_verbosity_error()
 13 | 
 14 | 
 15 | class FlattenHead(nn.Module):
 16 |     def __init__(self, n_vars, nf, target_window, head_dropout=0):
 17 |         super().__init__()
 18 |         self.n_vars = n_vars
 19 |         self.flatten = nn.Flatten(start_dim=-2)
 20 |         self.linear = nn.Linear(nf, target_window)
 21 |         self.dropout = nn.Dropout(head_dropout)
 22 | 
 23 |     def forward(self, x):
 24 |         x = self.flatten(x)
 25 |         x = self.linear(x)
 26 |         x = self.dropout(x)
 27 |         return x
 28 | 
 29 | 
 30 | class Model(nn.Module):
 31 | 
 32 |     def __init__(self, configs, patch_len=16, stride=8):
 33 |         super(Model, self).__init__()
 34 |         self.task_name = configs.task_name
 35 |         self.pred_len = configs.pred_len
 36 |         self.seq_len = configs.seq_len
 37 |         self.d_ff = configs.d_ff
 38 |         self.top_k = 5
 39 |         self.d_llm = configs.llm_dim
 40 |         self.patch_len = configs.patch_len
 41 |         self.stride = configs.stride
 42 | 
 43 |         if configs.llm_model == 'LLAMA':
 44 |             # self.llama_config = LlamaConfig.from_pretrained('/mnt/alps/modelhub/pretrained_model/LLaMA/7B_hf/')
 45 |             self.llama_config = LlamaConfig.from_pretrained('huggyllama/llama-7b')
 46 |             self.llama_config.num_hidden_layers = configs.llm_layers
 47 |             self.llama_config.output_attentions = True
 48 |             self.llama_config.output_hidden_states = True
 49 |             try:
 50 |                 self.llm_model = LlamaModel.from_pretrained(
 51 |                     # "/mnt/alps/modelhub/pretrained_model/LLaMA/7B_hf/",
 52 |                     'huggyllama/llama-7b',
 53 |                     trust_remote_code=True,
 54 |                     local_files_only=True,
 55 |                     config=self.llama_config,
 56 |                     # load_in_4bit=True
 57 |                 )
 58 |             except EnvironmentError:  # downloads model from HF is not already done
 59 |                 print("Local model files not found. Attempting to download...")
 60 |                 self.llm_model = LlamaModel.from_pretrained(
 61 |                     # "/mnt/alps/modelhub/pretrained_model/LLaMA/7B_hf/",
 62 |                     'huggyllama/llama-7b',
 63 |                     trust_remote_code=True,
 64 |                     local_files_only=False,
 65 |                     config=self.llama_config,
 66 |                     # load_in_4bit=True
 67 |                 )
 68 |             try:
 69 |                 self.tokenizer = LlamaTokenizer.from_pretrained(
 70 |                     # "/mnt/alps/modelhub/pretrained_model/LLaMA/7B_hf/tokenizer.model",
 71 |                     'huggyllama/llama-7b',
 72 |                     trust_remote_code=True,
 73 |                     local_files_only=True
 74 |                 )
 75 |             except EnvironmentError:  # downloads the tokenizer from HF if not already done
 76 |                 print("Local tokenizer files not found. Atempting to download them..")
 77 |                 self.tokenizer = LlamaTokenizer.from_pretrained(
 78 |                     # "/mnt/alps/modelhub/pretrained_model/LLaMA/7B_hf/tokenizer.model",
 79 |                     'huggyllama/llama-7b',
 80 |                     trust_remote_code=True,
 81 |                     local_files_only=False
 82 |                 )
 83 |         elif configs.llm_model == 'GPT2':
 84 |             self.gpt2_config = GPT2Config.from_pretrained('openai-community/gpt2')
 85 | 
 86 |             self.gpt2_config.num_hidden_layers = configs.llm_layers
 87 |             self.gpt2_config.output_attentions = True
 88 |             self.gpt2_config.output_hidden_states = True
 89 |             try:
 90 |                 self.llm_model = GPT2Model.from_pretrained(
 91 |                     'openai-community/gpt2',
 92 |                     trust_remote_code=True,
 93 |                     local_files_only=True,
 94 |                     config=self.gpt2_config,
 95 |                 )
 96 |             except EnvironmentError:  # downloads model from HF is not already done
 97 |                 print("Local model files not found. Attempting to download...")
 98 |                 self.llm_model = GPT2Model.from_pretrained(
 99 |                     'openai-community/gpt2',
100 |                     trust_remote_code=True,
101 |                     local_files_only=False,
102 |                     config=self.gpt2_config,
103 |                 )
104 | 
105 |             try:
106 |                 self.tokenizer = GPT2Tokenizer.from_pretrained(
107 |                     'openai-community/gpt2',
108 |                     trust_remote_code=True,
109 |                     local_files_only=True
110 |                 )
111 |             except EnvironmentError:  # downloads the tokenizer from HF if not already done
112 |                 print("Local tokenizer files not found. Atempting to download them..")
113 |                 self.tokenizer = GPT2Tokenizer.from_pretrained(
114 |                     'openai-community/gpt2',
115 |                     trust_remote_code=True,
116 |                     local_files_only=False
117 |                 )
118 |         elif configs.llm_model == 'BERT':
119 |             self.bert_config = BertConfig.from_pretrained('google-bert/bert-base-uncased')
120 | 
121 |             self.bert_config.num_hidden_layers = configs.llm_layers
122 |             self.bert_config.output_attentions = True
123 |             self.bert_config.output_hidden_states = True
124 |             try:
125 |                 self.llm_model = BertModel.from_pretrained(
126 |                     'google-bert/bert-base-uncased',
127 |                     trust_remote_code=True,
128 |                     local_files_only=True,
129 |                     config=self.bert_config,
130 |                 )
131 |             except EnvironmentError:  # downloads model from HF is not already done
132 |                 print("Local model files not found. Attempting to download...")
133 |                 self.llm_model = BertModel.from_pretrained(
134 |                     'google-bert/bert-base-uncased',
135 |                     trust_remote_code=True,
136 |                     local_files_only=False,
137 |                     config=self.bert_config,
138 |                 )
139 | 
140 |             try:
141 |                 self.tokenizer = BertTokenizer.from_pretrained(
142 |                     'google-bert/bert-base-uncased',
143 |                     trust_remote_code=True,
144 |                     local_files_only=True
145 |                 )
146 |             except EnvironmentError:  # downloads the tokenizer from HF if not already done
147 |                 print("Local tokenizer files not found. Atempting to download them..")
148 |                 self.tokenizer = BertTokenizer.from_pretrained(
149 |                     'google-bert/bert-base-uncased',
150 |                     trust_remote_code=True,
151 |                     local_files_only=False
152 |                 )
153 |         else:
154 |             raise Exception('LLM model is not defined')
155 | 
156 |         if self.tokenizer.eos_token:
157 |             self.tokenizer.pad_token = self.tokenizer.eos_token
158 |         else:
159 |             pad_token = '[PAD]'
160 |             self.tokenizer.add_special_tokens({'pad_token': pad_token})
161 |             self.tokenizer.pad_token = pad_token
162 | 
163 |         for param in self.llm_model.parameters():
164 |             param.requires_grad = False
165 | 
166 |         if configs.prompt_domain:
167 |             self.description = configs.content
168 |         else:
169 |             self.description = 'The Electricity Transformer Temperature (ETT) is a crucial indicator in the electric power long-term deployment.'
170 | 
171 |         self.dropout = nn.Dropout(configs.dropout)
172 | 
173 |         self.patch_embedding = PatchEmbedding(
174 |             configs.d_model, self.patch_len, self.stride, configs.dropout)
175 | 
176 |         self.word_embeddings = self.llm_model.get_input_embeddings().weight
177 |         self.vocab_size = self.word_embeddings.shape[0]
178 |         self.num_tokens = 1000
179 |         self.mapping_layer = nn.Linear(self.vocab_size, self.num_tokens)
180 | 
181 |         self.reprogramming_layer = ReprogrammingLayer(configs.d_model, configs.n_heads, self.d_ff, self.d_llm)
182 | 
183 |         self.patch_nums = int((configs.seq_len - self.patch_len) / self.stride + 2)
184 |         self.head_nf = self.d_ff * self.patch_nums
185 | 
186 |         if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
187 |             self.output_projection = FlattenHead(configs.enc_in, self.head_nf, self.pred_len,
188 |                                                  head_dropout=configs.dropout)
189 |         else:
190 |             raise NotImplementedError
191 | 
192 |         self.normalize_layers = Normalize(configs.enc_in, affine=False)
193 | 
194 |     def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
195 |         if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
196 |             dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
197 |             return dec_out[:, -self.pred_len:, :]
198 |         return None
199 | 
200 |     def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
201 | 
202 |         x_enc = self.normalize_layers(x_enc, 'norm')
203 | 
204 |         B, T, N = x_enc.size()
205 |         x_enc = x_enc.permute(0, 2, 1).contiguous().reshape(B * N, T, 1)
206 | 
207 |         min_values = torch.min(x_enc, dim=1)[0]
208 |         max_values = torch.max(x_enc, dim=1)[0]
209 |         medians = torch.median(x_enc, dim=1).values
210 |         lags = self.calcute_lags(x_enc)
211 |         trends = x_enc.diff(dim=1).sum(dim=1)
212 | 
213 |         prompt = []
214 |         for b in range(x_enc.shape[0]):
215 |             min_values_str = str(min_values[b].tolist()[0])
216 |             max_values_str = str(max_values[b].tolist()[0])
217 |             median_values_str = str(medians[b].tolist()[0])
218 |             lags_values_str = str(lags[b].tolist())
219 |             prompt_ = (
220 |                 f"<|start_prompt|>Dataset description: {self.description}"
221 |                 f"Task description: forecast the next {str(self.pred_len)} steps given the previous {str(self.seq_len)} steps information; "
222 |                 "Input statistics: "
223 |                 f"min value {min_values_str}, "
224 |                 f"max value {max_values_str}, "
225 |                 f"median value {median_values_str}, "
226 |                 f"the trend of input is {'upward' if trends[b] > 0 else 'downward'}, "
227 |                 f"top 5 lags are : {lags_values_str}<|<end_prompt>|>"
228 |             )
229 | 
230 |             prompt.append(prompt_)
231 | 
232 |         x_enc = x_enc.reshape(B, N, T).permute(0, 2, 1).contiguous()
233 | 
234 |         prompt = self.tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=2048).input_ids
235 |         prompt_embeddings = self.llm_model.get_input_embeddings()(prompt.to(x_enc.device))  # (batch, prompt_token, dim)
236 | 
237 |         source_embeddings = self.mapping_layer(self.word_embeddings.permute(1, 0)).permute(1, 0)
238 | 
239 |         x_enc = x_enc.permute(0, 2, 1).contiguous()
240 |         enc_out, n_vars = self.patch_embedding(x_enc.to(torch.bfloat16))
241 |         enc_out = self.reprogramming_layer(enc_out, source_embeddings, source_embeddings)
242 |         llama_enc_out = torch.cat([prompt_embeddings, enc_out], dim=1)
243 |         dec_out = self.llm_model(inputs_embeds=llama_enc_out).last_hidden_state
244 |         dec_out = dec_out[:, :, :self.d_ff]
245 | 
246 |         dec_out = torch.reshape(
247 |             dec_out, (-1, n_vars, dec_out.shape[-2], dec_out.shape[-1]))
248 |         dec_out = dec_out.permute(0, 1, 3, 2).contiguous()
249 | 
250 |         dec_out = self.output_projection(dec_out[:, :, :, -self.patch_nums:])
251 |         dec_out = dec_out.permute(0, 2, 1).contiguous()
252 | 
253 |         dec_out = self.normalize_layers(dec_out, 'denorm')
254 | 
255 |         return dec_out
256 | 
257 |     def calcute_lags(self, x_enc):
258 |         q_fft = torch.fft.rfft(x_enc.permute(0, 2, 1).contiguous(), dim=-1)
259 |         k_fft = torch.fft.rfft(x_enc.permute(0, 2, 1).contiguous(), dim=-1)
260 |         res = q_fft * torch.conj(k_fft)
261 |         corr = torch.fft.irfft(res, dim=-1)
262 |         mean_value = torch.mean(corr, dim=1)
263 |         _, lags = torch.topk(mean_value, self.top_k, dim=-1)
264 |         return lags
265 | 
266 | 
267 | class ReprogrammingLayer(nn.Module):
268 |     def __init__(self, d_model, n_heads, d_keys=None, d_llm=None, attention_dropout=0.1):
269 |         super(ReprogrammingLayer, self).__init__()
270 | 
271 |         d_keys = d_keys or (d_model // n_heads)
272 | 
273 |         self.query_projection = nn.Linear(d_model, d_keys * n_heads)
274 |         self.key_projection = nn.Linear(d_llm, d_keys * n_heads)
275 |         self.value_projection = nn.Linear(d_llm, d_keys * n_heads)
276 |         self.out_projection = nn.Linear(d_keys * n_heads, d_llm)
277 |         self.n_heads = n_heads
278 |         self.dropout = nn.Dropout(attention_dropout)
279 | 
280 |     def forward(self, target_embedding, source_embedding, value_embedding):
281 |         B, L, _ = target_embedding.shape
282 |         S, _ = source_embedding.shape
283 |         H = self.n_heads
284 | 
285 |         target_embedding = self.query_projection(target_embedding).view(B, L, H, -1)
286 |         source_embedding = self.key_projection(source_embedding).view(S, H, -1)
287 |         value_embedding = self.value_projection(value_embedding).view(S, H, -1)
288 | 
289 |         out = self.reprogramming(target_embedding, source_embedding, value_embedding)
290 | 
291 |         out = out.reshape(B, L, -1)
292 | 
293 |         return self.out_projection(out)
294 | 
295 |     def reprogramming(self, target_embedding, source_embedding, value_embedding):
296 |         B, L, H, E = target_embedding.shape
297 | 
298 |         scale = 1. / sqrt(E)
299 | 
300 |         scores = torch.einsum("blhe,she->bhls", target_embedding, source_embedding)
301 | 
302 |         A = self.dropout(torch.softmax(scale * scores, dim=-1))
303 |         reprogramming_embedding = torch.einsum("bhls,she->blhe", A, value_embedding)
304 | 
305 |         return reprogramming_embedding
306 | 


--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KimMeen/Time-LLM/02ee1b8f6043090c7a417f0cbb64cbf753895175/models/__init__.py


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | torch==2.2.2
 2 | accelerate==0.28.0
 3 | einops==0.7.0
 4 | matplotlib==3.7.0
 5 | numpy==1.23.5
 6 | pandas==1.5.3
 7 | scikit_learn==1.2.2
 8 | scipy==1.12.0
 9 | tqdm==4.65.0
10 | peft==0.4.0
11 | transformers==4.31.0
12 | deepspeed==0.14.0
13 | sentencepiece==0.2.0
14 | 


--------------------------------------------------------------------------------
/run_m4.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import torch
  3 | from accelerate import Accelerator, DeepSpeedPlugin
  4 | from accelerate import DistributedDataParallelKwargs
  5 | from torch import optim
  6 | from torch.optim import lr_scheduler
  7 | 
  8 | from data_provider.m4 import M4Meta
  9 | from models import Autoformer, DLinear, TimeLLM
 10 | 
 11 | from data_provider.data_factory import data_provider
 12 | import time
 13 | import random
 14 | import numpy as np
 15 | import pandas
 16 | 
 17 | from utils.losses import smape_loss
 18 | from utils.m4_summary import M4Summary
 19 | import os
 20 | 
 21 | os.environ['CURL_CA_BUNDLE'] = ''
 22 | os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:64"
 23 | 
 24 | from utils.tools import del_files, EarlyStopping, adjust_learning_rate, load_content, test
 25 | 
 26 | parser = argparse.ArgumentParser(description='Time-LLM')
 27 | 
 28 | fix_seed = 2021
 29 | random.seed(fix_seed)
 30 | torch.manual_seed(fix_seed)
 31 | np.random.seed(fix_seed)
 32 | 
 33 | # basic config
 34 | parser.add_argument('--task_name', type=str, required=True, default='long_term_forecast',
 35 |                     help='task name, options:[long_term_forecast, short_term_forecast, imputation, classification, anomaly_detection]')
 36 | parser.add_argument('--is_training', type=int, required=True, default=1, help='status')
 37 | parser.add_argument('--model_id', type=str, required=True, default='test', help='model id')
 38 | parser.add_argument('--model_comment', type=str, required=True, default='none', help='prefix when saving test results')
 39 | parser.add_argument('--model', type=str, required=True, default='Autoformer',
 40 |                     help='model name, options: [Autoformer, DLinear]')
 41 | parser.add_argument('--seed', type=int, default=0, help='random seed')
 42 | 
 43 | # data loader
 44 | parser.add_argument('--data', type=str, required=True, default='ETTm1', help='dataset type')
 45 | parser.add_argument('--root_path', type=str, default='./dataset', help='root path of the data file')
 46 | parser.add_argument('--data_path', type=str, default='ETTh1.csv', help='data file')
 47 | parser.add_argument('--features', type=str, default='M',
 48 |                     help='forecasting task, options:[M, S, MS]; '
 49 |                          'M:multivariate predict multivariate, S: univariate predict univariate, '
 50 |                          'MS:multivariate predict univariate')
 51 | parser.add_argument('--target', type=str, default='OT', help='target feature in S or MS task')
 52 | parser.add_argument('--loader', type=str, default='modal', help='dataset type')
 53 | parser.add_argument('--freq', type=str, default='h',
 54 |                     help='freq for time features encoding, '
 55 |                          'options:[s:secondly, t:minutely, h:hourly, d:daily, b:business days, w:weekly, m:monthly], '
 56 |                          'you can also use more detailed freq like 15min or 3h')
 57 | parser.add_argument('--checkpoints', type=str, default='./checkpoints/', help='location of model checkpoints')
 58 | 
 59 | # forecasting task
 60 | parser.add_argument('--seq_len', type=int, default=96, help='input sequence length')
 61 | parser.add_argument('--label_len', type=int, default=48, help='start token length')
 62 | parser.add_argument('--pred_len', type=int, default=96, help='prediction sequence length')
 63 | parser.add_argument('--seasonal_patterns', type=str, default='Monthly', help='subset for M4')
 64 | 
 65 | # model define
 66 | parser.add_argument('--enc_in', type=int, default=7, help='encoder input size')
 67 | parser.add_argument('--dec_in', type=int, default=7, help='decoder input size')
 68 | parser.add_argument('--c_out', type=int, default=7, help='output size')
 69 | parser.add_argument('--d_model', type=int, default=16, help='dimension of model')
 70 | parser.add_argument('--n_heads', type=int, default=8, help='num of heads')
 71 | parser.add_argument('--e_layers', type=int, default=2, help='num of encoder layers')
 72 | parser.add_argument('--d_layers', type=int, default=1, help='num of decoder layers')
 73 | parser.add_argument('--d_ff', type=int, default=32, help='dimension of fcn')
 74 | parser.add_argument('--moving_avg', type=int, default=25, help='window size of moving average')
 75 | parser.add_argument('--factor', type=int, default=1, help='attn factor')
 76 | parser.add_argument('--dropout', type=float, default=0.1, help='dropout')
 77 | parser.add_argument('--embed', type=str, default='timeF',
 78 |                     help='time features encoding, options:[timeF, fixed, learned]')
 79 | parser.add_argument('--activation', type=str, default='gelu', help='activation')
 80 | parser.add_argument('--output_attention', action='store_true', help='whether to output attention in ecoder')
 81 | parser.add_argument('--patch_len', type=int, default=16, help='patch length')
 82 | parser.add_argument('--stride', type=int, default=8, help='stride')
 83 | parser.add_argument('--prompt_domain', type=int, default=0, help='')
 84 | parser.add_argument('--llm_model', type=str, default='LLAMA', help='LLM model') # LLAMA, GPT2, BERT
 85 | parser.add_argument('--llm_dim', type=int, default='4096', help='LLM model dimension')# LLama7b:4096; GPT2-small:768; BERT-base:768
 86 | 
 87 | # optimization
 88 | parser.add_argument('--num_workers', type=int, default=10, help='data loader num workers')
 89 | parser.add_argument('--itr', type=int, default=1, help='experiments times')
 90 | parser.add_argument('--train_epochs', type=int, default=10, help='train epochs')
 91 | parser.add_argument('--align_epochs', type=int, default=10, help='alignment epochs')
 92 | parser.add_argument('--batch_size', type=int, default=32, help='batch size of train input data')
 93 | parser.add_argument('--eval_batch_size', type=int, default=8, help='batch size of model evaluation')
 94 | parser.add_argument('--patience', type=int, default=20, help='early stopping patience')
 95 | parser.add_argument('--learning_rate', type=float, default=0.0001, help='optimizer learning rate')
 96 | parser.add_argument('--des', type=str, default='test', help='exp description')
 97 | parser.add_argument('--loss', type=str, default='MSE', help='loss function')
 98 | parser.add_argument('--lradj', type=str, default='type1', help='adjust learning rate')
 99 | parser.add_argument('--pct_start', type=float, default=0.2, help='pct_start')
100 | parser.add_argument('--use_amp', action='store_true', help='use automatic mixed precision training', default=False)
101 | parser.add_argument('--llm_layers', type=int, default=6)
102 | parser.add_argument('--percent', type=int, default=100)
103 | 
104 | args = parser.parse_args()
105 | ddp_kwargs = DistributedDataParallelKwargs(find_unused_parameters=True)
106 | deepspeed_plugin = DeepSpeedPlugin(hf_ds_config='./ds_config_zero2.json')
107 | accelerator = Accelerator(kwargs_handlers=[ddp_kwargs], deepspeed_plugin=deepspeed_plugin)
108 | 
109 | for ii in range(args.itr):
110 |     # setting record of experiments
111 |     setting = '{}_{}_{}_{}_ft{}_sl{}_ll{}_pl{}_dm{}_nh{}_el{}_dl{}_df{}_fc{}_eb{}_{}_{}'.format(
112 |         args.task_name,
113 |         args.model_id,
114 |         args.model,
115 |         args.data,
116 |         args.features,
117 |         args.seq_len,
118 |         args.label_len,
119 |         args.pred_len,
120 |         args.d_model,
121 |         args.n_heads,
122 |         args.e_layers,
123 |         args.d_layers,
124 |         args.d_ff,
125 |         args.factor,
126 |         args.embed,
127 |         args.des, ii)
128 | 
129 |     if args.data == 'm4':
130 |         args.pred_len = M4Meta.horizons_map[args.seasonal_patterns]  # Up to M4 config
131 |         args.seq_len = 2 * args.pred_len
132 |         args.label_len = args.pred_len
133 |         args.frequency_map = M4Meta.frequency_map[args.seasonal_patterns]
134 | 
135 |     train_data, train_loader = data_provider(args, 'train')
136 |     vali_data, vali_loader = data_provider(args, 'val')
137 |     test_data, test_loader = data_provider(args, 'test')
138 | 
139 |     if args.model == 'Autoformer':
140 |         model = Autoformer.Model(args).float()
141 |     elif args.model == 'DLinear':
142 |         model = DLinear.Model(args).float()
143 |     else:
144 |         model = TimeLLM.Model(args).float()
145 | 
146 |     path = os.path.join(args.checkpoints,
147 |                         setting + '-' + args.model_comment)  # unique checkpoint saving path
148 |     args.content = load_content(args)
149 |     if not os.path.exists(path) and accelerator.is_local_main_process:
150 |         os.makedirs(path)
151 | 
152 |     time_now = time.time()
153 | 
154 |     train_steps = len(train_loader)
155 |     early_stopping = EarlyStopping(accelerator=accelerator, patience=args.patience, verbose=True)
156 | 
157 |     model_optim = optim.Adam(model.parameters(), lr=args.learning_rate)
158 | 
159 |     if args.lradj == 'COS':
160 |         scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(model_optim, T_max=20, eta_min=1e-8)
161 |     else:
162 |         scheduler = lr_scheduler.OneCycleLR(optimizer=model_optim,
163 |                                             steps_per_epoch=train_steps,
164 |                                             pct_start=args.pct_start,
165 |                                             epochs=args.train_epochs,
166 |                                             max_lr=args.learning_rate)
167 | 
168 |     criterion = smape_loss()
169 | 
170 |     train_loader, vali_loader, model, model_optim, scheduler = accelerator.prepare(
171 |         train_loader, vali_loader, model, model_optim, scheduler)
172 | 
173 |     for epoch in range(args.train_epochs):
174 |         iter_count = 0
175 |         train_loss = []
176 | 
177 |         model.train()
178 |         epoch_time = time.time()
179 | 
180 |         for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(train_loader):
181 |             iter_count += 1
182 |             model_optim.zero_grad()
183 |             batch_x = batch_x.float().to(accelerator.device)
184 | 
185 |             batch_y = batch_y.float().to(accelerator.device)
186 |             batch_y_mark = batch_y_mark.float().to(accelerator.device)
187 | 
188 |             # decoder input
189 |             dec_inp = torch.zeros_like(batch_y[:, -args.pred_len:, :]).float().to(accelerator.device)
190 |             dec_inp = torch.cat([batch_y[:, :args.label_len, :], dec_inp], dim=1).float().to(
191 |                 accelerator.device)
192 | 
193 |             outputs = model(batch_x, None, dec_inp, None)
194 | 
195 |             f_dim = -1 if args.features == 'MS' else 0
196 |             outputs = outputs[:, -args.pred_len:, f_dim:]
197 |             batch_y = batch_y[:, -args.pred_len:, f_dim:]
198 | 
199 |             batch_y_mark = batch_y_mark[:, -args.pred_len:, f_dim:]
200 |             loss = criterion(batch_x, args.frequency_map, outputs, batch_y, batch_y_mark)
201 | 
202 |             train_loss.append(loss.item())
203 | 
204 |             if (i + 1) % 100 == 0:
205 |                 accelerator.print(
206 |                     "\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item())
207 |                 )
208 |                 speed = (time.time() - time_now) / iter_count
209 |                 left_time = speed * ((args.train_epochs - epoch) * train_steps - i)
210 |                 accelerator.print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time))
211 |                 iter_count = 0
212 |                 time_now = time.time()
213 | 
214 |             accelerator.backward(loss)
215 |             model_optim.step()
216 | 
217 |             if args.lradj == 'TST':
218 |                 adjust_learning_rate(accelerator, model_optim, scheduler, epoch + 1, args, printout=False)
219 |                 scheduler.step()
220 | 
221 |         accelerator.print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time))
222 |         train_loss = np.average(train_loss)
223 |         vali_loss = test(args, accelerator, model, train_loader, vali_loader, criterion)
224 |         test_loss = vali_loss
225 |         accelerator.print(
226 |             "Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}".format(
227 |                 epoch + 1, train_steps, train_loss, vali_loss, test_loss))
228 |         early_stopping(vali_loss, model, path)  # model saving
229 |         if early_stopping.early_stop:
230 |             accelerator.print("Early stopping")
231 |             break
232 | 
233 |         if args.lradj != 'TST':
234 |             adjust_learning_rate(accelerator, model_optim, scheduler, epoch + 1, args, printout=True)
235 |         else:
236 |             accelerator.print('Updating learning rate to {}'.format(scheduler.get_last_lr()[0]))
237 | 
238 |     best_model_path = path + '/' + 'checkpoint'
239 |     accelerator.wait_for_everyone()
240 |     unwrapped_model = accelerator.unwrap_model(model)
241 |     torch.cuda.synchronize()
242 |     torch.cuda.empty_cache()
243 |     unwrapped_model.load_state_dict(torch.load(best_model_path, map_location=lambda storage, loc: storage))
244 | 
245 |     x, _ = train_loader.dataset.last_insample_window()
246 |     y = test_loader.dataset.timeseries
247 |     x = torch.tensor(x, dtype=torch.float32).to(accelerator.device)
248 |     x = x.unsqueeze(-1)
249 | 
250 |     model.eval()
251 | 
252 |     with torch.no_grad():
253 |         B, _, C = x.shape
254 |         dec_inp = torch.zeros((B, args.pred_len, C)).float().to(accelerator.device)
255 |         dec_inp = torch.cat([x[:, -args.label_len:, :], dec_inp], dim=1)
256 |         outputs = torch.zeros((B, args.pred_len, C)).float().to(accelerator.device)
257 |         id_list = np.arange(0, B, args.eval_batch_size)
258 |         id_list = np.append(id_list, B)
259 |         for i in range(len(id_list) - 1):
260 |             outputs[id_list[i]:id_list[i + 1], :, :] = model(
261 |                 x[id_list[i]:id_list[i + 1]],
262 |                 None,
263 |                 dec_inp[id_list[i]:id_list[i + 1]],
264 |                 None
265 |             )
266 |         accelerator.wait_for_everyone()
267 |         f_dim = -1 if args.features == 'MS' else 0
268 |         outputs = outputs[:, -args.pred_len:, f_dim:]
269 |         outputs = outputs.detach().cpu().numpy()
270 | 
271 |         preds = outputs
272 |         trues = y
273 |         x = x.detach().cpu().numpy()
274 | 
275 |     accelerator.print('test shape:', preds.shape)
276 | 
277 |     folder_path = './m4_results/' + args.model + '-' + args.model_comment + '/'
278 |     if not os.path.exists(folder_path) and accelerator.is_local_main_process:
279 |         os.makedirs(folder_path)
280 | 
281 |     if accelerator.is_local_main_process:
282 |         forecasts_df = pandas.DataFrame(preds[:, :, 0], columns=[f'V{i + 1}' for i in range(args.pred_len)])
283 |         forecasts_df.index = test_loader.dataset.ids[:preds.shape[0]]
284 |         forecasts_df.index.name = 'id'
285 |         forecasts_df.set_index(forecasts_df.columns[0], inplace=True)
286 |         forecasts_df.to_csv(folder_path + args.seasonal_patterns + '_forecast.csv')
287 | 
288 |         # calculate metrics
289 |         accelerator.print(args.model)
290 |         file_path = folder_path
291 |         if 'Weekly_forecast.csv' in os.listdir(file_path) \
292 |                 and 'Monthly_forecast.csv' in os.listdir(file_path) \
293 |                 and 'Yearly_forecast.csv' in os.listdir(file_path) \
294 |                 and 'Daily_forecast.csv' in os.listdir(file_path) \
295 |                 and 'Hourly_forecast.csv' in os.listdir(file_path) \
296 |                 and 'Quarterly_forecast.csv' in os.listdir(file_path):
297 |             m4_summary = M4Summary(file_path, args.root_path)
298 |             # m4_forecast.set_index(m4_winner_forecast.columns[0], inplace=True)
299 |             smape_results, owa_results, mape, mase = m4_summary.evaluate()
300 |             accelerator.print('smape:', smape_results)
301 |             accelerator.print('mape:', mape)
302 |             accelerator.print('mase:', mase)
303 |             accelerator.print('owa:', owa_results)
304 |         else:
305 |             accelerator.print('After all 6 tasks are finished, you can calculate the averaged performance')
306 | 
307 | accelerator.wait_for_everyone()
308 | if accelerator.is_local_main_process:
309 |     path = './checkpoints'  # unique checkpoint saving path
310 |     del_files(path)  # delete checkpoint files
311 |     accelerator.print('success delete checkpoints')
312 | 


--------------------------------------------------------------------------------
/run_main.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import torch
  3 | from accelerate import Accelerator, DeepSpeedPlugin
  4 | from accelerate import DistributedDataParallelKwargs
  5 | from torch import nn, optim
  6 | from torch.optim import lr_scheduler
  7 | from tqdm import tqdm
  8 | 
  9 | from models import Autoformer, DLinear, TimeLLM
 10 | 
 11 | from data_provider.data_factory import data_provider
 12 | import time
 13 | import random
 14 | import numpy as np
 15 | import os
 16 | 
 17 | os.environ['CURL_CA_BUNDLE'] = ''
 18 | os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:64"
 19 | 
 20 | from utils.tools import del_files, EarlyStopping, adjust_learning_rate, vali, load_content
 21 | 
 22 | parser = argparse.ArgumentParser(description='Time-LLM')
 23 | 
 24 | fix_seed = 2021
 25 | random.seed(fix_seed)
 26 | torch.manual_seed(fix_seed)
 27 | np.random.seed(fix_seed)
 28 | 
 29 | # basic config
 30 | parser.add_argument('--task_name', type=str, required=True, default='long_term_forecast',
 31 |                     help='task name, options:[long_term_forecast, short_term_forecast, imputation, classification, anomaly_detection]')
 32 | parser.add_argument('--is_training', type=int, required=True, default=1, help='status')
 33 | parser.add_argument('--model_id', type=str, required=True, default='test', help='model id')
 34 | parser.add_argument('--model_comment', type=str, required=True, default='none', help='prefix when saving test results')
 35 | parser.add_argument('--model', type=str, required=True, default='Autoformer',
 36 |                     help='model name, options: [Autoformer, DLinear]')
 37 | parser.add_argument('--seed', type=int, default=2021, help='random seed')
 38 | 
 39 | # data loader
 40 | parser.add_argument('--data', type=str, required=True, default='ETTm1', help='dataset type')
 41 | parser.add_argument('--root_path', type=str, default='./dataset', help='root path of the data file')
 42 | parser.add_argument('--data_path', type=str, default='ETTh1.csv', help='data file')
 43 | parser.add_argument('--features', type=str, default='M',
 44 |                     help='forecasting task, options:[M, S, MS]; '
 45 |                          'M:multivariate predict multivariate, S: univariate predict univariate, '
 46 |                          'MS:multivariate predict univariate')
 47 | parser.add_argument('--target', type=str, default='OT', help='target feature in S or MS task')
 48 | parser.add_argument('--loader', type=str, default='modal', help='dataset type')
 49 | parser.add_argument('--freq', type=str, default='h',
 50 |                     help='freq for time features encoding, '
 51 |                          'options:[s:secondly, t:minutely, h:hourly, d:daily, b:business days, w:weekly, m:monthly], '
 52 |                          'you can also use more detailed freq like 15min or 3h')
 53 | parser.add_argument('--checkpoints', type=str, default='./checkpoints/', help='location of model checkpoints')
 54 | 
 55 | # forecasting task
 56 | parser.add_argument('--seq_len', type=int, default=96, help='input sequence length')
 57 | parser.add_argument('--label_len', type=int, default=48, help='start token length')
 58 | parser.add_argument('--pred_len', type=int, default=96, help='prediction sequence length')
 59 | parser.add_argument('--seasonal_patterns', type=str, default='Monthly', help='subset for M4')
 60 | 
 61 | # model define
 62 | parser.add_argument('--enc_in', type=int, default=7, help='encoder input size')
 63 | parser.add_argument('--dec_in', type=int, default=7, help='decoder input size')
 64 | parser.add_argument('--c_out', type=int, default=7, help='output size')
 65 | parser.add_argument('--d_model', type=int, default=16, help='dimension of model')
 66 | parser.add_argument('--n_heads', type=int, default=8, help='num of heads')
 67 | parser.add_argument('--e_layers', type=int, default=2, help='num of encoder layers')
 68 | parser.add_argument('--d_layers', type=int, default=1, help='num of decoder layers')
 69 | parser.add_argument('--d_ff', type=int, default=32, help='dimension of fcn')
 70 | parser.add_argument('--moving_avg', type=int, default=25, help='window size of moving average')
 71 | parser.add_argument('--factor', type=int, default=1, help='attn factor')
 72 | parser.add_argument('--dropout', type=float, default=0.1, help='dropout')
 73 | parser.add_argument('--embed', type=str, default='timeF',
 74 |                     help='time features encoding, options:[timeF, fixed, learned]')
 75 | parser.add_argument('--activation', type=str, default='gelu', help='activation')
 76 | parser.add_argument('--output_attention', action='store_true', help='whether to output attention in encoder')
 77 | parser.add_argument('--patch_len', type=int, default=16, help='patch length')
 78 | parser.add_argument('--stride', type=int, default=8, help='stride')
 79 | parser.add_argument('--prompt_domain', type=int, default=0, help='')
 80 | parser.add_argument('--llm_model', type=str, default='LLAMA', help='LLM model') # LLAMA, GPT2, BERT
 81 | parser.add_argument('--llm_dim', type=int, default='4096', help='LLM model dimension')# LLama7b:4096; GPT2-small:768; BERT-base:768
 82 | 
 83 | 
 84 | # optimization
 85 | parser.add_argument('--num_workers', type=int, default=10, help='data loader num workers')
 86 | parser.add_argument('--itr', type=int, default=1, help='experiments times')
 87 | parser.add_argument('--train_epochs', type=int, default=10, help='train epochs')
 88 | parser.add_argument('--align_epochs', type=int, default=10, help='alignment epochs')
 89 | parser.add_argument('--batch_size', type=int, default=32, help='batch size of train input data')
 90 | parser.add_argument('--eval_batch_size', type=int, default=8, help='batch size of model evaluation')
 91 | parser.add_argument('--patience', type=int, default=10, help='early stopping patience')
 92 | parser.add_argument('--learning_rate', type=float, default=0.0001, help='optimizer learning rate')
 93 | parser.add_argument('--des', type=str, default='test', help='exp description')
 94 | parser.add_argument('--loss', type=str, default='MSE', help='loss function')
 95 | parser.add_argument('--lradj', type=str, default='type1', help='adjust learning rate')
 96 | parser.add_argument('--pct_start', type=float, default=0.2, help='pct_start')
 97 | parser.add_argument('--use_amp', action='store_true', help='use automatic mixed precision training', default=False)
 98 | parser.add_argument('--llm_layers', type=int, default=6)
 99 | parser.add_argument('--percent', type=int, default=100)
100 | 
101 | args = parser.parse_args()
102 | ddp_kwargs = DistributedDataParallelKwargs(find_unused_parameters=True)
103 | deepspeed_plugin = DeepSpeedPlugin(hf_ds_config='./ds_config_zero2.json')
104 | accelerator = Accelerator(kwargs_handlers=[ddp_kwargs], deepspeed_plugin=deepspeed_plugin)
105 | 
106 | for ii in range(args.itr):
107 |     # setting record of experiments
108 |     setting = '{}_{}_{}_{}_ft{}_sl{}_ll{}_pl{}_dm{}_nh{}_el{}_dl{}_df{}_fc{}_eb{}_{}_{}'.format(
109 |         args.task_name,
110 |         args.model_id,
111 |         args.model,
112 |         args.data,
113 |         args.features,
114 |         args.seq_len,
115 |         args.label_len,
116 |         args.pred_len,
117 |         args.d_model,
118 |         args.n_heads,
119 |         args.e_layers,
120 |         args.d_layers,
121 |         args.d_ff,
122 |         args.factor,
123 |         args.embed,
124 |         args.des, ii)
125 | 
126 |     train_data, train_loader = data_provider(args, 'train')
127 |     vali_data, vali_loader = data_provider(args, 'val')
128 |     test_data, test_loader = data_provider(args, 'test')
129 | 
130 |     if args.model == 'Autoformer':
131 |         model = Autoformer.Model(args).float()
132 |     elif args.model == 'DLinear':
133 |         model = DLinear.Model(args).float()
134 |     else:
135 |         model = TimeLLM.Model(args).float()
136 | 
137 |     path = os.path.join(args.checkpoints,
138 |                         setting + '-' + args.model_comment)  # unique checkpoint saving path
139 |     args.content = load_content(args)
140 |     if not os.path.exists(path) and accelerator.is_local_main_process:
141 |         os.makedirs(path)
142 | 
143 |     time_now = time.time()
144 | 
145 |     train_steps = len(train_loader)
146 |     early_stopping = EarlyStopping(accelerator=accelerator, patience=args.patience)
147 | 
148 |     trained_parameters = []
149 |     for p in model.parameters():
150 |         if p.requires_grad is True:
151 |             trained_parameters.append(p)
152 | 
153 |     model_optim = optim.Adam(trained_parameters, lr=args.learning_rate)
154 | 
155 |     if args.lradj == 'COS':
156 |         scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(model_optim, T_max=20, eta_min=1e-8)
157 |     else:
158 |         scheduler = lr_scheduler.OneCycleLR(optimizer=model_optim,
159 |                                             steps_per_epoch=train_steps,
160 |                                             pct_start=args.pct_start,
161 |                                             epochs=args.train_epochs,
162 |                                             max_lr=args.learning_rate)
163 | 
164 |     criterion = nn.MSELoss()
165 |     mae_metric = nn.L1Loss()
166 | 
167 |     train_loader, vali_loader, test_loader, model, model_optim, scheduler = accelerator.prepare(
168 |         train_loader, vali_loader, test_loader, model, model_optim, scheduler)
169 | 
170 |     if args.use_amp:
171 |         scaler = torch.cuda.amp.GradScaler()
172 | 
173 |     for epoch in range(args.train_epochs):
174 |         iter_count = 0
175 |         train_loss = []
176 | 
177 |         model.train()
178 |         epoch_time = time.time()
179 |         for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in tqdm(enumerate(train_loader)):
180 |             iter_count += 1
181 |             model_optim.zero_grad()
182 | 
183 |             batch_x = batch_x.float().to(accelerator.device)
184 |             batch_y = batch_y.float().to(accelerator.device)
185 |             batch_x_mark = batch_x_mark.float().to(accelerator.device)
186 |             batch_y_mark = batch_y_mark.float().to(accelerator.device)
187 | 
188 |             # decoder input
189 |             dec_inp = torch.zeros_like(batch_y[:, -args.pred_len:, :]).float().to(
190 |                 accelerator.device)
191 |             dec_inp = torch.cat([batch_y[:, :args.label_len, :], dec_inp], dim=1).float().to(
192 |                 accelerator.device)
193 | 
194 |             # encoder - decoder
195 |             if args.use_amp:
196 |                 with torch.cuda.amp.autocast():
197 |                     if args.output_attention:
198 |                         outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
199 |                     else:
200 |                         outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
201 | 
202 |                     f_dim = -1 if args.features == 'MS' else 0
203 |                     outputs = outputs[:, -args.pred_len:, f_dim:]
204 |                     batch_y = batch_y[:, -args.pred_len:, f_dim:].to(accelerator.device)
205 |                     loss = criterion(outputs, batch_y)
206 |                     train_loss.append(loss.item())
207 |             else:
208 |                 if args.output_attention:
209 |                     outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
210 |                 else:
211 |                     outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
212 | 
213 |                 f_dim = -1 if args.features == 'MS' else 0
214 |                 outputs = outputs[:, -args.pred_len:, f_dim:]
215 |                 batch_y = batch_y[:, -args.pred_len:, f_dim:]
216 |                 loss = criterion(outputs, batch_y)
217 |                 train_loss.append(loss.item())
218 | 
219 |             if (i + 1) % 100 == 0:
220 |                 accelerator.print(
221 |                     "\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item()))
222 |                 speed = (time.time() - time_now) / iter_count
223 |                 left_time = speed * ((args.train_epochs - epoch) * train_steps - i)
224 |                 accelerator.print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time))
225 |                 iter_count = 0
226 |                 time_now = time.time()
227 | 
228 |             if args.use_amp:
229 |                 scaler.scale(loss).backward()
230 |                 scaler.step(model_optim)
231 |                 scaler.update()
232 |             else:
233 |                 accelerator.backward(loss)
234 |                 model_optim.step()
235 | 
236 |             if args.lradj == 'TST':
237 |                 adjust_learning_rate(accelerator, model_optim, scheduler, epoch + 1, args, printout=False)
238 |                 scheduler.step()
239 | 
240 |         accelerator.print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time))
241 |         train_loss = np.average(train_loss)
242 |         vali_loss, vali_mae_loss = vali(args, accelerator, model, vali_data, vali_loader, criterion, mae_metric)
243 |         test_loss, test_mae_loss = vali(args, accelerator, model, test_data, test_loader, criterion, mae_metric)
244 |         accelerator.print(
245 |             "Epoch: {0} | Train Loss: {1:.7f} Vali Loss: {2:.7f} Test Loss: {3:.7f} MAE Loss: {4:.7f}".format(
246 |                 epoch + 1, train_loss, vali_loss, test_loss, test_mae_loss))
247 | 
248 |         early_stopping(vali_loss, model, path)
249 |         if early_stopping.early_stop:
250 |             accelerator.print("Early stopping")
251 |             break
252 | 
253 |         if args.lradj != 'TST':
254 |             if args.lradj == 'COS':
255 |                 scheduler.step()
256 |                 accelerator.print("lr = {:.10f}".format(model_optim.param_groups[0]['lr']))
257 |             else:
258 |                 if epoch == 0:
259 |                     args.learning_rate = model_optim.param_groups[0]['lr']
260 |                     accelerator.print("lr = {:.10f}".format(model_optim.param_groups[0]['lr']))
261 |                 adjust_learning_rate(accelerator, model_optim, scheduler, epoch + 1, args, printout=True)
262 | 
263 |         else:
264 |             accelerator.print('Updating learning rate to {}'.format(scheduler.get_last_lr()[0]))
265 | 
266 | accelerator.wait_for_everyone()
267 | if accelerator.is_local_main_process:
268 |     path = './checkpoints'  # unique checkpoint saving path
269 |     del_files(path)  # delete checkpoint files
270 |     accelerator.print('success delete checkpoints')


--------------------------------------------------------------------------------
/run_pretrain.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import torch
  3 | from accelerate import Accelerator, DeepSpeedPlugin
  4 | from accelerate import DistributedDataParallelKwargs
  5 | from torch import nn, optim
  6 | from torch.optim import lr_scheduler
  7 | 
  8 | from data_provider_pretrain.data_factory import data_provider
  9 | from models import Autoformer, DLinear, TimeLLM
 10 | 
 11 | import time
 12 | import random
 13 | import numpy as np
 14 | import os
 15 | 
 16 | os.environ['CURL_CA_BUNDLE'] = ''
 17 | os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:64"
 18 | 
 19 | from utils.tools import del_files, EarlyStopping, adjust_learning_rate, vali, load_content
 20 | 
 21 | parser = argparse.ArgumentParser(description='Time-LLM')
 22 | 
 23 | fix_seed = 2021
 24 | random.seed(fix_seed)
 25 | torch.manual_seed(fix_seed)
 26 | np.random.seed(fix_seed)
 27 | 
 28 | # basic config
 29 | parser.add_argument('--task_name', type=str, required=True, default='long_term_forecast',
 30 |                     help='task name, options:[long_term_forecast, short_term_forecast, imputation, classification, anomaly_detection]')
 31 | parser.add_argument('--is_training', type=int, required=True, default=1, help='status')
 32 | parser.add_argument('--model_id', type=str, required=True, default='test', help='model id')
 33 | parser.add_argument('--model_comment', type=str, required=True, default='none', help='prefix when saving test results')
 34 | parser.add_argument('--model', type=str, required=True, default='Autoformer',
 35 |                     help='model name, options: [Autoformer, DLinear]')
 36 | parser.add_argument('--seed', type=int, default=2021, help='random seed')
 37 | 
 38 | # data loader
 39 | parser.add_argument('--data_pretrain', type=str, required=True, default='ETTm1', help='dataset type')
 40 | parser.add_argument('--data', type=str, required=True, default='ETTm1', help='dataset type')
 41 | parser.add_argument('--root_path', type=str, default='./dataset', help='root path of the data file')
 42 | parser.add_argument('--data_path', type=str, default='ETTh1.csv', help='data file')
 43 | parser.add_argument('--data_path_pretrain', type=str, default='ETTh1.csv', help='data file')
 44 | parser.add_argument('--features', type=str, default='M',
 45 |                     help='forecasting task, options:[M, S, MS]; '
 46 |                          'M:multivariate predict multivariate, S: univariate predict univariate, '
 47 |                          'MS:multivariate predict univariate')
 48 | parser.add_argument('--target', type=str, default='OT', help='target feature in S or MS task')
 49 | parser.add_argument('--loader', type=str, default='modal', help='dataset type')
 50 | parser.add_argument('--freq', type=str, default='h',
 51 |                     help='freq for time features encoding, '
 52 |                          'options:[s:secondly, t:minutely, h:hourly, d:daily, b:business days, w:weekly, m:monthly], '
 53 |                          'you can also use more detailed freq like 15min or 3h')
 54 | parser.add_argument('--checkpoints', type=str, default='./checkpoints/', help='location of model checkpoints')
 55 | 
 56 | # forecasting task
 57 | parser.add_argument('--seq_len', type=int, default=96, help='input sequence length')
 58 | parser.add_argument('--label_len', type=int, default=48, help='start token length')
 59 | parser.add_argument('--pred_len', type=int, default=96, help='prediction sequence length')
 60 | parser.add_argument('--seasonal_patterns', type=str, default='Monthly', help='subset for M4')
 61 | 
 62 | # model define
 63 | parser.add_argument('--enc_in', type=int, default=7, help='encoder input size')
 64 | parser.add_argument('--dec_in', type=int, default=7, help='decoder input size')
 65 | parser.add_argument('--c_out', type=int, default=7, help='output size')
 66 | parser.add_argument('--d_model', type=int, default=16, help='dimension of model')
 67 | parser.add_argument('--n_heads', type=int, default=8, help='num of heads')
 68 | parser.add_argument('--e_layers', type=int, default=2, help='num of encoder layers')
 69 | parser.add_argument('--d_layers', type=int, default=1, help='num of decoder layers')
 70 | parser.add_argument('--d_ff', type=int, default=32, help='dimension of fcn')
 71 | parser.add_argument('--moving_avg', type=int, default=25, help='window size of moving average')
 72 | parser.add_argument('--factor', type=int, default=1, help='attn factor')
 73 | parser.add_argument('--dropout', type=float, default=0.1, help='dropout')
 74 | parser.add_argument('--embed', type=str, default='timeF',
 75 |                     help='time features encoding, options:[timeF, fixed, learned]')
 76 | parser.add_argument('--activation', type=str, default='gelu', help='activation')
 77 | parser.add_argument('--output_attention', action='store_true', help='whether to output attention in ecoder')
 78 | parser.add_argument('--patch_len', type=int, default=16, help='patch length')
 79 | parser.add_argument('--stride', type=int, default=8, help='stride')
 80 | parser.add_argument('--prompt_domain', type=int, default=0, help='')
 81 | parser.add_argument('--llm_model', type=str, default='LLAMA', help='LLM model') # LLAMA, GPT2, BERT
 82 | parser.add_argument('--llm_dim', type=int, default='4096', help='LLM model dimension')# LLama7b:4096; GPT2-small:768; BERT-base:768
 83 | 
 84 | # optimization
 85 | parser.add_argument('--num_workers', type=int, default=10, help='data loader num workers')
 86 | parser.add_argument('--itr', type=int, default=1, help='experiments times')
 87 | parser.add_argument('--train_epochs', type=int, default=10, help='train epochs')
 88 | parser.add_argument('--align_epochs', type=int, default=10, help='alignment epochs')
 89 | parser.add_argument('--batch_size', type=int, default=32, help='batch size of train input data')
 90 | parser.add_argument('--eval_batch_size', type=int, default=8, help='batch size of model evaluation')
 91 | parser.add_argument('--patience', type=int, default=5, help='early stopping patience')
 92 | parser.add_argument('--learning_rate', type=float, default=0.0001, help='optimizer learning rate')
 93 | parser.add_argument('--des', type=str, default='test', help='exp description')
 94 | parser.add_argument('--loss', type=str, default='MSE', help='loss function')
 95 | parser.add_argument('--lradj', type=str, default='type1', help='adjust learning rate')
 96 | parser.add_argument('--pct_start', type=float, default=0.2, help='pct_start')
 97 | parser.add_argument('--use_amp', action='store_true', help='use automatic mixed precision training', default=False)
 98 | parser.add_argument('--llm_layers', type=int, default=6)
 99 | parser.add_argument('--percent', type=int, default=100)
100 | 
101 | args = parser.parse_args()
102 | ddp_kwargs = DistributedDataParallelKwargs(find_unused_parameters=True)
103 | deepspeed_plugin = DeepSpeedPlugin(hf_ds_config='./ds_config_zero2.json')
104 | accelerator = Accelerator(kwargs_handlers=[ddp_kwargs], deepspeed_plugin=deepspeed_plugin)
105 | 
106 | for ii in range(args.itr):
107 |     # setting record of experiments
108 |     setting = '{}_{}_{}_{}_ft{}_sl{}_ll{}_pl{}_dm{}_nh{}_el{}_dl{}_df{}_fc{}_eb{}_{}_{}'.format(
109 |         args.task_name,
110 |         args.model_id,
111 |         args.model,
112 |         args.data,
113 |         args.features,
114 |         args.seq_len,
115 |         args.label_len,
116 |         args.pred_len,
117 |         args.d_model,
118 |         args.n_heads,
119 |         args.e_layers,
120 |         args.d_layers,
121 |         args.d_ff,
122 |         args.factor,
123 |         args.embed,
124 |         args.des, ii)
125 | 
126 |     train_data, train_loader = data_provider(args, args.data_pretrain, args.data_path_pretrain, True, 'train')
127 |     vali_data, vali_loader = data_provider(args, args.data_pretrain, args.data_path_pretrain, True, 'val')
128 |     test_data, test_loader = data_provider(args, args.data, args.data_path, False, 'test')
129 | 
130 |     if args.model == 'Autoformer':
131 |         model = Autoformer.Model(args).float()
132 |     elif args.model == 'DLinear':
133 |         model = DLinear.Model(args).float()
134 |     else:
135 |         model = TimeLLM.Model(args).float()
136 | 
137 |     path = os.path.join(args.checkpoints,
138 |                         setting + '-' + args.model_comment)  # unique checkpoint saving path
139 |     args.content = load_content(args)
140 |     if not os.path.exists(path) and accelerator.is_local_main_process:
141 |         os.makedirs(path)
142 | 
143 |     time_now = time.time()
144 | 
145 |     train_steps = len(train_loader)
146 |     early_stopping = EarlyStopping(accelerator=accelerator, patience=args.patience)
147 | 
148 |     trained_parameters = []
149 |     for p in model.parameters():
150 |         if p.requires_grad is True:
151 |             trained_parameters.append(p)
152 | 
153 |     model_optim = optim.Adam(trained_parameters, lr=args.learning_rate)
154 | 
155 |     if args.lradj == 'COS':
156 |         scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(model_optim, T_max=20, eta_min=1e-8)
157 |     else:
158 |         scheduler = lr_scheduler.OneCycleLR(optimizer=model_optim,
159 |                                             steps_per_epoch=train_steps,
160 |                                             pct_start=args.pct_start,
161 |                                             epochs=args.train_epochs,
162 |                                             max_lr=args.learning_rate)
163 | 
164 |     criterion = nn.MSELoss()
165 |     mae_metric = nn.L1Loss()
166 | 
167 |     train_loader, vali_loader, test_loader, model, model_optim, scheduler = accelerator.prepare(
168 |         train_loader, vali_loader, test_loader, model, model_optim, scheduler)
169 | 
170 |     if args.use_amp:
171 |         scaler = torch.cuda.amp.GradScaler()
172 | 
173 |     for epoch in range(args.train_epochs):
174 |         iter_count = 0
175 |         train_loss = []
176 | 
177 |         model.train()
178 |         epoch_time = time.time()
179 |         for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(train_loader):
180 |             iter_count += 1
181 |             model_optim.zero_grad()
182 | 
183 |             batch_x = batch_x.float().to(accelerator.device)
184 |             batch_y = batch_y.float().to(accelerator.device)
185 |             batch_x_mark = batch_x_mark.float().to(accelerator.device)
186 |             batch_y_mark = batch_y_mark.float().to(accelerator.device)
187 | 
188 |             # decoder input
189 |             dec_inp = torch.zeros_like(batch_y[:, -args.pred_len:, :]).float().to(
190 |                 accelerator.device)
191 |             dec_inp = torch.cat([batch_y[:, :args.label_len, :], dec_inp], dim=1).float().to(
192 |                 accelerator.device)
193 | 
194 |             # encoder - decoder
195 |             if args.use_amp:
196 |                 with torch.cuda.amp.autocast():
197 |                     if args.output_attention:
198 |                         outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
199 |                     else:
200 |                         outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
201 | 
202 |                     f_dim = -1 if args.features == 'MS' else 0
203 |                     outputs = outputs[:, -args.pred_len:, f_dim:]
204 |                     batch_y = batch_y[:, -args.pred_len:, f_dim:].to(accelerator.device)
205 |                     loss = criterion(outputs, batch_y)
206 |                     train_loss.append(loss.item())
207 |             else:
208 |                 if args.output_attention:
209 |                     outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
210 |                 else:
211 |                     outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
212 | 
213 |                 f_dim = -1 if args.features == 'MS' else 0
214 |                 outputs = outputs[:, -args.pred_len:, f_dim:]
215 |                 batch_y = batch_y[:, -args.pred_len:, f_dim:]
216 |                 loss = criterion(outputs, batch_y)
217 |                 train_loss.append(loss.item())
218 | 
219 |             if (i + 1) % 100 == 0:
220 |                 accelerator.print(
221 |                     "\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item()))
222 |                 speed = (time.time() - time_now) / iter_count
223 |                 left_time = speed * ((args.train_epochs - epoch) * train_steps - i)
224 |                 accelerator.print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time))
225 |                 iter_count = 0
226 |                 time_now = time.time()
227 | 
228 |             if args.use_amp:
229 |                 scaler.scale(loss).backward()
230 |                 scaler.step(model_optim)
231 |                 scaler.update()
232 |             else:
233 |                 accelerator.backward(loss)
234 |                 model_optim.step()
235 | 
236 |             if args.lradj == 'TST':
237 |                 adjust_learning_rate(accelerator, model_optim, scheduler, epoch + 1, args, printout=False)
238 |                 scheduler.step()
239 | 
240 |         accelerator.print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time))
241 |         train_loss = np.average(train_loss)
242 |         vali_loss, vali_mae_loss = vali(args, accelerator, model, vali_data, vali_loader, criterion, mae_metric)
243 |         test_loss, test_mae_loss = vali(args, accelerator, model, test_data, test_loader, criterion, mae_metric)
244 |         accelerator.print(
245 |             "Epoch: {0} | Train Loss: {1:.7f} Vali Loss: {2:.7f} Test Loss: {3:.7f} MAE Loss: {4:.7f}".format(
246 |                 epoch + 1, train_loss, vali_loss, test_loss, test_mae_loss))
247 | 
248 |         early_stopping(vali_loss, model, path)
249 |         if early_stopping.early_stop:
250 |             accelerator.print("Early stopping")
251 |             break
252 | 
253 |         if args.lradj != 'TST':
254 |             if args.lradj == 'COS':
255 |                 scheduler.step()
256 |                 accelerator.print("lr = {:.10f}".format(model_optim.param_groups[0]['lr']))
257 |             else:
258 |                 if epoch == 0:
259 |                     args.learning_rate = model_optim.param_groups[0]['lr']
260 |                     accelerator.print("lr = {:.10f}".format(model_optim.param_groups[0]['lr']))
261 |                 adjust_learning_rate(accelerator, model_optim, scheduler, epoch + 1, args, printout=True)
262 | 
263 |         else:
264 |             accelerator.print('Updating learning rate to {}'.format(scheduler.get_last_lr()[0]))
265 | 
266 | accelerator.wait_for_everyone()
267 | if accelerator.is_local_main_process:
268 |     path = './checkpoints'  # unique checkpoint saving path
269 |     del_files(path)  # delete checkpoint files
270 |     accelerator.print('success delete checkpoints')


--------------------------------------------------------------------------------
/scripts/TimeLLM_ECL.sh:
--------------------------------------------------------------------------------
  1 | model_name=TimeLLM
  2 | train_epochs=10
  3 | learning_rate=0.01
  4 | llama_layers=32
  5 | 
  6 | master_port=00097
  7 | num_process=8
  8 | batch_size=24
  9 | d_model=16
 10 | d_ff=32
 11 | 
 12 | comment='TimeLLM-ECL'
 13 | 
 14 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \
 15 |   --task_name long_term_forecast \
 16 |   --is_training 1 \
 17 |   --root_path ./dataset/electricity/ \
 18 |   --data_path electricity.csv \
 19 |   --model_id ECL_512_96 \
 20 |   --model $model_name \
 21 |   --data ECL \
 22 |   --features M \
 23 |   --seq_len 512 \
 24 |   --label_len 48 \
 25 |   --pred_len 96 \
 26 |   --e_layers 2 \
 27 |   --d_layers 1 \
 28 |   --factor 3 \
 29 |   --enc_in 321 \
 30 |   --dec_in 321 \
 31 |   --c_out 321 \
 32 |   --batch_size $batch_size \
 33 |   --learning_rate $learning_rate \
 34 |   --llm_layers $llama_layers \
 35 |   --train_epochs $train_epochs \
 36 |   --model_comment $comment
 37 | 
 38 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \
 39 |   --task_name long_term_forecast \
 40 |   --is_training 1 \
 41 |   --root_path ./dataset/electricity/ \
 42 |   --data_path electricity.csv \
 43 |   --model_id ECL_512_192 \
 44 |   --model $model_name \
 45 |   --data ECL \
 46 |   --features M \
 47 |   --seq_len 512 \
 48 |   --label_len 48 \
 49 |   --pred_len 192 \
 50 |   --e_layers 2 \
 51 |   --d_layers 1 \
 52 |   --factor 3 \
 53 |   --enc_in 321 \
 54 |   --dec_in 321 \
 55 |   --c_out 321 \
 56 |   --batch_size $batch_size \
 57 |   --learning_rate $learning_rate \
 58 |   --llm_layers $llama_layers \
 59 |   --train_epochs $train_epochs \
 60 |   --model_comment $comment
 61 | 
 62 |   accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \
 63 |   --task_name long_term_forecast \
 64 |   --is_training 1 \
 65 |   --root_path ./dataset/electricity/ \
 66 |   --data_path electricity.csv \
 67 |   --model_id ECL_512_336 \
 68 |   --model $model_name \
 69 |   --data ECL \
 70 |   --features M \
 71 |   --seq_len 512 \
 72 |   --label_len 48 \
 73 |   --pred_len 336 \
 74 |   --e_layers 2 \
 75 |   --d_layers 1 \
 76 |   --factor 3 \
 77 |   --enc_in 321 \
 78 |   --dec_in 321 \
 79 |   --c_out 321 \
 80 |   --batch_size $batch_size \
 81 |   --learning_rate $learning_rate \
 82 |   --llm_layers $llama_layers \
 83 |   --train_epochs $train_epochs \
 84 |   --model_comment $comment
 85 | 
 86 |   accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \
 87 |   --task_name long_term_forecast \
 88 |   --is_training 1 \
 89 |   --root_path ./dataset/electricity/ \
 90 |   --data_path electricity.csv \
 91 |   --model_id ECL_512_720 \
 92 |   --model $model_name \
 93 |   --data ECL \
 94 |   --features M \
 95 |   --seq_len 512 \
 96 |   --label_len 48 \
 97 |   --pred_len 720 \
 98 |   --e_layers 2 \
 99 |   --d_layers 1 \
100 |   --factor 3 \
101 |   --enc_in 321 \
102 |   --dec_in 321 \
103 |   --c_out 321 \
104 |   --batch_size $batch_size \
105 |   --learning_rate $learning_rate \
106 |   --llm_layers $llama_layers \
107 |   --train_epochs $train_epochs \
108 |   --model_comment $comment


--------------------------------------------------------------------------------
/scripts/TimeLLM_ETTh1.sh:
--------------------------------------------------------------------------------
  1 | model_name=TimeLLM
  2 | train_epochs=100
  3 | learning_rate=0.01
  4 | llama_layers=32
  5 | 
  6 | master_port=00097
  7 | num_process=8
  8 | batch_size=24
  9 | d_model=32
 10 | d_ff=128
 11 | 
 12 | comment='TimeLLM-ETTh1'
 13 | 
 14 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \
 15 |   --task_name long_term_forecast \
 16 |   --is_training 1 \
 17 |   --root_path ./dataset/ETT-small/ \
 18 |   --data_path ETTh1.csv \
 19 |   --model_id ETTh1_512_96 \
 20 |   --model $model_name \
 21 |   --data ETTh1 \
 22 |   --features M \
 23 |   --seq_len 512 \
 24 |   --label_len 48 \
 25 |   --pred_len 96 \
 26 |   --factor 3 \
 27 |   --enc_in 7 \
 28 |   --dec_in 7 \
 29 |   --c_out 7 \
 30 |   --des 'Exp' \
 31 |   --itr 1 \
 32 |   --d_model $d_model \
 33 |   --d_ff $d_ff \
 34 |   --batch_size $batch_size \
 35 |   --learning_rate $learning_rate \
 36 |   --llm_layers $llama_layers \
 37 |   --train_epochs $train_epochs \
 38 |   --model_comment $comment
 39 | 
 40 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \
 41 |   --task_name long_term_forecast \
 42 |   --is_training 1 \
 43 |   --root_path ./dataset/ETT-small/ \
 44 |   --data_path ETTh1.csv \
 45 |   --model_id ETTh1_512_192 \
 46 |   --model $model_name \
 47 |   --data ETTh1 \
 48 |   --features M \
 49 |   --seq_len 512 \
 50 |   --label_len 48 \
 51 |   --pred_len 192 \
 52 |   --factor 3 \
 53 |   --enc_in 7 \
 54 |   --dec_in 7 \
 55 |   --c_out 7 \
 56 |   --des 'Exp' \
 57 |   --itr 1 \
 58 |   --d_model 32 \
 59 |   --d_ff 128 \
 60 |   --batch_size $batch_size \
 61 |   --learning_rate 0.02 \
 62 |   --llm_layers $llama_layers \
 63 |   --train_epochs $train_epochs \
 64 |   --model_comment $comment
 65 | 
 66 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \
 67 |   --task_name long_term_forecast \
 68 |   --is_training 1 \
 69 |   --root_path ./dataset/ETT-small/ \
 70 |   --data_path ETTh1.csv \
 71 |   --model_id ETTh1_512_336 \
 72 |   --model $model_name \
 73 |   --data ETTh1 \
 74 |   --features M \
 75 |   --seq_len 512 \
 76 |   --label_len 48 \
 77 |   --pred_len 336 \
 78 |   --factor 3 \
 79 |   --enc_in 7 \
 80 |   --dec_in 7 \
 81 |   --c_out 7 \
 82 |   --des 'Exp' \
 83 |   --itr 1 \
 84 |   --d_model $d_model \
 85 |   --d_ff $d_ff \
 86 |   --batch_size $batch_size \
 87 |   --lradj 'COS'\
 88 |   --learning_rate 0.001 \
 89 |   --llm_layers $llama_layers \
 90 |   --train_epochs $train_epochs \
 91 |   --model_comment $comment
 92 | 
 93 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \
 94 |   --task_name long_term_forecast \
 95 |   --is_training 1 \
 96 |   --root_path ./dataset/ETT-small/ \
 97 |   --data_path ETTh1.csv \
 98 |   --model_id ETTh1_512_720 \
 99 |   --model $model_name \
100 |   --data ETTh1 \
101 |   --features M \
102 |   --seq_len 512 \
103 |   --label_len 48 \
104 |   --pred_len 720 \
105 |   --factor 3 \
106 |   --enc_in 7 \
107 |   --dec_in 7 \
108 |   --c_out 7 \
109 |   --des 'Exp' \
110 |   --itr 1 \
111 |   --d_model $d_model \
112 |   --d_ff $d_ff \
113 |   --batch_size $batch_size \
114 |   --learning_rate $learning_rate \
115 |   --llm_layers $llama_layers \
116 |   --train_epochs $train_epochs \
117 |   --model_comment $comment


--------------------------------------------------------------------------------
/scripts/TimeLLM_ETTh1_ETTh2.sh:
--------------------------------------------------------------------------------
  1 | model_name=TimeLLM
  2 | learning_rate=0.01
  3 | llama_layers=32
  4 | 
  5 | master_port=00097
  6 | num_process=8
  7 | batch_size=24
  8 | d_model=32
  9 | d_ff=128
 10 | 
 11 | comment='TimeLLM-ETTh1_ETTh2'
 12 | 
 13 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_pretrain.py \
 14 |   --task_name long_term_forecast \
 15 |   --is_training 1 \
 16 |   --root_path ./dataset/ETT-small/ \
 17 |   --data_path_pretrain ETTh1.csv \
 18 |   --data_path ETTh2.csv \
 19 |   --model_id ETTh1_ETTh2_512_96 \
 20 |   --model $model_name \
 21 |   --data_pretrain ETTh1 \
 22 |   --data ETTh2 \
 23 |   --features M \
 24 |   --seq_len 512 \
 25 |   --label_len 48 \
 26 |   --pred_len 96 \
 27 |   --factor 3 \
 28 |   --enc_in 7 \
 29 |   --dec_in 7 \
 30 |   --c_out 7 \
 31 |   --des 'Exp' \
 32 |   --itr 1 \
 33 |   --d_model $d_model \
 34 |   --d_ff $d_ff \
 35 |   --batch_size $batch_size \
 36 |   --learning_rate $learning_rate \
 37 |   --llm_layers $llama_layers \
 38 |   --train_epochs 5 \
 39 |   --model_comment $comment
 40 | 
 41 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_pretrain.py \
 42 |   --task_name long_term_forecast \
 43 |   --is_training 1 \
 44 |   --root_path ./dataset/ETT-small/ \
 45 |   --data_path_pretrain ETTh1.csv \
 46 |   --data_path ETTh2.csv \
 47 |   --model_id ETTh1_ETTh2_512_192 \
 48 |   --model $model_name \
 49 |   --data_pretrain ETTh1 \
 50 |   --data ETTh2 \
 51 |   --features M \
 52 |   --seq_len 512 \
 53 |   --label_len 48 \
 54 |   --pred_len 192 \
 55 |   --factor 3 \
 56 |   --enc_in 7 \
 57 |   --dec_in 7 \
 58 |   --c_out 7 \
 59 |   --des 'Exp' \
 60 |   --itr 1 \
 61 |   --d_model 32 \
 62 |   --d_ff 128 \
 63 |   --batch_size $batch_size \
 64 |   --learning_rate 0.02 \
 65 |   --llm_layers $llama_layers \
 66 |   --train_epochs 5 \
 67 |   --model_comment $comment
 68 | 
 69 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_pretrain.py \
 70 |   --task_name long_term_forecast \
 71 |   --is_training 1 \
 72 |   --root_path ./dataset/ETT-small/ \
 73 |   --data_path_pretrain ETTh1.csv \
 74 |   --data_path ETTh2.csv \
 75 |   --model_id ETTh1_ETTh2_512_336 \
 76 |   --model $model_name \
 77 |   --data_pretrain ETTh1 \
 78 |   --data ETTh2 \
 79 |   --features M \
 80 |   --seq_len 512 \
 81 |   --label_len 48 \
 82 |   --pred_len 336 \
 83 |   --factor 3 \
 84 |   --enc_in 7 \
 85 |   --dec_in 7 \
 86 |   --c_out 7 \
 87 |   --des 'Exp' \
 88 |   --itr 1 \
 89 |   --d_model $d_model \
 90 |   --d_ff $d_ff \
 91 |   --batch_size $batch_size \
 92 |   --lradj 'COS'\
 93 |   --learning_rate 0.001 \
 94 |   --llm_layers $llama_layers \
 95 |   --train_epochs 5 \
 96 |   --model_comment $comment
 97 | 
 98 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_pretrain.py \
 99 |   --task_name long_term_forecast \
100 |   --is_training 1 \
101 |   --root_path ./dataset/ETT-small/ \
102 |   --data_path_pretrain ETTh1.csv \
103 |   --data_path ETTh2.csv \
104 |   --model_id ETTh1_ETTh2_512_720 \
105 |   --model $model_name \
106 |   --data_pretrain ETTh1 \
107 |   --data ETTh2 \
108 |   --features M \
109 |   --seq_len 512 \
110 |   --label_len 48 \
111 |   --pred_len 720 \
112 |   --factor 3 \
113 |   --enc_in 7 \
114 |   --dec_in 7 \
115 |   --c_out 7 \
116 |   --des 'Exp' \
117 |   --itr 1 \
118 |   --d_model $d_model \
119 |   --d_ff $d_ff \
120 |   --batch_size $batch_size \
121 |   --learning_rate $learning_rate \
122 |   --llm_layers $llama_layers \
123 |   --train_epochs 5 \
124 |   --model_comment $comment


--------------------------------------------------------------------------------
/scripts/TimeLLM_ETTh2.sh:
--------------------------------------------------------------------------------
  1 | model_name=TimeLLM
  2 | train_epochs=10
  3 | learning_rate=0.01
  4 | llama_layers=32
  5 | 
  6 | master_port=00098
  7 | num_process=8
  8 | batch_size=24
  9 | d_model=32
 10 | d_ff=128
 11 | 
 12 | comment='TimeLLM-ETTh2'
 13 | 
 14 | 
 15 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \
 16 |   --task_name long_term_forecast \
 17 |   --is_training 1 \
 18 |   --root_path ./dataset/ETT-small/ \
 19 |   --data_path ETTh2.csv \
 20 |   --model_id ETTh2_512_96 \
 21 |   --model $model_name \
 22 |   --data ETTh2 \
 23 |   --features M \
 24 |   --seq_len 512 \
 25 |   --label_len 48 \
 26 |   --pred_len 96 \
 27 |   --factor 3 \
 28 |   --enc_in 7 \
 29 |   --dec_in 7 \
 30 |   --c_out 7 \
 31 |   --des 'Exp' \
 32 |   --itr 1 \
 33 |   --d_model $d_model \
 34 |   --d_ff $d_ff \
 35 |   --batch_size $batch_size \
 36 |   --learning_rate $learning_rate \
 37 |   --llm_layers $llama_layers \
 38 |   --train_epochs $train_epochs \
 39 |   --model_comment $comment
 40 | 
 41 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \
 42 |   --task_name long_term_forecast \
 43 |   --is_training 1 \
 44 |   --root_path ./dataset/ETT-small/ \
 45 |   --data_path ETTh2.csv \
 46 |   --model_id ETTh2_512_192 \
 47 |   --model $model_name \
 48 |   --data ETTh2 \
 49 |   --features M \
 50 |   --seq_len 512 \
 51 |   --label_len 48 \
 52 |   --pred_len 192 \
 53 |   --factor 3 \
 54 |   --enc_in 7 \
 55 |   --dec_in 7 \
 56 |   --c_out 7 \
 57 |   --des 'Exp' \
 58 |   --itr 1 \
 59 |   --d_model $d_model \
 60 |   --d_ff $d_ff \
 61 |   --batch_size $batch_size \
 62 |   --lradj 'TST'\
 63 |   --learning_rate 0.002 \
 64 |   --llm_layers $llama_layers \
 65 |   --train_epochs $train_epochs \
 66 |   --model_comment $comment
 67 | 
 68 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \
 69 |   --task_name long_term_forecast \
 70 |   --is_training 1 \
 71 |   --root_path ./dataset/ETT-small/ \
 72 |   --data_path ETTh2.csv \
 73 |   --model_id ETTh2_512_336 \
 74 |   --model $model_name \
 75 |   --data ETTh2 \
 76 |   --features M \
 77 |   --seq_len 512 \
 78 |   --label_len 48 \
 79 |   --pred_len 336 \
 80 |   --factor 3 \
 81 |   --enc_in 7 \
 82 |   --dec_in 7 \
 83 |   --c_out 7 \
 84 |   --des 'Exp' \
 85 |   --itr 1 \
 86 |   --d_model $d_model \
 87 |   --d_ff $d_ff \
 88 |   --batch_size $batch_size \
 89 |   --lradj 'TST'\
 90 |   --learning_rate 0.005 \
 91 |   --llm_layers $llama_layers \
 92 |   --train_epochs $train_epochs \
 93 |   --model_comment $comment
 94 | 
 95 | 
 96 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \
 97 |   --task_name long_term_forecast \
 98 |   --is_training 1 \
 99 |   --root_path ./dataset/ETT-small/ \
100 |   --data_path ETTh2.csv \
101 |   --model_id ETTh2_512_720 \
102 |   --model $model_name \
103 |   --data ETTh2 \
104 |   --features M \
105 |   --seq_len 512 \
106 |   --label_len 48 \
107 |   --pred_len 720 \
108 |   --factor 3 \
109 |   --enc_in 7 \
110 |   --dec_in 7 \
111 |   --c_out 7 \
112 |   --des 'Exp' \
113 |   --itr 1 \
114 |   --d_model 16 \
115 |   --d_ff 128 \
116 |   --batch_size $batch_size \
117 |   --learning_rate 0.005 \
118 |   --lradj 'TST'\
119 |   --llm_layers $llama_layers \
120 |   --train_epochs 20 \
121 |   --patience 10 \
122 |   --model_comment $comment


--------------------------------------------------------------------------------
/scripts/TimeLLM_ETTm1.sh:
--------------------------------------------------------------------------------
  1 | model_name=TimeLLM
  2 | train_epochs=100
  3 | learning_rate=0.01
  4 | llama_layers=32
  5 | 
  6 | master_port=00097
  7 | num_process=8
  8 | batch_size=24
  9 | d_model=32
 10 | d_ff=128
 11 | 
 12 | comment='TimeLLM-ETTm1'
 13 | 
 14 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \
 15 |   --task_name long_term_forecast \
 16 |   --is_training 1 \
 17 |   --root_path ./dataset/ETT-small/ \
 18 |   --data_path ETTm1.csv \
 19 |   --model_id ETTm1_512_96 \
 20 |   --model $model_name \
 21 |   --data ETTm1 \
 22 |   --features M \
 23 |   --seq_len 512 \
 24 |   --label_len 48 \
 25 |   --pred_len 96 \
 26 |   --factor 3 \
 27 |   --enc_in 7 \
 28 |   --dec_in 7 \
 29 |   --c_out 7 \
 30 |   --des 'Exp' \
 31 |   --itr 1 \
 32 |   --d_model $d_model \
 33 |   --d_ff $d_ff \
 34 |   --batch_size $batch_size \
 35 |   --lradj 'TST'\
 36 |   --learning_rate 0.001 \
 37 |   --llm_layers $llama_layers \
 38 |   --train_epochs $train_epochs \
 39 |   --model_comment $comment
 40 | 
 41 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \
 42 |   --task_name long_term_forecast \
 43 |   --is_training 1 \
 44 |   --root_path ./dataset/ETT-small/ \
 45 |   --data_path ETTm1.csv \
 46 |   --model_id ETTm1_512_192 \
 47 |   --model $model_name \
 48 |   --data ETTm1 \
 49 |   --features M \
 50 |   --seq_len 512 \
 51 |   --label_len 48 \
 52 |   --pred_len 192 \
 53 |   --factor 3 \
 54 |   --enc_in 7 \
 55 |   --dec_in 7 \
 56 |   --c_out 7 \
 57 |   --des 'Exp' \
 58 |   --itr 1 \
 59 |   --d_model $d_model \
 60 |   --d_ff $d_ff \
 61 |   --batch_size $batch_size \
 62 |   --learning_rate $learning_rate \
 63 |   --lradj 'TST'\
 64 |   --learning_rate 0.001 \
 65 |   --llm_layers $llama_layers \
 66 |   --train_epochs $train_epochs \
 67 |   --patience 20 \
 68 |   --model_comment $comment
 69 | 
 70 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \
 71 |   --task_name long_term_forecast \
 72 |   --is_training 1 \
 73 |   --root_path ./dataset/ETT-small/ \
 74 |   --data_path ETTm1.csv \
 75 |   --model_id ETTm1_512_336 \
 76 |   --model $model_name \
 77 |   --data ETTm1 \
 78 |   --features M \
 79 |   --seq_len 512 \
 80 |   --label_len 48 \
 81 |   --pred_len 336 \
 82 |   --factor 3 \
 83 |   --enc_in 7 \
 84 |   --dec_in 7 \
 85 |   --c_out 7 \
 86 |   --des 'Exp' \
 87 |   --itr 1 \
 88 |   --d_model $d_model \
 89 |   --d_ff $d_ff \
 90 |   --batch_size $batch_size \
 91 |   --learning_rate $learning_rate \
 92 |   --lradj 'TST'\
 93 |   --learning_rate 0.001 \
 94 |   --llm_layers $llama_layers \
 95 |   --train_epochs $train_epochs \
 96 |   --patience 20 \
 97 |   --model_comment $comment
 98 | 
 99 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \
100 |   --task_name long_term_forecast \
101 |   --is_training 1 \
102 |   --root_path ./dataset/ETT-small/ \
103 |   --data_path ETTm1.csv \
104 |   --model_id ETTm1_512_720 \
105 |   --model $model_name \
106 |   --data ETTm1 \
107 |   --features M \
108 |   --seq_len 512 \
109 |   --label_len 48 \
110 |   --pred_len 720 \
111 |   --factor 3 \
112 |   --enc_in 7 \
113 |   --dec_in 7 \
114 |   --c_out 7 \
115 |   --des 'Exp' \
116 |   --itr 1 \
117 |   --d_model $d_model \
118 |   --d_ff $d_ff \
119 |   --batch_size $batch_size \
120 |   --learning_rate $learning_rate \
121 |   --lradj 'TST'\
122 |   --learning_rate 0.001 \
123 |   --llm_layers $llama_layers \
124 |   --train_epochs $train_epochs \
125 |   --patience 20 \
126 |   --model_comment $comment


--------------------------------------------------------------------------------
/scripts/TimeLLM_ETTm2.sh:
--------------------------------------------------------------------------------
  1 | model_name=TimeLLM
  2 | train_epochs=10
  3 | learning_rate=0.01
  4 | llama_layers=32
  5 | 
  6 | master_port=00097
  7 | num_process=8
  8 | batch_size=24
  9 | d_model=32
 10 | d_ff=128
 11 | 
 12 | comment='TimeLLM-ETTm2'
 13 | 
 14 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \
 15 |   --task_name long_term_forecast \
 16 |   --is_training 1 \
 17 |   --root_path ./dataset/ETT-small/ \
 18 |   --data_path ETTm2.csv \
 19 |   --model_id ETTm2_512_96 \
 20 |   --model $model_name \
 21 |   --data ETTm2 \
 22 |   --features M \
 23 |   --seq_len 512 \
 24 |   --label_len 48 \
 25 |   --pred_len 96 \
 26 |   --factor 3 \
 27 |   --enc_in 7 \
 28 |   --dec_in 7 \
 29 |   --c_out 7 \
 30 |   --des 'Exp' \
 31 |   --itr 1 \
 32 |   --d_model $d_model \
 33 |   --d_ff $d_ff \
 34 |   --batch_size 16 \
 35 |   --learning_rate $learning_rate \
 36 |   --llm_layers $llama_layers \
 37 |   --train_epochs $train_epochs \
 38 |   --model_comment $comment
 39 | 
 40 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \
 41 |   --task_name long_term_forecast \
 42 |   --is_training 1 \
 43 |   --root_path ./dataset/ETT-small/ \
 44 |   --data_path ETTm2.csv \
 45 |   --model_id ETTm2_512_192 \
 46 |   --model $model_name \
 47 |   --data ETTm2 \
 48 |   --features M \
 49 |   --seq_len 512 \
 50 |   --label_len 48 \
 51 |   --pred_len 192 \
 52 |   --factor 3 \
 53 |   --enc_in 7 \
 54 |   --dec_in 7 \
 55 |   --c_out 7 \
 56 |   --des 'Exp' \
 57 |   --itr 1 \
 58 |   --d_model $d_model \
 59 |   --d_ff $d_ff \
 60 |   --batch_size $batch_size \
 61 |   --learning_rate $learning_rate \
 62 |   --lradj 'TST'\
 63 |   --learning_rate 0.002 \
 64 |   --llm_layers $llama_layers \
 65 |   --train_epochs $train_epochs \
 66 |   --model_comment $comment
 67 | 
 68 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \
 69 |   --task_name long_term_forecast \
 70 |   --is_training 1 \
 71 |   --root_path ./dataset/ETT-small/ \
 72 |   --data_path ETTm2.csv \
 73 |   --model_id ETTm2_512_336 \
 74 |   --model $model_name \
 75 |   --data ETTm2 \
 76 |   --features M \
 77 |   --seq_len 512 \
 78 |   --label_len 48 \
 79 |   --pred_len 336 \
 80 |   --factor 3 \
 81 |   --enc_in 7 \
 82 |   --dec_in 7 \
 83 |   --c_out 7 \
 84 |   --des 'Exp' \
 85 |   --itr 1 \
 86 |   --d_model $d_model \
 87 |   --d_ff $d_ff \
 88 |   --batch_size $batch_size \
 89 |   --learning_rate $learning_rate \
 90 |   --lradj 'TST'\
 91 |   --learning_rate 0.002 \
 92 |   --llm_layers $llama_layers \
 93 |   --train_epochs $train_epochs \
 94 |   --model_comment $comment
 95 | 
 96 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \
 97 |   --task_name long_term_forecast \
 98 |   --is_training 1 \
 99 |   --root_path ./dataset/ETT-small/ \
100 |   --data_path ETTm2.csv \
101 |   --model_id ETTm2_512_720 \
102 |   --model $model_name \
103 |   --data ETTm2 \
104 |   --features M \
105 |   --seq_len 512 \
106 |   --label_len 48 \
107 |   --pred_len 720 \
108 |   --factor 3 \
109 |   --enc_in 7 \
110 |   --dec_in 7 \
111 |   --c_out 7 \
112 |   --des 'Exp' \
113 |   --itr 1 \
114 |   --d_model $d_model \
115 |   --d_ff $d_ff \
116 |   --batch_size $batch_size \
117 |   --learning_rate $learning_rate \
118 |   --lradj 'TST'\
119 |   --learning_rate 0.002 \
120 |   --llm_layers $llama_layers \
121 |   --train_epochs $train_epochs \
122 |   --model_comment $comment
123 | 
124 | 
125 | 


--------------------------------------------------------------------------------
/scripts/TimeLLM_M4.sh:
--------------------------------------------------------------------------------
  1 | model_name=TimeLLM
  2 | 
  3 | train_epochs=50
  4 | llama_layers=32
  5 | batch_size=24
  6 | learning_rate=0.001
  7 | d_model=8
  8 | d_ff=32
  9 | 
 10 | master_port=00097
 11 | num_process=8
 12 | 
 13 | comment='TimeLLM-M4'
 14 | 
 15 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_m4.py \
 16 |   --task_name short_term_forecast \
 17 |   --is_training 1 \
 18 |   --root_path ./dataset/m4 \
 19 |   --seasonal_patterns 'Monthly' \
 20 |   --model_id m4_Monthly \
 21 |   --model $model_name \
 22 |   --data m4 \
 23 |   --features M \
 24 |   --enc_in 1 \
 25 |   --dec_in 1 \
 26 |   --c_out 1 \
 27 |   --llm_layers $llama_layers \
 28 |   --d_model $d_model \
 29 |   --d_ff $d_ff \
 30 |   --patch_len 1 \
 31 |   --stride 1 \
 32 |   --batch_size $batch_size \
 33 |   --des 'Exp' \
 34 |   --itr 1 \
 35 |   --learning_rate $learning_rate \
 36 |   --loss 'SMAPE' \
 37 |   --train_epochs $train_epochs \
 38 |   --model_comment $comment
 39 | 
 40 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_m4.py \
 41 |   --task_name short_term_forecast \
 42 |   --is_training 1 \
 43 |   --root_path ./dataset/m4 \
 44 |   --seasonal_patterns 'Yearly' \
 45 |   --model_id m4_Yearly \
 46 |   --model $model_name \
 47 |   --data m4 \
 48 |   --features M \
 49 |   --enc_in 1 \
 50 |   --dec_in 1 \
 51 |   --c_out 1 \
 52 |   --llm_layers $llama_layers \
 53 |   --d_model $d_model \
 54 |   --d_ff $d_ff \
 55 |   --patch_len 1 \
 56 |   --stride 1 \
 57 |   --batch_size $batch_size \
 58 |   --des 'Exp' \
 59 |   --itr 1 \
 60 |   --learning_rate $learning_rate \
 61 |   --loss 'SMAPE' \
 62 |   --train_epochs $train_epochs \
 63 |   --model_comment $comment
 64 | 
 65 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_m4.py \
 66 |   --task_name short_term_forecast \
 67 |   --is_training 1 \
 68 |   --root_path ./dataset/m4 \
 69 |   --seasonal_patterns 'Weekly' \
 70 |   --model_id m4_Weekly \
 71 |   --model $model_name \
 72 |   --data m4 \
 73 |   --features M \
 74 |   --enc_in 1 \
 75 |   --dec_in 1 \
 76 |   --c_out 1 \
 77 |   --llm_layers $llama_layers \
 78 |   --d_model $d_model \
 79 |   --d_ff $d_ff \
 80 |   --patch_len 1 \
 81 |   --stride 1 \
 82 |   --batch_size $batch_size \
 83 |   --des 'Exp' \
 84 |   --itr 1 \
 85 |   --learning_rate $learning_rate \
 86 |   --loss 'SMAPE' \
 87 |   --train_epochs $train_epochs \
 88 |   --model_comment $comment
 89 | 
 90 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_m4.py \
 91 |   --task_name short_term_forecast \
 92 |   --is_training 1 \
 93 |   --root_path ./dataset/m4 \
 94 |   --seasonal_patterns 'Daily' \
 95 |   --model_id m4_Daily \
 96 |   --model $model_name \
 97 |   --data m4 \
 98 |   --features M \
 99 |   --enc_in 1 \
100 |   --dec_in 1 \
101 |   --c_out 1 \
102 |   --llm_layers $llama_layers \
103 |   --d_model $d_model \
104 |   --d_ff $d_ff \
105 |   --patch_len 1 \
106 |   --stride 1 \
107 |   --batch_size $batch_size \
108 |   --des 'Exp' \
109 |   --itr 1 \
110 |   --learning_rate $learning_rate \
111 |   --loss 'SMAPE' \
112 |   --train_epochs $train_epochs \
113 |   --model_comment $comment
114 | 
115 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_m4.py \
116 |   --task_name short_term_forecast \
117 |   --is_training 1 \
118 |   --root_path ./dataset/m4 \
119 |   --seasonal_patterns 'Quarterly' \
120 |   --model_id m4_Quarterly \
121 |   --model $model_name \
122 |   --data m4 \
123 |   --features M \
124 |   --enc_in 1 \
125 |   --dec_in 1 \
126 |   --c_out 1 \
127 |   --llm_layers $llama_layers \
128 |   --d_model $d_model \
129 |   --d_ff $d_ff \
130 |   --patch_len 1 \
131 |   --stride 1 \
132 |   --batch_size $batch_size \
133 |   --des 'Exp' \
134 |   --itr 1 \
135 |   --learning_rate $learning_rate \
136 |   --loss 'SMAPE' \
137 |   --train_epochs $train_epochs \
138 |   --model_comment $comment
139 | 
140 | 
141 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_m4.py \
142 |   --task_name short_term_forecast \
143 |   --is_training 1 \
144 |   --root_path ./dataset/m4 \
145 |   --seasonal_patterns 'Hourly' \
146 |   --model_id m4_Hourly \
147 |   --model $model_name \
148 |   --data m4 \
149 |   --features M \
150 |   --enc_in 1 \
151 |   --dec_in 1 \
152 |   --c_out 1 \
153 |   --llm_layers $llama_layers \
154 |   --d_model $d_model \
155 |   --d_ff $d_ff \
156 |   --patch_len 1 \
157 |   --stride 1 \
158 |   --batch_size $batch_size \
159 |   --des 'Exp' \
160 |   --itr 1 \
161 |   --learning_rate $learning_rate \
162 |   --loss 'SMAPE' \
163 |   --train_epochs $train_epochs \
164 |   --model_comment $comment


--------------------------------------------------------------------------------
/scripts/TimeLLM_Traffic.sh:
--------------------------------------------------------------------------------
  1 | model_name=TimeLLM
  2 | train_epochs=10
  3 | learning_rate=0.01
  4 | llama_layers=32
  5 | 
  6 | master_port=00097
  7 | num_process=8
  8 | batch_size=24
  9 | d_model=16
 10 | d_ff=32
 11 | 
 12 | comment='TimeLLM-Traffic'
 13 | 
 14 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \
 15 |   --task_name long_term_forecast \
 16 |   --is_training 1 \
 17 |   --root_path ./dataset/traffic/ \
 18 |   --data_path traffic.csv \
 19 |   --model_id traffic_512_96 \
 20 |   --model $model_name \
 21 |   --data Traffic \
 22 |   --features M \
 23 |   --seq_len 512 \
 24 |   --label_len 48 \
 25 |   --pred_len 96 \
 26 |   --e_layers 2 \
 27 |   --d_layers 1 \
 28 |   --factor 3 \
 29 |   --enc_in 862 \
 30 |   --dec_in 862 \
 31 |   --c_out 862 \
 32 |   --batch_size $batch_size \
 33 |   --learning_rate $learning_rate \
 34 |   --llm_layers $llama_layers \
 35 |   --train_epochs $train_epochs \
 36 |   --model_comment $comment
 37 | 
 38 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \
 39 |   --task_name long_term_forecast \
 40 |   --is_training 1 \
 41 |   --root_path ./dataset/traffic/ \
 42 |   --data_path traffic.csv \
 43 |   --model_id traffic_512_96 \
 44 |   --model $model_name \
 45 |   --data Traffic \
 46 |   --features M \
 47 |   --seq_len 512 \
 48 |   --label_len 48 \
 49 |   --pred_len 192 \
 50 |   --e_layers 2 \
 51 |   --d_layers 1 \
 52 |   --factor 3 \
 53 |   --enc_in 862 \
 54 |   --dec_in 862 \
 55 |   --c_out 862 \
 56 |   --batch_size $batch_size \
 57 |   --learning_rate $learning_rate \
 58 |   --llm_layers $llama_layers \
 59 |   --train_epochs $train_epochs \
 60 |   --model_comment $comment
 61 | 
 62 |   accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \
 63 |   --task_name long_term_forecast \
 64 |   --is_training 1 \
 65 |   --root_path ./dataset/traffic/ \
 66 |   --data_path traffic.csv \
 67 |   --model_id traffic_512_96 \
 68 |   --model $model_name \
 69 |   --data Traffic \
 70 |   --features M \
 71 |   --seq_len 512 \
 72 |   --label_len 48 \
 73 |   --pred_len 336 \
 74 |   --e_layers 2 \
 75 |   --d_layers 1 \
 76 |   --factor 3 \
 77 |   --enc_in 862 \
 78 |   --dec_in 862 \
 79 |   --c_out 862 \
 80 |   --batch_size 1 \
 81 |   --learning_rate $learning_rate \
 82 |   --llm_layers $llama_layers \
 83 |   --train_epochs $train_epochs \
 84 |   --model_comment $comment
 85 | 
 86 |   accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \
 87 |   --task_name long_term_forecast \
 88 |   --is_training 1 \
 89 |   --root_path ./dataset/traffic/ \
 90 |   --data_path traffic.csv \
 91 |   --model_id traffic_512_96 \
 92 |   --model $model_name \
 93 |   --data Traffic \
 94 |   --features M \
 95 |   --seq_len 512 \
 96 |   --label_len 720 \
 97 |   --pred_len 96 \
 98 |   --e_layers 2 \
 99 |   --d_layers 1 \
100 |   --factor 3 \
101 |   --enc_in 862 \
102 |   --dec_in 862 \
103 |   --c_out 862 \
104 |   --batch_size $batch_size \
105 |   --learning_rate $learning_rate \
106 |   --llm_layers $llama_layers \
107 |   --train_epochs $train_epochs \
108 |   --model_comment $comment


--------------------------------------------------------------------------------
/scripts/TimeLLM_Weather.sh:
--------------------------------------------------------------------------------
  1 | model_name=TimeLLM
  2 | train_epochs=10
  3 | learning_rate=0.01
  4 | llama_layers=32
  5 | 
  6 | master_port=00097
  7 | num_process=8
  8 | batch_size=24
  9 | d_model=16
 10 | d_ff=32
 11 | 
 12 | comment='TimeLLM-Weather'
 13 | 
 14 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \
 15 |   --task_name long_term_forecast \
 16 |   --is_training 1 \
 17 |   --root_path ./dataset/weather/ \
 18 |   --data_path weather.csv \
 19 |   --model_id weather_512_96 \
 20 |   --model $model_name \
 21 |   --data Weather \
 22 |   --features M \
 23 |   --seq_len 512 \
 24 |   --label_len 48 \
 25 |   --pred_len 96 \
 26 |   --e_layers 2 \
 27 |   --d_layers 1 \
 28 |   --factor 3 \
 29 |   --enc_in 21 \
 30 |   --dec_in 21 \
 31 |   --c_out 21 \
 32 |   --d_model 32 \
 33 |   --d_ff 32 \
 34 |   --batch_size $batch_size \
 35 |   --learning_rate $learning_rate \
 36 |   --llm_layers $llama_layers \
 37 |   --train_epochs $train_epochs \
 38 |   --model_comment $comment
 39 | 
 40 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \
 41 |   --task_name long_term_forecast \
 42 |   --is_training 1 \
 43 |   --root_path ./dataset/weather/ \
 44 |   --data_path weather.csv \
 45 |   --model_id weather_512_192 \
 46 |   --model $model_name \
 47 |   --data Weather \
 48 |   --features M \
 49 |   --seq_len 512 \
 50 |   --label_len 48 \
 51 |   --pred_len 192 \
 52 |   --e_layers 2 \
 53 |   --d_layers 1 \
 54 |   --factor 3 \
 55 |   --enc_in 21 \
 56 |   --dec_in 21 \
 57 |   --c_out 21 \
 58 |   --d_model 32 \
 59 |   --d_ff 32 \
 60 |   --batch_size $batch_size \
 61 |   --learning_rate $learning_rate \
 62 |   --llm_layers $llama_layers \
 63 |   --train_epochs $train_epochs \
 64 |   --model_comment $comment
 65 | 
 66 |   accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \
 67 |   --task_name long_term_forecast \
 68 |   --is_training 1 \
 69 |   --root_path ./dataset/weather/ \
 70 |   --data_path weather.csv \
 71 |   --model_id weather_512_336 \
 72 |   --model $model_name \
 73 |   --data Weather \
 74 |   --features M \
 75 |   --seq_len 512 \
 76 |   --label_len 48 \
 77 |   --pred_len 336 \
 78 |   --e_layers 2 \
 79 |   --d_layers 1 \
 80 |   --factor 3 \
 81 |   --enc_in 21 \
 82 |   --dec_in 21 \
 83 |   --c_out 21 \
 84 |   --d_model 32 \
 85 |   --d_ff 128 \
 86 |   --batch_size $batch_size \
 87 |   --learning_rate $learning_rate \
 88 |   --llm_layers $llama_layers \
 89 |   --train_epochs 10 \
 90 |   --model_comment $comment
 91 | 
 92 |   accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \
 93 |   --task_name long_term_forecast \
 94 |   --is_training 1 \
 95 |   --root_path ./dataset/weather/ \
 96 |   --data_path weather.csv \
 97 |   --model_id weather_512_720 \
 98 |   --model $model_name \
 99 |   --data Weather \
100 |   --features M \
101 |   --seq_len 512 \
102 |   --label_len 48 \
103 |   --pred_len 720 \
104 |   --e_layers 2 \
105 |   --d_layers 1 \
106 |   --factor 3 \
107 |   --enc_in 21 \
108 |   --dec_in 21 \
109 |   --c_out 21 \
110 |   --d_model 32 \
111 |   --d_ff 128 \
112 |   --batch_size $batch_size \
113 |   --learning_rate $learning_rate \
114 |   --llm_layers $llama_layers \
115 |   --train_epochs 15 \
116 |   --model_comment $comment


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KimMeen/Time-LLM/02ee1b8f6043090c7a417f0cbb64cbf753895175/utils/__init__.py


--------------------------------------------------------------------------------
/utils/losses.py:
--------------------------------------------------------------------------------
 1 | # This source code is provided for the purposes of scientific reproducibility
 2 | # under the following limited license from Element AI Inc. The code is an
 3 | # implementation of the N-BEATS model (Oreshkin et al., N-BEATS: Neural basis
 4 | # expansion analysis for interpretable time series forecasting,
 5 | # https://arxiv.org/abs/1905.10437). The copyright to the source code is
 6 | # licensed under the Creative Commons - Attribution-NonCommercial 4.0
 7 | # International license (CC BY-NC 4.0):
 8 | # https://creativecommons.org/licenses/by-nc/4.0/.  Any commercial use (whether
 9 | # for the benefit of third parties or internally in production) requires an
10 | # explicit license. The subject-matter of the N-BEATS model and associated
11 | # materials are the property of Element AI Inc. and may be subject to patent
12 | # protection. No license to patents is granted hereunder (whether express or
13 | # implied). Copyright © 2020 Element AI Inc. All rights reserved.
14 | 
15 | """
16 | Loss functions for PyTorch.
17 | """
18 | 
19 | import torch as t
20 | import torch.nn as nn
21 | import numpy as np
22 | import pdb
23 | 
24 | 
25 | def divide_no_nan(a, b):
26 |     """
27 |     a/b where the resulted NaN or Inf are replaced by 0.
28 |     """
29 |     result = a / b
30 |     result[result != result] = .0
31 |     result[result == np.inf] = .0
32 |     return result
33 | 
34 | 
35 | class mape_loss(nn.Module):
36 |     def __init__(self):
37 |         super(mape_loss, self).__init__()
38 | 
39 |     def forward(self, insample: t.Tensor, freq: int,
40 |                 forecast: t.Tensor, target: t.Tensor, mask: t.Tensor) -> t.float:
41 |         """
42 |         MAPE loss as defined in: https://en.wikipedia.org/wiki/Mean_absolute_percentage_error
43 | 
44 |         :param forecast: Forecast values. Shape: batch, time
45 |         :param target: Target values. Shape: batch, time
46 |         :param mask: 0/1 mask. Shape: batch, time
47 |         :return: Loss value
48 |         """
49 |         weights = divide_no_nan(mask, target)
50 |         return t.mean(t.abs((forecast - target) * weights))
51 | 
52 | 
53 | class smape_loss(nn.Module):
54 |     def __init__(self):
55 |         super(smape_loss, self).__init__()
56 | 
57 |     def forward(self, insample: t.Tensor, freq: int,
58 |                 forecast: t.Tensor, target: t.Tensor, mask: t.Tensor) -> t.float:
59 |         """
60 |         sMAPE loss as defined in https://robjhyndman.com/hyndsight/smape/ (Makridakis 1993)
61 | 
62 |         :param forecast: Forecast values. Shape: batch, time
63 |         :param target: Target values. Shape: batch, time
64 |         :param mask: 0/1 mask. Shape: batch, time
65 |         :return: Loss value
66 |         """
67 |         return 200 * t.mean(divide_no_nan(t.abs(forecast - target),
68 |                                           t.abs(forecast.data) + t.abs(target.data)) * mask)
69 | 
70 | 
71 | class mase_loss(nn.Module):
72 |     def __init__(self):
73 |         super(mase_loss, self).__init__()
74 | 
75 |     def forward(self, insample: t.Tensor, freq: int,
76 |                 forecast: t.Tensor, target: t.Tensor, mask: t.Tensor) -> t.float:
77 |         """
78 |         MASE loss as defined in "Scaled Errors" https://robjhyndman.com/papers/mase.pdf
79 | 
80 |         :param insample: Insample values. Shape: batch, time_i
81 |         :param freq: Frequency value
82 |         :param forecast: Forecast values. Shape: batch, time_o
83 |         :param target: Target values. Shape: batch, time_o
84 |         :param mask: 0/1 mask. Shape: batch, time_o
85 |         :return: Loss value
86 |         """
87 |         masep = t.mean(t.abs(insample[:, freq:] - insample[:, :-freq]), dim=1)
88 |         masked_masep_inv = divide_no_nan(mask, masep[:, None])
89 |         return t.mean(t.abs(target - forecast) * masked_masep_inv)
90 | 


--------------------------------------------------------------------------------
/utils/m4_summary.py:
--------------------------------------------------------------------------------
  1 | # This source code is provided for the purposes of scientific reproducibility
  2 | # under the following limited license from Element AI Inc. The code is an
  3 | # implementation of the N-BEATS model (Oreshkin et al., N-BEATS: Neural basis
  4 | # expansion analysis for interpretable time series forecasting,
  5 | # https://arxiv.org/abs/1905.10437). The copyright to the source code is
  6 | # licensed under the Creative Commons - Attribution-NonCommercial 4.0
  7 | # International license (CC BY-NC 4.0):
  8 | # https://creativecommons.org/licenses/by-nc/4.0/.  Any commercial use (whether
  9 | # for the benefit of third parties or internally in production) requires an
 10 | # explicit license. The subject-matter of the N-BEATS model and associated
 11 | # materials are the property of Element AI Inc. and may be subject to patent
 12 | # protection. No license to patents is granted hereunder (whether express or
 13 | # implied). Copyright 2020 Element AI Inc. All rights reserved.
 14 | 
 15 | """
 16 | M4 Summary
 17 | """
 18 | from collections import OrderedDict
 19 | 
 20 | import numpy as np
 21 | import pandas as pd
 22 | 
 23 | from data_provider.m4 import M4Dataset
 24 | from data_provider.m4 import M4Meta
 25 | import os
 26 | 
 27 | 
 28 | def group_values(values, groups, group_name):
 29 |     return np.array([v[~np.isnan(v)] for v in values[groups == group_name]])
 30 | 
 31 | 
 32 | def mase(forecast, insample, outsample, frequency):
 33 |     return np.mean(np.abs(forecast - outsample)) / np.mean(np.abs(insample[:-frequency] - insample[frequency:]))
 34 | 
 35 | 
 36 | def smape_2(forecast, target):
 37 |     denom = np.abs(target) + np.abs(forecast)
 38 |     # divide by 1.0 instead of 0.0, in case when denom is zero the enumerator will be 0.0 anyway.
 39 |     denom[denom == 0.0] = 1.0
 40 |     return 200 * np.abs(forecast - target) / denom
 41 | 
 42 | 
 43 | def mape(forecast, target):
 44 |     denom = np.abs(target)
 45 |     # divide by 1.0 instead of 0.0, in case when denom is zero the enumerator will be 0.0 anyway.
 46 |     denom[denom == 0.0] = 1.0
 47 |     return 100 * np.abs(forecast - target) / denom
 48 | 
 49 | 
 50 | class M4Summary:
 51 |     def __init__(self, file_path, root_path):
 52 |         self.file_path = file_path
 53 |         self.training_set = M4Dataset.load(training=True, dataset_file=root_path)
 54 |         self.test_set = M4Dataset.load(training=False, dataset_file=root_path)
 55 |         self.naive_path = os.path.join(root_path, 'submission-Naive2.csv')
 56 | 
 57 |     def evaluate(self):
 58 |         """
 59 |         Evaluate forecasts using M4 test dataset.
 60 | 
 61 |         :param forecast: Forecasts. Shape: timeseries, time.
 62 |         :return: sMAPE and OWA grouped by seasonal patterns.
 63 |         """
 64 |         grouped_owa = OrderedDict()
 65 | 
 66 |         naive2_forecasts = pd.read_csv(self.naive_path).values[:, 1:].astype(np.float32)
 67 |         naive2_forecasts = np.array([v[~np.isnan(v)] for v in naive2_forecasts])
 68 | 
 69 |         model_mases = {}
 70 |         naive2_smapes = {}
 71 |         naive2_mases = {}
 72 |         grouped_smapes = {}
 73 |         grouped_mapes = {}
 74 |         for group_name in M4Meta.seasonal_patterns:
 75 |             file_name = self.file_path + group_name + "_forecast.csv"
 76 |             if os.path.exists(file_name):
 77 |                 model_forecast = pd.read_csv(file_name).values
 78 | 
 79 |             naive2_forecast = group_values(naive2_forecasts, self.test_set.groups, group_name)
 80 |             target = group_values(self.test_set.values, self.test_set.groups, group_name)
 81 |             # all timeseries within group have same frequency
 82 |             frequency = self.training_set.frequencies[self.test_set.groups == group_name][0]
 83 |             insample = group_values(self.training_set.values, self.test_set.groups, group_name)
 84 | 
 85 |             model_mases[group_name] = np.mean([mase(forecast=model_forecast[i],
 86 |                                                     insample=insample[i],
 87 |                                                     outsample=target[i],
 88 |                                                     frequency=frequency) for i in range(len(model_forecast))])
 89 |             naive2_mases[group_name] = np.mean([mase(forecast=naive2_forecast[i],
 90 |                                                      insample=insample[i],
 91 |                                                      outsample=target[i],
 92 |                                                      frequency=frequency) for i in range(len(model_forecast))])
 93 | 
 94 |             naive2_smapes[group_name] = np.mean(smape_2(naive2_forecast, target))
 95 |             grouped_smapes[group_name] = np.mean(smape_2(forecast=model_forecast, target=target))
 96 |             grouped_mapes[group_name] = np.mean(mape(forecast=model_forecast, target=target))
 97 | 
 98 |         grouped_smapes = self.summarize_groups(grouped_smapes)
 99 |         grouped_mapes = self.summarize_groups(grouped_mapes)
100 |         grouped_model_mases = self.summarize_groups(model_mases)
101 |         grouped_naive2_smapes = self.summarize_groups(naive2_smapes)
102 |         grouped_naive2_mases = self.summarize_groups(naive2_mases)
103 |         for k in grouped_model_mases.keys():
104 |             grouped_owa[k] = (grouped_model_mases[k] / grouped_naive2_mases[k] +
105 |                               grouped_smapes[k] / grouped_naive2_smapes[k]) / 2
106 | 
107 |         def round_all(d):
108 |             return dict(map(lambda kv: (kv[0], np.round(kv[1], 3)), d.items()))
109 | 
110 |         return round_all(grouped_smapes), round_all(grouped_owa), round_all(grouped_mapes), round_all(
111 |             grouped_model_mases)
112 | 
113 |     def summarize_groups(self, scores):
114 |         """
115 |         Re-group scores respecting M4 rules.
116 |         :param scores: Scores per group.
117 |         :return: Grouped scores.
118 |         """
119 |         scores_summary = OrderedDict()
120 | 
121 |         def group_count(group_name):
122 |             return len(np.where(self.test_set.groups == group_name)[0])
123 | 
124 |         weighted_score = {}
125 |         for g in ['Yearly', 'Quarterly', 'Monthly']:
126 |             weighted_score[g] = scores[g] * group_count(g)
127 |             scores_summary[g] = scores[g]
128 | 
129 |         others_score = 0
130 |         others_count = 0
131 |         for g in ['Weekly', 'Daily', 'Hourly']:
132 |             others_score += scores[g] * group_count(g)
133 |             others_count += group_count(g)
134 |         weighted_score['Others'] = others_score
135 |         scores_summary['Others'] = others_score / others_count
136 | 
137 |         average = np.sum(list(weighted_score.values())) / len(self.test_set.groups)
138 |         scores_summary['Average'] = average
139 | 
140 |         return scores_summary
141 | 


--------------------------------------------------------------------------------
/utils/masking.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | class TriangularCausalMask():
 5 |     def __init__(self, B, L, device="cpu"):
 6 |         mask_shape = [B, 1, L, L]
 7 |         with torch.no_grad():
 8 |             self._mask = torch.triu(torch.ones(mask_shape, dtype=torch.bool), diagonal=1).to(device)
 9 | 
10 |     @property
11 |     def mask(self):
12 |         return self._mask
13 | 
14 | 
15 | class ProbMask():
16 |     def __init__(self, B, H, L, index, scores, device="cpu"):
17 |         _mask = torch.ones(L, scores.shape[-1], dtype=torch.bool).to(device).triu(1)
18 |         _mask_ex = _mask[None, None, :].expand(B, H, L, scores.shape[-1])
19 |         indicator = _mask_ex[torch.arange(B)[:, None, None],
20 |                     torch.arange(H)[None, :, None],
21 |                     index, :].to(device)
22 |         self._mask = indicator.view(scores.shape).to(device)
23 | 
24 |     @property
25 |     def mask(self):
26 |         return self._mask


--------------------------------------------------------------------------------
/utils/metrics.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def RSE(pred, true):
 5 |     return np.sqrt(np.sum((true - pred) ** 2)) / np.sqrt(np.sum((true - true.mean()) ** 2))
 6 | 
 7 | 
 8 | def CORR(pred, true):
 9 |     u = ((true - true.mean(0)) * (pred - pred.mean(0))).sum(0)
10 |     d = np.sqrt(((true - true.mean(0)) ** 2 * (pred - pred.mean(0)) ** 2).sum(0))
11 |     return (u / d).mean(-1)
12 | 
13 | 
14 | def MAE(pred, true):
15 |     return np.mean(np.abs(pred - true))
16 | 
17 | 
18 | def MSE(pred, true):
19 |     return np.mean((pred - true) ** 2)
20 | 
21 | 
22 | def RMSE(pred, true):
23 |     return np.sqrt(MSE(pred, true))
24 | 
25 | 
26 | def MAPE(pred, true):
27 |     return np.mean(np.abs((pred - true) / true))
28 | 
29 | 
30 | def MSPE(pred, true):
31 |     return np.mean(np.square((pred - true) / true))
32 | 
33 | 
34 | def metric(pred, true):
35 |     mae = MAE(pred, true)
36 |     mse = MSE(pred, true)
37 |     rmse = RMSE(pred, true)
38 |     mape = MAPE(pred, true)
39 |     mspe = MSPE(pred, true)
40 | 
41 |     return mae, mse, rmse, mape, mspe
42 | 


--------------------------------------------------------------------------------
/utils/timefeatures.py:
--------------------------------------------------------------------------------
  1 | from typing import List
  2 | 
  3 | import numpy as np
  4 | import pandas as pd
  5 | from pandas.tseries import offsets
  6 | from pandas.tseries.frequencies import to_offset
  7 | 
  8 | 
  9 | class TimeFeature:
 10 |     def __init__(self):
 11 |         pass
 12 | 
 13 |     def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
 14 |         pass
 15 | 
 16 |     def __repr__(self):
 17 |         return self.__class__.__name__ + "()"
 18 | 
 19 | 
 20 | class SecondOfMinute(TimeFeature):
 21 |     """Minute of hour encoded as value between [-0.5, 0.5]"""
 22 | 
 23 |     def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
 24 |         return index.second / 59.0 - 0.5
 25 | 
 26 | 
 27 | class MinuteOfHour(TimeFeature):
 28 |     """Minute of hour encoded as value between [-0.5, 0.5]"""
 29 | 
 30 |     def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
 31 |         return index.minute / 59.0 - 0.5
 32 | 
 33 | 
 34 | class HourOfDay(TimeFeature):
 35 |     """Hour of day encoded as value between [-0.5, 0.5]"""
 36 | 
 37 |     def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
 38 |         return index.hour / 23.0 - 0.5
 39 | 
 40 | 
 41 | class DayOfWeek(TimeFeature):
 42 |     """Hour of day encoded as value between [-0.5, 0.5]"""
 43 | 
 44 |     def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
 45 |         return index.dayofweek / 6.0 - 0.5
 46 | 
 47 | 
 48 | class DayOfMonth(TimeFeature):
 49 |     """Day of month encoded as value between [-0.5, 0.5]"""
 50 | 
 51 |     def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
 52 |         return (index.day - 1) / 30.0 - 0.5
 53 | 
 54 | 
 55 | class DayOfYear(TimeFeature):
 56 |     """Day of year encoded as value between [-0.5, 0.5]"""
 57 | 
 58 |     def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
 59 |         return (index.dayofyear - 1) / 365.0 - 0.5
 60 | 
 61 | 
 62 | class MonthOfYear(TimeFeature):
 63 |     """Month of year encoded as value between [-0.5, 0.5]"""
 64 | 
 65 |     def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
 66 |         return (index.month - 1) / 11.0 - 0.5
 67 | 
 68 | 
 69 | class WeekOfYear(TimeFeature):
 70 |     """Week of year encoded as value between [-0.5, 0.5]"""
 71 | 
 72 |     def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
 73 |         return (index.isocalendar().week - 1) / 52.0 - 0.5
 74 | 
 75 | 
 76 | def time_features_from_frequency_str(freq_str: str) -> List[TimeFeature]:
 77 |     """
 78 |     Returns a list of time features that will be appropriate for the given frequency string.
 79 |     Parameters
 80 |     ----------
 81 |     freq_str
 82 |         Frequency string of the form [multiple][granularity] such as "12H", "5min", "1D" etc.
 83 |     """
 84 | 
 85 |     features_by_offsets = {
 86 |         offsets.YearEnd: [],
 87 |         offsets.QuarterEnd: [MonthOfYear],
 88 |         offsets.MonthEnd: [MonthOfYear],
 89 |         offsets.Week: [DayOfMonth, WeekOfYear],
 90 |         offsets.Day: [DayOfWeek, DayOfMonth, DayOfYear],
 91 |         offsets.BusinessDay: [DayOfWeek, DayOfMonth, DayOfYear],
 92 |         offsets.Hour: [HourOfDay, DayOfWeek, DayOfMonth, DayOfYear],
 93 |         offsets.Minute: [
 94 |             MinuteOfHour,
 95 |             HourOfDay,
 96 |             DayOfWeek,
 97 |             DayOfMonth,
 98 |             DayOfYear,
 99 |         ],
100 |         offsets.Second: [
101 |             SecondOfMinute,
102 |             MinuteOfHour,
103 |             HourOfDay,
104 |             DayOfWeek,
105 |             DayOfMonth,
106 |             DayOfYear,
107 |         ],
108 |     }
109 | 
110 |     offset = to_offset(freq_str)
111 | 
112 |     for offset_type, feature_classes in features_by_offsets.items():
113 |         if isinstance(offset, offset_type):
114 |             return [cls() for cls in feature_classes]
115 | 
116 |     supported_freq_msg = f"""
117 |     Unsupported frequency {freq_str}
118 |     The following frequencies are supported:
119 |         Y   - yearly
120 |             alias: A
121 |         M   - monthly
122 |         W   - weekly
123 |         D   - daily
124 |         B   - business days
125 |         H   - hourly
126 |         T   - minutely
127 |             alias: min
128 |         S   - secondly
129 |     """
130 |     raise RuntimeError(supported_freq_msg)
131 | 
132 | 
133 | def time_features(dates, freq='h'):
134 |     return np.vstack([feat(dates) for feat in time_features_from_frequency_str(freq)])


--------------------------------------------------------------------------------
/utils/tools.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import matplotlib.pyplot as plt
  4 | import shutil
  5 | 
  6 | from tqdm import tqdm
  7 | 
  8 | plt.switch_backend('agg')
  9 | 
 10 | 
 11 | def adjust_learning_rate(accelerator, optimizer, scheduler, epoch, args, printout=True):
 12 |     if args.lradj == 'type1':
 13 |         lr_adjust = {epoch: args.learning_rate * (0.5 ** ((epoch - 1) // 1))}
 14 |     elif args.lradj == 'type2':
 15 |         lr_adjust = {
 16 |             2: 5e-5, 4: 1e-5, 6: 5e-6, 8: 1e-6,
 17 |             10: 5e-7, 15: 1e-7, 20: 5e-8
 18 |         }
 19 |     elif args.lradj == 'type3':
 20 |         lr_adjust = {epoch: args.learning_rate if epoch < 3 else args.learning_rate * (0.9 ** ((epoch - 3) // 1))}
 21 |     elif args.lradj == 'PEMS':
 22 |         lr_adjust = {epoch: args.learning_rate * (0.95 ** (epoch // 1))}
 23 |     elif args.lradj == 'TST':
 24 |         lr_adjust = {epoch: scheduler.get_last_lr()[0]}
 25 |     elif args.lradj == 'constant':
 26 |         lr_adjust = {epoch: args.learning_rate}
 27 |     if epoch in lr_adjust.keys():
 28 |         lr = lr_adjust[epoch]
 29 |         for param_group in optimizer.param_groups:
 30 |             param_group['lr'] = lr
 31 |         if printout:
 32 |             if accelerator is not None:
 33 |                 accelerator.print('Updating learning rate to {}'.format(lr))
 34 |             else:
 35 |                 print('Updating learning rate to {}'.format(lr))
 36 | 
 37 | 
 38 | class EarlyStopping:
 39 |     def __init__(self, accelerator=None, patience=7, verbose=False, delta=0, save_mode=True):
 40 |         self.accelerator = accelerator
 41 |         self.patience = patience
 42 |         self.verbose = verbose
 43 |         self.counter = 0
 44 |         self.best_score = None
 45 |         self.early_stop = False
 46 |         self.val_loss_min = np.Inf
 47 |         self.delta = delta
 48 |         self.save_mode = save_mode
 49 | 
 50 |     def __call__(self, val_loss, model, path):
 51 |         score = -val_loss
 52 |         if self.best_score is None:
 53 |             self.best_score = score
 54 |             if self.save_mode:
 55 |                 self.save_checkpoint(val_loss, model, path)
 56 |         elif score < self.best_score + self.delta:
 57 |             self.counter += 1
 58 |             if self.accelerator is None:
 59 |                 print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
 60 |             else:
 61 |                 self.accelerator.print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
 62 |             if self.counter >= self.patience:
 63 |                 self.early_stop = True
 64 |         else:
 65 |             self.best_score = score
 66 |             if self.save_mode:
 67 |                 self.save_checkpoint(val_loss, model, path)
 68 |             self.counter = 0
 69 | 
 70 |     def save_checkpoint(self, val_loss, model, path):
 71 |         if self.verbose:
 72 |             if self.accelerator is not None:
 73 |                 self.accelerator.print(
 74 |                     f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
 75 |             else:
 76 |                 print(
 77 |                     f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
 78 | 
 79 |         if self.accelerator is not None:
 80 |             model = self.accelerator.unwrap_model(model)
 81 |             torch.save(model.state_dict(), path + '/' + 'checkpoint')
 82 |         else:
 83 |             torch.save(model.state_dict(), path + '/' + 'checkpoint')
 84 |         self.val_loss_min = val_loss
 85 | 
 86 | 
 87 | class dotdict(dict):
 88 |     """dot.notation access to dictionary attributes"""
 89 |     __getattr__ = dict.get
 90 |     __setattr__ = dict.__setitem__
 91 |     __delattr__ = dict.__delitem__
 92 | 
 93 | 
 94 | class StandardScaler():
 95 |     def __init__(self, mean, std):
 96 |         self.mean = mean
 97 |         self.std = std
 98 | 
 99 |     def transform(self, data):
100 |         return (data - self.mean) / self.std
101 | 
102 |     def inverse_transform(self, data):
103 |         return (data * self.std) + self.mean
104 | 
105 | def adjustment(gt, pred):
106 |     anomaly_state = False
107 |     for i in range(len(gt)):
108 |         if gt[i] == 1 and pred[i] == 1 and not anomaly_state:
109 |             anomaly_state = True
110 |             for j in range(i, 0, -1):
111 |                 if gt[j] == 0:
112 |                     break
113 |                 else:
114 |                     if pred[j] == 0:
115 |                         pred[j] = 1
116 |             for j in range(i, len(gt)):
117 |                 if gt[j] == 0:
118 |                     break
119 |                 else:
120 |                     if pred[j] == 0:
121 |                         pred[j] = 1
122 |         elif gt[i] == 0:
123 |             anomaly_state = False
124 |         if anomaly_state:
125 |             pred[i] = 1
126 |     return gt, pred
127 | 
128 | 
129 | def cal_accuracy(y_pred, y_true):
130 |     return np.mean(y_pred == y_true)
131 | 
132 | 
133 | def del_files(dir_path):
134 |     shutil.rmtree(dir_path)
135 | 
136 | 
137 | def vali(args, accelerator, model, vali_data, vali_loader, criterion, mae_metric):
138 |     total_loss = []
139 |     total_mae_loss = []
140 |     model.eval()
141 |     with torch.no_grad():
142 |         for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in tqdm(enumerate(vali_loader)):
143 |             batch_x = batch_x.float().to(accelerator.device)
144 |             batch_y = batch_y.float()
145 | 
146 |             batch_x_mark = batch_x_mark.float().to(accelerator.device)
147 |             batch_y_mark = batch_y_mark.float().to(accelerator.device)
148 | 
149 |             # decoder input
150 |             dec_inp = torch.zeros_like(batch_y[:, -args.pred_len:, :]).float()
151 |             dec_inp = torch.cat([batch_y[:, :args.label_len, :], dec_inp], dim=1).float().to(
152 |                 accelerator.device)
153 |             # encoder - decoder
154 |             if args.use_amp:
155 |                 with torch.cuda.amp.autocast():
156 |                     if args.output_attention:
157 |                         outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
158 |                     else:
159 |                         outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
160 |             else:
161 |                 if args.output_attention:
162 |                     outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
163 |                 else:
164 |                     outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
165 | 
166 |             outputs, batch_y = accelerator.gather_for_metrics((outputs, batch_y))
167 | 
168 |             f_dim = -1 if args.features == 'MS' else 0
169 |             outputs = outputs[:, -args.pred_len:, f_dim:]
170 |             batch_y = batch_y[:, -args.pred_len:, f_dim:].to(accelerator.device)
171 | 
172 |             pred = outputs.detach()
173 |             true = batch_y.detach()
174 | 
175 |             loss = criterion(pred, true)
176 | 
177 |             mae_loss = mae_metric(pred, true)
178 | 
179 |             total_loss.append(loss.item())
180 |             total_mae_loss.append(mae_loss.item())
181 | 
182 |     total_loss = np.average(total_loss)
183 |     total_mae_loss = np.average(total_mae_loss)
184 | 
185 |     model.train()
186 |     return total_loss, total_mae_loss
187 | 
188 | 
189 | def test(args, accelerator, model, train_loader, vali_loader, criterion):
190 |     x, _ = train_loader.dataset.last_insample_window()
191 |     y = vali_loader.dataset.timeseries
192 |     x = torch.tensor(x, dtype=torch.float32).to(accelerator.device)
193 |     x = x.unsqueeze(-1)
194 | 
195 |     model.eval()
196 |     with torch.no_grad():
197 |         B, _, C = x.shape
198 |         dec_inp = torch.zeros((B, args.pred_len, C)).float().to(accelerator.device)
199 |         dec_inp = torch.cat([x[:, -args.label_len:, :], dec_inp], dim=1)
200 |         outputs = torch.zeros((B, args.pred_len, C)).float().to(accelerator.device)
201 |         id_list = np.arange(0, B, args.eval_batch_size)
202 |         id_list = np.append(id_list, B)
203 |         for i in range(len(id_list) - 1):
204 |             outputs[id_list[i]:id_list[i + 1], :, :] = model(
205 |                 x[id_list[i]:id_list[i + 1]],
206 |                 None,
207 |                 dec_inp[id_list[i]:id_list[i + 1]],
208 |                 None
209 |             )
210 |         accelerator.wait_for_everyone()
211 |         outputs = accelerator.gather_for_metrics(outputs)
212 |         f_dim = -1 if args.features == 'MS' else 0
213 |         outputs = outputs[:, -args.pred_len:, f_dim:]
214 |         pred = outputs
215 |         true = torch.from_numpy(np.array(y)).to(accelerator.device)
216 |         batch_y_mark = torch.ones(true.shape).to(accelerator.device)
217 |         true = accelerator.gather_for_metrics(true)
218 |         batch_y_mark = accelerator.gather_for_metrics(batch_y_mark)
219 | 
220 |         loss = criterion(x[:, :, 0], args.frequency_map, pred[:, :, 0], true, batch_y_mark)
221 | 
222 |     model.train()
223 |     return loss
224 | 
225 | 
226 | def load_content(args):
227 |     if 'ETT' in args.data:
228 |         file = 'ETT'
229 |     else:
230 |         file = args.data
231 |     with open('./dataset/prompt_bank/{0}.txt'.format(file), 'r') as f:
232 |         content = f.read()
233 |     return content


--------------------------------------------------------------------------------