├── LEGAL.md
├── LICENSE
├── README.md
├── data_provider
├── __init__.py
├── data_factory.py
├── data_loader.py
└── m4.py
├── data_provider_pretrain
├── __init__.py
├── data_factory.py
└── data_loader.py
├── dataset
└── prompt_bank
│ ├── ECL.txt
│ ├── ETT.txt
│ ├── Traffic.txt
│ ├── Weather.txt
│ └── m4.txt
├── ds_config_zero2.json
├── figures
├── framework.png
├── logo.png
└── method-detailed-illustration.png
├── layers
├── AutoCorrelation.py
├── Autoformer_EncDec.py
├── Conv_Blocks.py
├── Embed.py
├── SelfAttention_Family.py
├── StandardNorm.py
├── Transformer_EncDec.py
└── __init__.py
├── models
├── Autoformer.py
├── DLinear.py
├── TimeLLM.py
└── __init__.py
├── requirements.txt
├── run_m4.py
├── run_main.py
├── run_pretrain.py
├── scripts
├── TimeLLM_ECL.sh
├── TimeLLM_ETTh1.sh
├── TimeLLM_ETTh1_ETTh2.sh
├── TimeLLM_ETTh2.sh
├── TimeLLM_ETTm1.sh
├── TimeLLM_ETTm2.sh
├── TimeLLM_M4.sh
├── TimeLLM_Traffic.sh
└── TimeLLM_Weather.sh
└── utils
├── __init__.py
├── losses.py
├── m4_summary.py
├── masking.py
├── metrics.py
├── timefeatures.py
└── tools.py
/LEGAL.md:
--------------------------------------------------------------------------------
1 | Legal Disclaimer
2 |
3 | Within this source code, the comments in Chinese shall be the original, governing version. Any comment in other languages are for reference only. In the event of any conflict between the Chinese language version comments and other language version comments, the Chinese language version shall prevail.
4 |
5 | 法律免责声明
6 |
7 | 关于代码注释部分,中文注释为官方版本,其它语言注释仅做参考。中文注释可能与其它语言注释存在不一致,当中文注释与其它语言注释存在不一致时,请以中文注释为准。
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
(ICLR'24) Time-LLM: Time Series Forecasting by Reprogramming Large Language Models
5 |
6 |
7 |
8 |
9 | 
10 | 
11 | 
12 | 
13 |
14 |
15 |
16 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 | ---
39 | >
40 | > 🙋 Please let us know if you find out a mistake or have any suggestions!
41 | >
42 | > 🌟 If you find this resource helpful, please consider to star this repository and cite our research:
43 |
44 | ```
45 | @inproceedings{jin2023time,
46 | title={{Time-LLM}: Time series forecasting by reprogramming large language models},
47 | author={Jin, Ming and Wang, Shiyu and Ma, Lintao and Chu, Zhixuan and Zhang, James Y and Shi, Xiaoming and Chen, Pin-Yu and Liang, Yuxuan and Li, Yuan-Fang and Pan, Shirui and Wen, Qingsong},
48 | booktitle={International Conference on Learning Representations (ICLR)},
49 | year={2024}
50 | }
51 | ```
52 |
53 | ## Updates/News:
54 |
55 | 🚩 **News** (Aug. 2024): Time-LLM has been adopted by XiMou Optimization Technology Co., Ltd. (XMO) for Solar, Wind, and Weather Forecasting.
56 |
57 | 🚩 **News** (May 2024): Time-LLM has been included in [NeuralForecast](https://github.com/Nixtla/neuralforecast). Special thanks to the contributor @[JQGoh](https://github.com/JQGoh) and @[marcopeix](https://github.com/marcopeix)!
58 |
59 | 🚩 **News** (March 2024): Time-LLM has been upgraded to serve as a general framework for repurposing a wide range of language models to time series forecasting. It now defaults to supporting Llama-7B and includes compatibility with two additional smaller PLMs (GPT-2 and BERT). Simply adjust `--llm_model` and `--llm_dim` to switch backbones.
60 |
61 | ## Introduction
62 | Time-LLM is a reprogramming framework to repurpose LLMs for general time series forecasting with the backbone language models kept intact.
63 | Notably, we show that time series analysis (e.g., forecasting) can be cast as yet another "language task" that can be effectively tackled by an off-the-shelf LLM.
64 |
65 |
66 |
67 |
68 |
69 | - Time-LLM comprises two key components: (1) reprogramming the input time series into text prototype representations that are more natural for the LLM, and (2) augmenting the input context with declarative prompts (e.g., domain expert knowledge and task instructions) to guide LLM reasoning.
70 |
71 |
72 |
73 |
74 |
75 | ## Requirements
76 | Use python 3.11 from MiniConda
77 |
78 | - torch==2.2.2
79 | - accelerate==0.28.0
80 | - einops==0.7.0
81 | - matplotlib==3.7.0
82 | - numpy==1.23.5
83 | - pandas==1.5.3
84 | - scikit_learn==1.2.2
85 | - scipy==1.12.0
86 | - tqdm==4.65.0
87 | - peft==0.4.0
88 | - transformers==4.31.0
89 | - deepspeed==0.14.0
90 | - sentencepiece==0.2.0
91 |
92 | To install all dependencies:
93 | ```
94 | pip install -r requirements.txt
95 | ```
96 |
97 | ## Datasets
98 | You can access the well pre-processed datasets from [[Google Drive]](https://drive.google.com/file/d/1NF7VEefXCmXuWNbnNe858WvQAkJ_7wuP/view?usp=sharing), then place the downloaded contents under `./dataset`
99 |
100 | ## Quick Demos
101 | 1. Download datasets and place them under `./dataset`
102 | 2. Tune the model. We provide five experiment scripts for demonstration purpose under the folder `./scripts`. For example, you can evaluate on ETT datasets by:
103 |
104 | ```bash
105 | bash ./scripts/TimeLLM_ETTh1.sh
106 | ```
107 | ```bash
108 | bash ./scripts/TimeLLM_ETTh2.sh
109 | ```
110 | ```bash
111 | bash ./scripts/TimeLLM_ETTm1.sh
112 | ```
113 | ```bash
114 | bash ./scripts/TimeLLM_ETTm2.sh
115 | ```
116 |
117 | ## Detailed usage
118 |
119 | Please refer to ```run_main.py```, ```run_m4.py``` and ```run_pretrain.py``` for the detailed description of each hyperparameter.
120 |
121 |
122 | ## Further Reading
123 | 1, [**TimeMixer++: A General Time Series Pattern Machine for Universal Predictive Analysis**](https://arxiv.org/abs/2410.16032), in *arXiv* 2024.
124 | [\[GitHub Repo\]](https://github.com/kwuking/TimeMixer/blob/main/README.md)
125 |
126 | **Authors**: Shiyu Wang, Jiawei Li, Xiaoming Shi, Zhou Ye, Baichuan Mo, Wenze Lin, Shengtong Ju, Zhixuan Chu, Ming Jin
127 |
128 | ```bibtex
129 | @article{wang2024timemixer++,
130 | title={TimeMixer++: A General Time Series Pattern Machine for Universal Predictive Analysis},
131 | author={Wang, Shiyu and Li, Jiawei and Shi, Xiaoming and Ye, Zhou and Mo, Baichuan and Lin, Wenze and Ju, Shengtong and Chu, Zhixuan and Jin, Ming},
132 | journal={arXiv preprint arXiv:2410.16032},
133 | year={2024}
134 | }
135 | ```
136 |
137 | 2, [**Foundation Models for Time Series Analysis: A Tutorial and Survey**](https://arxiv.org/pdf/2403.14735), in *KDD* 2024.
138 |
139 | **Authors**: Yuxuan Liang, Haomin Wen, Yuqi Nie, Yushan Jiang, Ming Jin, Dongjin Song, Shirui Pan, Qingsong Wen*
140 |
141 | ```bibtex
142 | @inproceedings{liang2024foundation,
143 | title={Foundation models for time series analysis: A tutorial and survey},
144 | author={Liang, Yuxuan and Wen, Haomin and Nie, Yuqi and Jiang, Yushan and Jin, Ming and Song, Dongjin and Pan, Shirui and Wen, Qingsong},
145 | booktitle={ACM SIGKDD Conference on Knowledge Discovery and Data Mining (KDD 2024)},
146 | year={2024}
147 | }
148 | ```
149 |
150 | 3, [**Position Paper: What Can Large Language Models Tell Us about Time Series Analysis**](https://arxiv.org/abs/2402.02713), in *ICML* 2024.
151 |
152 | **Authors**: Ming Jin, Yifan Zhang, Wei Chen, Kexin Zhang, Yuxuan Liang*, Bin Yang, Jindong Wang, Shirui Pan, Qingsong Wen*
153 |
154 | ```bibtex
155 | @inproceedings{jin2024position,
156 | title={Position Paper: What Can Large Language Models Tell Us about Time Series Analysis},
157 | author={Ming Jin and Yifan Zhang and Wei Chen and Kexin Zhang and Yuxuan Liang and Bin Yang and Jindong Wang and Shirui Pan and Qingsong Wen},
158 | booktitle={International Conference on Machine Learning (ICML 2024)},
159 | year={2024}
160 | }
161 | ```
162 |
163 | 4, [**Large Models for Time Series and Spatio-Temporal Data: A Survey and Outlook**](https://arxiv.org/abs/2310.10196), in *arXiv* 2023.
164 | [\[GitHub Repo\]](https://github.com/qingsongedu/Awesome-TimeSeries-SpatioTemporal-LM-LLM)
165 |
166 | **Authors**: Ming Jin, Qingsong Wen*, Yuxuan Liang, Chaoli Zhang, Siqiao Xue, Xue Wang, James Zhang, Yi Wang, Haifeng Chen, Xiaoli Li (IEEE Fellow), Shirui Pan*, Vincent S. Tseng (IEEE Fellow), Yu Zheng (IEEE Fellow), Lei Chen (IEEE Fellow), Hui Xiong (IEEE Fellow)
167 |
168 | ```bibtex
169 | @article{jin2023lm4ts,
170 | title={Large Models for Time Series and Spatio-Temporal Data: A Survey and Outlook},
171 | author={Ming Jin and Qingsong Wen and Yuxuan Liang and Chaoli Zhang and Siqiao Xue and Xue Wang and James Zhang and Yi Wang and Haifeng Chen and Xiaoli Li and Shirui Pan and Vincent S. Tseng and Yu Zheng and Lei Chen and Hui Xiong},
172 | journal={arXiv preprint arXiv:2310.10196},
173 | year={2023}
174 | }
175 | ```
176 |
177 |
178 | 5, [**Transformers in Time Series: A Survey**](https://arxiv.org/abs/2202.07125), in IJCAI 2023.
179 | [\[GitHub Repo\]](https://github.com/qingsongedu/time-series-transformers-review)
180 |
181 | **Authors**: Qingsong Wen, Tian Zhou, Chaoli Zhang, Weiqi Chen, Ziqing Ma, Junchi Yan, Liang Sun
182 |
183 | ```bibtex
184 | @inproceedings{wen2023transformers,
185 | title={Transformers in time series: A survey},
186 | author={Wen, Qingsong and Zhou, Tian and Zhang, Chaoli and Chen, Weiqi and Ma, Ziqing and Yan, Junchi and Sun, Liang},
187 | booktitle={International Joint Conference on Artificial Intelligence(IJCAI)},
188 | year={2023}
189 | }
190 | ```
191 |
192 | 6, [**TimeMixer: Decomposable Multiscale Mixing for Time Series Forecasting**](https://openreview.net/pdf?id=7oLshfEIC2), in ICLR 2024.
193 | [\[GitHub Repo\]](https://github.com/kwuking/TimeMixer)
194 |
195 | **Authors**: Shiyu Wang, Haixu Wu, Xiaoming Shi, Tengge Hu, Huakun Luo, Lintao Ma, James Y. Zhang, Jun Zhou
196 |
197 | ```bibtex
198 | @inproceedings{wang2023timemixer,
199 | title={TimeMixer: Decomposable Multiscale Mixing for Time Series Forecasting},
200 | author={Wang, Shiyu and Wu, Haixu and Shi, Xiaoming and Hu, Tengge and Luo, Huakun and Ma, Lintao and Zhang, James Y and ZHOU, JUN},
201 | booktitle={International Conference on Learning Representations (ICLR)},
202 | year={2024}
203 | }
204 | ```
205 |
206 | ## Acknowledgement
207 | Our implementation adapts [Time-Series-Library](https://github.com/thuml/Time-Series-Library) and [OFA (GPT4TS)](https://github.com/DAMO-DI-ML/NeurIPS2023-One-Fits-All) as the code base and have extensively modified it to our purposes. We thank the authors for sharing their implementations and related resources.
208 |
--------------------------------------------------------------------------------
/data_provider/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/data_provider/data_factory.py:
--------------------------------------------------------------------------------
1 | from data_provider.data_loader import Dataset_ETT_hour, Dataset_ETT_minute, Dataset_Custom, Dataset_M4
2 | from torch.utils.data import DataLoader
3 |
4 | data_dict = {
5 | 'ETTh1': Dataset_ETT_hour,
6 | 'ETTh2': Dataset_ETT_hour,
7 | 'ETTm1': Dataset_ETT_minute,
8 | 'ETTm2': Dataset_ETT_minute,
9 | 'ECL': Dataset_Custom,
10 | 'Traffic': Dataset_Custom,
11 | 'Weather': Dataset_Custom,
12 | 'm4': Dataset_M4,
13 | }
14 |
15 |
16 | def data_provider(args, flag):
17 | Data = data_dict[args.data]
18 | timeenc = 0 if args.embed != 'timeF' else 1
19 | percent = args.percent
20 |
21 | if flag == 'test':
22 | shuffle_flag = False
23 | drop_last = True
24 | batch_size = args.batch_size
25 | freq = args.freq
26 | else:
27 | shuffle_flag = True
28 | drop_last = True
29 | batch_size = args.batch_size
30 | freq = args.freq
31 |
32 | if args.data == 'm4':
33 | drop_last = False
34 | data_set = Data(
35 | root_path=args.root_path,
36 | data_path=args.data_path,
37 | flag=flag,
38 | size=[args.seq_len, args.label_len, args.pred_len],
39 | features=args.features,
40 | target=args.target,
41 | timeenc=timeenc,
42 | freq=freq,
43 | seasonal_patterns=args.seasonal_patterns
44 | )
45 | else:
46 | data_set = Data(
47 | root_path=args.root_path,
48 | data_path=args.data_path,
49 | flag=flag,
50 | size=[args.seq_len, args.label_len, args.pred_len],
51 | features=args.features,
52 | target=args.target,
53 | timeenc=timeenc,
54 | freq=freq,
55 | percent=percent,
56 | seasonal_patterns=args.seasonal_patterns
57 | )
58 | data_loader = DataLoader(
59 | data_set,
60 | batch_size=batch_size,
61 | shuffle=shuffle_flag,
62 | num_workers=args.num_workers,
63 | drop_last=drop_last)
64 | return data_set, data_loader
65 |
--------------------------------------------------------------------------------
/data_provider/m4.py:
--------------------------------------------------------------------------------
1 | # This source code is provided for the purposes of scientific reproducibility
2 | # under the following limited license from Element AI Inc. The code is an
3 | # implementation of the N-BEATS model (Oreshkin et al., N-BEATS: Neural basis
4 | # expansion analysis for interpretable time series forecasting,
5 | # https://arxiv.org/abs/1905.10437). The copyright to the source code is
6 | # licensed under the Creative Commons - Attribution-NonCommercial 4.0
7 | # International license (CC BY-NC 4.0):
8 | # https://creativecommons.org/licenses/by-nc/4.0/. Any commercial use (whether
9 | # for the benefit of third parties or internally in production) requires an
10 | # explicit license. The subject-matter of the N-BEATS model and associated
11 | # materials are the property of Element AI Inc. and may be subject to patent
12 | # protection. No license to patents is granted hereunder (whether express or
13 | # implied). Copyright © 2020 Element AI Inc. All rights reserved.
14 |
15 | """
16 | M4 Dataset
17 | """
18 | from dataclasses import dataclass
19 |
20 | import numpy as np
21 | import pandas as pd
22 | import logging
23 | import os
24 | import pathlib
25 | import sys
26 | from urllib import request
27 |
28 |
29 | def url_file_name(url: str) -> str:
30 | """
31 | Extract file name from url.
32 |
33 | :param url: URL to extract file name from.
34 | :return: File name.
35 | """
36 | return url.split('/')[-1] if len(url) > 0 else ''
37 |
38 |
39 | def download(url: str, file_path: str) -> None:
40 | """
41 | Download a file to the given path.
42 |
43 | :param url: URL to download
44 | :param file_path: Where to download the content.
45 | """
46 |
47 | def progress(count, block_size, total_size):
48 | progress_pct = float(count * block_size) / float(total_size) * 100.0
49 | sys.stdout.write('\rDownloading {} to {} {:.1f}%'.format(url, file_path, progress_pct))
50 | sys.stdout.flush()
51 |
52 | if not os.path.isfile(file_path):
53 | opener = request.build_opener()
54 | opener.addheaders = [('User-agent', 'Mozilla/5.0')]
55 | request.install_opener(opener)
56 | pathlib.Path(os.path.dirname(file_path)).mkdir(parents=True, exist_ok=True)
57 | f, _ = request.urlretrieve(url, file_path, progress)
58 | sys.stdout.write('\n')
59 | sys.stdout.flush()
60 | file_info = os.stat(f)
61 | logging.info(f'Successfully downloaded {os.path.basename(file_path)} {file_info.st_size} bytes.')
62 | else:
63 | file_info = os.stat(file_path)
64 | logging.info(f'File already exists: {file_path} {file_info.st_size} bytes.')
65 |
66 |
67 | @dataclass()
68 | class M4Dataset:
69 | ids: np.ndarray
70 | groups: np.ndarray
71 | frequencies: np.ndarray
72 | horizons: np.ndarray
73 | values: np.ndarray
74 |
75 | @staticmethod
76 | def load(training: bool = True, dataset_file: str = '../dataset/m4') -> 'M4Dataset':
77 | """
78 | Load cached dataset.
79 |
80 | :param training: Load training part if training is True, test part otherwise.
81 | """
82 | info_file = os.path.join(dataset_file, 'M4-info.csv')
83 | train_cache_file = os.path.join(dataset_file, 'training.npz')
84 | test_cache_file = os.path.join(dataset_file, 'test.npz')
85 | m4_info = pd.read_csv(info_file)
86 | return M4Dataset(ids=m4_info.M4id.values,
87 | groups=m4_info.SP.values,
88 | frequencies=m4_info.Frequency.values,
89 | horizons=m4_info.Horizon.values,
90 | values=np.load(
91 | train_cache_file if training else test_cache_file,
92 | allow_pickle=True))
93 |
94 |
95 | @dataclass()
96 | class M4Meta:
97 | seasonal_patterns = ['Yearly', 'Quarterly', 'Monthly', 'Weekly', 'Daily', 'Hourly']
98 | horizons = [6, 8, 18, 13, 14, 48]
99 | frequencies = [1, 4, 12, 1, 1, 24]
100 | horizons_map = {
101 | 'Yearly': 6,
102 | 'Quarterly': 8,
103 | 'Monthly': 18,
104 | 'Weekly': 13,
105 | 'Daily': 14,
106 | 'Hourly': 48
107 | } # different predict length
108 | frequency_map = {
109 | 'Yearly': 1,
110 | 'Quarterly': 4,
111 | 'Monthly': 12,
112 | 'Weekly': 1,
113 | 'Daily': 1,
114 | 'Hourly': 24
115 | }
116 | history_size = {
117 | 'Yearly': 1.5,
118 | 'Quarterly': 1.5,
119 | 'Monthly': 1.5,
120 | 'Weekly': 10,
121 | 'Daily': 10,
122 | 'Hourly': 10
123 | } # from interpretable.gin
124 |
125 |
126 | def load_m4_info() -> pd.DataFrame:
127 | """
128 | Load M4Info file.
129 |
130 | :return: Pandas DataFrame of M4Info.
131 | """
132 | return pd.read_csv(INFO_FILE_PATH)
133 |
--------------------------------------------------------------------------------
/data_provider_pretrain/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KimMeen/Time-LLM/02ee1b8f6043090c7a417f0cbb64cbf753895175/data_provider_pretrain/__init__.py
--------------------------------------------------------------------------------
/data_provider_pretrain/data_factory.py:
--------------------------------------------------------------------------------
1 | from torch.utils.data import DataLoader
2 |
3 | from data_provider_pretrain.data_loader import Dataset_ETT_hour, Dataset_ETT_minute
4 |
5 | data_dict = {
6 | 'ETTh1': Dataset_ETT_hour,
7 | 'ETTh2': Dataset_ETT_hour,
8 | 'ETTm1': Dataset_ETT_minute,
9 | 'ETTm2': Dataset_ETT_minute,
10 | }
11 |
12 |
13 | def data_provider(args, data, data_path, pretrain=True, flag='train'):
14 | Data = data_dict[data]
15 | timeenc = 0 if args.embed != 'timeF' else 1
16 | percent = args.percent
17 |
18 | if flag == 'test':
19 | shuffle_flag = False
20 | drop_last = True
21 | batch_size = args.batch_size
22 | freq = args.freq
23 | else:
24 | shuffle_flag = True
25 | drop_last = True
26 | batch_size = args.batch_size
27 | freq = args.freq
28 |
29 | data_set = Data(
30 | root_path=args.root_path,
31 | data_path=data_path,
32 | flag=flag,
33 | size=[args.seq_len, args.label_len, args.pred_len],
34 | features=args.features,
35 | target=args.target,
36 | timeenc=timeenc,
37 | freq=freq,
38 | percent=percent,
39 | seasonal_patterns=args.seasonal_patterns,
40 | pretrain=pretrain
41 | )
42 | data_loader = DataLoader(
43 | data_set,
44 | batch_size=batch_size,
45 | shuffle=shuffle_flag,
46 | num_workers=args.num_workers,
47 | drop_last=drop_last)
48 | return data_set, data_loader
49 |
--------------------------------------------------------------------------------
/data_provider_pretrain/data_loader.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pandas as pd
3 | from torch.utils.data import Dataset
4 | from sklearn.preprocessing import StandardScaler
5 | from utils.timefeatures import time_features
6 | import warnings
7 |
8 | warnings.filterwarnings('ignore')
9 |
10 |
11 | class Dataset_ETT_hour(Dataset):
12 | def __init__(self, root_path, flag='train', size=None,
13 | features='S', data_path='ETTh1.csv',
14 | target='OT', scale=True, timeenc=0, freq='h', percent=100,
15 | seasonal_patterns=None, pretrain=True):
16 | if size == None:
17 | self.seq_len = 24 * 4 * 4
18 | self.label_len = 24 * 4
19 | self.pred_len = 24 * 4
20 | else:
21 | self.seq_len = size[0]
22 | self.label_len = size[1]
23 | self.pred_len = size[2]
24 | # init
25 | assert flag in ['train', 'test', 'val']
26 | type_map = {'train': 0, 'val': 1, 'test': 2}
27 | self.set_type = type_map[flag]
28 |
29 | self.percent = percent
30 | self.pretrain = pretrain
31 | self.features = features
32 | self.target = target
33 | self.scale = scale
34 | self.timeenc = timeenc
35 | self.freq = freq
36 |
37 | # self.percent = percent
38 | self.root_path = root_path
39 | self.data_path = data_path
40 | self.__read_data__()
41 |
42 | self.enc_in = self.data_x.shape[-1]
43 | self.tot_len = len(self.data_x) - self.seq_len - self.pred_len + 1
44 |
45 | def __read_data__(self):
46 | self.scaler = StandardScaler()
47 | df_raw = pd.read_csv(os.path.join(self.root_path,
48 | self.data_path))
49 |
50 | if self.pretrain:
51 | # border1s = [0, 12 * 30 * 24 + 4 * 30 * 24 - self.seq_len, 12 * 30 * 24 + 4 * 30 * 24 - self.seq_len]
52 | # border2s = [12 * 30 * 24 + 8 * 30 * 24, 12 * 30 * 24 + 8 * 30 * 24, 12 * 30 * 24 + 8 * 30 * 24]
53 | border1s = [0, 12 * 30 * 24 + 4 * 30 * 24 - self.seq_len, 12 * 30 * 24 + 4 * 30 * 24 - self.seq_len]
54 | border2s = [12 * 30 * 24 + 4 * 30 * 24, 12 * 30 * 24 + 8 * 30 * 24, 12 * 30 * 24 + 8 * 30 * 24]
55 | else:
56 | border1s = [0, 12 * 30 * 24 - self.seq_len, 12 * 30 * 24 + 4 * 30 * 24 - self.seq_len]
57 | border2s = [12 * 30 * 24, 12 * 30 * 24 + 4 * 30 * 24, 12 * 30 * 24 + 8 * 30 * 24]
58 |
59 | border1 = border1s[self.set_type]
60 | border2 = border2s[self.set_type]
61 |
62 | if self.set_type == 0:
63 | border2 = (border2 - self.seq_len) * self.percent // 100 + self.seq_len
64 |
65 | if self.features == 'M' or self.features == 'MS':
66 | cols_data = df_raw.columns[1:]
67 | df_data = df_raw[cols_data]
68 | elif self.features == 'S':
69 | df_data = df_raw[[self.target]]
70 |
71 | if self.scale:
72 | train_data = df_data[border1s[0]:border2s[0]]
73 | self.scaler.fit(train_data.values)
74 | data = self.scaler.transform(df_data.values)
75 | else:
76 | data = df_data.values
77 |
78 | df_stamp = df_raw[['date']][border1:border2]
79 | df_stamp['date'] = pd.to_datetime(df_stamp.date)
80 | if self.timeenc == 0:
81 | df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1)
82 | df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1)
83 | df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1)
84 | df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1)
85 | data_stamp = df_stamp.drop(['date'], 1).values
86 | elif self.timeenc == 1:
87 | data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq)
88 | data_stamp = data_stamp.transpose(1, 0)
89 |
90 | self.data_x = data[border1:border2]
91 | self.data_y = data[border1:border2]
92 | self.data_stamp = data_stamp
93 |
94 | def __getitem__(self, index):
95 | feat_id = index // self.tot_len
96 | s_begin = index % self.tot_len
97 |
98 | s_end = s_begin + self.seq_len
99 | r_begin = s_end - self.label_len
100 | r_end = r_begin + self.label_len + self.pred_len
101 | seq_x = self.data_x[s_begin:s_end, feat_id:feat_id + 1]
102 | seq_y = self.data_y[r_begin:r_end, feat_id:feat_id + 1]
103 | seq_x_mark = self.data_stamp[s_begin:s_end]
104 | seq_y_mark = self.data_stamp[r_begin:r_end]
105 |
106 | return seq_x, seq_y, seq_x_mark, seq_y_mark
107 |
108 | def __len__(self):
109 | return (len(self.data_x) - self.seq_len - self.pred_len + 1) * self.enc_in
110 |
111 | def inverse_transform(self, data):
112 | return self.scaler.inverse_transform(data)
113 |
114 |
115 | class Dataset_ETT_minute(Dataset):
116 | def __init__(self, root_path, flag='train', size=None,
117 | features='S', data_path='ETTm1.csv',
118 | target='OT', scale=True, timeenc=0, freq='t', percent=100,
119 | seasonal_patterns=None, pretrain=True):
120 | if size == None:
121 | self.seq_len = 24 * 4 * 4
122 | self.label_len = 24 * 4
123 | self.pred_len = 24 * 4
124 | else:
125 | self.seq_len = size[0]
126 | self.label_len = size[1]
127 | self.pred_len = size[2]
128 | # init
129 | assert flag in ['train', 'test', 'val']
130 | type_map = {'train': 0, 'val': 1, 'test': 2}
131 | self.set_type = type_map[flag]
132 |
133 | self.percent = percent
134 | self.pretrain = pretrain
135 | self.features = features
136 | self.target = target
137 | self.scale = scale
138 | self.timeenc = timeenc
139 | self.freq = freq
140 |
141 | self.root_path = root_path
142 | self.data_path = data_path
143 | self.__read_data__()
144 |
145 | self.enc_in = self.data_x.shape[-1]
146 | self.tot_len = len(self.data_x) - self.seq_len - self.pred_len + 1
147 |
148 | def __read_data__(self):
149 | self.scaler = StandardScaler()
150 | df_raw = pd.read_csv(os.path.join(self.root_path,
151 | self.data_path))
152 |
153 | if self.pretrain:
154 | # border1s = [0, 12 * 30 * 24 * 4 + 4 * 30 * 24 * 4 - self.seq_len,
155 | # 12 * 30 * 24 * 4 + 4 * 30 * 24 * 4 - self.seq_len]
156 | # border2s = [12 * 30 * 24 * 4 + 8 * 30 * 24 * 4, 12 * 30 * 24 * 4 + 8 * 30 * 24 * 4,
157 | # 12 * 30 * 24 * 4 + 8 * 30 * 24 * 4]
158 | border1s = [0, 12 * 30 * 24 * 4 + 4 * 30 * 24 * 4 - self.seq_len,
159 | 12 * 30 * 24 * 4 + 4 * 30 * 24 * 4 - self.seq_len]
160 | border2s = [12 * 30 * 24 * 4 + 4 * 30 * 24 * 4, 12 * 30 * 24 * 4 + 8 * 30 * 24 * 4,
161 | 12 * 30 * 24 * 4 + 8 * 30 * 24 * 4]
162 | else:
163 | border1s = [0, 12 * 30 * 24 * 4 - self.seq_len, 12 * 30 * 24 * 4 + 4 * 30 * 24 * 4 - self.seq_len]
164 | border2s = [12 * 30 * 24 * 4, 12 * 30 * 24 * 4 + 4 * 30 * 24 * 4, 12 * 30 * 24 * 4 + 8 * 30 * 24 * 4]
165 |
166 | border1 = border1s[self.set_type]
167 | border2 = border2s[self.set_type]
168 |
169 | if self.set_type == 0:
170 | border2 = (border2 - self.seq_len) * self.percent // 100 + self.seq_len
171 |
172 | if self.features == 'M' or self.features == 'MS':
173 | cols_data = df_raw.columns[1:]
174 | df_data = df_raw[cols_data]
175 | elif self.features == 'S':
176 | df_data = df_raw[[self.target]]
177 |
178 | if self.scale:
179 | train_data = df_data[border1s[0]:border2s[0]]
180 | self.scaler.fit(train_data.values)
181 | data = self.scaler.transform(df_data.values)
182 | else:
183 | data = df_data.values
184 |
185 | df_stamp = df_raw[['date']][border1:border2]
186 | df_stamp['date'] = pd.to_datetime(df_stamp.date)
187 | if self.timeenc == 0:
188 | df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1)
189 | df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1)
190 | df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1)
191 | df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1)
192 | df_stamp['minute'] = df_stamp.date.apply(lambda row: row.minute, 1)
193 | df_stamp['minute'] = df_stamp.minute.map(lambda x: x // 15)
194 | data_stamp = df_stamp.drop(['date'], 1).values
195 | elif self.timeenc == 1:
196 | data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq)
197 | data_stamp = data_stamp.transpose(1, 0)
198 |
199 | self.data_x = data[border1:border2]
200 | self.data_y = data[border1:border2]
201 | self.data_stamp = data_stamp
202 |
203 | def __getitem__(self, index):
204 | feat_id = index // self.tot_len
205 | s_begin = index % self.tot_len
206 |
207 | s_end = s_begin + self.seq_len
208 | r_begin = s_end - self.label_len
209 | r_end = r_begin + self.label_len + self.pred_len
210 | seq_x = self.data_x[s_begin:s_end, feat_id:feat_id + 1]
211 | seq_y = self.data_y[r_begin:r_end, feat_id:feat_id + 1]
212 | seq_x_mark = self.data_stamp[s_begin:s_end]
213 | seq_y_mark = self.data_stamp[r_begin:r_end]
214 |
215 | return seq_x, seq_y, seq_x_mark, seq_y_mark
216 |
217 | def __len__(self):
218 | return (len(self.data_x) - self.seq_len - self.pred_len + 1) * self.enc_in
219 |
220 | def inverse_transform(self, data):
221 | return self.scaler.inverse_transform(data)
222 |
--------------------------------------------------------------------------------
/dataset/prompt_bank/ECL.txt:
--------------------------------------------------------------------------------
1 | Measurements of electric power consumption in one household with a one-minute sampling rate over a period of almost 4 years. Different electrical quantities and some sub-metering values are available.This archive contains 2075259 measurements gathered in a house located in Sceaux (7km of Paris, France) between December 2006 and November 2010 (47 months).
2 |
--------------------------------------------------------------------------------
/dataset/prompt_bank/ETT.txt:
--------------------------------------------------------------------------------
1 | The Electricity Transformer Temperature (ETT) is a crucial indicator in the electric power long-term deployment. This dataset consists of 2 years data from two separated counties in China. To explore the granularity on the Long sequence time-series forecasting (LSTF) problem, different subsets are created, {ETTh1, ETTh2} for 1-hour-level and ETTm1 for 15-minutes-level. Each data point consists of the target value ”oil temperature” and 6 power load features. The train/val/test is 12/4/4 months.
2 |
3 |
--------------------------------------------------------------------------------
/dataset/prompt_bank/Traffic.txt:
--------------------------------------------------------------------------------
1 | Traffic is a collection of hourly data from California Department of Transportation, which describes the road occupancy rates measured by different sensors on San Francisco Bay area freeways.
--------------------------------------------------------------------------------
/dataset/prompt_bank/Weather.txt:
--------------------------------------------------------------------------------
1 | Weather is recorded every 10 minutes for the 2020 whole year, which contains 21 meteorological indicators, such as air temperature, humidity, etc.
2 |
--------------------------------------------------------------------------------
/dataset/prompt_bank/m4.txt:
--------------------------------------------------------------------------------
1 | The M4 dataset is a collection of 100,000 time series used for the fourth edition of the Makridakis forecasting Competition. The M4 dataset consists of time series of yearly, quarterly, monthly and other (weekly, daily and hourly) data, which are divided into training and test sets. The minimum numbers of observations in the training test are 13 for yearly, 16 for quarterly, 42 for monthly, 80 for weekly, 93 for daily and 700 for hourly series. The participants were asked to produce the following numbers of forecasts beyond the available data that they had been given: six for yearly, eight for quarterly, 18 for monthly series, 13 for weekly series and 14 and 48 forecasts respectively for the daily and hourly ones.
2 |
3 |
--------------------------------------------------------------------------------
/ds_config_zero2.json:
--------------------------------------------------------------------------------
1 | {
2 | "bf16": {
3 | "enabled": true,
4 | "auto_cast": true
5 | },
6 | "zero_optimization": {
7 | "stage": 2,
8 | "allgather_partitions": true,
9 | "allgather_bucket_size": 2e8,
10 | "overlap_comm": true,
11 | "reduce_scatter": true,
12 | "reduce_bucket_size": 2e8,
13 | "contiguous_gradients": true,
14 | "sub_group_size": 1e9
15 | },
16 | "gradient_accumulation_steps": "auto",
17 | "train_batch_size": "auto",
18 | "train_micro_batch_size_per_gpu": "auto",
19 | "steps_per_print": 10,
20 | "wall_clock_breakdown": false
21 | }
--------------------------------------------------------------------------------
/figures/framework.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KimMeen/Time-LLM/02ee1b8f6043090c7a417f0cbb64cbf753895175/figures/framework.png
--------------------------------------------------------------------------------
/figures/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KimMeen/Time-LLM/02ee1b8f6043090c7a417f0cbb64cbf753895175/figures/logo.png
--------------------------------------------------------------------------------
/figures/method-detailed-illustration.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KimMeen/Time-LLM/02ee1b8f6043090c7a417f0cbb64cbf753895175/figures/method-detailed-illustration.png
--------------------------------------------------------------------------------
/layers/AutoCorrelation.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | import matplotlib.pyplot as plt
5 | import numpy as np
6 | import math
7 | from math import sqrt
8 | import os
9 |
10 |
11 | class AutoCorrelation(nn.Module):
12 | """
13 | AutoCorrelation Mechanism with the following two phases:
14 | (1) period-based dependencies discovery
15 | (2) time delay aggregation
16 | This block can replace the self-attention family mechanism seamlessly.
17 | """
18 |
19 | def __init__(self, mask_flag=True, factor=1, scale=None, attention_dropout=0.1, output_attention=False):
20 | super(AutoCorrelation, self).__init__()
21 | self.factor = factor
22 | self.scale = scale
23 | self.mask_flag = mask_flag
24 | self.output_attention = output_attention
25 | self.dropout = nn.Dropout(attention_dropout)
26 |
27 | def time_delay_agg_training(self, values, corr):
28 | """
29 | SpeedUp version of Autocorrelation (a batch-normalization style design)
30 | This is for the training phase.
31 | """
32 | head = values.shape[1]
33 | channel = values.shape[2]
34 | length = values.shape[3]
35 | # find top k
36 | top_k = int(self.factor * math.log(length))
37 | mean_value = torch.mean(torch.mean(corr, dim=1), dim=1)
38 | index = torch.topk(torch.mean(mean_value, dim=0), top_k, dim=-1)[1]
39 | weights = torch.stack([mean_value[:, index[i]] for i in range(top_k)], dim=-1)
40 | # update corr
41 | tmp_corr = torch.softmax(weights, dim=-1)
42 | # aggregation
43 | tmp_values = values
44 | delays_agg = torch.zeros_like(values).float()
45 | for i in range(top_k):
46 | pattern = torch.roll(tmp_values, -int(index[i]), -1)
47 | delays_agg = delays_agg + pattern * \
48 | (tmp_corr[:, i].unsqueeze(1).unsqueeze(1).unsqueeze(1).repeat(1, head, channel, length))
49 | return delays_agg
50 |
51 | def time_delay_agg_inference(self, values, corr):
52 | """
53 | SpeedUp version of Autocorrelation (a batch-normalization style design)
54 | This is for the inference phase.
55 | """
56 | batch = values.shape[0]
57 | head = values.shape[1]
58 | channel = values.shape[2]
59 | length = values.shape[3]
60 | # index init
61 | init_index = torch.arange(length).unsqueeze(0).unsqueeze(0).unsqueeze(0).repeat(batch, head, channel, 1).cuda()
62 | # find top k
63 | top_k = int(self.factor * math.log(length))
64 | mean_value = torch.mean(torch.mean(corr, dim=1), dim=1)
65 | weights, delay = torch.topk(mean_value, top_k, dim=-1)
66 | # update corr
67 | tmp_corr = torch.softmax(weights, dim=-1)
68 | # aggregation
69 | tmp_values = values.repeat(1, 1, 1, 2)
70 | delays_agg = torch.zeros_like(values).float()
71 | for i in range(top_k):
72 | tmp_delay = init_index + delay[:, i].unsqueeze(1).unsqueeze(1).unsqueeze(1).repeat(1, head, channel, length)
73 | pattern = torch.gather(tmp_values, dim=-1, index=tmp_delay)
74 | delays_agg = delays_agg + pattern * \
75 | (tmp_corr[:, i].unsqueeze(1).unsqueeze(1).unsqueeze(1).repeat(1, head, channel, length))
76 | return delays_agg
77 |
78 | def time_delay_agg_full(self, values, corr):
79 | """
80 | Standard version of Autocorrelation
81 | """
82 | batch = values.shape[0]
83 | head = values.shape[1]
84 | channel = values.shape[2]
85 | length = values.shape[3]
86 | # index init
87 | init_index = torch.arange(length).unsqueeze(0).unsqueeze(0).unsqueeze(0).repeat(batch, head, channel, 1).cuda()
88 | # find top k
89 | top_k = int(self.factor * math.log(length))
90 | weights, delay = torch.topk(corr, top_k, dim=-1)
91 | # update corr
92 | tmp_corr = torch.softmax(weights, dim=-1)
93 | # aggregation
94 | tmp_values = values.repeat(1, 1, 1, 2)
95 | delays_agg = torch.zeros_like(values).float()
96 | for i in range(top_k):
97 | tmp_delay = init_index + delay[..., i].unsqueeze(-1)
98 | pattern = torch.gather(tmp_values, dim=-1, index=tmp_delay)
99 | delays_agg = delays_agg + pattern * (tmp_corr[..., i].unsqueeze(-1))
100 | return delays_agg
101 |
102 | def forward(self, queries, keys, values, attn_mask):
103 | B, L, H, E = queries.shape
104 | _, S, _, D = values.shape
105 | if L > S:
106 | zeros = torch.zeros_like(queries[:, :(L - S), :]).float()
107 | values = torch.cat([values, zeros], dim=1)
108 | keys = torch.cat([keys, zeros], dim=1)
109 | else:
110 | values = values[:, :L, :, :]
111 | keys = keys[:, :L, :, :]
112 |
113 | # period-based dependencies
114 | q_fft = torch.fft.rfft(queries.permute(0, 2, 3, 1).contiguous(), dim=-1)
115 | k_fft = torch.fft.rfft(keys.permute(0, 2, 3, 1).contiguous(), dim=-1)
116 | res = q_fft * torch.conj(k_fft)
117 | corr = torch.fft.irfft(res, dim=-1)
118 |
119 | # time delay agg
120 | if self.training:
121 | V = self.time_delay_agg_training(values.permute(0, 2, 3, 1).contiguous(), corr).permute(0, 3, 1, 2)
122 | else:
123 | V = self.time_delay_agg_inference(values.permute(0, 2, 3, 1).contiguous(), corr).permute(0, 3, 1, 2)
124 |
125 | if self.output_attention:
126 | return (V.contiguous(), corr.permute(0, 3, 1, 2))
127 | else:
128 | return (V.contiguous(), None)
129 |
130 |
131 | class AutoCorrelationLayer(nn.Module):
132 | def __init__(self, correlation, d_model, n_heads, d_keys=None,
133 | d_values=None):
134 | super(AutoCorrelationLayer, self).__init__()
135 |
136 | d_keys = d_keys or (d_model // n_heads)
137 | d_values = d_values or (d_model // n_heads)
138 |
139 | self.inner_correlation = correlation
140 | self.query_projection = nn.Linear(d_model, d_keys * n_heads)
141 | self.key_projection = nn.Linear(d_model, d_keys * n_heads)
142 | self.value_projection = nn.Linear(d_model, d_values * n_heads)
143 | self.out_projection = nn.Linear(d_values * n_heads, d_model)
144 | self.n_heads = n_heads
145 |
146 | def forward(self, queries, keys, values, attn_mask):
147 | B, L, _ = queries.shape
148 | _, S, _ = keys.shape
149 | H = self.n_heads
150 |
151 | queries = self.query_projection(queries).view(B, L, H, -1)
152 | keys = self.key_projection(keys).view(B, S, H, -1)
153 | values = self.value_projection(values).view(B, S, H, -1)
154 |
155 | out, attn = self.inner_correlation(
156 | queries,
157 | keys,
158 | values,
159 | attn_mask
160 | )
161 | out = out.view(B, L, -1)
162 |
163 | return self.out_projection(out), attn
164 |
--------------------------------------------------------------------------------
/layers/Autoformer_EncDec.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 |
5 |
6 | class my_Layernorm(nn.Module):
7 | """
8 | Special designed layernorm for the seasonal part
9 | """
10 |
11 | def __init__(self, channels):
12 | super(my_Layernorm, self).__init__()
13 | self.layernorm = nn.LayerNorm(channels)
14 |
15 | def forward(self, x):
16 | x_hat = self.layernorm(x)
17 | bias = torch.mean(x_hat, dim=1).unsqueeze(1).repeat(1, x.shape[1], 1)
18 | return x_hat - bias
19 |
20 |
21 | class moving_avg(nn.Module):
22 | """
23 | Moving average block to highlight the trend of time series
24 | """
25 |
26 | def __init__(self, kernel_size, stride):
27 | super(moving_avg, self).__init__()
28 | self.kernel_size = kernel_size
29 | self.avg = nn.AvgPool1d(kernel_size=kernel_size, stride=stride, padding=0)
30 |
31 | def forward(self, x):
32 | # padding on the both ends of time series
33 | front = x[:, 0:1, :].repeat(1, (self.kernel_size - 1) // 2, 1)
34 | end = x[:, -1:, :].repeat(1, (self.kernel_size - 1) // 2, 1)
35 | x = torch.cat([front, x, end], dim=1)
36 | x = self.avg(x.permute(0, 2, 1))
37 | x = x.permute(0, 2, 1)
38 | return x
39 |
40 |
41 | class series_decomp(nn.Module):
42 | """
43 | Series decomposition block
44 | """
45 |
46 | def __init__(self, kernel_size):
47 | super(series_decomp, self).__init__()
48 | self.moving_avg = moving_avg(kernel_size, stride=1)
49 |
50 | def forward(self, x):
51 | moving_mean = self.moving_avg(x)
52 | res = x - moving_mean
53 | return res, moving_mean
54 |
55 |
56 | class series_decomp_multi(nn.Module):
57 | """
58 | Multiple Series decomposition block from FEDformer
59 | """
60 |
61 | def __init__(self, kernel_size):
62 | super(series_decomp_multi, self).__init__()
63 | self.kernel_size = kernel_size
64 | self.series_decomp = [series_decomp(kernel) for kernel in kernel_size]
65 |
66 | def forward(self, x):
67 | moving_mean = []
68 | res = []
69 | for func in self.series_decomp:
70 | sea, moving_avg = func(x)
71 | moving_mean.append(moving_avg)
72 | res.append(sea)
73 |
74 | sea = sum(res) / len(res)
75 | moving_mean = sum(moving_mean) / len(moving_mean)
76 | return sea, moving_mean
77 |
78 |
79 | class EncoderLayer(nn.Module):
80 | """
81 | Autoformer encoder layer with the progressive decomposition architecture
82 | """
83 |
84 | def __init__(self, attention, d_model, d_ff=None, moving_avg=25, dropout=0.1, activation="relu"):
85 | super(EncoderLayer, self).__init__()
86 | d_ff = d_ff or 4 * d_model
87 | self.attention = attention
88 | self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1, bias=False)
89 | self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1, bias=False)
90 | self.decomp1 = series_decomp(moving_avg)
91 | self.decomp2 = series_decomp(moving_avg)
92 | self.dropout = nn.Dropout(dropout)
93 | self.activation = F.relu if activation == "relu" else F.gelu
94 |
95 | def forward(self, x, attn_mask=None):
96 | new_x, attn = self.attention(
97 | x, x, x,
98 | attn_mask=attn_mask
99 | )
100 | x = x + self.dropout(new_x)
101 | x, _ = self.decomp1(x)
102 | y = x
103 | y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
104 | y = self.dropout(self.conv2(y).transpose(-1, 1))
105 | res, _ = self.decomp2(x + y)
106 | return res, attn
107 |
108 |
109 | class Encoder(nn.Module):
110 | """
111 | Autoformer encoder
112 | """
113 |
114 | def __init__(self, attn_layers, conv_layers=None, norm_layer=None):
115 | super(Encoder, self).__init__()
116 | self.attn_layers = nn.ModuleList(attn_layers)
117 | self.conv_layers = nn.ModuleList(conv_layers) if conv_layers is not None else None
118 | self.norm = norm_layer
119 |
120 | def forward(self, x, attn_mask=None):
121 | attns = []
122 | if self.conv_layers is not None:
123 | for attn_layer, conv_layer in zip(self.attn_layers, self.conv_layers):
124 | x, attn = attn_layer(x, attn_mask=attn_mask)
125 | x = conv_layer(x)
126 | attns.append(attn)
127 | x, attn = self.attn_layers[-1](x)
128 | attns.append(attn)
129 | else:
130 | for attn_layer in self.attn_layers:
131 | x, attn = attn_layer(x, attn_mask=attn_mask)
132 | attns.append(attn)
133 |
134 | if self.norm is not None:
135 | x = self.norm(x)
136 |
137 | return x, attns
138 |
139 |
140 | class DecoderLayer(nn.Module):
141 | """
142 | Autoformer decoder layer with the progressive decomposition architecture
143 | """
144 |
145 | def __init__(self, self_attention, cross_attention, d_model, c_out, d_ff=None,
146 | moving_avg=25, dropout=0.1, activation="relu"):
147 | super(DecoderLayer, self).__init__()
148 | d_ff = d_ff or 4 * d_model
149 | self.self_attention = self_attention
150 | self.cross_attention = cross_attention
151 | self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1, bias=False)
152 | self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1, bias=False)
153 | self.decomp1 = series_decomp(moving_avg)
154 | self.decomp2 = series_decomp(moving_avg)
155 | self.decomp3 = series_decomp(moving_avg)
156 | self.dropout = nn.Dropout(dropout)
157 | self.projection = nn.Conv1d(in_channels=d_model, out_channels=c_out, kernel_size=3, stride=1, padding=1,
158 | padding_mode='circular', bias=False)
159 | self.activation = F.relu if activation == "relu" else F.gelu
160 |
161 | def forward(self, x, cross, x_mask=None, cross_mask=None):
162 | x = x + self.dropout(self.self_attention(
163 | x, x, x,
164 | attn_mask=x_mask
165 | )[0])
166 | x, trend1 = self.decomp1(x)
167 | x = x + self.dropout(self.cross_attention(
168 | x, cross, cross,
169 | attn_mask=cross_mask
170 | )[0])
171 | x, trend2 = self.decomp2(x)
172 | y = x
173 | y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
174 | y = self.dropout(self.conv2(y).transpose(-1, 1))
175 | x, trend3 = self.decomp3(x + y)
176 |
177 | residual_trend = trend1 + trend2 + trend3
178 | residual_trend = self.projection(residual_trend.permute(0, 2, 1)).transpose(1, 2)
179 | return x, residual_trend
180 |
181 |
182 | class Decoder(nn.Module):
183 | """
184 | Autoformer encoder
185 | """
186 |
187 | def __init__(self, layers, norm_layer=None, projection=None):
188 | super(Decoder, self).__init__()
189 | self.layers = nn.ModuleList(layers)
190 | self.norm = norm_layer
191 | self.projection = projection
192 |
193 | def forward(self, x, cross, x_mask=None, cross_mask=None, trend=None):
194 | for layer in self.layers:
195 | x, residual_trend = layer(x, cross, x_mask=x_mask, cross_mask=cross_mask)
196 | trend = trend + residual_trend
197 |
198 | if self.norm is not None:
199 | x = self.norm(x)
200 |
201 | if self.projection is not None:
202 | x = self.projection(x)
203 | return x, trend
204 |
--------------------------------------------------------------------------------
/layers/Conv_Blocks.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 |
4 |
5 | class Inception_Block_V1(nn.Module):
6 | def __init__(self, in_channels, out_channels, num_kernels=6, init_weight=True):
7 | super(Inception_Block_V1, self).__init__()
8 | self.in_channels = in_channels
9 | self.out_channels = out_channels
10 | self.num_kernels = num_kernels
11 | kernels = []
12 | for i in range(self.num_kernels):
13 | kernels.append(nn.Conv2d(in_channels, out_channels, kernel_size=2 * i + 1, padding=i))
14 | self.kernels = nn.ModuleList(kernels)
15 | if init_weight:
16 | self._initialize_weights()
17 |
18 | def _initialize_weights(self):
19 | for m in self.modules():
20 | if isinstance(m, nn.Conv2d):
21 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
22 | if m.bias is not None:
23 | nn.init.constant_(m.bias, 0)
24 |
25 | def forward(self, x):
26 | res_list = []
27 | for i in range(self.num_kernels):
28 | res_list.append(self.kernels[i](x))
29 | res = torch.stack(res_list, dim=-1).mean(-1)
30 | return res
31 |
32 |
33 | class Inception_Block_V2(nn.Module):
34 | def __init__(self, in_channels, out_channels, num_kernels=6, init_weight=True):
35 | super(Inception_Block_V2, self).__init__()
36 | self.in_channels = in_channels
37 | self.out_channels = out_channels
38 | self.num_kernels = num_kernels
39 | kernels = []
40 | for i in range(self.num_kernels // 2):
41 | kernels.append(nn.Conv2d(in_channels, out_channels, kernel_size=[1, 2 * i + 3], padding=[0, i + 1]))
42 | kernels.append(nn.Conv2d(in_channels, out_channels, kernel_size=[2 * i + 3, 1], padding=[i + 1, 0]))
43 | kernels.append(nn.Conv2d(in_channels, out_channels, kernel_size=1))
44 | self.kernels = nn.ModuleList(kernels)
45 | if init_weight:
46 | self._initialize_weights()
47 |
48 | def _initialize_weights(self):
49 | for m in self.modules():
50 | if isinstance(m, nn.Conv2d):
51 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
52 | if m.bias is not None:
53 | nn.init.constant_(m.bias, 0)
54 |
55 | def forward(self, x):
56 | res_list = []
57 | for i in range(self.num_kernels + 1):
58 | res_list.append(self.kernels[i](x))
59 | res = torch.stack(res_list, dim=-1).mean(-1)
60 | return res
61 |
--------------------------------------------------------------------------------
/layers/Embed.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | from torch import Tensor
5 | from torch.nn.utils import weight_norm
6 | import math
7 |
8 |
9 | class PositionalEmbedding(nn.Module):
10 | def __init__(self, d_model, max_len=5000):
11 | super(PositionalEmbedding, self).__init__()
12 | # Compute the positional encodings once in log space.
13 | pe = torch.zeros(max_len, d_model).float()
14 | pe.require_grad = False
15 |
16 | position = torch.arange(0, max_len).float().unsqueeze(1)
17 | div_term = (torch.arange(0, d_model, 2).float()
18 | * -(math.log(10000.0) / d_model)).exp()
19 |
20 | pe[:, 0::2] = torch.sin(position * div_term)
21 | pe[:, 1::2] = torch.cos(position * div_term)
22 |
23 | pe = pe.unsqueeze(0)
24 | self.register_buffer('pe', pe)
25 |
26 | def forward(self, x):
27 | return self.pe[:, :x.size(1)]
28 |
29 |
30 | class TokenEmbedding(nn.Module):
31 | def __init__(self, c_in, d_model):
32 | super(TokenEmbedding, self).__init__()
33 | padding = 1 if torch.__version__ >= '1.5.0' else 2
34 | self.tokenConv = nn.Conv1d(in_channels=c_in, out_channels=d_model,
35 | kernel_size=3, padding=padding, padding_mode='circular', bias=False)
36 | for m in self.modules():
37 | if isinstance(m, nn.Conv1d):
38 | nn.init.kaiming_normal_(
39 | m.weight, mode='fan_in', nonlinearity='leaky_relu')
40 |
41 | def forward(self, x):
42 | x = self.tokenConv(x.permute(0, 2, 1)).transpose(1, 2)
43 | return x
44 |
45 |
46 | class FixedEmbedding(nn.Module):
47 | def __init__(self, c_in, d_model):
48 | super(FixedEmbedding, self).__init__()
49 |
50 | w = torch.zeros(c_in, d_model).float()
51 | w.require_grad = False
52 |
53 | position = torch.arange(0, c_in).float().unsqueeze(1)
54 | div_term = (torch.arange(0, d_model, 2).float()
55 | * -(math.log(10000.0) / d_model)).exp()
56 |
57 | w[:, 0::2] = torch.sin(position * div_term)
58 | w[:, 1::2] = torch.cos(position * div_term)
59 |
60 | self.emb = nn.Embedding(c_in, d_model)
61 | self.emb.weight = nn.Parameter(w, requires_grad=False)
62 |
63 | def forward(self, x):
64 | return self.emb(x).detach()
65 |
66 |
67 | class TemporalEmbedding(nn.Module):
68 | def __init__(self, d_model, embed_type='fixed', freq='h'):
69 | super(TemporalEmbedding, self).__init__()
70 |
71 | minute_size = 4
72 | hour_size = 24
73 | weekday_size = 7
74 | day_size = 32
75 | month_size = 13
76 |
77 | Embed = FixedEmbedding if embed_type == 'fixed' else nn.Embedding
78 | if freq == 't':
79 | self.minute_embed = Embed(minute_size, d_model)
80 | self.hour_embed = Embed(hour_size, d_model)
81 | self.weekday_embed = Embed(weekday_size, d_model)
82 | self.day_embed = Embed(day_size, d_model)
83 | self.month_embed = Embed(month_size, d_model)
84 |
85 | def forward(self, x):
86 | x = x.long()
87 | minute_x = self.minute_embed(x[:, :, 4]) if hasattr(
88 | self, 'minute_embed') else 0.
89 | hour_x = self.hour_embed(x[:, :, 3])
90 | weekday_x = self.weekday_embed(x[:, :, 2])
91 | day_x = self.day_embed(x[:, :, 1])
92 | month_x = self.month_embed(x[:, :, 0])
93 |
94 | return hour_x + weekday_x + day_x + month_x + minute_x
95 |
96 |
97 | class TimeFeatureEmbedding(nn.Module):
98 | def __init__(self, d_model, embed_type='timeF', freq='h'):
99 | super(TimeFeatureEmbedding, self).__init__()
100 |
101 | freq_map = {'h': 4, 't': 5, 's': 6,
102 | 'm': 1, 'a': 1, 'w': 2, 'd': 3, 'b': 3}
103 | d_inp = freq_map[freq]
104 | self.embed = nn.Linear(d_inp, d_model, bias=False)
105 |
106 | def forward(self, x):
107 | return self.embed(x)
108 |
109 |
110 | class DataEmbedding(nn.Module):
111 | def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1):
112 | super(DataEmbedding, self).__init__()
113 |
114 | self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model)
115 | self.position_embedding = PositionalEmbedding(d_model=d_model)
116 | self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type,
117 | freq=freq) if embed_type != 'timeF' else TimeFeatureEmbedding(
118 | d_model=d_model, embed_type=embed_type, freq=freq)
119 | self.dropout = nn.Dropout(p=dropout)
120 |
121 | def forward(self, x, x_mark):
122 | if x_mark is None:
123 | x = self.value_embedding(x) + self.position_embedding(x).to(x.device)
124 | else:
125 | x = self.value_embedding(
126 | x) + self.temporal_embedding(x_mark) + self.position_embedding(x)
127 | return self.dropout(x)
128 |
129 |
130 | class DataEmbedding_wo_pos(nn.Module):
131 | def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1):
132 | super(DataEmbedding_wo_pos, self).__init__()
133 |
134 | self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model)
135 | self.position_embedding = PositionalEmbedding(d_model=d_model)
136 | self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type,
137 | freq=freq) if embed_type != 'timeF' else TimeFeatureEmbedding(
138 | d_model=d_model, embed_type=embed_type, freq=freq)
139 | self.dropout = nn.Dropout(p=dropout)
140 |
141 | def forward(self, x, x_mark):
142 | if x_mark is None:
143 | x = self.value_embedding(x)
144 | else:
145 | x = self.value_embedding(x) + self.temporal_embedding(x_mark)
146 | return self.dropout(x)
147 |
148 |
149 | class ReplicationPad1d(nn.Module):
150 | def __init__(self, padding) -> None:
151 | super(ReplicationPad1d, self).__init__()
152 | self.padding = padding
153 |
154 | def forward(self, input: Tensor) -> Tensor:
155 | replicate_padding = input[:, :, -1].unsqueeze(-1).repeat(1, 1, self.padding[-1])
156 | output = torch.cat([input, replicate_padding], dim=-1)
157 | return output
158 |
159 |
160 | class PatchEmbedding(nn.Module):
161 | def __init__(self, d_model, patch_len, stride, dropout):
162 | super(PatchEmbedding, self).__init__()
163 | # Patching
164 | self.patch_len = patch_len
165 | self.stride = stride
166 | self.padding_patch_layer = ReplicationPad1d((0, stride))
167 |
168 | # Backbone, Input encoding: projection of feature vectors onto a d-dim vector space
169 | self.value_embedding = TokenEmbedding(patch_len, d_model)
170 |
171 | # Positional embedding
172 | # self.position_embedding = PositionalEmbedding(d_model)
173 |
174 | # Residual dropout
175 | self.dropout = nn.Dropout(dropout)
176 |
177 | def forward(self, x):
178 | # do patching
179 | n_vars = x.shape[1]
180 | x = self.padding_patch_layer(x)
181 | x = x.unfold(dimension=-1, size=self.patch_len, step=self.stride)
182 | x = torch.reshape(x, (x.shape[0] * x.shape[1], x.shape[2], x.shape[3]))
183 | # Input encoding
184 | x = self.value_embedding(x)
185 | return self.dropout(x), n_vars
186 |
187 |
188 | class DataEmbedding_wo_time(nn.Module):
189 | def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1):
190 | super(DataEmbedding_wo_time, self).__init__()
191 |
192 | self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model)
193 | self.position_embedding = PositionalEmbedding(d_model=d_model)
194 | self.dropout = nn.Dropout(p=dropout)
195 |
196 | def forward(self, x):
197 | x = self.value_embedding(x) + self.position_embedding(x)
198 | return self.dropout(x)
199 |
--------------------------------------------------------------------------------
/layers/SelfAttention_Family.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import numpy as np
4 | from math import sqrt
5 | from utils.masking import TriangularCausalMask, ProbMask
6 | from reformer_pytorch import LSHSelfAttention
7 |
8 |
9 | class DSAttention(nn.Module):
10 | '''De-stationary Attention'''
11 |
12 | def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False):
13 | super(DSAttention, self).__init__()
14 | self.scale = scale
15 | self.mask_flag = mask_flag
16 | self.output_attention = output_attention
17 | self.dropout = nn.Dropout(attention_dropout)
18 |
19 | def forward(self, queries, keys, values, attn_mask, tau=None, delta=None):
20 | B, L, H, E = queries.shape
21 | _, S, _, D = values.shape
22 | scale = self.scale or 1. / sqrt(E)
23 |
24 | tau = 1.0 if tau is None else tau.unsqueeze(
25 | 1).unsqueeze(1) # B x 1 x 1 x 1
26 | delta = 0.0 if delta is None else delta.unsqueeze(
27 | 1).unsqueeze(1) # B x 1 x 1 x S
28 |
29 | # De-stationary Attention, rescaling pre-softmax score with learned de-stationary factors
30 | scores = torch.einsum("blhe,bshe->bhls", queries, keys) * tau + delta
31 |
32 | if self.mask_flag:
33 | if attn_mask is None:
34 | attn_mask = TriangularCausalMask(B, L, device=queries.device)
35 |
36 | scores.masked_fill_(attn_mask.mask, -np.inf)
37 |
38 | A = self.dropout(torch.softmax(scale * scores, dim=-1))
39 | V = torch.einsum("bhls,bshd->blhd", A, values)
40 |
41 | if self.output_attention:
42 | return (V.contiguous(), A)
43 | else:
44 | return (V.contiguous(), None)
45 |
46 |
47 | class FullAttention(nn.Module):
48 | def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False):
49 | super(FullAttention, self).__init__()
50 | self.scale = scale
51 | self.mask_flag = mask_flag
52 | self.output_attention = output_attention
53 | self.dropout = nn.Dropout(attention_dropout)
54 |
55 | def forward(self, queries, keys, values, attn_mask, tau=None, delta=None):
56 | B, L, H, E = queries.shape
57 | _, S, _, D = values.shape
58 | scale = self.scale or 1. / sqrt(E)
59 |
60 | scores = torch.einsum("blhe,bshe->bhls", queries, keys)
61 |
62 | if self.mask_flag:
63 | if attn_mask is None:
64 | attn_mask = TriangularCausalMask(B, L, device=queries.device)
65 |
66 | scores.masked_fill_(attn_mask.mask, -np.inf)
67 |
68 | A = self.dropout(torch.softmax(scale * scores, dim=-1))
69 | V = torch.einsum("bhls,bshd->blhd", A, values)
70 |
71 | if self.output_attention:
72 | return (V.contiguous(), A)
73 | else:
74 | return (V.contiguous(), None)
75 |
76 |
77 | class ProbAttention(nn.Module):
78 | def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False):
79 | super(ProbAttention, self).__init__()
80 | self.factor = factor
81 | self.scale = scale
82 | self.mask_flag = mask_flag
83 | self.output_attention = output_attention
84 | self.dropout = nn.Dropout(attention_dropout)
85 |
86 | def _prob_QK(self, Q, K, sample_k, n_top): # n_top: c*ln(L_q)
87 | # Q [B, H, L, D]
88 | B, H, L_K, E = K.shape
89 | _, _, L_Q, _ = Q.shape
90 |
91 | # calculate the sampled Q_K
92 | K_expand = K.unsqueeze(-3).expand(B, H, L_Q, L_K, E)
93 | # real U = U_part(factor*ln(L_k))*L_q
94 | index_sample = torch.randint(L_K, (L_Q, sample_k))
95 | K_sample = K_expand[:, :, torch.arange(
96 | L_Q).unsqueeze(1), index_sample, :]
97 | Q_K_sample = torch.matmul(
98 | Q.unsqueeze(-2), K_sample.transpose(-2, -1)).squeeze()
99 |
100 | # find the Top_k query with sparisty measurement
101 | M = Q_K_sample.max(-1)[0] - torch.div(Q_K_sample.sum(-1), L_K)
102 | M_top = M.topk(n_top, sorted=False)[1]
103 |
104 | # use the reduced Q to calculate Q_K
105 | Q_reduce = Q[torch.arange(B)[:, None, None],
106 | torch.arange(H)[None, :, None],
107 | M_top, :] # factor*ln(L_q)
108 | Q_K = torch.matmul(Q_reduce, K.transpose(-2, -1)) # factor*ln(L_q)*L_k
109 |
110 | return Q_K, M_top
111 |
112 | def _get_initial_context(self, V, L_Q):
113 | B, H, L_V, D = V.shape
114 | if not self.mask_flag:
115 | # V_sum = V.sum(dim=-2)
116 | V_sum = V.mean(dim=-2)
117 | contex = V_sum.unsqueeze(-2).expand(B, H,
118 | L_Q, V_sum.shape[-1]).clone()
119 | else: # use mask
120 | # requires that L_Q == L_V, i.e. for self-attention only
121 | assert (L_Q == L_V)
122 | contex = V.cumsum(dim=-2)
123 | return contex
124 |
125 | def _update_context(self, context_in, V, scores, index, L_Q, attn_mask):
126 | B, H, L_V, D = V.shape
127 |
128 | if self.mask_flag:
129 | attn_mask = ProbMask(B, H, L_Q, index, scores, device=V.device)
130 | scores.masked_fill_(attn_mask.mask, -np.inf)
131 |
132 | attn = torch.softmax(scores, dim=-1) # nn.Softmax(dim=-1)(scores)
133 |
134 | context_in[torch.arange(B)[:, None, None],
135 | torch.arange(H)[None, :, None],
136 | index, :] = torch.matmul(attn, V).type_as(context_in)
137 | if self.output_attention:
138 | attns = (torch.ones([B, H, L_V, L_V]) /
139 | L_V).type_as(attn).to(attn.device)
140 | attns[torch.arange(B)[:, None, None], torch.arange(H)[
141 | None, :, None], index, :] = attn
142 | return (context_in, attns)
143 | else:
144 | return (context_in, None)
145 |
146 | def forward(self, queries, keys, values, attn_mask, tau=None, delta=None):
147 | B, L_Q, H, D = queries.shape
148 | _, L_K, _, _ = keys.shape
149 |
150 | queries = queries.transpose(2, 1)
151 | keys = keys.transpose(2, 1)
152 | values = values.transpose(2, 1)
153 |
154 | U_part = self.factor * \
155 | np.ceil(np.log(L_K)).astype('int').item() # c*ln(L_k)
156 | u = self.factor * \
157 | np.ceil(np.log(L_Q)).astype('int').item() # c*ln(L_q)
158 |
159 | U_part = U_part if U_part < L_K else L_K
160 | u = u if u < L_Q else L_Q
161 |
162 | scores_top, index = self._prob_QK(
163 | queries, keys, sample_k=U_part, n_top=u)
164 |
165 | # add scale factor
166 | scale = self.scale or 1. / sqrt(D)
167 | if scale is not None:
168 | scores_top = scores_top * scale
169 | # get the context
170 | context = self._get_initial_context(values, L_Q)
171 | # update the context with selected top_k queries
172 | context, attn = self._update_context(
173 | context, values, scores_top, index, L_Q, attn_mask)
174 |
175 | return context.contiguous(), attn
176 |
177 |
178 | class AttentionLayer(nn.Module):
179 | def __init__(self, attention, d_model, n_heads, d_keys=None,
180 | d_values=None):
181 | super(AttentionLayer, self).__init__()
182 |
183 | d_keys = d_keys or (d_model // n_heads)
184 | d_values = d_values or (d_model // n_heads)
185 |
186 | self.inner_attention = attention
187 | self.query_projection = nn.Linear(d_model, d_keys * n_heads)
188 | self.key_projection = nn.Linear(d_model, d_keys * n_heads)
189 | self.value_projection = nn.Linear(d_model, d_values * n_heads)
190 | self.out_projection = nn.Linear(d_values * n_heads, d_model)
191 | self.n_heads = n_heads
192 |
193 | def forward(self, queries, keys, values, attn_mask, tau=None, delta=None):
194 | B, L, _ = queries.shape
195 | _, S, _ = keys.shape
196 | H = self.n_heads
197 |
198 | queries = self.query_projection(queries).view(B, L, H, -1)
199 | keys = self.key_projection(keys).view(B, S, H, -1)
200 | values = self.value_projection(values).view(B, S, H, -1)
201 |
202 | out, attn = self.inner_attention(
203 | queries,
204 | keys,
205 | values,
206 | attn_mask,
207 | tau=tau,
208 | delta=delta
209 | )
210 | out = out.view(B, L, -1)
211 |
212 | return self.out_projection(out), attn
213 |
214 |
215 | class ReformerLayer(nn.Module):
216 | def __init__(self, attention, d_model, n_heads, d_keys=None,
217 | d_values=None, causal=False, bucket_size=4, n_hashes=4):
218 | super().__init__()
219 | self.bucket_size = bucket_size
220 | self.attn = LSHSelfAttention(
221 | dim=d_model,
222 | heads=n_heads,
223 | bucket_size=bucket_size,
224 | n_hashes=n_hashes,
225 | causal=causal
226 | )
227 |
228 | def fit_length(self, queries):
229 | # inside reformer: assert N % (bucket_size * 2) == 0
230 | B, N, C = queries.shape
231 | if N % (self.bucket_size * 2) == 0:
232 | return queries
233 | else:
234 | # fill the time series
235 | fill_len = (self.bucket_size * 2) - (N % (self.bucket_size * 2))
236 | return torch.cat([queries, torch.zeros([B, fill_len, C]).to(queries.device)], dim=1)
237 |
238 | def forward(self, queries, keys, values, attn_mask, tau, delta):
239 | # in Reformer: defalut queries=keys
240 | B, N, C = queries.shape
241 | queries = self.attn(self.fit_length(queries))[:, :N, :]
242 | return queries, None
243 |
--------------------------------------------------------------------------------
/layers/StandardNorm.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 |
4 |
5 | class Normalize(nn.Module):
6 | def __init__(self, num_features: int, eps=1e-5, affine=False, subtract_last=False, non_norm=False):
7 | """
8 | :param num_features: the number of features or channels
9 | :param eps: a value added for numerical stability
10 | :param affine: if True, RevIN has learnable affine parameters
11 | """
12 | super(Normalize, self).__init__()
13 | self.num_features = num_features
14 | self.eps = eps
15 | self.affine = affine
16 | self.subtract_last = subtract_last
17 | self.non_norm = non_norm
18 | if self.affine:
19 | self._init_params()
20 |
21 | def forward(self, x, mode: str):
22 | if mode == 'norm':
23 | self._get_statistics(x)
24 | x = self._normalize(x)
25 | elif mode == 'denorm':
26 | x = self._denormalize(x)
27 | else:
28 | raise NotImplementedError
29 | return x
30 |
31 | def _init_params(self):
32 | # initialize RevIN params: (C,)
33 | self.affine_weight = nn.Parameter(torch.ones(self.num_features))
34 | self.affine_bias = nn.Parameter(torch.zeros(self.num_features))
35 |
36 | def _get_statistics(self, x):
37 | dim2reduce = tuple(range(1, x.ndim - 1))
38 | if self.subtract_last:
39 | self.last = x[:, -1, :].unsqueeze(1)
40 | else:
41 | self.mean = torch.mean(x, dim=dim2reduce, keepdim=True).detach()
42 | self.stdev = torch.sqrt(torch.var(x, dim=dim2reduce, keepdim=True, unbiased=False) + self.eps).detach()
43 |
44 | def _normalize(self, x):
45 | if self.non_norm:
46 | return x
47 | if self.subtract_last:
48 | x = x - self.last
49 | else:
50 | x = x - self.mean
51 | x = x / self.stdev
52 | if self.affine:
53 | x = x * self.affine_weight
54 | x = x + self.affine_bias
55 | return x
56 |
57 | def _denormalize(self, x):
58 | if self.non_norm:
59 | return x
60 | if self.affine:
61 | x = x - self.affine_bias
62 | x = x / (self.affine_weight + self.eps * self.eps)
63 | x = x * self.stdev
64 | if self.subtract_last:
65 | x = x + self.last
66 | else:
67 | x = x + self.mean
68 | return x
69 |
--------------------------------------------------------------------------------
/layers/Transformer_EncDec.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 |
5 |
6 | class ConvLayer(nn.Module):
7 | def __init__(self, c_in):
8 | super(ConvLayer, self).__init__()
9 | self.downConv = nn.Conv1d(in_channels=c_in,
10 | out_channels=c_in,
11 | kernel_size=3,
12 | padding=2,
13 | padding_mode='circular')
14 | self.norm = nn.BatchNorm1d(c_in)
15 | self.activation = nn.ELU()
16 | self.maxPool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1)
17 |
18 | def forward(self, x):
19 | x = self.downConv(x.permute(0, 2, 1))
20 | x = self.norm(x)
21 | x = self.activation(x)
22 | x = self.maxPool(x)
23 | x = x.transpose(1, 2)
24 | return x
25 |
26 |
27 | class EncoderLayer(nn.Module):
28 | def __init__(self, attention, d_model, d_ff=None, dropout=0.1, activation="relu"):
29 | super(EncoderLayer, self).__init__()
30 | d_ff = d_ff or 4 * d_model
31 | self.attention = attention
32 | self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)
33 | self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)
34 | self.norm1 = nn.LayerNorm(d_model)
35 | self.norm2 = nn.LayerNorm(d_model)
36 | self.dropout = nn.Dropout(dropout)
37 | self.activation = F.relu if activation == "relu" else F.gelu
38 |
39 | def forward(self, x, attn_mask=None, tau=None, delta=None):
40 | new_x, attn = self.attention(
41 | x, x, x,
42 | attn_mask=attn_mask,
43 | tau=tau, delta=delta
44 | )
45 | x = x + self.dropout(new_x)
46 |
47 | y = x = self.norm1(x)
48 | y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
49 | y = self.dropout(self.conv2(y).transpose(-1, 1))
50 |
51 | return self.norm2(x + y), attn
52 |
53 |
54 | class Encoder(nn.Module):
55 | def __init__(self, attn_layers, conv_layers=None, norm_layer=None):
56 | super(Encoder, self).__init__()
57 | self.attn_layers = nn.ModuleList(attn_layers)
58 | self.conv_layers = nn.ModuleList(conv_layers) if conv_layers is not None else None
59 | self.norm = norm_layer
60 |
61 | def forward(self, x, attn_mask=None, tau=None, delta=None):
62 | # x [B, L, D]
63 | attns = []
64 | if self.conv_layers is not None:
65 | for i, (attn_layer, conv_layer) in enumerate(zip(self.attn_layers, self.conv_layers)):
66 | delta = delta if i == 0 else None
67 | x, attn = attn_layer(x, attn_mask=attn_mask, tau=tau, delta=delta)
68 | x = conv_layer(x)
69 | attns.append(attn)
70 | x, attn = self.attn_layers[-1](x, tau=tau, delta=None)
71 | attns.append(attn)
72 | else:
73 | for attn_layer in self.attn_layers:
74 | x, attn = attn_layer(x, attn_mask=attn_mask, tau=tau, delta=delta)
75 | attns.append(attn)
76 |
77 | if self.norm is not None:
78 | x = self.norm(x)
79 |
80 | return x, attns
81 |
82 |
83 | class DecoderLayer(nn.Module):
84 | def __init__(self, self_attention, cross_attention, d_model, d_ff=None,
85 | dropout=0.1, activation="relu"):
86 | super(DecoderLayer, self).__init__()
87 | d_ff = d_ff or 4 * d_model
88 | self.self_attention = self_attention
89 | self.cross_attention = cross_attention
90 | self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)
91 | self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)
92 | self.norm1 = nn.LayerNorm(d_model)
93 | self.norm2 = nn.LayerNorm(d_model)
94 | self.norm3 = nn.LayerNorm(d_model)
95 | self.dropout = nn.Dropout(dropout)
96 | self.activation = F.relu if activation == "relu" else F.gelu
97 |
98 | def forward(self, x, cross, x_mask=None, cross_mask=None, tau=None, delta=None):
99 | x = x + self.dropout(self.self_attention(
100 | x, x, x,
101 | attn_mask=x_mask,
102 | tau=tau, delta=None
103 | )[0])
104 | x = self.norm1(x)
105 |
106 | x = x + self.dropout(self.cross_attention(
107 | x, cross, cross,
108 | attn_mask=cross_mask,
109 | tau=tau, delta=delta
110 | )[0])
111 |
112 | y = x = self.norm2(x)
113 | y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
114 | y = self.dropout(self.conv2(y).transpose(-1, 1))
115 |
116 | return self.norm3(x + y)
117 |
118 |
119 | class Decoder(nn.Module):
120 | def __init__(self, layers, norm_layer=None, projection=None):
121 | super(Decoder, self).__init__()
122 | self.layers = nn.ModuleList(layers)
123 | self.norm = norm_layer
124 | self.projection = projection
125 |
126 | def forward(self, x, cross, x_mask=None, cross_mask=None, tau=None, delta=None):
127 | for layer in self.layers:
128 | x = layer(x, cross, x_mask=x_mask, cross_mask=cross_mask, tau=tau, delta=delta)
129 |
130 | if self.norm is not None:
131 | x = self.norm(x)
132 |
133 | if self.projection is not None:
134 | x = self.projection(x)
135 | return x
136 |
--------------------------------------------------------------------------------
/layers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KimMeen/Time-LLM/02ee1b8f6043090c7a417f0cbb64cbf753895175/layers/__init__.py
--------------------------------------------------------------------------------
/models/Autoformer.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | from layers.Embed import DataEmbedding, DataEmbedding_wo_pos
5 | from layers.AutoCorrelation import AutoCorrelation, AutoCorrelationLayer
6 | from layers.Autoformer_EncDec import Encoder, Decoder, EncoderLayer, DecoderLayer, my_Layernorm, series_decomp
7 | import math
8 | import numpy as np
9 |
10 |
11 | class Model(nn.Module):
12 | """
13 | Autoformer is the first method to achieve the series-wise connection,
14 | with inherent O(LlogL) complexity
15 | Paper link: https://openreview.net/pdf?id=I55UqU-M11y
16 | """
17 |
18 | def __init__(self, configs):
19 | super(Model, self).__init__()
20 | self.task_name = configs.task_name
21 | self.seq_len = configs.seq_len
22 | self.label_len = configs.label_len
23 | self.pred_len = configs.pred_len
24 | self.output_attention = configs.output_attention
25 |
26 | # Decomp
27 | kernel_size = configs.moving_avg
28 | self.decomp = series_decomp(kernel_size)
29 |
30 | # Embedding
31 | self.enc_embedding = DataEmbedding_wo_pos(configs.enc_in, configs.d_model, configs.embed, configs.freq,
32 | configs.dropout)
33 | # Encoder
34 | self.encoder = Encoder(
35 | [
36 | EncoderLayer(
37 | AutoCorrelationLayer(
38 | AutoCorrelation(False, configs.factor, attention_dropout=configs.dropout,
39 | output_attention=configs.output_attention),
40 | configs.d_model, configs.n_heads),
41 | configs.d_model,
42 | configs.d_ff,
43 | moving_avg=configs.moving_avg,
44 | dropout=configs.dropout,
45 | activation=configs.activation
46 | ) for l in range(configs.e_layers)
47 | ],
48 | norm_layer=my_Layernorm(configs.d_model)
49 | )
50 | # Decoder
51 | if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
52 | self.dec_embedding = DataEmbedding_wo_pos(configs.dec_in, configs.d_model, configs.embed, configs.freq,
53 | configs.dropout)
54 | self.decoder = Decoder(
55 | [
56 | DecoderLayer(
57 | AutoCorrelationLayer(
58 | AutoCorrelation(True, configs.factor, attention_dropout=configs.dropout,
59 | output_attention=False),
60 | configs.d_model, configs.n_heads),
61 | AutoCorrelationLayer(
62 | AutoCorrelation(False, configs.factor, attention_dropout=configs.dropout,
63 | output_attention=False),
64 | configs.d_model, configs.n_heads),
65 | configs.d_model,
66 | configs.c_out,
67 | configs.d_ff,
68 | moving_avg=configs.moving_avg,
69 | dropout=configs.dropout,
70 | activation=configs.activation,
71 | )
72 | for l in range(configs.d_layers)
73 | ],
74 | norm_layer=my_Layernorm(configs.d_model),
75 | projection=nn.Linear(configs.d_model, configs.c_out, bias=True)
76 | )
77 | if self.task_name == 'imputation':
78 | self.projection = nn.Linear(
79 | configs.d_model, configs.c_out, bias=True)
80 | if self.task_name == 'anomaly_detection':
81 | self.projection = nn.Linear(
82 | configs.d_model, configs.c_out, bias=True)
83 | if self.task_name == 'classification':
84 | self.act = F.gelu
85 | self.dropout = nn.Dropout(configs.dropout)
86 | self.projection = nn.Linear(
87 | configs.d_model * configs.seq_len, configs.num_class)
88 |
89 | def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
90 | # decomp init
91 | mean = torch.mean(x_enc, dim=1).unsqueeze(
92 | 1).repeat(1, self.pred_len, 1)
93 | zeros = torch.zeros([x_dec.shape[0], self.pred_len,
94 | x_dec.shape[2]], device=x_enc.device)
95 | seasonal_init, trend_init = self.decomp(x_enc)
96 | # decoder input
97 | trend_init = torch.cat(
98 | [trend_init[:, -self.label_len:, :], mean], dim=1)
99 | seasonal_init = torch.cat(
100 | [seasonal_init[:, -self.label_len:, :], zeros], dim=1)
101 | # enc
102 | enc_out = self.enc_embedding(x_enc, x_mark_enc)
103 | enc_out, attns = self.encoder(enc_out, attn_mask=None)
104 | # dec
105 | dec_out = self.dec_embedding(seasonal_init, x_mark_dec)
106 | seasonal_part, trend_part = self.decoder(dec_out, enc_out, x_mask=None, cross_mask=None,
107 | trend=trend_init)
108 | # final
109 | dec_out = trend_part + seasonal_part
110 | return dec_out
111 |
112 | def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask):
113 | # enc
114 | enc_out = self.enc_embedding(x_enc, x_mark_enc)
115 | enc_out, attns = self.encoder(enc_out, attn_mask=None)
116 | # final
117 | dec_out = self.projection(enc_out)
118 | return dec_out
119 |
120 | def anomaly_detection(self, x_enc):
121 | # enc
122 | enc_out = self.enc_embedding(x_enc, None)
123 | enc_out, attns = self.encoder(enc_out, attn_mask=None)
124 | # final
125 | dec_out = self.projection(enc_out)
126 | return dec_out
127 |
128 | def classification(self, x_enc, x_mark_enc):
129 | # enc
130 | enc_out = self.enc_embedding(x_enc, None)
131 | enc_out, attns = self.encoder(enc_out, attn_mask=None)
132 |
133 | # Output
134 | # the output transformer encoder/decoder embeddings don't include non-linearity
135 | output = self.act(enc_out)
136 | output = self.dropout(output)
137 | # zero-out padding embeddings
138 | output = output * x_mark_enc.unsqueeze(-1)
139 | # (batch_size, seq_length * d_model)
140 | output = output.reshape(output.shape[0], -1)
141 | output = self.projection(output) # (batch_size, num_classes)
142 | return output
143 |
144 | def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
145 | if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
146 | dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
147 | return dec_out[:, -self.pred_len:, :] # [B, L, D]
148 | if self.task_name == 'imputation':
149 | dec_out = self.imputation(
150 | x_enc, x_mark_enc, x_dec, x_mark_dec, mask)
151 | return dec_out # [B, L, D]
152 | if self.task_name == 'anomaly_detection':
153 | dec_out = self.anomaly_detection(x_enc)
154 | return dec_out # [B, L, D]
155 | if self.task_name == 'classification':
156 | dec_out = self.classification(x_enc, x_mark_enc)
157 | return dec_out # [B, N]
158 | return None
159 |
--------------------------------------------------------------------------------
/models/DLinear.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | from layers.Autoformer_EncDec import series_decomp
5 |
6 |
7 | class Model(nn.Module):
8 | """
9 | Paper link: https://arxiv.org/pdf/2205.13504.pdf
10 | """
11 |
12 | def __init__(self, configs, individual=False):
13 | """
14 | individual: Bool, whether shared model among different variates.
15 | """
16 | super(Model, self).__init__()
17 | self.task_name = configs.task_name
18 | self.seq_len = configs.seq_len
19 | if self.task_name == 'classification' or self.task_name == 'anomaly_detection' or self.task_name == 'imputation':
20 | self.pred_len = configs.seq_len
21 | else:
22 | self.pred_len = configs.pred_len
23 |
24 | self.decompsition = series_decomp(configs.moving_avg)
25 | self.individual = individual
26 | self.channels = configs.enc_in
27 |
28 | if self.individual:
29 | self.Linear_Seasonal = nn.ModuleList()
30 | self.Linear_Trend = nn.ModuleList()
31 |
32 | for i in range(self.channels):
33 | self.Linear_Seasonal.append(
34 | nn.Linear(self.seq_len, self.pred_len))
35 | self.Linear_Trend.append(
36 | nn.Linear(self.seq_len, self.pred_len))
37 |
38 | self.Linear_Seasonal[i].weight = nn.Parameter(
39 | (1 / self.seq_len) * torch.ones([self.pred_len, self.seq_len]))
40 | self.Linear_Trend[i].weight = nn.Parameter(
41 | (1 / self.seq_len) * torch.ones([self.pred_len, self.seq_len]))
42 | else:
43 | self.Linear_Seasonal = nn.Linear(self.seq_len, self.pred_len)
44 | self.Linear_Trend = nn.Linear(self.seq_len, self.pred_len)
45 |
46 | self.Linear_Seasonal.weight = nn.Parameter(
47 | (1 / self.seq_len) * torch.ones([self.pred_len, self.seq_len]))
48 | self.Linear_Trend.weight = nn.Parameter(
49 | (1 / self.seq_len) * torch.ones([self.pred_len, self.seq_len]))
50 |
51 | if self.task_name == 'classification':
52 | self.act = F.gelu
53 | self.dropout = nn.Dropout(configs.dropout)
54 | self.projection = nn.Linear(
55 | configs.enc_in * configs.seq_len, configs.num_class)
56 |
57 | def encoder(self, x):
58 | seasonal_init, trend_init = self.decompsition(x)
59 | seasonal_init, trend_init = seasonal_init.permute(
60 | 0, 2, 1), trend_init.permute(0, 2, 1)
61 | if self.individual:
62 | seasonal_output = torch.zeros([seasonal_init.size(0), seasonal_init.size(1), self.pred_len],
63 | dtype=seasonal_init.dtype).to(seasonal_init.device)
64 | trend_output = torch.zeros([trend_init.size(0), trend_init.size(1), self.pred_len],
65 | dtype=trend_init.dtype).to(trend_init.device)
66 | for i in range(self.channels):
67 | seasonal_output[:, i, :] = self.Linear_Seasonal[i](
68 | seasonal_init[:, i, :])
69 | trend_output[:, i, :] = self.Linear_Trend[i](
70 | trend_init[:, i, :])
71 | else:
72 | seasonal_output = self.Linear_Seasonal(seasonal_init)
73 | trend_output = self.Linear_Trend(trend_init)
74 | x = seasonal_output + trend_output
75 | return x.permute(0, 2, 1)
76 |
77 | def forecast(self, x_enc):
78 | return self.encoder(x_enc)
79 |
80 | def imputation(self, x_enc):
81 | return self.encoder(x_enc)
82 |
83 | def anomaly_detection(self, x_enc):
84 | return self.encoder(x_enc)
85 |
86 | def classification(self, x_enc):
87 | enc_out = self.encoder(x_enc)
88 | # Output
89 | # (batch_size, seq_length * d_model)
90 | output = enc_out.reshape(enc_out.shape[0], -1)
91 | output = self.projection(output) # (batch_size, num_classes)
92 | return output
93 |
94 | def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
95 | if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
96 | dec_out = self.forecast(x_enc)
97 | return dec_out[:, -self.pred_len:, :] # [B, L, D]
98 | if self.task_name == 'imputation':
99 | dec_out = self.imputation(x_enc)
100 | return dec_out # [B, L, D]
101 | if self.task_name == 'anomaly_detection':
102 | dec_out = self.anomaly_detection(x_enc)
103 | return dec_out # [B, L, D]
104 | if self.task_name == 'classification':
105 | dec_out = self.classification(x_enc)
106 | return dec_out # [B, N]
107 | return None
108 |
--------------------------------------------------------------------------------
/models/TimeLLM.py:
--------------------------------------------------------------------------------
1 | from math import sqrt
2 |
3 | import torch
4 | import torch.nn as nn
5 |
6 | from transformers import LlamaConfig, LlamaModel, LlamaTokenizer, GPT2Config, GPT2Model, GPT2Tokenizer, BertConfig, \
7 | BertModel, BertTokenizer
8 | from layers.Embed import PatchEmbedding
9 | import transformers
10 | from layers.StandardNorm import Normalize
11 |
12 | transformers.logging.set_verbosity_error()
13 |
14 |
15 | class FlattenHead(nn.Module):
16 | def __init__(self, n_vars, nf, target_window, head_dropout=0):
17 | super().__init__()
18 | self.n_vars = n_vars
19 | self.flatten = nn.Flatten(start_dim=-2)
20 | self.linear = nn.Linear(nf, target_window)
21 | self.dropout = nn.Dropout(head_dropout)
22 |
23 | def forward(self, x):
24 | x = self.flatten(x)
25 | x = self.linear(x)
26 | x = self.dropout(x)
27 | return x
28 |
29 |
30 | class Model(nn.Module):
31 |
32 | def __init__(self, configs, patch_len=16, stride=8):
33 | super(Model, self).__init__()
34 | self.task_name = configs.task_name
35 | self.pred_len = configs.pred_len
36 | self.seq_len = configs.seq_len
37 | self.d_ff = configs.d_ff
38 | self.top_k = 5
39 | self.d_llm = configs.llm_dim
40 | self.patch_len = configs.patch_len
41 | self.stride = configs.stride
42 |
43 | if configs.llm_model == 'LLAMA':
44 | # self.llama_config = LlamaConfig.from_pretrained('/mnt/alps/modelhub/pretrained_model/LLaMA/7B_hf/')
45 | self.llama_config = LlamaConfig.from_pretrained('huggyllama/llama-7b')
46 | self.llama_config.num_hidden_layers = configs.llm_layers
47 | self.llama_config.output_attentions = True
48 | self.llama_config.output_hidden_states = True
49 | try:
50 | self.llm_model = LlamaModel.from_pretrained(
51 | # "/mnt/alps/modelhub/pretrained_model/LLaMA/7B_hf/",
52 | 'huggyllama/llama-7b',
53 | trust_remote_code=True,
54 | local_files_only=True,
55 | config=self.llama_config,
56 | # load_in_4bit=True
57 | )
58 | except EnvironmentError: # downloads model from HF is not already done
59 | print("Local model files not found. Attempting to download...")
60 | self.llm_model = LlamaModel.from_pretrained(
61 | # "/mnt/alps/modelhub/pretrained_model/LLaMA/7B_hf/",
62 | 'huggyllama/llama-7b',
63 | trust_remote_code=True,
64 | local_files_only=False,
65 | config=self.llama_config,
66 | # load_in_4bit=True
67 | )
68 | try:
69 | self.tokenizer = LlamaTokenizer.from_pretrained(
70 | # "/mnt/alps/modelhub/pretrained_model/LLaMA/7B_hf/tokenizer.model",
71 | 'huggyllama/llama-7b',
72 | trust_remote_code=True,
73 | local_files_only=True
74 | )
75 | except EnvironmentError: # downloads the tokenizer from HF if not already done
76 | print("Local tokenizer files not found. Atempting to download them..")
77 | self.tokenizer = LlamaTokenizer.from_pretrained(
78 | # "/mnt/alps/modelhub/pretrained_model/LLaMA/7B_hf/tokenizer.model",
79 | 'huggyllama/llama-7b',
80 | trust_remote_code=True,
81 | local_files_only=False
82 | )
83 | elif configs.llm_model == 'GPT2':
84 | self.gpt2_config = GPT2Config.from_pretrained('openai-community/gpt2')
85 |
86 | self.gpt2_config.num_hidden_layers = configs.llm_layers
87 | self.gpt2_config.output_attentions = True
88 | self.gpt2_config.output_hidden_states = True
89 | try:
90 | self.llm_model = GPT2Model.from_pretrained(
91 | 'openai-community/gpt2',
92 | trust_remote_code=True,
93 | local_files_only=True,
94 | config=self.gpt2_config,
95 | )
96 | except EnvironmentError: # downloads model from HF is not already done
97 | print("Local model files not found. Attempting to download...")
98 | self.llm_model = GPT2Model.from_pretrained(
99 | 'openai-community/gpt2',
100 | trust_remote_code=True,
101 | local_files_only=False,
102 | config=self.gpt2_config,
103 | )
104 |
105 | try:
106 | self.tokenizer = GPT2Tokenizer.from_pretrained(
107 | 'openai-community/gpt2',
108 | trust_remote_code=True,
109 | local_files_only=True
110 | )
111 | except EnvironmentError: # downloads the tokenizer from HF if not already done
112 | print("Local tokenizer files not found. Atempting to download them..")
113 | self.tokenizer = GPT2Tokenizer.from_pretrained(
114 | 'openai-community/gpt2',
115 | trust_remote_code=True,
116 | local_files_only=False
117 | )
118 | elif configs.llm_model == 'BERT':
119 | self.bert_config = BertConfig.from_pretrained('google-bert/bert-base-uncased')
120 |
121 | self.bert_config.num_hidden_layers = configs.llm_layers
122 | self.bert_config.output_attentions = True
123 | self.bert_config.output_hidden_states = True
124 | try:
125 | self.llm_model = BertModel.from_pretrained(
126 | 'google-bert/bert-base-uncased',
127 | trust_remote_code=True,
128 | local_files_only=True,
129 | config=self.bert_config,
130 | )
131 | except EnvironmentError: # downloads model from HF is not already done
132 | print("Local model files not found. Attempting to download...")
133 | self.llm_model = BertModel.from_pretrained(
134 | 'google-bert/bert-base-uncased',
135 | trust_remote_code=True,
136 | local_files_only=False,
137 | config=self.bert_config,
138 | )
139 |
140 | try:
141 | self.tokenizer = BertTokenizer.from_pretrained(
142 | 'google-bert/bert-base-uncased',
143 | trust_remote_code=True,
144 | local_files_only=True
145 | )
146 | except EnvironmentError: # downloads the tokenizer from HF if not already done
147 | print("Local tokenizer files not found. Atempting to download them..")
148 | self.tokenizer = BertTokenizer.from_pretrained(
149 | 'google-bert/bert-base-uncased',
150 | trust_remote_code=True,
151 | local_files_only=False
152 | )
153 | else:
154 | raise Exception('LLM model is not defined')
155 |
156 | if self.tokenizer.eos_token:
157 | self.tokenizer.pad_token = self.tokenizer.eos_token
158 | else:
159 | pad_token = '[PAD]'
160 | self.tokenizer.add_special_tokens({'pad_token': pad_token})
161 | self.tokenizer.pad_token = pad_token
162 |
163 | for param in self.llm_model.parameters():
164 | param.requires_grad = False
165 |
166 | if configs.prompt_domain:
167 | self.description = configs.content
168 | else:
169 | self.description = 'The Electricity Transformer Temperature (ETT) is a crucial indicator in the electric power long-term deployment.'
170 |
171 | self.dropout = nn.Dropout(configs.dropout)
172 |
173 | self.patch_embedding = PatchEmbedding(
174 | configs.d_model, self.patch_len, self.stride, configs.dropout)
175 |
176 | self.word_embeddings = self.llm_model.get_input_embeddings().weight
177 | self.vocab_size = self.word_embeddings.shape[0]
178 | self.num_tokens = 1000
179 | self.mapping_layer = nn.Linear(self.vocab_size, self.num_tokens)
180 |
181 | self.reprogramming_layer = ReprogrammingLayer(configs.d_model, configs.n_heads, self.d_ff, self.d_llm)
182 |
183 | self.patch_nums = int((configs.seq_len - self.patch_len) / self.stride + 2)
184 | self.head_nf = self.d_ff * self.patch_nums
185 |
186 | if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
187 | self.output_projection = FlattenHead(configs.enc_in, self.head_nf, self.pred_len,
188 | head_dropout=configs.dropout)
189 | else:
190 | raise NotImplementedError
191 |
192 | self.normalize_layers = Normalize(configs.enc_in, affine=False)
193 |
194 | def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
195 | if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
196 | dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
197 | return dec_out[:, -self.pred_len:, :]
198 | return None
199 |
200 | def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
201 |
202 | x_enc = self.normalize_layers(x_enc, 'norm')
203 |
204 | B, T, N = x_enc.size()
205 | x_enc = x_enc.permute(0, 2, 1).contiguous().reshape(B * N, T, 1)
206 |
207 | min_values = torch.min(x_enc, dim=1)[0]
208 | max_values = torch.max(x_enc, dim=1)[0]
209 | medians = torch.median(x_enc, dim=1).values
210 | lags = self.calcute_lags(x_enc)
211 | trends = x_enc.diff(dim=1).sum(dim=1)
212 |
213 | prompt = []
214 | for b in range(x_enc.shape[0]):
215 | min_values_str = str(min_values[b].tolist()[0])
216 | max_values_str = str(max_values[b].tolist()[0])
217 | median_values_str = str(medians[b].tolist()[0])
218 | lags_values_str = str(lags[b].tolist())
219 | prompt_ = (
220 | f"<|start_prompt|>Dataset description: {self.description}"
221 | f"Task description: forecast the next {str(self.pred_len)} steps given the previous {str(self.seq_len)} steps information; "
222 | "Input statistics: "
223 | f"min value {min_values_str}, "
224 | f"max value {max_values_str}, "
225 | f"median value {median_values_str}, "
226 | f"the trend of input is {'upward' if trends[b] > 0 else 'downward'}, "
227 | f"top 5 lags are : {lags_values_str}<||>"
228 | )
229 |
230 | prompt.append(prompt_)
231 |
232 | x_enc = x_enc.reshape(B, N, T).permute(0, 2, 1).contiguous()
233 |
234 | prompt = self.tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=2048).input_ids
235 | prompt_embeddings = self.llm_model.get_input_embeddings()(prompt.to(x_enc.device)) # (batch, prompt_token, dim)
236 |
237 | source_embeddings = self.mapping_layer(self.word_embeddings.permute(1, 0)).permute(1, 0)
238 |
239 | x_enc = x_enc.permute(0, 2, 1).contiguous()
240 | enc_out, n_vars = self.patch_embedding(x_enc.to(torch.bfloat16))
241 | enc_out = self.reprogramming_layer(enc_out, source_embeddings, source_embeddings)
242 | llama_enc_out = torch.cat([prompt_embeddings, enc_out], dim=1)
243 | dec_out = self.llm_model(inputs_embeds=llama_enc_out).last_hidden_state
244 | dec_out = dec_out[:, :, :self.d_ff]
245 |
246 | dec_out = torch.reshape(
247 | dec_out, (-1, n_vars, dec_out.shape[-2], dec_out.shape[-1]))
248 | dec_out = dec_out.permute(0, 1, 3, 2).contiguous()
249 |
250 | dec_out = self.output_projection(dec_out[:, :, :, -self.patch_nums:])
251 | dec_out = dec_out.permute(0, 2, 1).contiguous()
252 |
253 | dec_out = self.normalize_layers(dec_out, 'denorm')
254 |
255 | return dec_out
256 |
257 | def calcute_lags(self, x_enc):
258 | q_fft = torch.fft.rfft(x_enc.permute(0, 2, 1).contiguous(), dim=-1)
259 | k_fft = torch.fft.rfft(x_enc.permute(0, 2, 1).contiguous(), dim=-1)
260 | res = q_fft * torch.conj(k_fft)
261 | corr = torch.fft.irfft(res, dim=-1)
262 | mean_value = torch.mean(corr, dim=1)
263 | _, lags = torch.topk(mean_value, self.top_k, dim=-1)
264 | return lags
265 |
266 |
267 | class ReprogrammingLayer(nn.Module):
268 | def __init__(self, d_model, n_heads, d_keys=None, d_llm=None, attention_dropout=0.1):
269 | super(ReprogrammingLayer, self).__init__()
270 |
271 | d_keys = d_keys or (d_model // n_heads)
272 |
273 | self.query_projection = nn.Linear(d_model, d_keys * n_heads)
274 | self.key_projection = nn.Linear(d_llm, d_keys * n_heads)
275 | self.value_projection = nn.Linear(d_llm, d_keys * n_heads)
276 | self.out_projection = nn.Linear(d_keys * n_heads, d_llm)
277 | self.n_heads = n_heads
278 | self.dropout = nn.Dropout(attention_dropout)
279 |
280 | def forward(self, target_embedding, source_embedding, value_embedding):
281 | B, L, _ = target_embedding.shape
282 | S, _ = source_embedding.shape
283 | H = self.n_heads
284 |
285 | target_embedding = self.query_projection(target_embedding).view(B, L, H, -1)
286 | source_embedding = self.key_projection(source_embedding).view(S, H, -1)
287 | value_embedding = self.value_projection(value_embedding).view(S, H, -1)
288 |
289 | out = self.reprogramming(target_embedding, source_embedding, value_embedding)
290 |
291 | out = out.reshape(B, L, -1)
292 |
293 | return self.out_projection(out)
294 |
295 | def reprogramming(self, target_embedding, source_embedding, value_embedding):
296 | B, L, H, E = target_embedding.shape
297 |
298 | scale = 1. / sqrt(E)
299 |
300 | scores = torch.einsum("blhe,she->bhls", target_embedding, source_embedding)
301 |
302 | A = self.dropout(torch.softmax(scale * scores, dim=-1))
303 | reprogramming_embedding = torch.einsum("bhls,she->blhe", A, value_embedding)
304 |
305 | return reprogramming_embedding
306 |
--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KimMeen/Time-LLM/02ee1b8f6043090c7a417f0cbb64cbf753895175/models/__init__.py
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | torch==2.2.2
2 | accelerate==0.28.0
3 | einops==0.7.0
4 | matplotlib==3.7.0
5 | numpy==1.23.5
6 | pandas==1.5.3
7 | scikit_learn==1.2.2
8 | scipy==1.12.0
9 | tqdm==4.65.0
10 | peft==0.4.0
11 | transformers==4.31.0
12 | deepspeed==0.14.0
13 | sentencepiece==0.2.0
14 |
--------------------------------------------------------------------------------
/run_m4.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import torch
3 | from accelerate import Accelerator, DeepSpeedPlugin
4 | from accelerate import DistributedDataParallelKwargs
5 | from torch import optim
6 | from torch.optim import lr_scheduler
7 |
8 | from data_provider.m4 import M4Meta
9 | from models import Autoformer, DLinear, TimeLLM
10 |
11 | from data_provider.data_factory import data_provider
12 | import time
13 | import random
14 | import numpy as np
15 | import pandas
16 |
17 | from utils.losses import smape_loss
18 | from utils.m4_summary import M4Summary
19 | import os
20 |
21 | os.environ['CURL_CA_BUNDLE'] = ''
22 | os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:64"
23 |
24 | from utils.tools import del_files, EarlyStopping, adjust_learning_rate, load_content, test
25 |
26 | parser = argparse.ArgumentParser(description='Time-LLM')
27 |
28 | fix_seed = 2021
29 | random.seed(fix_seed)
30 | torch.manual_seed(fix_seed)
31 | np.random.seed(fix_seed)
32 |
33 | # basic config
34 | parser.add_argument('--task_name', type=str, required=True, default='long_term_forecast',
35 | help='task name, options:[long_term_forecast, short_term_forecast, imputation, classification, anomaly_detection]')
36 | parser.add_argument('--is_training', type=int, required=True, default=1, help='status')
37 | parser.add_argument('--model_id', type=str, required=True, default='test', help='model id')
38 | parser.add_argument('--model_comment', type=str, required=True, default='none', help='prefix when saving test results')
39 | parser.add_argument('--model', type=str, required=True, default='Autoformer',
40 | help='model name, options: [Autoformer, DLinear]')
41 | parser.add_argument('--seed', type=int, default=0, help='random seed')
42 |
43 | # data loader
44 | parser.add_argument('--data', type=str, required=True, default='ETTm1', help='dataset type')
45 | parser.add_argument('--root_path', type=str, default='./dataset', help='root path of the data file')
46 | parser.add_argument('--data_path', type=str, default='ETTh1.csv', help='data file')
47 | parser.add_argument('--features', type=str, default='M',
48 | help='forecasting task, options:[M, S, MS]; '
49 | 'M:multivariate predict multivariate, S: univariate predict univariate, '
50 | 'MS:multivariate predict univariate')
51 | parser.add_argument('--target', type=str, default='OT', help='target feature in S or MS task')
52 | parser.add_argument('--loader', type=str, default='modal', help='dataset type')
53 | parser.add_argument('--freq', type=str, default='h',
54 | help='freq for time features encoding, '
55 | 'options:[s:secondly, t:minutely, h:hourly, d:daily, b:business days, w:weekly, m:monthly], '
56 | 'you can also use more detailed freq like 15min or 3h')
57 | parser.add_argument('--checkpoints', type=str, default='./checkpoints/', help='location of model checkpoints')
58 |
59 | # forecasting task
60 | parser.add_argument('--seq_len', type=int, default=96, help='input sequence length')
61 | parser.add_argument('--label_len', type=int, default=48, help='start token length')
62 | parser.add_argument('--pred_len', type=int, default=96, help='prediction sequence length')
63 | parser.add_argument('--seasonal_patterns', type=str, default='Monthly', help='subset for M4')
64 |
65 | # model define
66 | parser.add_argument('--enc_in', type=int, default=7, help='encoder input size')
67 | parser.add_argument('--dec_in', type=int, default=7, help='decoder input size')
68 | parser.add_argument('--c_out', type=int, default=7, help='output size')
69 | parser.add_argument('--d_model', type=int, default=16, help='dimension of model')
70 | parser.add_argument('--n_heads', type=int, default=8, help='num of heads')
71 | parser.add_argument('--e_layers', type=int, default=2, help='num of encoder layers')
72 | parser.add_argument('--d_layers', type=int, default=1, help='num of decoder layers')
73 | parser.add_argument('--d_ff', type=int, default=32, help='dimension of fcn')
74 | parser.add_argument('--moving_avg', type=int, default=25, help='window size of moving average')
75 | parser.add_argument('--factor', type=int, default=1, help='attn factor')
76 | parser.add_argument('--dropout', type=float, default=0.1, help='dropout')
77 | parser.add_argument('--embed', type=str, default='timeF',
78 | help='time features encoding, options:[timeF, fixed, learned]')
79 | parser.add_argument('--activation', type=str, default='gelu', help='activation')
80 | parser.add_argument('--output_attention', action='store_true', help='whether to output attention in ecoder')
81 | parser.add_argument('--patch_len', type=int, default=16, help='patch length')
82 | parser.add_argument('--stride', type=int, default=8, help='stride')
83 | parser.add_argument('--prompt_domain', type=int, default=0, help='')
84 | parser.add_argument('--llm_model', type=str, default='LLAMA', help='LLM model') # LLAMA, GPT2, BERT
85 | parser.add_argument('--llm_dim', type=int, default='4096', help='LLM model dimension')# LLama7b:4096; GPT2-small:768; BERT-base:768
86 |
87 | # optimization
88 | parser.add_argument('--num_workers', type=int, default=10, help='data loader num workers')
89 | parser.add_argument('--itr', type=int, default=1, help='experiments times')
90 | parser.add_argument('--train_epochs', type=int, default=10, help='train epochs')
91 | parser.add_argument('--align_epochs', type=int, default=10, help='alignment epochs')
92 | parser.add_argument('--batch_size', type=int, default=32, help='batch size of train input data')
93 | parser.add_argument('--eval_batch_size', type=int, default=8, help='batch size of model evaluation')
94 | parser.add_argument('--patience', type=int, default=20, help='early stopping patience')
95 | parser.add_argument('--learning_rate', type=float, default=0.0001, help='optimizer learning rate')
96 | parser.add_argument('--des', type=str, default='test', help='exp description')
97 | parser.add_argument('--loss', type=str, default='MSE', help='loss function')
98 | parser.add_argument('--lradj', type=str, default='type1', help='adjust learning rate')
99 | parser.add_argument('--pct_start', type=float, default=0.2, help='pct_start')
100 | parser.add_argument('--use_amp', action='store_true', help='use automatic mixed precision training', default=False)
101 | parser.add_argument('--llm_layers', type=int, default=6)
102 | parser.add_argument('--percent', type=int, default=100)
103 |
104 | args = parser.parse_args()
105 | ddp_kwargs = DistributedDataParallelKwargs(find_unused_parameters=True)
106 | deepspeed_plugin = DeepSpeedPlugin(hf_ds_config='./ds_config_zero2.json')
107 | accelerator = Accelerator(kwargs_handlers=[ddp_kwargs], deepspeed_plugin=deepspeed_plugin)
108 |
109 | for ii in range(args.itr):
110 | # setting record of experiments
111 | setting = '{}_{}_{}_{}_ft{}_sl{}_ll{}_pl{}_dm{}_nh{}_el{}_dl{}_df{}_fc{}_eb{}_{}_{}'.format(
112 | args.task_name,
113 | args.model_id,
114 | args.model,
115 | args.data,
116 | args.features,
117 | args.seq_len,
118 | args.label_len,
119 | args.pred_len,
120 | args.d_model,
121 | args.n_heads,
122 | args.e_layers,
123 | args.d_layers,
124 | args.d_ff,
125 | args.factor,
126 | args.embed,
127 | args.des, ii)
128 |
129 | if args.data == 'm4':
130 | args.pred_len = M4Meta.horizons_map[args.seasonal_patterns] # Up to M4 config
131 | args.seq_len = 2 * args.pred_len
132 | args.label_len = args.pred_len
133 | args.frequency_map = M4Meta.frequency_map[args.seasonal_patterns]
134 |
135 | train_data, train_loader = data_provider(args, 'train')
136 | vali_data, vali_loader = data_provider(args, 'val')
137 | test_data, test_loader = data_provider(args, 'test')
138 |
139 | if args.model == 'Autoformer':
140 | model = Autoformer.Model(args).float()
141 | elif args.model == 'DLinear':
142 | model = DLinear.Model(args).float()
143 | else:
144 | model = TimeLLM.Model(args).float()
145 |
146 | path = os.path.join(args.checkpoints,
147 | setting + '-' + args.model_comment) # unique checkpoint saving path
148 | args.content = load_content(args)
149 | if not os.path.exists(path) and accelerator.is_local_main_process:
150 | os.makedirs(path)
151 |
152 | time_now = time.time()
153 |
154 | train_steps = len(train_loader)
155 | early_stopping = EarlyStopping(accelerator=accelerator, patience=args.patience, verbose=True)
156 |
157 | model_optim = optim.Adam(model.parameters(), lr=args.learning_rate)
158 |
159 | if args.lradj == 'COS':
160 | scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(model_optim, T_max=20, eta_min=1e-8)
161 | else:
162 | scheduler = lr_scheduler.OneCycleLR(optimizer=model_optim,
163 | steps_per_epoch=train_steps,
164 | pct_start=args.pct_start,
165 | epochs=args.train_epochs,
166 | max_lr=args.learning_rate)
167 |
168 | criterion = smape_loss()
169 |
170 | train_loader, vali_loader, model, model_optim, scheduler = accelerator.prepare(
171 | train_loader, vali_loader, model, model_optim, scheduler)
172 |
173 | for epoch in range(args.train_epochs):
174 | iter_count = 0
175 | train_loss = []
176 |
177 | model.train()
178 | epoch_time = time.time()
179 |
180 | for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(train_loader):
181 | iter_count += 1
182 | model_optim.zero_grad()
183 | batch_x = batch_x.float().to(accelerator.device)
184 |
185 | batch_y = batch_y.float().to(accelerator.device)
186 | batch_y_mark = batch_y_mark.float().to(accelerator.device)
187 |
188 | # decoder input
189 | dec_inp = torch.zeros_like(batch_y[:, -args.pred_len:, :]).float().to(accelerator.device)
190 | dec_inp = torch.cat([batch_y[:, :args.label_len, :], dec_inp], dim=1).float().to(
191 | accelerator.device)
192 |
193 | outputs = model(batch_x, None, dec_inp, None)
194 |
195 | f_dim = -1 if args.features == 'MS' else 0
196 | outputs = outputs[:, -args.pred_len:, f_dim:]
197 | batch_y = batch_y[:, -args.pred_len:, f_dim:]
198 |
199 | batch_y_mark = batch_y_mark[:, -args.pred_len:, f_dim:]
200 | loss = criterion(batch_x, args.frequency_map, outputs, batch_y, batch_y_mark)
201 |
202 | train_loss.append(loss.item())
203 |
204 | if (i + 1) % 100 == 0:
205 | accelerator.print(
206 | "\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item())
207 | )
208 | speed = (time.time() - time_now) / iter_count
209 | left_time = speed * ((args.train_epochs - epoch) * train_steps - i)
210 | accelerator.print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time))
211 | iter_count = 0
212 | time_now = time.time()
213 |
214 | accelerator.backward(loss)
215 | model_optim.step()
216 |
217 | if args.lradj == 'TST':
218 | adjust_learning_rate(accelerator, model_optim, scheduler, epoch + 1, args, printout=False)
219 | scheduler.step()
220 |
221 | accelerator.print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time))
222 | train_loss = np.average(train_loss)
223 | vali_loss = test(args, accelerator, model, train_loader, vali_loader, criterion)
224 | test_loss = vali_loss
225 | accelerator.print(
226 | "Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}".format(
227 | epoch + 1, train_steps, train_loss, vali_loss, test_loss))
228 | early_stopping(vali_loss, model, path) # model saving
229 | if early_stopping.early_stop:
230 | accelerator.print("Early stopping")
231 | break
232 |
233 | if args.lradj != 'TST':
234 | adjust_learning_rate(accelerator, model_optim, scheduler, epoch + 1, args, printout=True)
235 | else:
236 | accelerator.print('Updating learning rate to {}'.format(scheduler.get_last_lr()[0]))
237 |
238 | best_model_path = path + '/' + 'checkpoint'
239 | accelerator.wait_for_everyone()
240 | unwrapped_model = accelerator.unwrap_model(model)
241 | torch.cuda.synchronize()
242 | torch.cuda.empty_cache()
243 | unwrapped_model.load_state_dict(torch.load(best_model_path, map_location=lambda storage, loc: storage))
244 |
245 | x, _ = train_loader.dataset.last_insample_window()
246 | y = test_loader.dataset.timeseries
247 | x = torch.tensor(x, dtype=torch.float32).to(accelerator.device)
248 | x = x.unsqueeze(-1)
249 |
250 | model.eval()
251 |
252 | with torch.no_grad():
253 | B, _, C = x.shape
254 | dec_inp = torch.zeros((B, args.pred_len, C)).float().to(accelerator.device)
255 | dec_inp = torch.cat([x[:, -args.label_len:, :], dec_inp], dim=1)
256 | outputs = torch.zeros((B, args.pred_len, C)).float().to(accelerator.device)
257 | id_list = np.arange(0, B, args.eval_batch_size)
258 | id_list = np.append(id_list, B)
259 | for i in range(len(id_list) - 1):
260 | outputs[id_list[i]:id_list[i + 1], :, :] = model(
261 | x[id_list[i]:id_list[i + 1]],
262 | None,
263 | dec_inp[id_list[i]:id_list[i + 1]],
264 | None
265 | )
266 | accelerator.wait_for_everyone()
267 | f_dim = -1 if args.features == 'MS' else 0
268 | outputs = outputs[:, -args.pred_len:, f_dim:]
269 | outputs = outputs.detach().cpu().numpy()
270 |
271 | preds = outputs
272 | trues = y
273 | x = x.detach().cpu().numpy()
274 |
275 | accelerator.print('test shape:', preds.shape)
276 |
277 | folder_path = './m4_results/' + args.model + '-' + args.model_comment + '/'
278 | if not os.path.exists(folder_path) and accelerator.is_local_main_process:
279 | os.makedirs(folder_path)
280 |
281 | if accelerator.is_local_main_process:
282 | forecasts_df = pandas.DataFrame(preds[:, :, 0], columns=[f'V{i + 1}' for i in range(args.pred_len)])
283 | forecasts_df.index = test_loader.dataset.ids[:preds.shape[0]]
284 | forecasts_df.index.name = 'id'
285 | forecasts_df.set_index(forecasts_df.columns[0], inplace=True)
286 | forecasts_df.to_csv(folder_path + args.seasonal_patterns + '_forecast.csv')
287 |
288 | # calculate metrics
289 | accelerator.print(args.model)
290 | file_path = folder_path
291 | if 'Weekly_forecast.csv' in os.listdir(file_path) \
292 | and 'Monthly_forecast.csv' in os.listdir(file_path) \
293 | and 'Yearly_forecast.csv' in os.listdir(file_path) \
294 | and 'Daily_forecast.csv' in os.listdir(file_path) \
295 | and 'Hourly_forecast.csv' in os.listdir(file_path) \
296 | and 'Quarterly_forecast.csv' in os.listdir(file_path):
297 | m4_summary = M4Summary(file_path, args.root_path)
298 | # m4_forecast.set_index(m4_winner_forecast.columns[0], inplace=True)
299 | smape_results, owa_results, mape, mase = m4_summary.evaluate()
300 | accelerator.print('smape:', smape_results)
301 | accelerator.print('mape:', mape)
302 | accelerator.print('mase:', mase)
303 | accelerator.print('owa:', owa_results)
304 | else:
305 | accelerator.print('After all 6 tasks are finished, you can calculate the averaged performance')
306 |
307 | accelerator.wait_for_everyone()
308 | if accelerator.is_local_main_process:
309 | path = './checkpoints' # unique checkpoint saving path
310 | del_files(path) # delete checkpoint files
311 | accelerator.print('success delete checkpoints')
312 |
--------------------------------------------------------------------------------
/run_main.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import torch
3 | from accelerate import Accelerator, DeepSpeedPlugin
4 | from accelerate import DistributedDataParallelKwargs
5 | from torch import nn, optim
6 | from torch.optim import lr_scheduler
7 | from tqdm import tqdm
8 |
9 | from models import Autoformer, DLinear, TimeLLM
10 |
11 | from data_provider.data_factory import data_provider
12 | import time
13 | import random
14 | import numpy as np
15 | import os
16 |
17 | os.environ['CURL_CA_BUNDLE'] = ''
18 | os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:64"
19 |
20 | from utils.tools import del_files, EarlyStopping, adjust_learning_rate, vali, load_content
21 |
22 | parser = argparse.ArgumentParser(description='Time-LLM')
23 |
24 | fix_seed = 2021
25 | random.seed(fix_seed)
26 | torch.manual_seed(fix_seed)
27 | np.random.seed(fix_seed)
28 |
29 | # basic config
30 | parser.add_argument('--task_name', type=str, required=True, default='long_term_forecast',
31 | help='task name, options:[long_term_forecast, short_term_forecast, imputation, classification, anomaly_detection]')
32 | parser.add_argument('--is_training', type=int, required=True, default=1, help='status')
33 | parser.add_argument('--model_id', type=str, required=True, default='test', help='model id')
34 | parser.add_argument('--model_comment', type=str, required=True, default='none', help='prefix when saving test results')
35 | parser.add_argument('--model', type=str, required=True, default='Autoformer',
36 | help='model name, options: [Autoformer, DLinear]')
37 | parser.add_argument('--seed', type=int, default=2021, help='random seed')
38 |
39 | # data loader
40 | parser.add_argument('--data', type=str, required=True, default='ETTm1', help='dataset type')
41 | parser.add_argument('--root_path', type=str, default='./dataset', help='root path of the data file')
42 | parser.add_argument('--data_path', type=str, default='ETTh1.csv', help='data file')
43 | parser.add_argument('--features', type=str, default='M',
44 | help='forecasting task, options:[M, S, MS]; '
45 | 'M:multivariate predict multivariate, S: univariate predict univariate, '
46 | 'MS:multivariate predict univariate')
47 | parser.add_argument('--target', type=str, default='OT', help='target feature in S or MS task')
48 | parser.add_argument('--loader', type=str, default='modal', help='dataset type')
49 | parser.add_argument('--freq', type=str, default='h',
50 | help='freq for time features encoding, '
51 | 'options:[s:secondly, t:minutely, h:hourly, d:daily, b:business days, w:weekly, m:monthly], '
52 | 'you can also use more detailed freq like 15min or 3h')
53 | parser.add_argument('--checkpoints', type=str, default='./checkpoints/', help='location of model checkpoints')
54 |
55 | # forecasting task
56 | parser.add_argument('--seq_len', type=int, default=96, help='input sequence length')
57 | parser.add_argument('--label_len', type=int, default=48, help='start token length')
58 | parser.add_argument('--pred_len', type=int, default=96, help='prediction sequence length')
59 | parser.add_argument('--seasonal_patterns', type=str, default='Monthly', help='subset for M4')
60 |
61 | # model define
62 | parser.add_argument('--enc_in', type=int, default=7, help='encoder input size')
63 | parser.add_argument('--dec_in', type=int, default=7, help='decoder input size')
64 | parser.add_argument('--c_out', type=int, default=7, help='output size')
65 | parser.add_argument('--d_model', type=int, default=16, help='dimension of model')
66 | parser.add_argument('--n_heads', type=int, default=8, help='num of heads')
67 | parser.add_argument('--e_layers', type=int, default=2, help='num of encoder layers')
68 | parser.add_argument('--d_layers', type=int, default=1, help='num of decoder layers')
69 | parser.add_argument('--d_ff', type=int, default=32, help='dimension of fcn')
70 | parser.add_argument('--moving_avg', type=int, default=25, help='window size of moving average')
71 | parser.add_argument('--factor', type=int, default=1, help='attn factor')
72 | parser.add_argument('--dropout', type=float, default=0.1, help='dropout')
73 | parser.add_argument('--embed', type=str, default='timeF',
74 | help='time features encoding, options:[timeF, fixed, learned]')
75 | parser.add_argument('--activation', type=str, default='gelu', help='activation')
76 | parser.add_argument('--output_attention', action='store_true', help='whether to output attention in encoder')
77 | parser.add_argument('--patch_len', type=int, default=16, help='patch length')
78 | parser.add_argument('--stride', type=int, default=8, help='stride')
79 | parser.add_argument('--prompt_domain', type=int, default=0, help='')
80 | parser.add_argument('--llm_model', type=str, default='LLAMA', help='LLM model') # LLAMA, GPT2, BERT
81 | parser.add_argument('--llm_dim', type=int, default='4096', help='LLM model dimension')# LLama7b:4096; GPT2-small:768; BERT-base:768
82 |
83 |
84 | # optimization
85 | parser.add_argument('--num_workers', type=int, default=10, help='data loader num workers')
86 | parser.add_argument('--itr', type=int, default=1, help='experiments times')
87 | parser.add_argument('--train_epochs', type=int, default=10, help='train epochs')
88 | parser.add_argument('--align_epochs', type=int, default=10, help='alignment epochs')
89 | parser.add_argument('--batch_size', type=int, default=32, help='batch size of train input data')
90 | parser.add_argument('--eval_batch_size', type=int, default=8, help='batch size of model evaluation')
91 | parser.add_argument('--patience', type=int, default=10, help='early stopping patience')
92 | parser.add_argument('--learning_rate', type=float, default=0.0001, help='optimizer learning rate')
93 | parser.add_argument('--des', type=str, default='test', help='exp description')
94 | parser.add_argument('--loss', type=str, default='MSE', help='loss function')
95 | parser.add_argument('--lradj', type=str, default='type1', help='adjust learning rate')
96 | parser.add_argument('--pct_start', type=float, default=0.2, help='pct_start')
97 | parser.add_argument('--use_amp', action='store_true', help='use automatic mixed precision training', default=False)
98 | parser.add_argument('--llm_layers', type=int, default=6)
99 | parser.add_argument('--percent', type=int, default=100)
100 |
101 | args = parser.parse_args()
102 | ddp_kwargs = DistributedDataParallelKwargs(find_unused_parameters=True)
103 | deepspeed_plugin = DeepSpeedPlugin(hf_ds_config='./ds_config_zero2.json')
104 | accelerator = Accelerator(kwargs_handlers=[ddp_kwargs], deepspeed_plugin=deepspeed_plugin)
105 |
106 | for ii in range(args.itr):
107 | # setting record of experiments
108 | setting = '{}_{}_{}_{}_ft{}_sl{}_ll{}_pl{}_dm{}_nh{}_el{}_dl{}_df{}_fc{}_eb{}_{}_{}'.format(
109 | args.task_name,
110 | args.model_id,
111 | args.model,
112 | args.data,
113 | args.features,
114 | args.seq_len,
115 | args.label_len,
116 | args.pred_len,
117 | args.d_model,
118 | args.n_heads,
119 | args.e_layers,
120 | args.d_layers,
121 | args.d_ff,
122 | args.factor,
123 | args.embed,
124 | args.des, ii)
125 |
126 | train_data, train_loader = data_provider(args, 'train')
127 | vali_data, vali_loader = data_provider(args, 'val')
128 | test_data, test_loader = data_provider(args, 'test')
129 |
130 | if args.model == 'Autoformer':
131 | model = Autoformer.Model(args).float()
132 | elif args.model == 'DLinear':
133 | model = DLinear.Model(args).float()
134 | else:
135 | model = TimeLLM.Model(args).float()
136 |
137 | path = os.path.join(args.checkpoints,
138 | setting + '-' + args.model_comment) # unique checkpoint saving path
139 | args.content = load_content(args)
140 | if not os.path.exists(path) and accelerator.is_local_main_process:
141 | os.makedirs(path)
142 |
143 | time_now = time.time()
144 |
145 | train_steps = len(train_loader)
146 | early_stopping = EarlyStopping(accelerator=accelerator, patience=args.patience)
147 |
148 | trained_parameters = []
149 | for p in model.parameters():
150 | if p.requires_grad is True:
151 | trained_parameters.append(p)
152 |
153 | model_optim = optim.Adam(trained_parameters, lr=args.learning_rate)
154 |
155 | if args.lradj == 'COS':
156 | scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(model_optim, T_max=20, eta_min=1e-8)
157 | else:
158 | scheduler = lr_scheduler.OneCycleLR(optimizer=model_optim,
159 | steps_per_epoch=train_steps,
160 | pct_start=args.pct_start,
161 | epochs=args.train_epochs,
162 | max_lr=args.learning_rate)
163 |
164 | criterion = nn.MSELoss()
165 | mae_metric = nn.L1Loss()
166 |
167 | train_loader, vali_loader, test_loader, model, model_optim, scheduler = accelerator.prepare(
168 | train_loader, vali_loader, test_loader, model, model_optim, scheduler)
169 |
170 | if args.use_amp:
171 | scaler = torch.cuda.amp.GradScaler()
172 |
173 | for epoch in range(args.train_epochs):
174 | iter_count = 0
175 | train_loss = []
176 |
177 | model.train()
178 | epoch_time = time.time()
179 | for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in tqdm(enumerate(train_loader)):
180 | iter_count += 1
181 | model_optim.zero_grad()
182 |
183 | batch_x = batch_x.float().to(accelerator.device)
184 | batch_y = batch_y.float().to(accelerator.device)
185 | batch_x_mark = batch_x_mark.float().to(accelerator.device)
186 | batch_y_mark = batch_y_mark.float().to(accelerator.device)
187 |
188 | # decoder input
189 | dec_inp = torch.zeros_like(batch_y[:, -args.pred_len:, :]).float().to(
190 | accelerator.device)
191 | dec_inp = torch.cat([batch_y[:, :args.label_len, :], dec_inp], dim=1).float().to(
192 | accelerator.device)
193 |
194 | # encoder - decoder
195 | if args.use_amp:
196 | with torch.cuda.amp.autocast():
197 | if args.output_attention:
198 | outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
199 | else:
200 | outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
201 |
202 | f_dim = -1 if args.features == 'MS' else 0
203 | outputs = outputs[:, -args.pred_len:, f_dim:]
204 | batch_y = batch_y[:, -args.pred_len:, f_dim:].to(accelerator.device)
205 | loss = criterion(outputs, batch_y)
206 | train_loss.append(loss.item())
207 | else:
208 | if args.output_attention:
209 | outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
210 | else:
211 | outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
212 |
213 | f_dim = -1 if args.features == 'MS' else 0
214 | outputs = outputs[:, -args.pred_len:, f_dim:]
215 | batch_y = batch_y[:, -args.pred_len:, f_dim:]
216 | loss = criterion(outputs, batch_y)
217 | train_loss.append(loss.item())
218 |
219 | if (i + 1) % 100 == 0:
220 | accelerator.print(
221 | "\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item()))
222 | speed = (time.time() - time_now) / iter_count
223 | left_time = speed * ((args.train_epochs - epoch) * train_steps - i)
224 | accelerator.print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time))
225 | iter_count = 0
226 | time_now = time.time()
227 |
228 | if args.use_amp:
229 | scaler.scale(loss).backward()
230 | scaler.step(model_optim)
231 | scaler.update()
232 | else:
233 | accelerator.backward(loss)
234 | model_optim.step()
235 |
236 | if args.lradj == 'TST':
237 | adjust_learning_rate(accelerator, model_optim, scheduler, epoch + 1, args, printout=False)
238 | scheduler.step()
239 |
240 | accelerator.print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time))
241 | train_loss = np.average(train_loss)
242 | vali_loss, vali_mae_loss = vali(args, accelerator, model, vali_data, vali_loader, criterion, mae_metric)
243 | test_loss, test_mae_loss = vali(args, accelerator, model, test_data, test_loader, criterion, mae_metric)
244 | accelerator.print(
245 | "Epoch: {0} | Train Loss: {1:.7f} Vali Loss: {2:.7f} Test Loss: {3:.7f} MAE Loss: {4:.7f}".format(
246 | epoch + 1, train_loss, vali_loss, test_loss, test_mae_loss))
247 |
248 | early_stopping(vali_loss, model, path)
249 | if early_stopping.early_stop:
250 | accelerator.print("Early stopping")
251 | break
252 |
253 | if args.lradj != 'TST':
254 | if args.lradj == 'COS':
255 | scheduler.step()
256 | accelerator.print("lr = {:.10f}".format(model_optim.param_groups[0]['lr']))
257 | else:
258 | if epoch == 0:
259 | args.learning_rate = model_optim.param_groups[0]['lr']
260 | accelerator.print("lr = {:.10f}".format(model_optim.param_groups[0]['lr']))
261 | adjust_learning_rate(accelerator, model_optim, scheduler, epoch + 1, args, printout=True)
262 |
263 | else:
264 | accelerator.print('Updating learning rate to {}'.format(scheduler.get_last_lr()[0]))
265 |
266 | accelerator.wait_for_everyone()
267 | if accelerator.is_local_main_process:
268 | path = './checkpoints' # unique checkpoint saving path
269 | del_files(path) # delete checkpoint files
270 | accelerator.print('success delete checkpoints')
--------------------------------------------------------------------------------
/run_pretrain.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import torch
3 | from accelerate import Accelerator, DeepSpeedPlugin
4 | from accelerate import DistributedDataParallelKwargs
5 | from torch import nn, optim
6 | from torch.optim import lr_scheduler
7 |
8 | from data_provider_pretrain.data_factory import data_provider
9 | from models import Autoformer, DLinear, TimeLLM
10 |
11 | import time
12 | import random
13 | import numpy as np
14 | import os
15 |
16 | os.environ['CURL_CA_BUNDLE'] = ''
17 | os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:64"
18 |
19 | from utils.tools import del_files, EarlyStopping, adjust_learning_rate, vali, load_content
20 |
21 | parser = argparse.ArgumentParser(description='Time-LLM')
22 |
23 | fix_seed = 2021
24 | random.seed(fix_seed)
25 | torch.manual_seed(fix_seed)
26 | np.random.seed(fix_seed)
27 |
28 | # basic config
29 | parser.add_argument('--task_name', type=str, required=True, default='long_term_forecast',
30 | help='task name, options:[long_term_forecast, short_term_forecast, imputation, classification, anomaly_detection]')
31 | parser.add_argument('--is_training', type=int, required=True, default=1, help='status')
32 | parser.add_argument('--model_id', type=str, required=True, default='test', help='model id')
33 | parser.add_argument('--model_comment', type=str, required=True, default='none', help='prefix when saving test results')
34 | parser.add_argument('--model', type=str, required=True, default='Autoformer',
35 | help='model name, options: [Autoformer, DLinear]')
36 | parser.add_argument('--seed', type=int, default=2021, help='random seed')
37 |
38 | # data loader
39 | parser.add_argument('--data_pretrain', type=str, required=True, default='ETTm1', help='dataset type')
40 | parser.add_argument('--data', type=str, required=True, default='ETTm1', help='dataset type')
41 | parser.add_argument('--root_path', type=str, default='./dataset', help='root path of the data file')
42 | parser.add_argument('--data_path', type=str, default='ETTh1.csv', help='data file')
43 | parser.add_argument('--data_path_pretrain', type=str, default='ETTh1.csv', help='data file')
44 | parser.add_argument('--features', type=str, default='M',
45 | help='forecasting task, options:[M, S, MS]; '
46 | 'M:multivariate predict multivariate, S: univariate predict univariate, '
47 | 'MS:multivariate predict univariate')
48 | parser.add_argument('--target', type=str, default='OT', help='target feature in S or MS task')
49 | parser.add_argument('--loader', type=str, default='modal', help='dataset type')
50 | parser.add_argument('--freq', type=str, default='h',
51 | help='freq for time features encoding, '
52 | 'options:[s:secondly, t:minutely, h:hourly, d:daily, b:business days, w:weekly, m:monthly], '
53 | 'you can also use more detailed freq like 15min or 3h')
54 | parser.add_argument('--checkpoints', type=str, default='./checkpoints/', help='location of model checkpoints')
55 |
56 | # forecasting task
57 | parser.add_argument('--seq_len', type=int, default=96, help='input sequence length')
58 | parser.add_argument('--label_len', type=int, default=48, help='start token length')
59 | parser.add_argument('--pred_len', type=int, default=96, help='prediction sequence length')
60 | parser.add_argument('--seasonal_patterns', type=str, default='Monthly', help='subset for M4')
61 |
62 | # model define
63 | parser.add_argument('--enc_in', type=int, default=7, help='encoder input size')
64 | parser.add_argument('--dec_in', type=int, default=7, help='decoder input size')
65 | parser.add_argument('--c_out', type=int, default=7, help='output size')
66 | parser.add_argument('--d_model', type=int, default=16, help='dimension of model')
67 | parser.add_argument('--n_heads', type=int, default=8, help='num of heads')
68 | parser.add_argument('--e_layers', type=int, default=2, help='num of encoder layers')
69 | parser.add_argument('--d_layers', type=int, default=1, help='num of decoder layers')
70 | parser.add_argument('--d_ff', type=int, default=32, help='dimension of fcn')
71 | parser.add_argument('--moving_avg', type=int, default=25, help='window size of moving average')
72 | parser.add_argument('--factor', type=int, default=1, help='attn factor')
73 | parser.add_argument('--dropout', type=float, default=0.1, help='dropout')
74 | parser.add_argument('--embed', type=str, default='timeF',
75 | help='time features encoding, options:[timeF, fixed, learned]')
76 | parser.add_argument('--activation', type=str, default='gelu', help='activation')
77 | parser.add_argument('--output_attention', action='store_true', help='whether to output attention in ecoder')
78 | parser.add_argument('--patch_len', type=int, default=16, help='patch length')
79 | parser.add_argument('--stride', type=int, default=8, help='stride')
80 | parser.add_argument('--prompt_domain', type=int, default=0, help='')
81 | parser.add_argument('--llm_model', type=str, default='LLAMA', help='LLM model') # LLAMA, GPT2, BERT
82 | parser.add_argument('--llm_dim', type=int, default='4096', help='LLM model dimension')# LLama7b:4096; GPT2-small:768; BERT-base:768
83 |
84 | # optimization
85 | parser.add_argument('--num_workers', type=int, default=10, help='data loader num workers')
86 | parser.add_argument('--itr', type=int, default=1, help='experiments times')
87 | parser.add_argument('--train_epochs', type=int, default=10, help='train epochs')
88 | parser.add_argument('--align_epochs', type=int, default=10, help='alignment epochs')
89 | parser.add_argument('--batch_size', type=int, default=32, help='batch size of train input data')
90 | parser.add_argument('--eval_batch_size', type=int, default=8, help='batch size of model evaluation')
91 | parser.add_argument('--patience', type=int, default=5, help='early stopping patience')
92 | parser.add_argument('--learning_rate', type=float, default=0.0001, help='optimizer learning rate')
93 | parser.add_argument('--des', type=str, default='test', help='exp description')
94 | parser.add_argument('--loss', type=str, default='MSE', help='loss function')
95 | parser.add_argument('--lradj', type=str, default='type1', help='adjust learning rate')
96 | parser.add_argument('--pct_start', type=float, default=0.2, help='pct_start')
97 | parser.add_argument('--use_amp', action='store_true', help='use automatic mixed precision training', default=False)
98 | parser.add_argument('--llm_layers', type=int, default=6)
99 | parser.add_argument('--percent', type=int, default=100)
100 |
101 | args = parser.parse_args()
102 | ddp_kwargs = DistributedDataParallelKwargs(find_unused_parameters=True)
103 | deepspeed_plugin = DeepSpeedPlugin(hf_ds_config='./ds_config_zero2.json')
104 | accelerator = Accelerator(kwargs_handlers=[ddp_kwargs], deepspeed_plugin=deepspeed_plugin)
105 |
106 | for ii in range(args.itr):
107 | # setting record of experiments
108 | setting = '{}_{}_{}_{}_ft{}_sl{}_ll{}_pl{}_dm{}_nh{}_el{}_dl{}_df{}_fc{}_eb{}_{}_{}'.format(
109 | args.task_name,
110 | args.model_id,
111 | args.model,
112 | args.data,
113 | args.features,
114 | args.seq_len,
115 | args.label_len,
116 | args.pred_len,
117 | args.d_model,
118 | args.n_heads,
119 | args.e_layers,
120 | args.d_layers,
121 | args.d_ff,
122 | args.factor,
123 | args.embed,
124 | args.des, ii)
125 |
126 | train_data, train_loader = data_provider(args, args.data_pretrain, args.data_path_pretrain, True, 'train')
127 | vali_data, vali_loader = data_provider(args, args.data_pretrain, args.data_path_pretrain, True, 'val')
128 | test_data, test_loader = data_provider(args, args.data, args.data_path, False, 'test')
129 |
130 | if args.model == 'Autoformer':
131 | model = Autoformer.Model(args).float()
132 | elif args.model == 'DLinear':
133 | model = DLinear.Model(args).float()
134 | else:
135 | model = TimeLLM.Model(args).float()
136 |
137 | path = os.path.join(args.checkpoints,
138 | setting + '-' + args.model_comment) # unique checkpoint saving path
139 | args.content = load_content(args)
140 | if not os.path.exists(path) and accelerator.is_local_main_process:
141 | os.makedirs(path)
142 |
143 | time_now = time.time()
144 |
145 | train_steps = len(train_loader)
146 | early_stopping = EarlyStopping(accelerator=accelerator, patience=args.patience)
147 |
148 | trained_parameters = []
149 | for p in model.parameters():
150 | if p.requires_grad is True:
151 | trained_parameters.append(p)
152 |
153 | model_optim = optim.Adam(trained_parameters, lr=args.learning_rate)
154 |
155 | if args.lradj == 'COS':
156 | scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(model_optim, T_max=20, eta_min=1e-8)
157 | else:
158 | scheduler = lr_scheduler.OneCycleLR(optimizer=model_optim,
159 | steps_per_epoch=train_steps,
160 | pct_start=args.pct_start,
161 | epochs=args.train_epochs,
162 | max_lr=args.learning_rate)
163 |
164 | criterion = nn.MSELoss()
165 | mae_metric = nn.L1Loss()
166 |
167 | train_loader, vali_loader, test_loader, model, model_optim, scheduler = accelerator.prepare(
168 | train_loader, vali_loader, test_loader, model, model_optim, scheduler)
169 |
170 | if args.use_amp:
171 | scaler = torch.cuda.amp.GradScaler()
172 |
173 | for epoch in range(args.train_epochs):
174 | iter_count = 0
175 | train_loss = []
176 |
177 | model.train()
178 | epoch_time = time.time()
179 | for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(train_loader):
180 | iter_count += 1
181 | model_optim.zero_grad()
182 |
183 | batch_x = batch_x.float().to(accelerator.device)
184 | batch_y = batch_y.float().to(accelerator.device)
185 | batch_x_mark = batch_x_mark.float().to(accelerator.device)
186 | batch_y_mark = batch_y_mark.float().to(accelerator.device)
187 |
188 | # decoder input
189 | dec_inp = torch.zeros_like(batch_y[:, -args.pred_len:, :]).float().to(
190 | accelerator.device)
191 | dec_inp = torch.cat([batch_y[:, :args.label_len, :], dec_inp], dim=1).float().to(
192 | accelerator.device)
193 |
194 | # encoder - decoder
195 | if args.use_amp:
196 | with torch.cuda.amp.autocast():
197 | if args.output_attention:
198 | outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
199 | else:
200 | outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
201 |
202 | f_dim = -1 if args.features == 'MS' else 0
203 | outputs = outputs[:, -args.pred_len:, f_dim:]
204 | batch_y = batch_y[:, -args.pred_len:, f_dim:].to(accelerator.device)
205 | loss = criterion(outputs, batch_y)
206 | train_loss.append(loss.item())
207 | else:
208 | if args.output_attention:
209 | outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
210 | else:
211 | outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
212 |
213 | f_dim = -1 if args.features == 'MS' else 0
214 | outputs = outputs[:, -args.pred_len:, f_dim:]
215 | batch_y = batch_y[:, -args.pred_len:, f_dim:]
216 | loss = criterion(outputs, batch_y)
217 | train_loss.append(loss.item())
218 |
219 | if (i + 1) % 100 == 0:
220 | accelerator.print(
221 | "\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item()))
222 | speed = (time.time() - time_now) / iter_count
223 | left_time = speed * ((args.train_epochs - epoch) * train_steps - i)
224 | accelerator.print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time))
225 | iter_count = 0
226 | time_now = time.time()
227 |
228 | if args.use_amp:
229 | scaler.scale(loss).backward()
230 | scaler.step(model_optim)
231 | scaler.update()
232 | else:
233 | accelerator.backward(loss)
234 | model_optim.step()
235 |
236 | if args.lradj == 'TST':
237 | adjust_learning_rate(accelerator, model_optim, scheduler, epoch + 1, args, printout=False)
238 | scheduler.step()
239 |
240 | accelerator.print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time))
241 | train_loss = np.average(train_loss)
242 | vali_loss, vali_mae_loss = vali(args, accelerator, model, vali_data, vali_loader, criterion, mae_metric)
243 | test_loss, test_mae_loss = vali(args, accelerator, model, test_data, test_loader, criterion, mae_metric)
244 | accelerator.print(
245 | "Epoch: {0} | Train Loss: {1:.7f} Vali Loss: {2:.7f} Test Loss: {3:.7f} MAE Loss: {4:.7f}".format(
246 | epoch + 1, train_loss, vali_loss, test_loss, test_mae_loss))
247 |
248 | early_stopping(vali_loss, model, path)
249 | if early_stopping.early_stop:
250 | accelerator.print("Early stopping")
251 | break
252 |
253 | if args.lradj != 'TST':
254 | if args.lradj == 'COS':
255 | scheduler.step()
256 | accelerator.print("lr = {:.10f}".format(model_optim.param_groups[0]['lr']))
257 | else:
258 | if epoch == 0:
259 | args.learning_rate = model_optim.param_groups[0]['lr']
260 | accelerator.print("lr = {:.10f}".format(model_optim.param_groups[0]['lr']))
261 | adjust_learning_rate(accelerator, model_optim, scheduler, epoch + 1, args, printout=True)
262 |
263 | else:
264 | accelerator.print('Updating learning rate to {}'.format(scheduler.get_last_lr()[0]))
265 |
266 | accelerator.wait_for_everyone()
267 | if accelerator.is_local_main_process:
268 | path = './checkpoints' # unique checkpoint saving path
269 | del_files(path) # delete checkpoint files
270 | accelerator.print('success delete checkpoints')
--------------------------------------------------------------------------------
/scripts/TimeLLM_ECL.sh:
--------------------------------------------------------------------------------
1 | model_name=TimeLLM
2 | train_epochs=10
3 | learning_rate=0.01
4 | llama_layers=32
5 |
6 | master_port=00097
7 | num_process=8
8 | batch_size=24
9 | d_model=16
10 | d_ff=32
11 |
12 | comment='TimeLLM-ECL'
13 |
14 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \
15 | --task_name long_term_forecast \
16 | --is_training 1 \
17 | --root_path ./dataset/electricity/ \
18 | --data_path electricity.csv \
19 | --model_id ECL_512_96 \
20 | --model $model_name \
21 | --data ECL \
22 | --features M \
23 | --seq_len 512 \
24 | --label_len 48 \
25 | --pred_len 96 \
26 | --e_layers 2 \
27 | --d_layers 1 \
28 | --factor 3 \
29 | --enc_in 321 \
30 | --dec_in 321 \
31 | --c_out 321 \
32 | --batch_size $batch_size \
33 | --learning_rate $learning_rate \
34 | --llm_layers $llama_layers \
35 | --train_epochs $train_epochs \
36 | --model_comment $comment
37 |
38 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \
39 | --task_name long_term_forecast \
40 | --is_training 1 \
41 | --root_path ./dataset/electricity/ \
42 | --data_path electricity.csv \
43 | --model_id ECL_512_192 \
44 | --model $model_name \
45 | --data ECL \
46 | --features M \
47 | --seq_len 512 \
48 | --label_len 48 \
49 | --pred_len 192 \
50 | --e_layers 2 \
51 | --d_layers 1 \
52 | --factor 3 \
53 | --enc_in 321 \
54 | --dec_in 321 \
55 | --c_out 321 \
56 | --batch_size $batch_size \
57 | --learning_rate $learning_rate \
58 | --llm_layers $llama_layers \
59 | --train_epochs $train_epochs \
60 | --model_comment $comment
61 |
62 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \
63 | --task_name long_term_forecast \
64 | --is_training 1 \
65 | --root_path ./dataset/electricity/ \
66 | --data_path electricity.csv \
67 | --model_id ECL_512_336 \
68 | --model $model_name \
69 | --data ECL \
70 | --features M \
71 | --seq_len 512 \
72 | --label_len 48 \
73 | --pred_len 336 \
74 | --e_layers 2 \
75 | --d_layers 1 \
76 | --factor 3 \
77 | --enc_in 321 \
78 | --dec_in 321 \
79 | --c_out 321 \
80 | --batch_size $batch_size \
81 | --learning_rate $learning_rate \
82 | --llm_layers $llama_layers \
83 | --train_epochs $train_epochs \
84 | --model_comment $comment
85 |
86 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \
87 | --task_name long_term_forecast \
88 | --is_training 1 \
89 | --root_path ./dataset/electricity/ \
90 | --data_path electricity.csv \
91 | --model_id ECL_512_720 \
92 | --model $model_name \
93 | --data ECL \
94 | --features M \
95 | --seq_len 512 \
96 | --label_len 48 \
97 | --pred_len 720 \
98 | --e_layers 2 \
99 | --d_layers 1 \
100 | --factor 3 \
101 | --enc_in 321 \
102 | --dec_in 321 \
103 | --c_out 321 \
104 | --batch_size $batch_size \
105 | --learning_rate $learning_rate \
106 | --llm_layers $llama_layers \
107 | --train_epochs $train_epochs \
108 | --model_comment $comment
--------------------------------------------------------------------------------
/scripts/TimeLLM_ETTh1.sh:
--------------------------------------------------------------------------------
1 | model_name=TimeLLM
2 | train_epochs=100
3 | learning_rate=0.01
4 | llama_layers=32
5 |
6 | master_port=00097
7 | num_process=8
8 | batch_size=24
9 | d_model=32
10 | d_ff=128
11 |
12 | comment='TimeLLM-ETTh1'
13 |
14 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \
15 | --task_name long_term_forecast \
16 | --is_training 1 \
17 | --root_path ./dataset/ETT-small/ \
18 | --data_path ETTh1.csv \
19 | --model_id ETTh1_512_96 \
20 | --model $model_name \
21 | --data ETTh1 \
22 | --features M \
23 | --seq_len 512 \
24 | --label_len 48 \
25 | --pred_len 96 \
26 | --factor 3 \
27 | --enc_in 7 \
28 | --dec_in 7 \
29 | --c_out 7 \
30 | --des 'Exp' \
31 | --itr 1 \
32 | --d_model $d_model \
33 | --d_ff $d_ff \
34 | --batch_size $batch_size \
35 | --learning_rate $learning_rate \
36 | --llm_layers $llama_layers \
37 | --train_epochs $train_epochs \
38 | --model_comment $comment
39 |
40 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \
41 | --task_name long_term_forecast \
42 | --is_training 1 \
43 | --root_path ./dataset/ETT-small/ \
44 | --data_path ETTh1.csv \
45 | --model_id ETTh1_512_192 \
46 | --model $model_name \
47 | --data ETTh1 \
48 | --features M \
49 | --seq_len 512 \
50 | --label_len 48 \
51 | --pred_len 192 \
52 | --factor 3 \
53 | --enc_in 7 \
54 | --dec_in 7 \
55 | --c_out 7 \
56 | --des 'Exp' \
57 | --itr 1 \
58 | --d_model 32 \
59 | --d_ff 128 \
60 | --batch_size $batch_size \
61 | --learning_rate 0.02 \
62 | --llm_layers $llama_layers \
63 | --train_epochs $train_epochs \
64 | --model_comment $comment
65 |
66 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \
67 | --task_name long_term_forecast \
68 | --is_training 1 \
69 | --root_path ./dataset/ETT-small/ \
70 | --data_path ETTh1.csv \
71 | --model_id ETTh1_512_336 \
72 | --model $model_name \
73 | --data ETTh1 \
74 | --features M \
75 | --seq_len 512 \
76 | --label_len 48 \
77 | --pred_len 336 \
78 | --factor 3 \
79 | --enc_in 7 \
80 | --dec_in 7 \
81 | --c_out 7 \
82 | --des 'Exp' \
83 | --itr 1 \
84 | --d_model $d_model \
85 | --d_ff $d_ff \
86 | --batch_size $batch_size \
87 | --lradj 'COS'\
88 | --learning_rate 0.001 \
89 | --llm_layers $llama_layers \
90 | --train_epochs $train_epochs \
91 | --model_comment $comment
92 |
93 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \
94 | --task_name long_term_forecast \
95 | --is_training 1 \
96 | --root_path ./dataset/ETT-small/ \
97 | --data_path ETTh1.csv \
98 | --model_id ETTh1_512_720 \
99 | --model $model_name \
100 | --data ETTh1 \
101 | --features M \
102 | --seq_len 512 \
103 | --label_len 48 \
104 | --pred_len 720 \
105 | --factor 3 \
106 | --enc_in 7 \
107 | --dec_in 7 \
108 | --c_out 7 \
109 | --des 'Exp' \
110 | --itr 1 \
111 | --d_model $d_model \
112 | --d_ff $d_ff \
113 | --batch_size $batch_size \
114 | --learning_rate $learning_rate \
115 | --llm_layers $llama_layers \
116 | --train_epochs $train_epochs \
117 | --model_comment $comment
--------------------------------------------------------------------------------
/scripts/TimeLLM_ETTh1_ETTh2.sh:
--------------------------------------------------------------------------------
1 | model_name=TimeLLM
2 | learning_rate=0.01
3 | llama_layers=32
4 |
5 | master_port=00097
6 | num_process=8
7 | batch_size=24
8 | d_model=32
9 | d_ff=128
10 |
11 | comment='TimeLLM-ETTh1_ETTh2'
12 |
13 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_pretrain.py \
14 | --task_name long_term_forecast \
15 | --is_training 1 \
16 | --root_path ./dataset/ETT-small/ \
17 | --data_path_pretrain ETTh1.csv \
18 | --data_path ETTh2.csv \
19 | --model_id ETTh1_ETTh2_512_96 \
20 | --model $model_name \
21 | --data_pretrain ETTh1 \
22 | --data ETTh2 \
23 | --features M \
24 | --seq_len 512 \
25 | --label_len 48 \
26 | --pred_len 96 \
27 | --factor 3 \
28 | --enc_in 7 \
29 | --dec_in 7 \
30 | --c_out 7 \
31 | --des 'Exp' \
32 | --itr 1 \
33 | --d_model $d_model \
34 | --d_ff $d_ff \
35 | --batch_size $batch_size \
36 | --learning_rate $learning_rate \
37 | --llm_layers $llama_layers \
38 | --train_epochs 5 \
39 | --model_comment $comment
40 |
41 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_pretrain.py \
42 | --task_name long_term_forecast \
43 | --is_training 1 \
44 | --root_path ./dataset/ETT-small/ \
45 | --data_path_pretrain ETTh1.csv \
46 | --data_path ETTh2.csv \
47 | --model_id ETTh1_ETTh2_512_192 \
48 | --model $model_name \
49 | --data_pretrain ETTh1 \
50 | --data ETTh2 \
51 | --features M \
52 | --seq_len 512 \
53 | --label_len 48 \
54 | --pred_len 192 \
55 | --factor 3 \
56 | --enc_in 7 \
57 | --dec_in 7 \
58 | --c_out 7 \
59 | --des 'Exp' \
60 | --itr 1 \
61 | --d_model 32 \
62 | --d_ff 128 \
63 | --batch_size $batch_size \
64 | --learning_rate 0.02 \
65 | --llm_layers $llama_layers \
66 | --train_epochs 5 \
67 | --model_comment $comment
68 |
69 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_pretrain.py \
70 | --task_name long_term_forecast \
71 | --is_training 1 \
72 | --root_path ./dataset/ETT-small/ \
73 | --data_path_pretrain ETTh1.csv \
74 | --data_path ETTh2.csv \
75 | --model_id ETTh1_ETTh2_512_336 \
76 | --model $model_name \
77 | --data_pretrain ETTh1 \
78 | --data ETTh2 \
79 | --features M \
80 | --seq_len 512 \
81 | --label_len 48 \
82 | --pred_len 336 \
83 | --factor 3 \
84 | --enc_in 7 \
85 | --dec_in 7 \
86 | --c_out 7 \
87 | --des 'Exp' \
88 | --itr 1 \
89 | --d_model $d_model \
90 | --d_ff $d_ff \
91 | --batch_size $batch_size \
92 | --lradj 'COS'\
93 | --learning_rate 0.001 \
94 | --llm_layers $llama_layers \
95 | --train_epochs 5 \
96 | --model_comment $comment
97 |
98 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_pretrain.py \
99 | --task_name long_term_forecast \
100 | --is_training 1 \
101 | --root_path ./dataset/ETT-small/ \
102 | --data_path_pretrain ETTh1.csv \
103 | --data_path ETTh2.csv \
104 | --model_id ETTh1_ETTh2_512_720 \
105 | --model $model_name \
106 | --data_pretrain ETTh1 \
107 | --data ETTh2 \
108 | --features M \
109 | --seq_len 512 \
110 | --label_len 48 \
111 | --pred_len 720 \
112 | --factor 3 \
113 | --enc_in 7 \
114 | --dec_in 7 \
115 | --c_out 7 \
116 | --des 'Exp' \
117 | --itr 1 \
118 | --d_model $d_model \
119 | --d_ff $d_ff \
120 | --batch_size $batch_size \
121 | --learning_rate $learning_rate \
122 | --llm_layers $llama_layers \
123 | --train_epochs 5 \
124 | --model_comment $comment
--------------------------------------------------------------------------------
/scripts/TimeLLM_ETTh2.sh:
--------------------------------------------------------------------------------
1 | model_name=TimeLLM
2 | train_epochs=10
3 | learning_rate=0.01
4 | llama_layers=32
5 |
6 | master_port=00098
7 | num_process=8
8 | batch_size=24
9 | d_model=32
10 | d_ff=128
11 |
12 | comment='TimeLLM-ETTh2'
13 |
14 |
15 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \
16 | --task_name long_term_forecast \
17 | --is_training 1 \
18 | --root_path ./dataset/ETT-small/ \
19 | --data_path ETTh2.csv \
20 | --model_id ETTh2_512_96 \
21 | --model $model_name \
22 | --data ETTh2 \
23 | --features M \
24 | --seq_len 512 \
25 | --label_len 48 \
26 | --pred_len 96 \
27 | --factor 3 \
28 | --enc_in 7 \
29 | --dec_in 7 \
30 | --c_out 7 \
31 | --des 'Exp' \
32 | --itr 1 \
33 | --d_model $d_model \
34 | --d_ff $d_ff \
35 | --batch_size $batch_size \
36 | --learning_rate $learning_rate \
37 | --llm_layers $llama_layers \
38 | --train_epochs $train_epochs \
39 | --model_comment $comment
40 |
41 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \
42 | --task_name long_term_forecast \
43 | --is_training 1 \
44 | --root_path ./dataset/ETT-small/ \
45 | --data_path ETTh2.csv \
46 | --model_id ETTh2_512_192 \
47 | --model $model_name \
48 | --data ETTh2 \
49 | --features M \
50 | --seq_len 512 \
51 | --label_len 48 \
52 | --pred_len 192 \
53 | --factor 3 \
54 | --enc_in 7 \
55 | --dec_in 7 \
56 | --c_out 7 \
57 | --des 'Exp' \
58 | --itr 1 \
59 | --d_model $d_model \
60 | --d_ff $d_ff \
61 | --batch_size $batch_size \
62 | --lradj 'TST'\
63 | --learning_rate 0.002 \
64 | --llm_layers $llama_layers \
65 | --train_epochs $train_epochs \
66 | --model_comment $comment
67 |
68 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \
69 | --task_name long_term_forecast \
70 | --is_training 1 \
71 | --root_path ./dataset/ETT-small/ \
72 | --data_path ETTh2.csv \
73 | --model_id ETTh2_512_336 \
74 | --model $model_name \
75 | --data ETTh2 \
76 | --features M \
77 | --seq_len 512 \
78 | --label_len 48 \
79 | --pred_len 336 \
80 | --factor 3 \
81 | --enc_in 7 \
82 | --dec_in 7 \
83 | --c_out 7 \
84 | --des 'Exp' \
85 | --itr 1 \
86 | --d_model $d_model \
87 | --d_ff $d_ff \
88 | --batch_size $batch_size \
89 | --lradj 'TST'\
90 | --learning_rate 0.005 \
91 | --llm_layers $llama_layers \
92 | --train_epochs $train_epochs \
93 | --model_comment $comment
94 |
95 |
96 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \
97 | --task_name long_term_forecast \
98 | --is_training 1 \
99 | --root_path ./dataset/ETT-small/ \
100 | --data_path ETTh2.csv \
101 | --model_id ETTh2_512_720 \
102 | --model $model_name \
103 | --data ETTh2 \
104 | --features M \
105 | --seq_len 512 \
106 | --label_len 48 \
107 | --pred_len 720 \
108 | --factor 3 \
109 | --enc_in 7 \
110 | --dec_in 7 \
111 | --c_out 7 \
112 | --des 'Exp' \
113 | --itr 1 \
114 | --d_model 16 \
115 | --d_ff 128 \
116 | --batch_size $batch_size \
117 | --learning_rate 0.005 \
118 | --lradj 'TST'\
119 | --llm_layers $llama_layers \
120 | --train_epochs 20 \
121 | --patience 10 \
122 | --model_comment $comment
--------------------------------------------------------------------------------
/scripts/TimeLLM_ETTm1.sh:
--------------------------------------------------------------------------------
1 | model_name=TimeLLM
2 | train_epochs=100
3 | learning_rate=0.01
4 | llama_layers=32
5 |
6 | master_port=00097
7 | num_process=8
8 | batch_size=24
9 | d_model=32
10 | d_ff=128
11 |
12 | comment='TimeLLM-ETTm1'
13 |
14 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \
15 | --task_name long_term_forecast \
16 | --is_training 1 \
17 | --root_path ./dataset/ETT-small/ \
18 | --data_path ETTm1.csv \
19 | --model_id ETTm1_512_96 \
20 | --model $model_name \
21 | --data ETTm1 \
22 | --features M \
23 | --seq_len 512 \
24 | --label_len 48 \
25 | --pred_len 96 \
26 | --factor 3 \
27 | --enc_in 7 \
28 | --dec_in 7 \
29 | --c_out 7 \
30 | --des 'Exp' \
31 | --itr 1 \
32 | --d_model $d_model \
33 | --d_ff $d_ff \
34 | --batch_size $batch_size \
35 | --lradj 'TST'\
36 | --learning_rate 0.001 \
37 | --llm_layers $llama_layers \
38 | --train_epochs $train_epochs \
39 | --model_comment $comment
40 |
41 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \
42 | --task_name long_term_forecast \
43 | --is_training 1 \
44 | --root_path ./dataset/ETT-small/ \
45 | --data_path ETTm1.csv \
46 | --model_id ETTm1_512_192 \
47 | --model $model_name \
48 | --data ETTm1 \
49 | --features M \
50 | --seq_len 512 \
51 | --label_len 48 \
52 | --pred_len 192 \
53 | --factor 3 \
54 | --enc_in 7 \
55 | --dec_in 7 \
56 | --c_out 7 \
57 | --des 'Exp' \
58 | --itr 1 \
59 | --d_model $d_model \
60 | --d_ff $d_ff \
61 | --batch_size $batch_size \
62 | --learning_rate $learning_rate \
63 | --lradj 'TST'\
64 | --learning_rate 0.001 \
65 | --llm_layers $llama_layers \
66 | --train_epochs $train_epochs \
67 | --patience 20 \
68 | --model_comment $comment
69 |
70 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \
71 | --task_name long_term_forecast \
72 | --is_training 1 \
73 | --root_path ./dataset/ETT-small/ \
74 | --data_path ETTm1.csv \
75 | --model_id ETTm1_512_336 \
76 | --model $model_name \
77 | --data ETTm1 \
78 | --features M \
79 | --seq_len 512 \
80 | --label_len 48 \
81 | --pred_len 336 \
82 | --factor 3 \
83 | --enc_in 7 \
84 | --dec_in 7 \
85 | --c_out 7 \
86 | --des 'Exp' \
87 | --itr 1 \
88 | --d_model $d_model \
89 | --d_ff $d_ff \
90 | --batch_size $batch_size \
91 | --learning_rate $learning_rate \
92 | --lradj 'TST'\
93 | --learning_rate 0.001 \
94 | --llm_layers $llama_layers \
95 | --train_epochs $train_epochs \
96 | --patience 20 \
97 | --model_comment $comment
98 |
99 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \
100 | --task_name long_term_forecast \
101 | --is_training 1 \
102 | --root_path ./dataset/ETT-small/ \
103 | --data_path ETTm1.csv \
104 | --model_id ETTm1_512_720 \
105 | --model $model_name \
106 | --data ETTm1 \
107 | --features M \
108 | --seq_len 512 \
109 | --label_len 48 \
110 | --pred_len 720 \
111 | --factor 3 \
112 | --enc_in 7 \
113 | --dec_in 7 \
114 | --c_out 7 \
115 | --des 'Exp' \
116 | --itr 1 \
117 | --d_model $d_model \
118 | --d_ff $d_ff \
119 | --batch_size $batch_size \
120 | --learning_rate $learning_rate \
121 | --lradj 'TST'\
122 | --learning_rate 0.001 \
123 | --llm_layers $llama_layers \
124 | --train_epochs $train_epochs \
125 | --patience 20 \
126 | --model_comment $comment
--------------------------------------------------------------------------------
/scripts/TimeLLM_ETTm2.sh:
--------------------------------------------------------------------------------
1 | model_name=TimeLLM
2 | train_epochs=10
3 | learning_rate=0.01
4 | llama_layers=32
5 |
6 | master_port=00097
7 | num_process=8
8 | batch_size=24
9 | d_model=32
10 | d_ff=128
11 |
12 | comment='TimeLLM-ETTm2'
13 |
14 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \
15 | --task_name long_term_forecast \
16 | --is_training 1 \
17 | --root_path ./dataset/ETT-small/ \
18 | --data_path ETTm2.csv \
19 | --model_id ETTm2_512_96 \
20 | --model $model_name \
21 | --data ETTm2 \
22 | --features M \
23 | --seq_len 512 \
24 | --label_len 48 \
25 | --pred_len 96 \
26 | --factor 3 \
27 | --enc_in 7 \
28 | --dec_in 7 \
29 | --c_out 7 \
30 | --des 'Exp' \
31 | --itr 1 \
32 | --d_model $d_model \
33 | --d_ff $d_ff \
34 | --batch_size 16 \
35 | --learning_rate $learning_rate \
36 | --llm_layers $llama_layers \
37 | --train_epochs $train_epochs \
38 | --model_comment $comment
39 |
40 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \
41 | --task_name long_term_forecast \
42 | --is_training 1 \
43 | --root_path ./dataset/ETT-small/ \
44 | --data_path ETTm2.csv \
45 | --model_id ETTm2_512_192 \
46 | --model $model_name \
47 | --data ETTm2 \
48 | --features M \
49 | --seq_len 512 \
50 | --label_len 48 \
51 | --pred_len 192 \
52 | --factor 3 \
53 | --enc_in 7 \
54 | --dec_in 7 \
55 | --c_out 7 \
56 | --des 'Exp' \
57 | --itr 1 \
58 | --d_model $d_model \
59 | --d_ff $d_ff \
60 | --batch_size $batch_size \
61 | --learning_rate $learning_rate \
62 | --lradj 'TST'\
63 | --learning_rate 0.002 \
64 | --llm_layers $llama_layers \
65 | --train_epochs $train_epochs \
66 | --model_comment $comment
67 |
68 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \
69 | --task_name long_term_forecast \
70 | --is_training 1 \
71 | --root_path ./dataset/ETT-small/ \
72 | --data_path ETTm2.csv \
73 | --model_id ETTm2_512_336 \
74 | --model $model_name \
75 | --data ETTm2 \
76 | --features M \
77 | --seq_len 512 \
78 | --label_len 48 \
79 | --pred_len 336 \
80 | --factor 3 \
81 | --enc_in 7 \
82 | --dec_in 7 \
83 | --c_out 7 \
84 | --des 'Exp' \
85 | --itr 1 \
86 | --d_model $d_model \
87 | --d_ff $d_ff \
88 | --batch_size $batch_size \
89 | --learning_rate $learning_rate \
90 | --lradj 'TST'\
91 | --learning_rate 0.002 \
92 | --llm_layers $llama_layers \
93 | --train_epochs $train_epochs \
94 | --model_comment $comment
95 |
96 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \
97 | --task_name long_term_forecast \
98 | --is_training 1 \
99 | --root_path ./dataset/ETT-small/ \
100 | --data_path ETTm2.csv \
101 | --model_id ETTm2_512_720 \
102 | --model $model_name \
103 | --data ETTm2 \
104 | --features M \
105 | --seq_len 512 \
106 | --label_len 48 \
107 | --pred_len 720 \
108 | --factor 3 \
109 | --enc_in 7 \
110 | --dec_in 7 \
111 | --c_out 7 \
112 | --des 'Exp' \
113 | --itr 1 \
114 | --d_model $d_model \
115 | --d_ff $d_ff \
116 | --batch_size $batch_size \
117 | --learning_rate $learning_rate \
118 | --lradj 'TST'\
119 | --learning_rate 0.002 \
120 | --llm_layers $llama_layers \
121 | --train_epochs $train_epochs \
122 | --model_comment $comment
123 |
124 |
125 |
--------------------------------------------------------------------------------
/scripts/TimeLLM_M4.sh:
--------------------------------------------------------------------------------
1 | model_name=TimeLLM
2 |
3 | train_epochs=50
4 | llama_layers=32
5 | batch_size=24
6 | learning_rate=0.001
7 | d_model=8
8 | d_ff=32
9 |
10 | master_port=00097
11 | num_process=8
12 |
13 | comment='TimeLLM-M4'
14 |
15 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_m4.py \
16 | --task_name short_term_forecast \
17 | --is_training 1 \
18 | --root_path ./dataset/m4 \
19 | --seasonal_patterns 'Monthly' \
20 | --model_id m4_Monthly \
21 | --model $model_name \
22 | --data m4 \
23 | --features M \
24 | --enc_in 1 \
25 | --dec_in 1 \
26 | --c_out 1 \
27 | --llm_layers $llama_layers \
28 | --d_model $d_model \
29 | --d_ff $d_ff \
30 | --patch_len 1 \
31 | --stride 1 \
32 | --batch_size $batch_size \
33 | --des 'Exp' \
34 | --itr 1 \
35 | --learning_rate $learning_rate \
36 | --loss 'SMAPE' \
37 | --train_epochs $train_epochs \
38 | --model_comment $comment
39 |
40 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_m4.py \
41 | --task_name short_term_forecast \
42 | --is_training 1 \
43 | --root_path ./dataset/m4 \
44 | --seasonal_patterns 'Yearly' \
45 | --model_id m4_Yearly \
46 | --model $model_name \
47 | --data m4 \
48 | --features M \
49 | --enc_in 1 \
50 | --dec_in 1 \
51 | --c_out 1 \
52 | --llm_layers $llama_layers \
53 | --d_model $d_model \
54 | --d_ff $d_ff \
55 | --patch_len 1 \
56 | --stride 1 \
57 | --batch_size $batch_size \
58 | --des 'Exp' \
59 | --itr 1 \
60 | --learning_rate $learning_rate \
61 | --loss 'SMAPE' \
62 | --train_epochs $train_epochs \
63 | --model_comment $comment
64 |
65 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_m4.py \
66 | --task_name short_term_forecast \
67 | --is_training 1 \
68 | --root_path ./dataset/m4 \
69 | --seasonal_patterns 'Weekly' \
70 | --model_id m4_Weekly \
71 | --model $model_name \
72 | --data m4 \
73 | --features M \
74 | --enc_in 1 \
75 | --dec_in 1 \
76 | --c_out 1 \
77 | --llm_layers $llama_layers \
78 | --d_model $d_model \
79 | --d_ff $d_ff \
80 | --patch_len 1 \
81 | --stride 1 \
82 | --batch_size $batch_size \
83 | --des 'Exp' \
84 | --itr 1 \
85 | --learning_rate $learning_rate \
86 | --loss 'SMAPE' \
87 | --train_epochs $train_epochs \
88 | --model_comment $comment
89 |
90 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_m4.py \
91 | --task_name short_term_forecast \
92 | --is_training 1 \
93 | --root_path ./dataset/m4 \
94 | --seasonal_patterns 'Daily' \
95 | --model_id m4_Daily \
96 | --model $model_name \
97 | --data m4 \
98 | --features M \
99 | --enc_in 1 \
100 | --dec_in 1 \
101 | --c_out 1 \
102 | --llm_layers $llama_layers \
103 | --d_model $d_model \
104 | --d_ff $d_ff \
105 | --patch_len 1 \
106 | --stride 1 \
107 | --batch_size $batch_size \
108 | --des 'Exp' \
109 | --itr 1 \
110 | --learning_rate $learning_rate \
111 | --loss 'SMAPE' \
112 | --train_epochs $train_epochs \
113 | --model_comment $comment
114 |
115 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_m4.py \
116 | --task_name short_term_forecast \
117 | --is_training 1 \
118 | --root_path ./dataset/m4 \
119 | --seasonal_patterns 'Quarterly' \
120 | --model_id m4_Quarterly \
121 | --model $model_name \
122 | --data m4 \
123 | --features M \
124 | --enc_in 1 \
125 | --dec_in 1 \
126 | --c_out 1 \
127 | --llm_layers $llama_layers \
128 | --d_model $d_model \
129 | --d_ff $d_ff \
130 | --patch_len 1 \
131 | --stride 1 \
132 | --batch_size $batch_size \
133 | --des 'Exp' \
134 | --itr 1 \
135 | --learning_rate $learning_rate \
136 | --loss 'SMAPE' \
137 | --train_epochs $train_epochs \
138 | --model_comment $comment
139 |
140 |
141 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_m4.py \
142 | --task_name short_term_forecast \
143 | --is_training 1 \
144 | --root_path ./dataset/m4 \
145 | --seasonal_patterns 'Hourly' \
146 | --model_id m4_Hourly \
147 | --model $model_name \
148 | --data m4 \
149 | --features M \
150 | --enc_in 1 \
151 | --dec_in 1 \
152 | --c_out 1 \
153 | --llm_layers $llama_layers \
154 | --d_model $d_model \
155 | --d_ff $d_ff \
156 | --patch_len 1 \
157 | --stride 1 \
158 | --batch_size $batch_size \
159 | --des 'Exp' \
160 | --itr 1 \
161 | --learning_rate $learning_rate \
162 | --loss 'SMAPE' \
163 | --train_epochs $train_epochs \
164 | --model_comment $comment
--------------------------------------------------------------------------------
/scripts/TimeLLM_Traffic.sh:
--------------------------------------------------------------------------------
1 | model_name=TimeLLM
2 | train_epochs=10
3 | learning_rate=0.01
4 | llama_layers=32
5 |
6 | master_port=00097
7 | num_process=8
8 | batch_size=24
9 | d_model=16
10 | d_ff=32
11 |
12 | comment='TimeLLM-Traffic'
13 |
14 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \
15 | --task_name long_term_forecast \
16 | --is_training 1 \
17 | --root_path ./dataset/traffic/ \
18 | --data_path traffic.csv \
19 | --model_id traffic_512_96 \
20 | --model $model_name \
21 | --data Traffic \
22 | --features M \
23 | --seq_len 512 \
24 | --label_len 48 \
25 | --pred_len 96 \
26 | --e_layers 2 \
27 | --d_layers 1 \
28 | --factor 3 \
29 | --enc_in 862 \
30 | --dec_in 862 \
31 | --c_out 862 \
32 | --batch_size $batch_size \
33 | --learning_rate $learning_rate \
34 | --llm_layers $llama_layers \
35 | --train_epochs $train_epochs \
36 | --model_comment $comment
37 |
38 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \
39 | --task_name long_term_forecast \
40 | --is_training 1 \
41 | --root_path ./dataset/traffic/ \
42 | --data_path traffic.csv \
43 | --model_id traffic_512_96 \
44 | --model $model_name \
45 | --data Traffic \
46 | --features M \
47 | --seq_len 512 \
48 | --label_len 48 \
49 | --pred_len 192 \
50 | --e_layers 2 \
51 | --d_layers 1 \
52 | --factor 3 \
53 | --enc_in 862 \
54 | --dec_in 862 \
55 | --c_out 862 \
56 | --batch_size $batch_size \
57 | --learning_rate $learning_rate \
58 | --llm_layers $llama_layers \
59 | --train_epochs $train_epochs \
60 | --model_comment $comment
61 |
62 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \
63 | --task_name long_term_forecast \
64 | --is_training 1 \
65 | --root_path ./dataset/traffic/ \
66 | --data_path traffic.csv \
67 | --model_id traffic_512_96 \
68 | --model $model_name \
69 | --data Traffic \
70 | --features M \
71 | --seq_len 512 \
72 | --label_len 48 \
73 | --pred_len 336 \
74 | --e_layers 2 \
75 | --d_layers 1 \
76 | --factor 3 \
77 | --enc_in 862 \
78 | --dec_in 862 \
79 | --c_out 862 \
80 | --batch_size 1 \
81 | --learning_rate $learning_rate \
82 | --llm_layers $llama_layers \
83 | --train_epochs $train_epochs \
84 | --model_comment $comment
85 |
86 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \
87 | --task_name long_term_forecast \
88 | --is_training 1 \
89 | --root_path ./dataset/traffic/ \
90 | --data_path traffic.csv \
91 | --model_id traffic_512_96 \
92 | --model $model_name \
93 | --data Traffic \
94 | --features M \
95 | --seq_len 512 \
96 | --label_len 720 \
97 | --pred_len 96 \
98 | --e_layers 2 \
99 | --d_layers 1 \
100 | --factor 3 \
101 | --enc_in 862 \
102 | --dec_in 862 \
103 | --c_out 862 \
104 | --batch_size $batch_size \
105 | --learning_rate $learning_rate \
106 | --llm_layers $llama_layers \
107 | --train_epochs $train_epochs \
108 | --model_comment $comment
--------------------------------------------------------------------------------
/scripts/TimeLLM_Weather.sh:
--------------------------------------------------------------------------------
1 | model_name=TimeLLM
2 | train_epochs=10
3 | learning_rate=0.01
4 | llama_layers=32
5 |
6 | master_port=00097
7 | num_process=8
8 | batch_size=24
9 | d_model=16
10 | d_ff=32
11 |
12 | comment='TimeLLM-Weather'
13 |
14 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \
15 | --task_name long_term_forecast \
16 | --is_training 1 \
17 | --root_path ./dataset/weather/ \
18 | --data_path weather.csv \
19 | --model_id weather_512_96 \
20 | --model $model_name \
21 | --data Weather \
22 | --features M \
23 | --seq_len 512 \
24 | --label_len 48 \
25 | --pred_len 96 \
26 | --e_layers 2 \
27 | --d_layers 1 \
28 | --factor 3 \
29 | --enc_in 21 \
30 | --dec_in 21 \
31 | --c_out 21 \
32 | --d_model 32 \
33 | --d_ff 32 \
34 | --batch_size $batch_size \
35 | --learning_rate $learning_rate \
36 | --llm_layers $llama_layers \
37 | --train_epochs $train_epochs \
38 | --model_comment $comment
39 |
40 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \
41 | --task_name long_term_forecast \
42 | --is_training 1 \
43 | --root_path ./dataset/weather/ \
44 | --data_path weather.csv \
45 | --model_id weather_512_192 \
46 | --model $model_name \
47 | --data Weather \
48 | --features M \
49 | --seq_len 512 \
50 | --label_len 48 \
51 | --pred_len 192 \
52 | --e_layers 2 \
53 | --d_layers 1 \
54 | --factor 3 \
55 | --enc_in 21 \
56 | --dec_in 21 \
57 | --c_out 21 \
58 | --d_model 32 \
59 | --d_ff 32 \
60 | --batch_size $batch_size \
61 | --learning_rate $learning_rate \
62 | --llm_layers $llama_layers \
63 | --train_epochs $train_epochs \
64 | --model_comment $comment
65 |
66 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \
67 | --task_name long_term_forecast \
68 | --is_training 1 \
69 | --root_path ./dataset/weather/ \
70 | --data_path weather.csv \
71 | --model_id weather_512_336 \
72 | --model $model_name \
73 | --data Weather \
74 | --features M \
75 | --seq_len 512 \
76 | --label_len 48 \
77 | --pred_len 336 \
78 | --e_layers 2 \
79 | --d_layers 1 \
80 | --factor 3 \
81 | --enc_in 21 \
82 | --dec_in 21 \
83 | --c_out 21 \
84 | --d_model 32 \
85 | --d_ff 128 \
86 | --batch_size $batch_size \
87 | --learning_rate $learning_rate \
88 | --llm_layers $llama_layers \
89 | --train_epochs 10 \
90 | --model_comment $comment
91 |
92 | accelerate launch --multi_gpu --mixed_precision bf16 --num_processes $num_process --main_process_port $master_port run_main.py \
93 | --task_name long_term_forecast \
94 | --is_training 1 \
95 | --root_path ./dataset/weather/ \
96 | --data_path weather.csv \
97 | --model_id weather_512_720 \
98 | --model $model_name \
99 | --data Weather \
100 | --features M \
101 | --seq_len 512 \
102 | --label_len 48 \
103 | --pred_len 720 \
104 | --e_layers 2 \
105 | --d_layers 1 \
106 | --factor 3 \
107 | --enc_in 21 \
108 | --dec_in 21 \
109 | --c_out 21 \
110 | --d_model 32 \
111 | --d_ff 128 \
112 | --batch_size $batch_size \
113 | --learning_rate $learning_rate \
114 | --llm_layers $llama_layers \
115 | --train_epochs 15 \
116 | --model_comment $comment
--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KimMeen/Time-LLM/02ee1b8f6043090c7a417f0cbb64cbf753895175/utils/__init__.py
--------------------------------------------------------------------------------
/utils/losses.py:
--------------------------------------------------------------------------------
1 | # This source code is provided for the purposes of scientific reproducibility
2 | # under the following limited license from Element AI Inc. The code is an
3 | # implementation of the N-BEATS model (Oreshkin et al., N-BEATS: Neural basis
4 | # expansion analysis for interpretable time series forecasting,
5 | # https://arxiv.org/abs/1905.10437). The copyright to the source code is
6 | # licensed under the Creative Commons - Attribution-NonCommercial 4.0
7 | # International license (CC BY-NC 4.0):
8 | # https://creativecommons.org/licenses/by-nc/4.0/. Any commercial use (whether
9 | # for the benefit of third parties or internally in production) requires an
10 | # explicit license. The subject-matter of the N-BEATS model and associated
11 | # materials are the property of Element AI Inc. and may be subject to patent
12 | # protection. No license to patents is granted hereunder (whether express or
13 | # implied). Copyright © 2020 Element AI Inc. All rights reserved.
14 |
15 | """
16 | Loss functions for PyTorch.
17 | """
18 |
19 | import torch as t
20 | import torch.nn as nn
21 | import numpy as np
22 | import pdb
23 |
24 |
25 | def divide_no_nan(a, b):
26 | """
27 | a/b where the resulted NaN or Inf are replaced by 0.
28 | """
29 | result = a / b
30 | result[result != result] = .0
31 | result[result == np.inf] = .0
32 | return result
33 |
34 |
35 | class mape_loss(nn.Module):
36 | def __init__(self):
37 | super(mape_loss, self).__init__()
38 |
39 | def forward(self, insample: t.Tensor, freq: int,
40 | forecast: t.Tensor, target: t.Tensor, mask: t.Tensor) -> t.float:
41 | """
42 | MAPE loss as defined in: https://en.wikipedia.org/wiki/Mean_absolute_percentage_error
43 |
44 | :param forecast: Forecast values. Shape: batch, time
45 | :param target: Target values. Shape: batch, time
46 | :param mask: 0/1 mask. Shape: batch, time
47 | :return: Loss value
48 | """
49 | weights = divide_no_nan(mask, target)
50 | return t.mean(t.abs((forecast - target) * weights))
51 |
52 |
53 | class smape_loss(nn.Module):
54 | def __init__(self):
55 | super(smape_loss, self).__init__()
56 |
57 | def forward(self, insample: t.Tensor, freq: int,
58 | forecast: t.Tensor, target: t.Tensor, mask: t.Tensor) -> t.float:
59 | """
60 | sMAPE loss as defined in https://robjhyndman.com/hyndsight/smape/ (Makridakis 1993)
61 |
62 | :param forecast: Forecast values. Shape: batch, time
63 | :param target: Target values. Shape: batch, time
64 | :param mask: 0/1 mask. Shape: batch, time
65 | :return: Loss value
66 | """
67 | return 200 * t.mean(divide_no_nan(t.abs(forecast - target),
68 | t.abs(forecast.data) + t.abs(target.data)) * mask)
69 |
70 |
71 | class mase_loss(nn.Module):
72 | def __init__(self):
73 | super(mase_loss, self).__init__()
74 |
75 | def forward(self, insample: t.Tensor, freq: int,
76 | forecast: t.Tensor, target: t.Tensor, mask: t.Tensor) -> t.float:
77 | """
78 | MASE loss as defined in "Scaled Errors" https://robjhyndman.com/papers/mase.pdf
79 |
80 | :param insample: Insample values. Shape: batch, time_i
81 | :param freq: Frequency value
82 | :param forecast: Forecast values. Shape: batch, time_o
83 | :param target: Target values. Shape: batch, time_o
84 | :param mask: 0/1 mask. Shape: batch, time_o
85 | :return: Loss value
86 | """
87 | masep = t.mean(t.abs(insample[:, freq:] - insample[:, :-freq]), dim=1)
88 | masked_masep_inv = divide_no_nan(mask, masep[:, None])
89 | return t.mean(t.abs(target - forecast) * masked_masep_inv)
90 |
--------------------------------------------------------------------------------
/utils/m4_summary.py:
--------------------------------------------------------------------------------
1 | # This source code is provided for the purposes of scientific reproducibility
2 | # under the following limited license from Element AI Inc. The code is an
3 | # implementation of the N-BEATS model (Oreshkin et al., N-BEATS: Neural basis
4 | # expansion analysis for interpretable time series forecasting,
5 | # https://arxiv.org/abs/1905.10437). The copyright to the source code is
6 | # licensed under the Creative Commons - Attribution-NonCommercial 4.0
7 | # International license (CC BY-NC 4.0):
8 | # https://creativecommons.org/licenses/by-nc/4.0/. Any commercial use (whether
9 | # for the benefit of third parties or internally in production) requires an
10 | # explicit license. The subject-matter of the N-BEATS model and associated
11 | # materials are the property of Element AI Inc. and may be subject to patent
12 | # protection. No license to patents is granted hereunder (whether express or
13 | # implied). Copyright 2020 Element AI Inc. All rights reserved.
14 |
15 | """
16 | M4 Summary
17 | """
18 | from collections import OrderedDict
19 |
20 | import numpy as np
21 | import pandas as pd
22 |
23 | from data_provider.m4 import M4Dataset
24 | from data_provider.m4 import M4Meta
25 | import os
26 |
27 |
28 | def group_values(values, groups, group_name):
29 | return np.array([v[~np.isnan(v)] for v in values[groups == group_name]])
30 |
31 |
32 | def mase(forecast, insample, outsample, frequency):
33 | return np.mean(np.abs(forecast - outsample)) / np.mean(np.abs(insample[:-frequency] - insample[frequency:]))
34 |
35 |
36 | def smape_2(forecast, target):
37 | denom = np.abs(target) + np.abs(forecast)
38 | # divide by 1.0 instead of 0.0, in case when denom is zero the enumerator will be 0.0 anyway.
39 | denom[denom == 0.0] = 1.0
40 | return 200 * np.abs(forecast - target) / denom
41 |
42 |
43 | def mape(forecast, target):
44 | denom = np.abs(target)
45 | # divide by 1.0 instead of 0.0, in case when denom is zero the enumerator will be 0.0 anyway.
46 | denom[denom == 0.0] = 1.0
47 | return 100 * np.abs(forecast - target) / denom
48 |
49 |
50 | class M4Summary:
51 | def __init__(self, file_path, root_path):
52 | self.file_path = file_path
53 | self.training_set = M4Dataset.load(training=True, dataset_file=root_path)
54 | self.test_set = M4Dataset.load(training=False, dataset_file=root_path)
55 | self.naive_path = os.path.join(root_path, 'submission-Naive2.csv')
56 |
57 | def evaluate(self):
58 | """
59 | Evaluate forecasts using M4 test dataset.
60 |
61 | :param forecast: Forecasts. Shape: timeseries, time.
62 | :return: sMAPE and OWA grouped by seasonal patterns.
63 | """
64 | grouped_owa = OrderedDict()
65 |
66 | naive2_forecasts = pd.read_csv(self.naive_path).values[:, 1:].astype(np.float32)
67 | naive2_forecasts = np.array([v[~np.isnan(v)] for v in naive2_forecasts])
68 |
69 | model_mases = {}
70 | naive2_smapes = {}
71 | naive2_mases = {}
72 | grouped_smapes = {}
73 | grouped_mapes = {}
74 | for group_name in M4Meta.seasonal_patterns:
75 | file_name = self.file_path + group_name + "_forecast.csv"
76 | if os.path.exists(file_name):
77 | model_forecast = pd.read_csv(file_name).values
78 |
79 | naive2_forecast = group_values(naive2_forecasts, self.test_set.groups, group_name)
80 | target = group_values(self.test_set.values, self.test_set.groups, group_name)
81 | # all timeseries within group have same frequency
82 | frequency = self.training_set.frequencies[self.test_set.groups == group_name][0]
83 | insample = group_values(self.training_set.values, self.test_set.groups, group_name)
84 |
85 | model_mases[group_name] = np.mean([mase(forecast=model_forecast[i],
86 | insample=insample[i],
87 | outsample=target[i],
88 | frequency=frequency) for i in range(len(model_forecast))])
89 | naive2_mases[group_name] = np.mean([mase(forecast=naive2_forecast[i],
90 | insample=insample[i],
91 | outsample=target[i],
92 | frequency=frequency) for i in range(len(model_forecast))])
93 |
94 | naive2_smapes[group_name] = np.mean(smape_2(naive2_forecast, target))
95 | grouped_smapes[group_name] = np.mean(smape_2(forecast=model_forecast, target=target))
96 | grouped_mapes[group_name] = np.mean(mape(forecast=model_forecast, target=target))
97 |
98 | grouped_smapes = self.summarize_groups(grouped_smapes)
99 | grouped_mapes = self.summarize_groups(grouped_mapes)
100 | grouped_model_mases = self.summarize_groups(model_mases)
101 | grouped_naive2_smapes = self.summarize_groups(naive2_smapes)
102 | grouped_naive2_mases = self.summarize_groups(naive2_mases)
103 | for k in grouped_model_mases.keys():
104 | grouped_owa[k] = (grouped_model_mases[k] / grouped_naive2_mases[k] +
105 | grouped_smapes[k] / grouped_naive2_smapes[k]) / 2
106 |
107 | def round_all(d):
108 | return dict(map(lambda kv: (kv[0], np.round(kv[1], 3)), d.items()))
109 |
110 | return round_all(grouped_smapes), round_all(grouped_owa), round_all(grouped_mapes), round_all(
111 | grouped_model_mases)
112 |
113 | def summarize_groups(self, scores):
114 | """
115 | Re-group scores respecting M4 rules.
116 | :param scores: Scores per group.
117 | :return: Grouped scores.
118 | """
119 | scores_summary = OrderedDict()
120 |
121 | def group_count(group_name):
122 | return len(np.where(self.test_set.groups == group_name)[0])
123 |
124 | weighted_score = {}
125 | for g in ['Yearly', 'Quarterly', 'Monthly']:
126 | weighted_score[g] = scores[g] * group_count(g)
127 | scores_summary[g] = scores[g]
128 |
129 | others_score = 0
130 | others_count = 0
131 | for g in ['Weekly', 'Daily', 'Hourly']:
132 | others_score += scores[g] * group_count(g)
133 | others_count += group_count(g)
134 | weighted_score['Others'] = others_score
135 | scores_summary['Others'] = others_score / others_count
136 |
137 | average = np.sum(list(weighted_score.values())) / len(self.test_set.groups)
138 | scores_summary['Average'] = average
139 |
140 | return scores_summary
141 |
--------------------------------------------------------------------------------
/utils/masking.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 |
4 | class TriangularCausalMask():
5 | def __init__(self, B, L, device="cpu"):
6 | mask_shape = [B, 1, L, L]
7 | with torch.no_grad():
8 | self._mask = torch.triu(torch.ones(mask_shape, dtype=torch.bool), diagonal=1).to(device)
9 |
10 | @property
11 | def mask(self):
12 | return self._mask
13 |
14 |
15 | class ProbMask():
16 | def __init__(self, B, H, L, index, scores, device="cpu"):
17 | _mask = torch.ones(L, scores.shape[-1], dtype=torch.bool).to(device).triu(1)
18 | _mask_ex = _mask[None, None, :].expand(B, H, L, scores.shape[-1])
19 | indicator = _mask_ex[torch.arange(B)[:, None, None],
20 | torch.arange(H)[None, :, None],
21 | index, :].to(device)
22 | self._mask = indicator.view(scores.shape).to(device)
23 |
24 | @property
25 | def mask(self):
26 | return self._mask
--------------------------------------------------------------------------------
/utils/metrics.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | def RSE(pred, true):
5 | return np.sqrt(np.sum((true - pred) ** 2)) / np.sqrt(np.sum((true - true.mean()) ** 2))
6 |
7 |
8 | def CORR(pred, true):
9 | u = ((true - true.mean(0)) * (pred - pred.mean(0))).sum(0)
10 | d = np.sqrt(((true - true.mean(0)) ** 2 * (pred - pred.mean(0)) ** 2).sum(0))
11 | return (u / d).mean(-1)
12 |
13 |
14 | def MAE(pred, true):
15 | return np.mean(np.abs(pred - true))
16 |
17 |
18 | def MSE(pred, true):
19 | return np.mean((pred - true) ** 2)
20 |
21 |
22 | def RMSE(pred, true):
23 | return np.sqrt(MSE(pred, true))
24 |
25 |
26 | def MAPE(pred, true):
27 | return np.mean(np.abs((pred - true) / true))
28 |
29 |
30 | def MSPE(pred, true):
31 | return np.mean(np.square((pred - true) / true))
32 |
33 |
34 | def metric(pred, true):
35 | mae = MAE(pred, true)
36 | mse = MSE(pred, true)
37 | rmse = RMSE(pred, true)
38 | mape = MAPE(pred, true)
39 | mspe = MSPE(pred, true)
40 |
41 | return mae, mse, rmse, mape, mspe
42 |
--------------------------------------------------------------------------------
/utils/timefeatures.py:
--------------------------------------------------------------------------------
1 | from typing import List
2 |
3 | import numpy as np
4 | import pandas as pd
5 | from pandas.tseries import offsets
6 | from pandas.tseries.frequencies import to_offset
7 |
8 |
9 | class TimeFeature:
10 | def __init__(self):
11 | pass
12 |
13 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
14 | pass
15 |
16 | def __repr__(self):
17 | return self.__class__.__name__ + "()"
18 |
19 |
20 | class SecondOfMinute(TimeFeature):
21 | """Minute of hour encoded as value between [-0.5, 0.5]"""
22 |
23 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
24 | return index.second / 59.0 - 0.5
25 |
26 |
27 | class MinuteOfHour(TimeFeature):
28 | """Minute of hour encoded as value between [-0.5, 0.5]"""
29 |
30 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
31 | return index.minute / 59.0 - 0.5
32 |
33 |
34 | class HourOfDay(TimeFeature):
35 | """Hour of day encoded as value between [-0.5, 0.5]"""
36 |
37 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
38 | return index.hour / 23.0 - 0.5
39 |
40 |
41 | class DayOfWeek(TimeFeature):
42 | """Hour of day encoded as value between [-0.5, 0.5]"""
43 |
44 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
45 | return index.dayofweek / 6.0 - 0.5
46 |
47 |
48 | class DayOfMonth(TimeFeature):
49 | """Day of month encoded as value between [-0.5, 0.5]"""
50 |
51 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
52 | return (index.day - 1) / 30.0 - 0.5
53 |
54 |
55 | class DayOfYear(TimeFeature):
56 | """Day of year encoded as value between [-0.5, 0.5]"""
57 |
58 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
59 | return (index.dayofyear - 1) / 365.0 - 0.5
60 |
61 |
62 | class MonthOfYear(TimeFeature):
63 | """Month of year encoded as value between [-0.5, 0.5]"""
64 |
65 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
66 | return (index.month - 1) / 11.0 - 0.5
67 |
68 |
69 | class WeekOfYear(TimeFeature):
70 | """Week of year encoded as value between [-0.5, 0.5]"""
71 |
72 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
73 | return (index.isocalendar().week - 1) / 52.0 - 0.5
74 |
75 |
76 | def time_features_from_frequency_str(freq_str: str) -> List[TimeFeature]:
77 | """
78 | Returns a list of time features that will be appropriate for the given frequency string.
79 | Parameters
80 | ----------
81 | freq_str
82 | Frequency string of the form [multiple][granularity] such as "12H", "5min", "1D" etc.
83 | """
84 |
85 | features_by_offsets = {
86 | offsets.YearEnd: [],
87 | offsets.QuarterEnd: [MonthOfYear],
88 | offsets.MonthEnd: [MonthOfYear],
89 | offsets.Week: [DayOfMonth, WeekOfYear],
90 | offsets.Day: [DayOfWeek, DayOfMonth, DayOfYear],
91 | offsets.BusinessDay: [DayOfWeek, DayOfMonth, DayOfYear],
92 | offsets.Hour: [HourOfDay, DayOfWeek, DayOfMonth, DayOfYear],
93 | offsets.Minute: [
94 | MinuteOfHour,
95 | HourOfDay,
96 | DayOfWeek,
97 | DayOfMonth,
98 | DayOfYear,
99 | ],
100 | offsets.Second: [
101 | SecondOfMinute,
102 | MinuteOfHour,
103 | HourOfDay,
104 | DayOfWeek,
105 | DayOfMonth,
106 | DayOfYear,
107 | ],
108 | }
109 |
110 | offset = to_offset(freq_str)
111 |
112 | for offset_type, feature_classes in features_by_offsets.items():
113 | if isinstance(offset, offset_type):
114 | return [cls() for cls in feature_classes]
115 |
116 | supported_freq_msg = f"""
117 | Unsupported frequency {freq_str}
118 | The following frequencies are supported:
119 | Y - yearly
120 | alias: A
121 | M - monthly
122 | W - weekly
123 | D - daily
124 | B - business days
125 | H - hourly
126 | T - minutely
127 | alias: min
128 | S - secondly
129 | """
130 | raise RuntimeError(supported_freq_msg)
131 |
132 |
133 | def time_features(dates, freq='h'):
134 | return np.vstack([feat(dates) for feat in time_features_from_frequency_str(freq)])
--------------------------------------------------------------------------------
/utils/tools.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 | import matplotlib.pyplot as plt
4 | import shutil
5 |
6 | from tqdm import tqdm
7 |
8 | plt.switch_backend('agg')
9 |
10 |
11 | def adjust_learning_rate(accelerator, optimizer, scheduler, epoch, args, printout=True):
12 | if args.lradj == 'type1':
13 | lr_adjust = {epoch: args.learning_rate * (0.5 ** ((epoch - 1) // 1))}
14 | elif args.lradj == 'type2':
15 | lr_adjust = {
16 | 2: 5e-5, 4: 1e-5, 6: 5e-6, 8: 1e-6,
17 | 10: 5e-7, 15: 1e-7, 20: 5e-8
18 | }
19 | elif args.lradj == 'type3':
20 | lr_adjust = {epoch: args.learning_rate if epoch < 3 else args.learning_rate * (0.9 ** ((epoch - 3) // 1))}
21 | elif args.lradj == 'PEMS':
22 | lr_adjust = {epoch: args.learning_rate * (0.95 ** (epoch // 1))}
23 | elif args.lradj == 'TST':
24 | lr_adjust = {epoch: scheduler.get_last_lr()[0]}
25 | elif args.lradj == 'constant':
26 | lr_adjust = {epoch: args.learning_rate}
27 | if epoch in lr_adjust.keys():
28 | lr = lr_adjust[epoch]
29 | for param_group in optimizer.param_groups:
30 | param_group['lr'] = lr
31 | if printout:
32 | if accelerator is not None:
33 | accelerator.print('Updating learning rate to {}'.format(lr))
34 | else:
35 | print('Updating learning rate to {}'.format(lr))
36 |
37 |
38 | class EarlyStopping:
39 | def __init__(self, accelerator=None, patience=7, verbose=False, delta=0, save_mode=True):
40 | self.accelerator = accelerator
41 | self.patience = patience
42 | self.verbose = verbose
43 | self.counter = 0
44 | self.best_score = None
45 | self.early_stop = False
46 | self.val_loss_min = np.Inf
47 | self.delta = delta
48 | self.save_mode = save_mode
49 |
50 | def __call__(self, val_loss, model, path):
51 | score = -val_loss
52 | if self.best_score is None:
53 | self.best_score = score
54 | if self.save_mode:
55 | self.save_checkpoint(val_loss, model, path)
56 | elif score < self.best_score + self.delta:
57 | self.counter += 1
58 | if self.accelerator is None:
59 | print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
60 | else:
61 | self.accelerator.print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
62 | if self.counter >= self.patience:
63 | self.early_stop = True
64 | else:
65 | self.best_score = score
66 | if self.save_mode:
67 | self.save_checkpoint(val_loss, model, path)
68 | self.counter = 0
69 |
70 | def save_checkpoint(self, val_loss, model, path):
71 | if self.verbose:
72 | if self.accelerator is not None:
73 | self.accelerator.print(
74 | f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...')
75 | else:
76 | print(
77 | f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...')
78 |
79 | if self.accelerator is not None:
80 | model = self.accelerator.unwrap_model(model)
81 | torch.save(model.state_dict(), path + '/' + 'checkpoint')
82 | else:
83 | torch.save(model.state_dict(), path + '/' + 'checkpoint')
84 | self.val_loss_min = val_loss
85 |
86 |
87 | class dotdict(dict):
88 | """dot.notation access to dictionary attributes"""
89 | __getattr__ = dict.get
90 | __setattr__ = dict.__setitem__
91 | __delattr__ = dict.__delitem__
92 |
93 |
94 | class StandardScaler():
95 | def __init__(self, mean, std):
96 | self.mean = mean
97 | self.std = std
98 |
99 | def transform(self, data):
100 | return (data - self.mean) / self.std
101 |
102 | def inverse_transform(self, data):
103 | return (data * self.std) + self.mean
104 |
105 | def adjustment(gt, pred):
106 | anomaly_state = False
107 | for i in range(len(gt)):
108 | if gt[i] == 1 and pred[i] == 1 and not anomaly_state:
109 | anomaly_state = True
110 | for j in range(i, 0, -1):
111 | if gt[j] == 0:
112 | break
113 | else:
114 | if pred[j] == 0:
115 | pred[j] = 1
116 | for j in range(i, len(gt)):
117 | if gt[j] == 0:
118 | break
119 | else:
120 | if pred[j] == 0:
121 | pred[j] = 1
122 | elif gt[i] == 0:
123 | anomaly_state = False
124 | if anomaly_state:
125 | pred[i] = 1
126 | return gt, pred
127 |
128 |
129 | def cal_accuracy(y_pred, y_true):
130 | return np.mean(y_pred == y_true)
131 |
132 |
133 | def del_files(dir_path):
134 | shutil.rmtree(dir_path)
135 |
136 |
137 | def vali(args, accelerator, model, vali_data, vali_loader, criterion, mae_metric):
138 | total_loss = []
139 | total_mae_loss = []
140 | model.eval()
141 | with torch.no_grad():
142 | for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in tqdm(enumerate(vali_loader)):
143 | batch_x = batch_x.float().to(accelerator.device)
144 | batch_y = batch_y.float()
145 |
146 | batch_x_mark = batch_x_mark.float().to(accelerator.device)
147 | batch_y_mark = batch_y_mark.float().to(accelerator.device)
148 |
149 | # decoder input
150 | dec_inp = torch.zeros_like(batch_y[:, -args.pred_len:, :]).float()
151 | dec_inp = torch.cat([batch_y[:, :args.label_len, :], dec_inp], dim=1).float().to(
152 | accelerator.device)
153 | # encoder - decoder
154 | if args.use_amp:
155 | with torch.cuda.amp.autocast():
156 | if args.output_attention:
157 | outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
158 | else:
159 | outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
160 | else:
161 | if args.output_attention:
162 | outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
163 | else:
164 | outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
165 |
166 | outputs, batch_y = accelerator.gather_for_metrics((outputs, batch_y))
167 |
168 | f_dim = -1 if args.features == 'MS' else 0
169 | outputs = outputs[:, -args.pred_len:, f_dim:]
170 | batch_y = batch_y[:, -args.pred_len:, f_dim:].to(accelerator.device)
171 |
172 | pred = outputs.detach()
173 | true = batch_y.detach()
174 |
175 | loss = criterion(pred, true)
176 |
177 | mae_loss = mae_metric(pred, true)
178 |
179 | total_loss.append(loss.item())
180 | total_mae_loss.append(mae_loss.item())
181 |
182 | total_loss = np.average(total_loss)
183 | total_mae_loss = np.average(total_mae_loss)
184 |
185 | model.train()
186 | return total_loss, total_mae_loss
187 |
188 |
189 | def test(args, accelerator, model, train_loader, vali_loader, criterion):
190 | x, _ = train_loader.dataset.last_insample_window()
191 | y = vali_loader.dataset.timeseries
192 | x = torch.tensor(x, dtype=torch.float32).to(accelerator.device)
193 | x = x.unsqueeze(-1)
194 |
195 | model.eval()
196 | with torch.no_grad():
197 | B, _, C = x.shape
198 | dec_inp = torch.zeros((B, args.pred_len, C)).float().to(accelerator.device)
199 | dec_inp = torch.cat([x[:, -args.label_len:, :], dec_inp], dim=1)
200 | outputs = torch.zeros((B, args.pred_len, C)).float().to(accelerator.device)
201 | id_list = np.arange(0, B, args.eval_batch_size)
202 | id_list = np.append(id_list, B)
203 | for i in range(len(id_list) - 1):
204 | outputs[id_list[i]:id_list[i + 1], :, :] = model(
205 | x[id_list[i]:id_list[i + 1]],
206 | None,
207 | dec_inp[id_list[i]:id_list[i + 1]],
208 | None
209 | )
210 | accelerator.wait_for_everyone()
211 | outputs = accelerator.gather_for_metrics(outputs)
212 | f_dim = -1 if args.features == 'MS' else 0
213 | outputs = outputs[:, -args.pred_len:, f_dim:]
214 | pred = outputs
215 | true = torch.from_numpy(np.array(y)).to(accelerator.device)
216 | batch_y_mark = torch.ones(true.shape).to(accelerator.device)
217 | true = accelerator.gather_for_metrics(true)
218 | batch_y_mark = accelerator.gather_for_metrics(batch_y_mark)
219 |
220 | loss = criterion(x[:, :, 0], args.frequency_map, pred[:, :, 0], true, batch_y_mark)
221 |
222 | model.train()
223 | return loss
224 |
225 |
226 | def load_content(args):
227 | if 'ETT' in args.data:
228 | file = 'ETT'
229 | else:
230 | file = args.data
231 | with open('./dataset/prompt_bank/{0}.txt'.format(file), 'r') as f:
232 | content = f.read()
233 | return content
--------------------------------------------------------------------------------