├── ltsm
├── utils
│ ├── __init__.py
│ ├── .DS_Store
│ ├── masking.py
│ ├── dist.py
│ ├── metrics.py
│ └── timefeatures.py
├── sk_interface
│ └── __init__.py
├── prompt_reader
│ ├── soft_prompt
│ │ └── README.md
│ ├── stat_prompt
│ │ ├── tsfel
│ │ │ ├── __init__.py
│ │ │ ├── utils
│ │ │ │ ├── __init__.py
│ │ │ │ ├── progress_bar.py
│ │ │ │ ├── signal_processing.py
│ │ │ │ ├── calculate_complexity.py
│ │ │ │ └── add_personal_features.py
│ │ │ └── feature_extraction
│ │ │ │ ├── __init__.py
│ │ │ │ └── features_settings.py
│ │ ├── README.md
│ │ └── prompt_tsne.py
│ └── text_prompt
│ │ └── csv_prompt.json
├── common
│ ├── base_splitter.py
│ ├── base_reader.py
│ ├── base_processor.py
│ └── sklearn.py
├── data_provider
│ ├── tokenizer
│ │ ├── __init__.py
│ │ └── standard_scaler.py
│ ├── hf_train_data_loader.py
│ ├── __init__.py
│ └── data_splitter.py
├── data_pipeline
│ └── __init__.py
├── models
│ ├── ltsm_base.py
│ ├── ltsm_ts_tokenizer.py
│ ├── __init__.py
│ ├── ltsm_stat_model.py
│ ├── DLinear.py
│ ├── PatchTST.py
│ ├── base_config.py
│ ├── Informer.py
│ └── utils.py
├── data_reader
│ ├── __init__.py
│ └── database_reader.py
└── layers
│ ├── RevIN.py
│ ├── Transformer_EncDec.py
│ └── PatchTST_layers.py
├── tests
├── common
│ └── __init__.py
├── models
│ ├── __init__.py
│ ├── init_test.py
│ ├── DLinear_test.py
│ ├── PatchTST_test.py
│ └── Informer_test.py
├── data_provider
│ ├── __init__.py
│ ├── data_splitter_test.py
│ ├── tokenizer
│ │ └── standard_scaler_test.py
│ └── prompt_generator_test.py
├── data_reader
│ ├── __init__.py
│ ├── dataloader_unittest_example.py
│ ├── database_reader_test.py
│ ├── train_database_reader_test.py
│ └── npy_database_reader_test.py
├── test_scripts
│ ├── anomaly_main_ltsm.py
│ ├── main_tokenizer.py
│ ├── prompt_generation_norm.sh
│ ├── dlinear.json
│ ├── ltsm.json
│ ├── patchtst.json
│ ├── train_ltsm_csv.sh
│ ├── train_dlinear_csv.sh
│ ├── train_patchtst_csv.sh
│ ├── train_informer_csv.sh
│ ├── informer.json
│ ├── train_ltsm_tokenizer_csv.sh
│ ├── train_anomaly_main_ltsm.sh
│ ├── anomaly_config
│ │ ├── config-1.json
│ │ └── config.json
│ ├── train_ltsm_textprompt_csv.sh
│ ├── test_ltsm.sh
│ ├── test_tokenizer_training.py
│ ├── test_csv_lora.sh
│ ├── main_ltsm.py
│ └── test_pipeline_training.py
├── data_pipeline
│ └── stat_pipeline_test.py
└── evaluate_pipeline
│ └── evaluation_pipeline_test.py
├── datasets
└── README.md
├── multi_agents_pipeline
├── agents
│ ├── __init__.py
│ ├── custom_messages.py
│ ├── TS_Agent.py
│ ├── QA_Agent.py
│ └── Planning_Agent.py
├── Agents.jpg
├── model_config.yaml
├── llm-server.py
├── Readme.md
├── ltsm_inference.py
└── main.py
├── imgs
├── ltsm_model.png
├── stat_prompt.png
└── prompt_csv_tsne.png
├── setup.py
├── .github
└── workflows
│ └── test.yml
├── .gitignore
├── requirements.txt
└── tutorial
└── README.md
/ltsm/utils/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tests/common/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tests/models/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/ltsm/sk_interface/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tests/data_provider/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tests/data_reader/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/datasets/README.md:
--------------------------------------------------------------------------------
1 | # Training Dataset
--------------------------------------------------------------------------------
/multi_agents_pipeline/agents/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/ltsm/prompt_reader/soft_prompt/README.md:
--------------------------------------------------------------------------------
1 | # Time Series Prompt Dataset
--------------------------------------------------------------------------------
/imgs/ltsm_model.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datamllab/ltsm/HEAD/imgs/ltsm_model.png
--------------------------------------------------------------------------------
/imgs/stat_prompt.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datamllab/ltsm/HEAD/imgs/stat_prompt.png
--------------------------------------------------------------------------------
/ltsm/utils/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datamllab/ltsm/HEAD/ltsm/utils/.DS_Store
--------------------------------------------------------------------------------
/imgs/prompt_csv_tsne.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datamllab/ltsm/HEAD/imgs/prompt_csv_tsne.png
--------------------------------------------------------------------------------
/multi_agents_pipeline/Agents.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datamllab/ltsm/HEAD/multi_agents_pipeline/Agents.jpg
--------------------------------------------------------------------------------
/ltsm/prompt_reader/stat_prompt/tsfel/__init__.py:
--------------------------------------------------------------------------------
1 | from tsfel.utils import *
2 | from tsfel.feature_extraction import *
3 |
--------------------------------------------------------------------------------
/ltsm/common/base_splitter.py:
--------------------------------------------------------------------------------
1 | class DataSplitter:
2 | def __init__(self):
3 | pass
4 |
5 | def get_splits(self):
6 | pass
--------------------------------------------------------------------------------
/ltsm/common/base_reader.py:
--------------------------------------------------------------------------------
1 | class BaseReader:
2 | def __init__(self):
3 | pass
4 |
5 | def fetch(self):
6 | # input: path
7 | # output: DataFrame
8 | pass
--------------------------------------------------------------------------------
/multi_agents_pipeline/model_config.yaml:
--------------------------------------------------------------------------------
1 | provider: autogen_ext.models.openai.OpenAIChatCompletionClient
2 | config:
3 | model: gpt-4o
4 | base_url: http://127.0.0.1:8000/v1
5 | api_key: REPLACE_WITH_YOUR_API_KEY
--------------------------------------------------------------------------------
/ltsm/prompt_reader/stat_prompt/tsfel/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from tsfel.utils.calculate_complexity import *
2 | from tsfel.utils.signal_processing import *
3 | from tsfel.utils.add_personal_features import *
4 | from tsfel.utils.progress_bar import *
5 |
--------------------------------------------------------------------------------
/ltsm/prompt_reader/stat_prompt/tsfel/feature_extraction/__init__.py:
--------------------------------------------------------------------------------
1 | from tsfel.feature_extraction.calc_features import *
2 | from tsfel.feature_extraction.features import *
3 | from tsfel.feature_extraction.features_settings import *
4 | from tsfel.feature_extraction.features_utils import *
5 |
--------------------------------------------------------------------------------
/tests/test_scripts/anomaly_main_ltsm.py:
--------------------------------------------------------------------------------
1 | from ltsm.data_pipeline import AnomalyTrainingPipeline, anomaly_get_args, anomaly_seed_all
2 |
3 | if __name__ == "__main__":
4 | args = anomaly_get_args()
5 | anomaly_seed_all(args.seed)
6 | pipeline = AnomalyTrainingPipeline(args)
7 | pipeline.run()
--------------------------------------------------------------------------------
/tests/test_scripts/main_tokenizer.py:
--------------------------------------------------------------------------------
1 | from ltsm.data_pipeline import TokenizerTrainingPipeline, tokenizer_get_args, tokenizer_seed_all
2 |
3 | if __name__ == "__main__":
4 | config = tokenizer_get_args()
5 | tokenizer_seed_all(config.seed)
6 | pipeline = TokenizerTrainingPipeline(config)
7 | pipeline.run()
--------------------------------------------------------------------------------
/ltsm/data_provider/tokenizer/__init__.py:
--------------------------------------------------------------------------------
1 | from ltsm.data_provider.tokenizer.standard_scaler import StandardScaler
2 |
3 | processor_dict = {}
4 |
5 | def register_processor(module):
6 | assert module.module_id not in processor_dict, f"Processor {module.module_id} alreader registered"
7 | processor_dict[module.module_id] = module
8 |
9 | register_processor(StandardScaler)
10 |
--------------------------------------------------------------------------------
/ltsm/data_provider/hf_train_data_loader.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 | import pandas as pd
4 | import torch
5 | from torch.utils.data import Dataset, DataLoader
6 | from sklearn.preprocessing import StandardScaler
7 | import warnings
8 | from pathlib import Path
9 |
10 | from torch.utils.data.dataset import ConcatDataset, Dataset
11 |
12 | from ltsm.utils.timefeatures import time_features
13 | from ltsm.utils.tools import convert_tsf_to_dataframe
14 |
15 | warnings.filterwarnings('ignore')
16 |
17 |
--------------------------------------------------------------------------------
/ltsm/common/base_processor.py:
--------------------------------------------------------------------------------
1 | from typing import Any, Dict, List, Literal, Optional, Tuple, Union
2 | from dataclasses import dataclass
3 | import torch
4 |
5 | class BaseProcessor:
6 | def __init__(self):
7 | pass
8 |
9 | def process(self, raw_data, train_data, val_data, test_data, fit_train_only=False):
10 | pass
11 |
12 | def inverse_process(self, data):
13 | pass
14 |
15 | def save(self, save_dir):
16 | pass
17 |
18 | def load(self, save_dir):
19 | pass
20 |
--------------------------------------------------------------------------------
/tests/test_scripts/prompt_generation_norm.sh:
--------------------------------------------------------------------------------
1 | data_name="
2 | web_attack"
3 | save_format="pth.tar"
4 |
5 |
6 | python ./ltsm/prompt_reader/stat_prompt/prompt_generate_split.py \
7 | --dataset_name ${data_name} \
8 | --save_format ${save_format}
9 | python ./ltsm/prompt_reader/stat_prompt/prompt_normalization_split.py --mode fit --dataset_name ${data_name} --save_format ${save_format}
10 | python ./ltsm/prompt_reader/stat_prompt/prompt_normalization_split.py --mode transform --dataset_name ${data_name} --save_format ${save_format}
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | import setuptools
2 |
3 | setuptools.setup(
4 | name="ltsm",
5 | version='1.0.0',
6 | author="Data Lab",
7 | author_email="daochen.zha@rice.edu",
8 | description="Large Time Sereis Model",
9 | url="XXXX",
10 | keywords=["Time Series"],
11 | packages=setuptools.find_packages(exclude=('tests',)),
12 | requires_python='>=3.8',
13 | classifiers=[
14 | "Programming Language :: Python :: 3.8",
15 | "License :: OSI Approved :: MIT License",
16 | "Operating System :: OS Independent",
17 | ],
18 | )
19 |
--------------------------------------------------------------------------------
/tests/test_scripts/dlinear.json:
--------------------------------------------------------------------------------
1 | {
2 | "train_params": {
3 | "model": "DLinear",
4 | "model_name_or_path": "gpt2-medium",
5 | "train_epochs": 100,
6 | "gradient_accumulation_steps": 64,
7 | "des": "Exp",
8 | "freeze": 0,
9 | "itr": 1,
10 | "learning_rate": 1e-3,
11 | "downsample_rate": 20,
12 | "output_dir": "output/dlinear/",
13 | "eval": 0,
14 | "features": "M"
15 | },
16 | "model_config": {
17 | "pred_len": 96,
18 | "seq_len": 336
19 | }
20 | }
--------------------------------------------------------------------------------
/tests/test_scripts/ltsm.json:
--------------------------------------------------------------------------------
1 | {
2 | "train_params": {
3 | "model": "LTSM",
4 | "train_epochs": 2,
5 | "batch_size": 100,
6 | "gradient_accumulation_steps": 64,
7 | "prompt_data_path": "../../prompt_bank/prompt_data_normalize_split",
8 | "freeze": 0,
9 | "learning_rate": 1e-3,
10 | "downsample_rate": 20,
11 | "eval": 0,
12 | "tmax": 100
13 | },
14 | "model_config": {
15 | "model_name_or_path": "gpt2-medium",
16 | "patch_size": 16,
17 | "pretrain": 1,
18 | "stride": 8,
19 | "gpt_layers": 3
20 | }
21 | }
--------------------------------------------------------------------------------
/ltsm/data_pipeline/__init__.py:
--------------------------------------------------------------------------------
1 | from .stat_pipeline import StatisticalTrainingPipeline, get_args, seed_all
2 | from .model_manager import ModelManager
3 | from .anormly_pipeline import AnomalyTrainingPipeline, anomaly_get_args, anomaly_seed_all
4 | from .tokenizer_pipeline import TokenizerTrainingPipeline, tokenizer_get_args, tokenizer_seed_all
5 |
6 | __all__ = {
7 | StatisticalTrainingPipeline,
8 | AnomalyTrainingPipeline,
9 | TokenizerTrainingPipeline,
10 | ModelManager,
11 | get_args,
12 | anomaly_get_args,
13 | tokenizer_get_args,
14 | seed_all,
15 | anomaly_seed_all,
16 | tokenizer_seed_all
17 | }
--------------------------------------------------------------------------------
/ltsm/models/ltsm_base.py:
--------------------------------------------------------------------------------
1 | from dataclasses import dataclass
2 | from transformers import PretrainedConfig
3 | import json
4 |
5 | @dataclass
6 | class LTSMConfig(PretrainedConfig):
7 |
8 | def __init__(self, **kwargs):
9 | super().__init__(**kwargs)
10 |
11 | for key, value in kwargs.items():
12 | setattr(self, key, value)
13 |
14 | def update(self, **kwargs):
15 | for key, value in kwargs.items():
16 | setattr(self, key, value)
17 |
18 | def load(self, json_file):
19 |
20 | with open(json_file) as f:
21 | config = json.load(f)
22 |
23 | for key, value in config.items():
24 | setattr(self, key, value)
25 |
26 | return self
--------------------------------------------------------------------------------
/multi_agents_pipeline/agents/custom_messages.py:
--------------------------------------------------------------------------------
1 | from pydantic import BaseModel
2 | from typing import Optional, List
3 |
4 |
5 | class TextMessage(BaseModel):
6 | """
7 | passed from Planner to QA Agent"""
8 | source: str
9 | content: str
10 | task: Optional[str] = None
11 |
12 | class TSMessage(BaseModel):
13 | """
14 | passed from Planner to TS Agent, and from TS Agent to QA Agent
15 |
16 | filepath should be a valid path to a csv/tsv file"""
17 | source: str
18 | filepath: str
19 | task_type:Optional[str] = None
20 | description: Optional[str] = None
21 |
22 | class TSTaskMessage(BaseModel):
23 | """
24 | passed to Planner
25 |
26 | This message contains a text prompt and the filepath to the data file.
27 | """
28 | description: str
29 | filepath: str
--------------------------------------------------------------------------------
/tests/test_scripts/patchtst.json:
--------------------------------------------------------------------------------
1 | {
2 | "train_params": {
3 | "model": "PatchTST",
4 | "model_name_or_path": "gpt2-medium",
5 | "des": "Exp",
6 | "train_epochs": 100,
7 | "patience": 10,
8 | "lradj": "TST",
9 | "pct_start": 0.2,
10 | "freeze": 0,
11 | "itr": 1,
12 | "learning_rate": 1e-3,
13 | "downsample_rate": 20,
14 | "features": "M"
15 | },
16 | "model_config": {
17 | "pred_len": 96,
18 | "gradient_accumulation_steps": 64,
19 | "e_layers": 3,
20 | "n_heads": 16,
21 | "d_model": 128,
22 | "d_ff": 256,
23 | "dropout": 0.2,
24 | "fc_dropout": 0.2,
25 | "head_dropout": 0,
26 | "seq_len": 336,
27 | "patch_len": 16,
28 | "stride": 8
29 | }
30 | }
--------------------------------------------------------------------------------
/tests/test_scripts/train_ltsm_csv.sh:
--------------------------------------------------------------------------------
1 | nohup bash -c '
2 | data_paths="../../datasets/ETT-small/ETTh1.csv
3 | ../../datasets/ETT-small/ETTh2.csv
4 | ../../datasets/ETT-small/ETTm1.csv
5 | ../../datasets/ETT-small/ETTm2.csv
6 | ../../datasets/electricity/electricity.csv
7 | ../../datasets/traffic/traffic.csv
8 | ../../datasets/exchange_rate/exchange_rate.csv
9 | ../../datasets/weather/weather.csv"
10 |
11 | declare -a pred_len=(96 192 336 720)
12 |
13 | for index in "${!pred_len[@]}";
14 | do
15 | CUDA_VISIBLE_DEVICES=0,1,2,3 python3 main_ltsm.py \
16 | --config "ltsm.json" \
17 | --data_path ${data_paths} \
18 | --test_data_path_list ${data_paths} \
19 | --pred_len ${pred_len[$index]} \
20 | --output_dir output/ltsm_lr1e-3_loraFalse_down20_freeze0_e2_pred${pred_len[$index]}/,
21 | done
22 | ' > output.log 2>&1 &
23 | echo $! > save_pid.txt
--------------------------------------------------------------------------------
/ltsm/data_reader/__init__.py:
--------------------------------------------------------------------------------
1 | from ltsm.data_reader.monash_reader import MonashReader
2 | from ltsm.data_reader.csv_reader import CSVReader
3 | reader_dict = {}
4 |
5 | def register_reader(module):
6 | """
7 | Registers a BaseReader module into the reader dictionary.
8 |
9 | Args:
10 | module: A Python module or class that implements a BaseReader.
11 | module_name (str): The key name for the module in the reader dictionary.
12 |
13 | Raises:
14 | AssertionError: If a reader with the same name is already registered
15 | """
16 | assert module.module_id not in reader_dict, f"Reader {module.module_id} already registered"
17 | reader_dict[module.module_id] = module
18 |
19 | register_reader(MonashReader)
20 | register_reader(CSVReader)
21 |
22 | __all__ = {
23 | register_reader,
24 | MonashReader,
25 | CSVReader
26 | }
--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
1 | name: Test
2 |
3 | on:
4 | - pull_request
5 | - push
6 |
7 | jobs:
8 | test:
9 | runs-on: ${{ matrix.os }}
10 | strategy:
11 | matrix:
12 | os: [ubuntu-latest]
13 | python-version: ['3.10']
14 | steps:
15 | - name: Checkout repository
16 | uses: actions/checkout@v4
17 |
18 | - name: Set up Python
19 | uses: actions/setup-python@v5
20 | with:
21 | python-version: ${{ matrix.python-version }}
22 |
23 | - name: Install dependencies
24 | run: |
25 | python -m pip install --upgrade pip
26 | pip install -r requirements.txt
27 |
28 | - name: Test with pytest
29 | run: |
30 | export PYTHONPATH=./:$PYTHONPATH
31 | pytest tests/ --doctest-modules --junitxml=junit/test-results.xml --cov=com --cov-report=xml --cov-report=html
32 |
--------------------------------------------------------------------------------
/ltsm/utils/masking.py:
--------------------------------------------------------------------------------
1 | # code from https://github.com/yuqinie98/PatchTST, with minor modifications
2 | import torch
3 |
4 | class TriangularCausalMask():
5 | def __init__(self, B, L, device="cpu"):
6 | mask_shape = [B, 1, L, L]
7 | with torch.no_grad():
8 | self._mask = torch.triu(torch.ones(mask_shape, dtype=torch.bool), diagonal=1).to(device)
9 |
10 | @property
11 | def mask(self):
12 | return self._mask
13 |
14 |
15 | class ProbMask():
16 | def __init__(self, B, H, L, index, scores, device="cpu"):
17 | _mask = torch.ones(L, scores.shape[-1], dtype=torch.bool).to(device).triu(1)
18 | _mask_ex = _mask[None, None, :].expand(B, H, L, scores.shape[-1])
19 | indicator = _mask_ex[torch.arange(B)[:, None, None],
20 | torch.arange(H)[None, :, None],
21 | index, :].to(device)
22 | self._mask = indicator.view(scores.shape).to(device)
23 |
24 | @property
25 | def mask(self):
26 | return self._mask
--------------------------------------------------------------------------------
/ltsm/data_provider/__init__.py:
--------------------------------------------------------------------------------
1 | from .data_factory import DatasetFactory
2 | from .data_loader import (
3 | HF_Dataset,
4 | HF_Timestamp_Dataset,
5 | Dataset_ETT_hour,
6 | Dataset_ETT_minute,
7 | Dataset_Custom,
8 | Dataset_Pred,
9 | Dataset_TSF,
10 | Dataset_Custom_List,
11 | Dataset_Custom_List_TS,
12 | Dataset_Custom_List_TS_TSF
13 | )
14 | from .data_splitter import SplitterByTimestamp
15 | from .dataset import TSDataset, TSPromptDataset, TSTokenDataset
16 | from .prompt_generator import prompt_generate_split, prompt_normalization_split
17 |
18 | __all__ = {
19 | DatasetFactory,
20 | HF_Dataset,
21 | HF_Timestamp_Dataset,
22 | Dataset_ETT_hour,
23 | Dataset_ETT_minute,
24 | Dataset_Custom,
25 | Dataset_Pred,
26 | Dataset_TSF,
27 | Dataset_Custom_List,
28 | Dataset_Custom_List_TS,
29 | Dataset_Custom_List_TS_TSF,
30 | SplitterByTimestamp,
31 | TSDataset,
32 | TSPromptDataset,
33 | TSTokenDataset,
34 | prompt_generate_split,
35 | prompt_normalization_split
36 | }
--------------------------------------------------------------------------------
/tests/test_scripts/train_dlinear_csv.sh:
--------------------------------------------------------------------------------
1 | nohup bash -c '
2 | declare -a data_paths=(
3 | "../../datasets/ETT-small/ETTh1.csv"
4 | "../../datasets/ETT-small/ETTh2.csv"
5 | "../../datasets/ETT-small/ETTm1.csv"
6 | "../../datasets/ETT-small/ETTm2.csv"
7 | "../../datasets/electricity/electricity.csv"
8 | "../../datasets/traffic/traffic.csv"
9 | "../../datasets/exchange_rate/exchange_rate.csv"
10 | "../../datasets/weather/weather.csv"
11 | )
12 |
13 | declare -a data=(
14 | "ETTh1"
15 | "ETTh2"
16 | "ETTm1"
17 | "ETTm2"
18 | "custom"
19 | "custom"
20 | "custom"
21 | "custom"
22 | )
23 |
24 | declare -a features=(7 7 7 7 321 862 8 21)
25 |
26 | declare -a batch_sizes=(128 128 128 128 32 24 128 128)
27 |
28 | for index in "${!data_paths[@]}";
29 | do
30 | CUDA_VISIBLE_DEVICES=0,1,2,3 python3 main_ltsm.py \
31 | --config "dlinear.json" --data_path ${data_paths[$index]} \
32 | --data ${data[$index]} \
33 | --enc_in ${features[$index]} \
34 | --batch_size ${batch_sizes[$index]}
35 | done
36 | ' > output.log 2>&1 &
37 | echo $! > save_pid.txt
--------------------------------------------------------------------------------
/tests/test_scripts/train_patchtst_csv.sh:
--------------------------------------------------------------------------------
1 | nohup bash -c '
2 | declare -a data_paths=(
3 | "../../datasets/ETT-small/ETTh1.csv"
4 | "../../datasets/ETT-small/ETTh2.csv"
5 | "../../datasets/ETT-small/ETTm1.csv"
6 | "../../datasets/ETT-small/ETTm2.csv"
7 | "../../datasets/electricity/electricity.csv"
8 | "../../datasets/traffic/traffic.csv"
9 | "../../datasets/exchange_rate/exchange_rate.csv"
10 | "../../datasets/weather/weather.csv"
11 | )
12 |
13 | declare -a data=(
14 | "ETTh1"
15 | "ETTh2"
16 | "ETTm1"
17 | "ETTm2"
18 | "custom"
19 | "custom"
20 | "custom"
21 | "custom"
22 | )
23 |
24 | declare -a features=(7 7 7 7 321 862 8 21)
25 |
26 | declare -a batch_sizes=(128 128 128 128 32 24 128 128)
27 |
28 | for index in "${!data_paths[@]}";
29 | do
30 | CUDA_VISIBLE_DEVICES=0,1,2,3 python3 main_ltsm.py \
31 | --config "patchtst.json" \
32 | --data_path ${data_paths[$index]} \
33 | --data ${data[$index]} \
34 | --enc_in ${features[$index]} \
35 | --batch_size ${batch_sizes[$index]}
36 | done
37 | ' > output.log 2>&1 &
38 | echo $! > save_pid.txt
--------------------------------------------------------------------------------
/ltsm/utils/dist.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from scipy.spatial.distance import euclidean
3 | from fastdtw import fastdtw
4 | import torch
5 |
6 | def pairwise_dtw(x_batch, y_batch):
7 | """
8 |
9 | Args:
10 | :param x_batch: Tensor, [ Batchsize, Time, Dimension_x ]
11 | :param y_batch: Tensor, [ Batchsize, Time, Dimension_y ]
12 |
13 | The input tensor should have Dimension_x == Dimension_y
14 |
15 | :return: Pair-wise Distance, Tensor, [ Batchsize, Batchsize ]
16 | """
17 |
18 | batchsize_x = x_batch.shape[0]
19 | batchsize_y = y_batch.shape[0]
20 | dist_matrix = torch.zeros((batchsize_x, batchsize_y), device=torch.device("cpu"))
21 | for idx1, x in enumerate(x_batch):
22 | for idx2, y in enumerate(y_batch):
23 | if x_batch is y_batch and dist_matrix[idx2, idx1] > 0:
24 | dist_matrix[idx1, idx2] = dist_matrix[idx2, idx1]
25 |
26 | else:
27 | distance_xy, _ = fastdtw(x, y, dist=euclidean)
28 | dist_matrix[idx1, idx2] = distance_xy
29 |
30 |
31 |
32 |
33 |
34 |
--------------------------------------------------------------------------------
/tests/test_scripts/train_informer_csv.sh:
--------------------------------------------------------------------------------
1 | nohup bash -c '
2 | declare -a data_paths=(
3 | "../../datasets/ETT-small/ETTh1.csv"
4 | "../../datasets/ETT-small/ETTh2.csv"
5 | "../../datasets/ETT-small/ETTm1.csv"
6 | "../../datasets/ETT-small/ETTm2.csv"
7 | "../../datasets/electricity/electricity.csv"
8 | "../../datasets/traffic/traffic.csv"
9 | "../../datasets/exchange_rate/exchange_rate.csv"
10 | "../../datasets/weather/weather.csv"
11 | )
12 |
13 | declare -a data=(
14 | "ETTh1"
15 | "ETTh2"
16 | "ETTm1"
17 | "ETTm2"
18 | "custom"
19 | "custom"
20 | "custom"
21 | "custom"
22 | )
23 |
24 | declare -a features=(7 7 7 7 321 862 8 21)
25 |
26 | declare -a batch_sizes=(128 128 128 128 32 24 128 128)
27 |
28 | for index in "${!data_paths[@]}";
29 | do
30 | CUDA_VISIBLE_DEVICES=0,1,2,3 python3 main_ltsm.py \
31 | --config "informer.json" \
32 | --data_path ${data_paths[$index]} \
33 | --data ${data[$index]} \
34 | --enc_in ${features[$index]} \
35 | --dec_in ${features[$index]} \
36 | --c_out ${features[$index]} \
37 | --batch_size ${batch_sizes[$index]}
38 | done
39 | ' > output.log 2>&1 &
40 | echo $! > save_pid.txt
--------------------------------------------------------------------------------
/tests/test_scripts/informer.json:
--------------------------------------------------------------------------------
1 | {
2 | "train_params": {
3 | "model": "Informer",
4 | "model_name_or_path": "gpt2-medium",
5 | "des": "Exp",
6 | "train_epochs": 100,
7 | "patience": 10,
8 | "lradj": "TST",
9 | "pct_start": 0.2,
10 | "freeze": 0,
11 | "itr": 1,
12 | "learning_rate": 1e-3,
13 | "downsample_rate": 20,
14 | "output_dir": "output/patchtst/",
15 | "eval": 0,
16 | "padding_patch": "end",
17 | "affine": 0,
18 | "subtract_last": 0,
19 | "decomposition": 0,
20 | "kernel_size": 25,
21 | "individual": 0,
22 | "embed": "timeF",
23 | "factor": 1,
24 | "features": "M",
25 | "local_pretrain": "None"
26 | },
27 | "model_config":{
28 | "pred_len": 96,
29 | "gradient_accumulation_steps": 512,
30 | "e_layers": 3,
31 | "d_layers": 1,
32 | "n_heads": 16,
33 | "d_model": 128,
34 | "d_ff": 256,
35 | "dropout": 0.2,
36 | "seq_len": 336,
37 | "activation": "gelu",
38 | "output_attention": 0,
39 | "embed_type": 0,
40 | "distil": 1
41 | }
42 | }
--------------------------------------------------------------------------------
/tests/test_scripts/train_ltsm_tokenizer_csv.sh:
--------------------------------------------------------------------------------
1 | nohup bash -c '
2 | TRAIN="
3 | ../../datasets/exchange_rate/exchange_rate.csv
4 | ../../datasets/illness/national_illness.csv"
5 |
6 | TEST="
7 | ../../datasets/exchange_rate/exchange_rate.csv
8 | ../../datasets/illness/national_illness.csv"
9 | PROMPT="../../prompt_bank/prompt_data_normalize_split"
10 | lr=1e-3
11 | epoch=10
12 | downsample_rate=20
13 | freeze=0
14 | d_ff=128
15 |
16 | for pred_len in 96
17 | do
18 | OUTPUT_PATH="output/ltsm_tokenizer_lr${lr}_loraFalse_down${downsample_rate}_freeze${freeze}_e${epoch}_pred${pred_len}/"
19 | CUDA_VISIBLE_DEVICES=5,6,7 python3 main_tokenizer.py \
20 | --model LTSM_Tokenizer \
21 | --model_name_or_path gpt2-medium \
22 | --d_ff $d_ff \
23 | --train_epochs ${epoch} \
24 | --batch_size 20 \
25 | --pred_len ${pred_len} \
26 | --gradient_accumulation_steps 64 \
27 | --data_path ${TRAIN} \
28 | --test_data_path_list ${TEST} \
29 | --prompt_data_path ${PROMPT} \
30 | --freeze ${freeze} \
31 | --learning_rate ${lr} \
32 | --downsample_rate ${downsample_rate} \
33 | --output_dir ${OUTPUT_PATH}\
34 | --eval 0
35 | done
36 | ' > output.log 2>&1 &
37 |
38 | #tail -f output.log # check the latest output
--------------------------------------------------------------------------------
/ltsm/utils/metrics.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | def RSE(pred, true):
5 | return np.sqrt(np.sum((true - pred) ** 2)) / np.sqrt(np.sum((true - true.mean()) ** 2))
6 |
7 |
8 | def CORR(pred, true):
9 | u = ((true - true.mean(0)) * (pred - pred.mean(0))).sum(0)
10 | d = np.sqrt(((true - true.mean(0)) ** 2 * (pred - pred.mean(0)) ** 2).sum(0))
11 | return (u / d).mean(-1)
12 |
13 |
14 | def MAE(pred, true):
15 | return np.mean(np.abs(pred - true))
16 |
17 |
18 | def MSE(pred, true):
19 | return np.mean((pred - true) ** 2)
20 |
21 |
22 | def RMSE(pred, true):
23 | return np.sqrt(MSE(pred, true))
24 |
25 |
26 | def MAPE(pred, true):
27 | return np.mean(np.abs(100 * (pred - true) / (true +1e-8)))
28 |
29 |
30 | def MSPE(pred, true):
31 | return np.mean(np.square((pred - true) / (true + 1e-8)))
32 |
33 | def SMAPE(pred, true):
34 | return np.mean(200 * np.abs(pred - true) / (np.abs(pred) + np.abs(true) + 1e-8))
35 | # return np.mean(200 * np.abs(pred - true) / (pred + true + 1e-8))
36 |
37 | def ND(pred, true):
38 | return np.mean(np.abs(true - pred)) / np.mean(np.abs(true))
39 |
40 | def metric(pred, true):
41 | mae = MAE(pred, true)
42 | mse = MSE(pred, true)
43 | rmse = RMSE(pred, true)
44 | mape = MAPE(pred, true)
45 | mspe = MSPE(pred, true)
46 | smape = SMAPE(pred, true)
47 | nd = ND(pred, true)
48 |
49 | return mae, mse, rmse, mape, mspe, smape, nd
50 |
--------------------------------------------------------------------------------
/ltsm/prompt_reader/stat_prompt/README.md:
--------------------------------------------------------------------------------
1 | # Time Series Prompt Generator
2 |
3 |
4 | Time series prompts are designed to capture the extensive characteristics of time series data comprehensively. These prompts, distinct from text-based ones, are created by extracting a wide range of global features from the entire training dataset. This method ensures a robust representation of the underlying dynamics, essential for boosting model performance.
5 |
6 | ## Quick Start
7 | **Step 1.** Download the dataset from our [Google Drive](). Make sure your local data folder like this:
8 | ````angular2html
9 | - ltsm/
10 | - datasets/
11 | electricity/
12 | ETT-small/
13 | exchange_rate/
14 | illness/
15 | traffic/
16 | weather/
17 | ...
18 | ````
19 |
20 | **Step 2.** Generating the time series prompts from training, validating, and testing datasets
21 | ````angular2html
22 | python3 prompt_generate_split.py
23 | ````
24 |
25 | **Step 3.** Find the generated time series prompts in the './prompt_data_split' folder. Then run the following command for normalizing the prompts:
26 | ````angular2html
27 | python3 prompt_normalization_split.py --mode fit
28 | ````
29 |
30 | **Step 4.** Run this command to export the prompts to the "./prompt_data_normalize_split" folder:
31 | ````angular2html
32 | python3 prompt_normalization_split.py --mode transform
33 | ````
--------------------------------------------------------------------------------
/tests/test_scripts/train_anomaly_main_ltsm.sh:
--------------------------------------------------------------------------------
1 | CONFIG_PATH="./anomaly_config/config.json"
2 |
3 | CUDA_VISIBLE_DEVICES=6,7 python3 anomaly_main_ltsm.py \
4 | --config_path ${CONFIG_PATH}
5 | # #TRAIN="../../datasets/creditcard/creditcard.csv"
6 | # TRAIN="../../datasets/water_quality/water_quality.csv"
7 | # #TRAIN="../../datasets/multi-Synthetic/0.csv"
8 |
9 |
10 | # #TEST="../../datasets/creditcard/creditcard.csv"
11 | # TEST="../../datasets/water_quality/water_quality.csv"
12 | # #TEST="../../datasets/multi-Synthetic/0.csv"
13 |
14 | # PROMPT="../../prompt_bank/stat-prompt/prompt_data_normalize_split"
15 |
16 | # epoch=4
17 | # downsample_rate=20
18 | # freeze=0
19 | # lr=1e-7
20 |
21 |
22 | # for seq_len in 133
23 | # do
24 | # OUTPUT_PATH="output/ltsm_lr${lr}_loraFalse_down${downsample_rate}_freeze${freeze}_e${epoch}_pred${pred_len}_water_quality/"
25 | # echo "Current OUTPUT_PATH: ${OUTPUT_PATH}"
26 | # CUDA_VISIBLE_DEVICES=6,7 python3 anomaly_main_ltsm.py \
27 | # --model LTSM \
28 | # --model_name_or_path gpt2-medium \
29 | # --train_epochs ${epoch} \
30 | # --batch_size 100 \
31 | # --seq_len ${seq_len} \
32 | # --gradient_accumulation_steps 64 \
33 | # --data_path ${TRAIN} \
34 | # --test_data_path_list ${TEST} \
35 | # --prompt_data_path ${PROMPT} \
36 | # --freeze ${freeze} \
37 | # --learning_rate ${lr} \
38 | # --downsample_rate ${downsample_rate} \
39 | # --output_dir ${OUTPUT_PATH}\
40 | # --eval 0
41 | # done
--------------------------------------------------------------------------------
/tests/test_scripts/anomaly_config/config-1.json:
--------------------------------------------------------------------------------
1 | {
2 | "model_id": "test_run",
3 | "model_name_or_path": "gpt2-medium",
4 | "seed": 2024,
5 | "device": "cuda:0",
6 | "checkpoints": "./checkpoints/",
7 | "data_path": ["../../datasets/creditcard/creditcard.csv"],
8 | "test_data_path_list": ["../../datasets/creditcard/creditcard.csv"],
9 | "prompt_data_path": "../../prompt_bank/stat-prompt/prompt_data_normalize_split",
10 | "data_processing": "standard_scaler",
11 | "learning_rate": 1e-4,
12 | "batch_size": 8,
13 | "num_workers": 10,
14 | "train_epochs": 1,
15 | "train_ratio": 0.7,
16 | "val_ratio": 0.1,
17 | "do_anomaly": true,
18 | "seq_len": 113,
19 | "pred_len": 113,
20 | "prompt_len": 133,
21 | "lora": false,
22 | "lora_dim": 128,
23 | "gpt_layers": 3,
24 | "d_model": 1024,
25 | "n_heads": 16,
26 | "d_ff": 512,
27 | "dropout": 0.2,
28 | "enc_in": 1,
29 | "c_out": 862,
30 | "patch_size": 16,
31 | "pretrain": 1,
32 | "local_pretrain": "None",
33 | "freeze": 0,
34 | "model": "LTSM",
35 | "stride": 8,
36 | "tmax": 10,
37 | "eval": 0,
38 | "itr": 1,
39 | "output_dir_template": "output/ltsm_lr{learning_rate}_loraFalse_down{downsample_rate}_freeze{freeze}_e{train_epochs}_pred{pred_len}_creditcard_113_check_bsize=8/",
40 | "downsample_rate": 20,
41 | "llm_layers": 32,
42 | "decay_fac": 0.75,
43 | "lradj": "type1",
44 | "patience": 3,
45 | "gradient_accumulation_steps": 64
46 | }
47 |
--------------------------------------------------------------------------------
/tests/test_scripts/train_ltsm_textprompt_csv.sh:
--------------------------------------------------------------------------------
1 | TRAIN="datasets/ETT-small/ETTh1.csv
2 | datasets/ETT-small/ETTh2.csv
3 | datasets/ETT-small/ETTm1.csv
4 | datasets/ETT-small/ETTm2.csv
5 | datasets/electricity/electricity.csv
6 | datasets/exchange_rate/exchange_rate.csv
7 | datasets/traffic/traffic.csv
8 | datasets/weather/weather.csv"
9 |
10 | TEST="datasets/ETT-small/ETTh1.csv
11 | datasets/ETT-small/ETTh2.csv
12 | datasets/ETT-small/ETTm1.csv
13 | datasets/ETT-small/ETTm2.csv
14 | datasets/electricity/electricity.csv
15 | datasets/exchange_rate/exchange_rate.csv
16 | datasets/traffic/traffic.csv
17 | datasets/weather/weather.csv"
18 |
19 | PROMPT="prompt_bank/text_prompt_data_csv/csv_prompt.json"
20 | epoch=1000
21 | downsample_rate=20
22 | freeze=0
23 | lr=1e-3
24 |
25 |
26 | for pred_len in 96 192 336 720
27 | do
28 | OUTPUT_PATH="output/ltsm_textprompt_lr${lr}_loraFalse_down${downsample_rate}_freeze${freeze}_e${epoch}_pred${pred_len}/"
29 | CUDA_VISIBLE_DEVICES=0,1,2,3 python3 main_ltsm.py \
30 | --model LTSM_WordPrompt \
31 | --model_name_or_path gpt2-medium \
32 | --train_epochs ${epoch} \
33 | --batch_size 10 \
34 | --pred_len ${pred_len} \
35 | --gradient_accumulation_steps 64 \
36 | --data_path ${TRAIN} \
37 | --test_data_path_list ${TEST} \
38 | --prompt_data_path ${PROMPT} \
39 | --freeze ${freeze} \
40 | --learning_rate ${lr} \
41 | --downsample_rate ${downsample_rate} \
42 | --output_dir ${OUTPUT_PATH} \
43 | --eval 0
44 | done
45 |
--------------------------------------------------------------------------------
/tests/test_scripts/anomaly_config/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "model_id": "test_run",
3 | "model_name_or_path": "gpt2-medium",
4 | "seed": 2024,
5 | "device": "cuda:0",
6 | "checkpoints": "./checkpoints/",
7 | "data_path": ["../../datasets/water_quality/water_quality.csv"],
8 | "test_data_path_list": ["../../datasets/water_quality/water_quality.csv"],
9 | "prompt_data_path": "../../prompt_bank/stat-prompt/prompt_data_normalize_split",
10 | "data_processing": "standard_scaler",
11 | "learning_rate": 2e-5,
12 | "batch_size": 8,
13 | "num_workers": 10,
14 | "train_epochs": 4,
15 | "train_ratio": 0.7,
16 | "val_ratio": 0.1,
17 | "do_anomaly": true,
18 | "seq_len": 133,
19 | "pred_len": 133,
20 | "prompt_len": 133,
21 | "lora": false,
22 | "lora_dim": 128,
23 | "gpt_layers": 1,
24 | "d_model": 1024,
25 | "n_heads": 16,
26 | "d_ff": 512,
27 | "dropout": 0.2,
28 | "enc_in": 1,
29 | "c_out": 862,
30 | "patch_size": 16,
31 | "pretrain": 1,
32 | "local_pretrain": "None",
33 | "freeze": 0,
34 | "model": "LTSM",
35 | "stride": 8,
36 | "tmax": 10,
37 | "eval": 0,
38 | "itr": 1,
39 | "output_dir_template": "output/ltsm_lr{learning_rate}_loraFalse_down{downsample_rate}_freeze{freeze}_e{train_epochs}_pred{pred_len}_113_check_bsize=8_grad_accumulate=16_layer=1",
40 | "downsample_rate": 20,
41 | "llm_layers": 32,
42 | "decay_fac": 0.75,
43 | "lradj": "type1",
44 | "patience": 3,
45 | "gradient_accumulation_steps": 16
46 | }
47 |
--------------------------------------------------------------------------------
/tests/data_provider/data_splitter_test.py:
--------------------------------------------------------------------------------
1 | from ltsm.data_provider.data_splitter import SplitterByTimestamp
2 | import pandas as pd
3 | import numpy as np
4 | import pytest
5 | import math
6 |
7 | def test_splitter_by_timestamp_get_csv_splits():
8 | indices = ["cosine", "linear", "exponential"]
9 | test_df = pd.DataFrame([[math.cos(i) for i in range(100)],
10 | [2*i for i in range(100)],
11 | [math.exp(i) for i in range(100)]],
12 | index=indices)
13 | splitter = SplitterByTimestamp(seq_len=5,
14 | pred_len=1,
15 | train_ratio=0.7,
16 | val_ratio=0.1)
17 | train, val, test, buff = splitter.get_csv_splits(test_df)
18 | assert len(train) == 3
19 | assert len(val) == 3
20 | assert len(test) == 3
21 | assert len(buff) == 3
22 | assert buff == indices
23 | for i in range(3):
24 | assert len(train[i]) == 70
25 | assert len(val[i]) == 15
26 | assert len(test[i]) == 25
27 |
28 | def test_splitter_by_timestamp_get_csv_splits_invalid_ndim():
29 | test_df = pd.DataFrame([np.array([1, 2, 3]), np.array([[4, 5], [6, 7], [8, 9]])])
30 | splitter = SplitterByTimestamp(seq_len=5,
31 | pred_len=1,
32 | train_ratio=0.7,
33 | val_ratio=0.1)
34 | with pytest.raises(ValueError):
35 | train, val, test, buff = splitter.get_csv_splits(test_df)
--------------------------------------------------------------------------------
/tests/test_scripts/test_ltsm.sh:
--------------------------------------------------------------------------------
1 | TRAIN="
2 | all_six_datasets/ETT-small/ETTh1.csv
3 | all_six_datasets/ETT-small/ETTh2.csv
4 | all_six_datasets/ETT-small/ETTm1.csv
5 | all_six_datasets/ETT-small/ETTm2.csv
6 | all_six_datasets/electricity/electricity.csv
7 | all_six_datasets/exchange_rate/exchange_rate.csv
8 | all_six_datasets/traffic/traffic.csv
9 | all_six_datasets/weather/weather.csv"
10 |
11 |
12 | TEST="
13 | all_six_datasets/ETT-small/ETTh1.csv
14 | all_six_datasets/ETT-small/ETTh2.csv
15 | all_six_datasets/ETT-small/ETTm1.csv
16 | all_six_datasets/ETT-small/ETTm2.csv
17 | all_six_datasets/electricity/electricity.csv
18 | all_six_datasets/exchange_rate/exchange_rate.csv
19 | all_six_datasets/traffic/traffic.csv
20 | all_six_datasets/weather/weather.csv"
21 |
22 | PROMPT="prompt_bank/prompt_data_normalize_csv_split"
23 | epoch=500
24 | downsample_rate=20
25 | freeze=0
26 | lr=1e-3
27 |
28 |
29 | for pred_len in 96
30 | do
31 |
32 | CUDA_VISIBLE_DEVICES=0,1 python3 main_ltsm.py \
33 | --model LTSM \
34 | --model_name_or_path gpt2-medium \
35 | --local_pretrain LSC2204/LTSM-bundle \
36 | --train_epochs ${epoch} \
37 | --batch_size 800 \
38 | --pred_len ${pred_len} \
39 | --gradient_accumulation_steps 64 \
40 | --data_path ${TRAIN} \
41 | --test_data_path_list ${TEST} \
42 | --prompt_data_path ${PROMPT} \
43 | --freeze ${freeze} \
44 | --learning_rate ${lr} \
45 | --downsample_rate ${downsample_rate} \
46 | --output_dir "output/ltsm_csv_medium_lr${lr}_loraFalse_down${downsample_rate}_freeze${freeze}_e${epoch}_pred${pred_len}/"\
47 | --eval 1
48 | done
49 |
--------------------------------------------------------------------------------
/tests/test_scripts/test_tokenizer_training.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 | from torch import nn
4 | import os
5 | import argparse
6 | import random
7 | import sys
8 |
9 | sys.path.append("/home/yc146/github_open_ltsm/ltsm")
10 |
11 | from ltsm.data_provider.data_loader import HF_Dataset
12 | from ltsm.data_provider.tokenizer.tokenizer_processor import TokenizerConfig
13 | from ltsm.data_pipeline.tokenizer_pipeline import TokenizerTrainingPipeline, tokenizer_get_args, tokenizer_seed_all
14 | from ltsm.models import get_model
15 | from ltsm.models.utils import freeze_parameters, print_trainable_parameters
16 | from peft import get_peft_model, LoraConfig
17 |
18 | from transformers import (
19 | Trainer,
20 | TrainingArguments,
21 | EvalPrediction,
22 | )
23 |
24 | def run():
25 | config = tokenizer_get_args()
26 | seed = config.seed
27 | tokenizer_seed_all(seed)
28 | model = get_model(config)
29 |
30 | if config.lora:
31 | peft_config = LoraConfig(
32 | target_modules=["c_attn"], # ["q", "v"],
33 | inference_mode=False,
34 | r=config.lora_dim,
35 | lora_alpha=32,
36 | lora_dropout=0.1
37 | )
38 | model = get_peft_model(model, peft_config)
39 | model.print_trainable_parameters()
40 |
41 | elif config.freeze:
42 | freeze_parameters(model)
43 |
44 | print_trainable_parameters(model)
45 |
46 |
47 | model_optim = torch.optim.Adam(model.parameters(), lr=config.learning_rate)
48 | lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(model_optim, T_max=config.tmax, eta_min=1e-8)
49 |
50 | pipeline = TokenizerTrainingPipeline(config, model, model_optim, lr_scheduler)
51 |
52 | pipeline.run()
53 |
54 |
55 | if __name__ == "__main__":
56 | run()
57 |
--------------------------------------------------------------------------------
/ltsm/models/ltsm_ts_tokenizer.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from .base_config import LTSMConfig
3 | from transformers.modeling_utils import PreTrainedModel
4 | from transformers import AutoModel, AutoConfig
5 |
6 |
7 | class LTSM_Tokenizer(PreTrainedModel):
8 | config_class = LTSMConfig
9 | def __init__(self, configs):
10 | super().__init__(configs)
11 | self.patch_size = configs.patch_size
12 | self.pretrain = configs.pretrain
13 |
14 | self.d_type = torch.bfloat16
15 | self.pred_len = configs.pred_len
16 |
17 | if configs.pretrain:
18 | print("Loading the pretraining weight.")
19 | self.llm_config = AutoConfig.from_pretrained(configs.model_name_or_path)
20 | self.llm = AutoModel.from_pretrained(configs.model_name_or_path) # loads a pretrained GPT-2 base model
21 | else:
22 | raise NotImplementedError("You must load the pretraining weight.")
23 |
24 | self.model_prune(configs)
25 | print("gpt2 = {}".format(self.llm))
26 |
27 | def model_prune(self, configs):
28 | if "gpt2" in configs.model_name_or_path:
29 | self.llm.h = self.llm.h[:configs.gpt_layers]
30 | elif "phi" in configs.model_name_or_path or "llama" in configs.model_name_or_path or "gemma" in configs.model_name_or_path:
31 | self.llm.layers = self.llm.layers[:configs.gpt_layers]
32 | else:
33 | raise NotImplementedError(f"No implementation in model prune for {self.llm}.")
34 |
35 | def forward(self, x):
36 | x = x.int().unsqueeze(-1)
37 | # x = x.int().to(self.llm.device)
38 | # import ipdb; ipdb.set_trace()
39 | outputs = self.llm(input_ids = x).last_hidden_state
40 | outputs = outputs[:, -self.pred_len:, :]
41 |
42 | return outputs
--------------------------------------------------------------------------------
/tests/test_scripts/test_csv_lora.sh:
--------------------------------------------------------------------------------
1 | TRAIN="datasets/ETT-small/ETTh1.csv
2 | datasets/ETT-small/ETTh2.csv
3 | datasets/ETT-small/ETTm1.csv
4 | datasets/ETT-small/ETTm2.csv
5 | datasets/electricity/electricity.csv
6 | datasets/exchange_rate/exchange_rate.csv
7 | datasets/traffic/traffic.csv
8 | datasets/weather/weather.csv"
9 |
10 | TEST="datasets/ETT-small/ETTh1.csv
11 | datasets/ETT-small/ETTh2.csv
12 | datasets/ETT-small/ETTm1.csv
13 | datasets/ETT-small/ETTm2.csv
14 | datasets/electricity/electricity.csv
15 | datasets/exchange_rate/exchange_rate.csv
16 | datasets/traffic/traffic.csv
17 | datasets/weather/weather.csv"
18 |
19 | PROMPT="prompt_bank/prompt_data_normalize_csv_split"
20 |
21 | epoch=500
22 | downsample_rate=20
23 | freeze=0
24 | OUTPUT_PATH="output/test_ltsm_lr${lr}_loraFalse_down${downsample_rate}_freeze${freeze}_e${epoch}_pred${pred_len}/"
25 |
26 | for pred_len in 96 192 336 720
27 | do
28 | for lr in 1e-3
29 | do
30 | for lora_dim in 32 64
31 | do
32 | CUDA_VISIBLE_DEVICES=1,2,3,4,5,6,7 python3 main_ltsm.py \
33 | --lora \
34 | --lora_dim ${lora_dim} \
35 | --model_id test_run \
36 | --train_epochs ${epoch} \
37 | --batch_size 800 \
38 | --pred_len ${pred_len} \
39 | --gradient_accumulation_steps 64 \
40 | --data_path ${TRAIN} \
41 | --test_data_path ${INIT_TEST} \
42 | --test_data_path_list ${TEST} \
43 | --prompt_data_path ${PROMPT} \
44 | --freeze ${freeze} \
45 | --learning_rate ${lr} \
46 | --downsample_rate ${downsample_rate} \
47 | --output_dir ${OUTPUT_PATH}
48 | done
49 | done
50 | done
51 |
--------------------------------------------------------------------------------
/multi_agents_pipeline/agents/TS_Agent.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | from pathlib import Path
3 | from typing import Optional, List
4 |
5 | from autogen_core import (
6 | RoutedAgent,
7 | message_handler,
8 | default_subscription,
9 | MessageContext,
10 | DefaultTopicId,
11 | TopicId,
12 | type_subscription
13 | )
14 | from autogen_core.models import ChatCompletionClient, UserMessage, AssistantMessage
15 | from pydantic import BaseModel
16 | from .custom_messages import TextMessage, TSMessage
17 | from multi_agents_pipeline.ltsm_inference import inference
18 |
19 |
20 | @type_subscription(topic_type="Planner-TS") # for receiving task from Planner
21 | @type_subscription(topic_type="Redo-TS") # for receiving TS Feedback
22 | class TSAgent(RoutedAgent):
23 | def __init__(self, name: str, model_client: Optional[ChatCompletionClient] = None):
24 | super().__init__(description=f"{name} with LTSM Package support")
25 | self.name = name
26 | self._last_plan: Optional[str] = None
27 | self._model_client = model_client
28 | self._last_ts_response: Optional[str] = None # for evaluation
29 |
30 | @message_handler
31 | async def handle_TS(self, message: TSMessage, ctx: MessageContext) -> None:
32 | """This is the TS info given by Planner. LTSM will process the TS data and return the answer.
33 | """
34 | file_path = message.filepath
35 | task_type = message.task_type
36 |
37 | ts_response = inference(
38 | file=file_path,
39 | task_type=task_type
40 | )
41 |
42 |
43 | # publish
44 | await self.publish_message(TSMessage(source=self.name,
45 | filepath = ts_response,
46 | task_type="ts-classification"), TopicId(type="TS-Info", source=self.id.key))
47 |
48 | def get_last_response(self) -> Optional[str]:
49 | return self._last_ts_response
50 |
51 |
--------------------------------------------------------------------------------
/tests/models/init_test.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from transformers import PretrainedConfig, PreTrainedModel
3 | from ltsm.models import register_model, get_model, model_dict
4 |
5 | def test_register_model(mocker):
6 | mock_model = mocker.MagicMock(spec=PreTrainedModel)
7 | register_model(mock_model, "MockModel1")
8 | assert "MockModel1" in model_dict
9 | assert model_dict["MockModel1"] == mock_model
10 |
11 | with pytest.raises(AssertionError, match="Reader MockModel1 already registered"):
12 | register_model(mock_model, "MockModel1")
13 |
14 | def test_get_model(mocker):
15 | mock_model = mocker.MagicMock(spec=PreTrainedModel)
16 | mock_config = mocker.MagicMock(spec=PretrainedConfig)
17 | register_model(mock_model, "MockModel2")
18 |
19 | instance = get_model(mock_config, "MockModel2")
20 | mock_model.assert_called_once_with(mock_config)
21 | assert isinstance(instance, mocker.MagicMock)
22 |
23 | def test_get_model_invalid_name():
24 | with pytest.raises(ValueError, match="Model NonExistentModel is not registered"):
25 | get_model(PretrainedConfig(), "NonExistentModel")
26 |
27 | def test_get_model_local_pretrain(mocker):
28 | mock_from_pretrained = mocker.patch("transformers.PretrainedConfig.from_pretrained")
29 | mock_model = mocker.MagicMock(spec=PreTrainedModel)
30 | register_model(mock_model, "MockModel3")
31 |
32 | mock_from_pretrained.return_value = mocker.MagicMock()
33 | instance = get_model(PretrainedConfig(), "MockModel3", local_pretrain="path/to/pretrained")
34 | mock_model.from_pretrained.assert_called_once_with("path/to/pretrained", mock_from_pretrained.return_value)
35 | assert isinstance(instance, mocker.MagicMock)
36 |
37 | def test_get_model_hf_hub(mocker):
38 | mock_from_pretrained = mocker.patch("transformers.PreTrainedModel.from_pretrained")
39 | mock_model = mocker.MagicMock(spec=PreTrainedModel)
40 | register_model(mock_model, "MockModel4")
41 |
42 | instance = get_model(PretrainedConfig(), "MockModel4", hf_hub_model="mock-hub-model")
43 | mock_model.from_pretrained.assert_called_once_with("mock-hub-model", PretrainedConfig())
44 | assert isinstance(instance, mocker.MagicMock)
--------------------------------------------------------------------------------
/ltsm/layers/RevIN.py:
--------------------------------------------------------------------------------
1 | # code from https://github.com/ts-kim/RevIN, with minor modifications
2 |
3 | import torch
4 | import torch.nn as nn
5 |
6 | class RevIN(nn.Module):
7 | def __init__(self, num_features: int, eps=1e-5, affine=True, subtract_last=False):
8 | """
9 | :param num_features: the number of features or channels
10 | :param eps: a value added for numerical stability
11 | :param affine: if True, RevIN has learnable affine parameters
12 | """
13 | super(RevIN, self).__init__()
14 | self.num_features = num_features
15 | self.eps = eps
16 | self.affine = affine
17 | self.subtract_last = subtract_last
18 | if self.affine:
19 | self._init_params()
20 |
21 | def forward(self, x, mode:str):
22 | if mode == 'norm':
23 | self._get_statistics(x)
24 | x = self._normalize(x)
25 | elif mode == 'denorm':
26 | x = self._denormalize(x)
27 | else: raise NotImplementedError
28 | return x
29 |
30 | def _init_params(self):
31 | # initialize RevIN params: (C,)
32 | self.affine_weight = nn.Parameter(torch.ones(self.num_features))
33 | self.affine_bias = nn.Parameter(torch.zeros(self.num_features))
34 |
35 | def _get_statistics(self, x):
36 | dim2reduce = tuple(range(1, x.ndim-1))
37 | if self.subtract_last:
38 | self.last = x[:,-1,:].unsqueeze(1)
39 | else:
40 | self.mean = torch.mean(x, dim=dim2reduce, keepdim=True).detach()
41 | self.stdev = torch.sqrt(torch.var(x, dim=dim2reduce, keepdim=True, unbiased=False) + self.eps).detach()
42 |
43 | def _normalize(self, x):
44 | if self.subtract_last:
45 | x = x - self.last
46 | else:
47 | x = x - self.mean
48 | x = x / self.stdev
49 | if self.affine:
50 | x = x * self.affine_weight
51 | x = x + self.affine_bias
52 | return x
53 |
54 | def _denormalize(self, x):
55 | if self.affine:
56 | x = x - self.affine_bias
57 | x = x / (self.affine_weight + self.eps*self.eps)
58 | x = x * self.stdev
59 | if self.subtract_last:
60 | x = x + self.last
61 | else:
62 | x = x + self.mean
63 | return x
--------------------------------------------------------------------------------
/multi_agents_pipeline/llm-server.py:
--------------------------------------------------------------------------------
1 | from fastapi import FastAPI
2 | from pydantic import BaseModel
3 | from transformers import AutoModelForCausalLM, AutoTokenizer
4 | import torch
5 |
6 | app = FastAPI()
7 | model_name = "meta-llama/Meta-Llama-3-8B-Instruct"
8 | tokenizer = AutoTokenizer.from_pretrained(model_name, local_files_only=True)
9 | model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.float16)
10 | print(model.hf_device_map)
11 |
12 | model.eval()
13 |
14 | tokenizer.pad_token = tokenizer.eos_token
15 |
16 | class ChatRequest(BaseModel):
17 | model: str
18 | messages: list
19 | temperature: float = 0.7
20 | max_tokens: int = 1024
21 |
22 | def format_prompt_llama3(prompt: str) -> str:
23 | return (
24 | "<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n"
25 | f"{prompt}\n"
26 | "<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n"
27 | )
28 |
29 |
30 | @app.post("/v1/chat/completions")
31 | async def chat(request: ChatRequest):
32 | prompt = request.messages[-1]["content"] # for convenience, temporarily just use the last message.
33 | prompt = format_prompt_llama3(prompt)
34 |
35 | input_data = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True)
36 | input_ids = input_data["input_ids"].to(model.device)
37 | attention_mask = input_data["attention_mask"].to(model.device)
38 |
39 | with torch.no_grad():
40 | output = model.generate(
41 | input_ids,
42 | attention_mask=attention_mask,
43 | max_new_tokens=request.max_tokens,
44 | temperature=request.temperature,
45 | do_sample=True,
46 | pad_token_id=tokenizer.pad_token_id,
47 | )
48 |
49 | generated = output[0][input_ids.shape[1]:]
50 | response_text = tokenizer.decode(generated, skip_special_tokens=True)
51 |
52 | return {
53 | "id": "chatcmpl-123",
54 | "object": "chat.completion",
55 | "created": 1234567890,
56 | "model": request.model,
57 | "choices": [
58 | {
59 | "message": {"role": "assistant", "content": response_text},
60 | "finish_reason": "stop",
61 | "index": 0,
62 | }
63 | ],
64 | "usage": {
65 | "prompt_tokens": len(input_ids[0]),
66 | "completion_tokens": len(output[0]),
67 | "total_tokens": len(input_ids[0]) + len(output[0]),
68 | }
69 | }
--------------------------------------------------------------------------------
/tests/data_reader/dataloader_unittest_example.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | import pandas as pd
3 | from io import StringIO
4 |
5 | class TestDataTransformation(unittest.TestCase):
6 |
7 | def setUp(self):
8 | # Create a simulated CSV input data
9 | self.input_csv = StringIO(
10 | """Updated Time,Suction Pressure,Suction temperature,Condenser Inlet Temperature,Condenser Outlet Temperature,Liquid temperature,Liquid Pressure,Compressor current,Condensing Fan Current,Top Shell Temperature,Discharge Temperature,Bottom Temperature,Motor Temperature
11 | 6/30/2023 19:01:24,61.712231658240015,102.75,98.340625,109.73125,100.84,363.4015032,9.9,0.6,58.22,26.27,118.6609375,118.8859375
12 | 6/30/2023 19:03:04,69.21224676096001,103.19,98.93125,109.73125,100.84,364.13170107648006,9.86,0.6,58.89,26.4,118.365625,118.6046875
13 | """
14 | )
15 |
16 | # Expected format of converted data
17 | self.expected_df = pd.DataFrame({
18 | 0: [0, 1],
19 | 1: [61.712231658240015, 69.21224676096001],
20 | 2: [102.75, 103.19],
21 | 3: [98.340625, 98.93125],
22 | 4: [109.73125, 109.73125],
23 | 5: [100.84, 100.84],
24 | 6: [363.4015032, 364.13170107648006],
25 | 7: [9.9, 9.86],
26 | 8: [0.6, 0.6],
27 | 9: [58.22, 58.89],
28 | 10: [26.27, 26.4],
29 | 11: [118.6609375, 118.365625],
30 | 12: [118.8859375, 118.6046875]
31 | })
32 |
33 | def test_data_transformation(self):
34 | # Read CSV data as a DataFrame
35 | input_df = pd.read_csv(self.input_csv, parse_dates=['Updated Time'])
36 |
37 | # Execute data conversion function
38 | transformed_df = transform_data(input_df)
39 |
40 | # Verify that the time column has been successfully converted to the 0, 1, 2... format
41 | self.assertTrue((transformed_df.iloc[0, :] == range(len(transformed_df.columns))).all(),
42 | "Time sequence conversion failed.")
43 |
44 | # Verify that the converted data structure meets expectations
45 | pd.testing.assert_frame_equal(
46 | transformed_df.iloc[1:, :].reset_index(drop=True),
47 | self.expected_df.reset_index(drop=True),
48 | check_dtype=False,
49 | err_msg="Data transformation did not produce the expected output."
50 | )
51 |
52 | if __name__ == '__main__':
53 | unittest.main()
54 | # Step 1
55 | # Step 2
56 |
--------------------------------------------------------------------------------
/tests/test_scripts/main_ltsm.py:
--------------------------------------------------------------------------------
1 | from ltsm.data_pipeline import StatisticalTrainingPipeline, get_args, seed_all
2 | from ltsm.common.base_training_pipeline import TrainingConfig
3 | import torch
4 | import torch.nn as nn
5 | import numpy as np
6 |
7 | if __name__ == "__main__":
8 | # Two ways to load the configuration: from a JSON file or from command line arguments
9 | # First method
10 | config = get_args()
11 |
12 | # Second method
13 | # train_config = TrainingConfig.load("ltsm.json")
14 |
15 | seed = config.train_params["seed"]
16 | seed_all(seed)
17 |
18 | if config.train_params["model"] == "Informer":
19 | def collate_fn(batch):
20 | return {
21 | 'input_data': torch.from_numpy(np.stack([x['input_data'] for x in batch])).type(torch.float32),
22 | 'labels': torch.from_numpy(np.stack([x['labels'] for x in batch])).type(torch.float32),
23 | 'timestamp_input': torch.from_numpy(np.stack([x['timestamp_input'] for x in batch])).type(torch.float32),
24 | 'timestamp_labels': torch.from_numpy(np.stack([x['timestamp_labels'] for x in batch])).type(torch.float32)
25 | }
26 |
27 | def prediction_step(model, inputs, prediction_loss_only=False, ignore_keys=None):
28 | labels = inputs["labels"].to(model.module.device)
29 | input_data_mark = inputs["timestamp_input"].to(model.module.device)
30 | label_mark = inputs["timestamp_labels"].to(model.module.device)
31 | input_data = inputs["input_data"].to(model.module.device)
32 |
33 | outputs = model(input_data, input_data_mark, labels, label_mark)
34 | loss = nn.functional.mse_loss(outputs, labels)
35 | return (loss, outputs, labels)
36 |
37 | def compute_loss(model, inputs, return_outputs=False):
38 | input_data_mark = inputs["timestamp_input"].to(model.module.device)
39 | label_mark = inputs["timestamp_labels"].to(model.module.device)
40 | outputs = model(inputs["input_data"], input_data_mark, inputs["labels"], label_mark)
41 |
42 | loss = nn.functional.mse_loss(outputs, inputs["labels"])
43 | return (loss, outputs) if return_outputs else loss
44 |
45 | pipeline = StatisticalTrainingPipeline(config,
46 | collate_fn=collate_fn,
47 | prediction_step=prediction_step,
48 | compute_loss=compute_loss)
49 | else:
50 | pipeline = StatisticalTrainingPipeline(config)
51 |
52 | pipeline.run()
53 |
--------------------------------------------------------------------------------
/ltsm/prompt_reader/text_prompt/csv_prompt.json:
--------------------------------------------------------------------------------
1 | {
2 | "0": "The Electricity Transformer Temperature (ETT) is a crucial indicator in the electric power long-term deployment. This dataset consists of 2 years data from two separated counties in China. To explore the granularity on the Long sequence time-series forecasting (LSTF) problem, different subsets are created, {ETTh1, ETTh2} for 1-hour-level and ETTm1 for 15-minutes-level. Each data point consists of the target value ”oil temperature” and 6 power load features. The train/val/test is 12/4/4 months.",
3 | "1": "The Electricity Transformer Temperature (ETT) is a crucial indicator in the electric power long-term deployment. This dataset consists of 2 years data from two separated counties in China. To explore the granularity on the Long sequence time-series forecasting (LSTF) problem, different subsets are created, {ETTh1, ETTh2} for 1-hour-level and ETTm1 for 15-minutes-level. Each data point consists of the target value ”oil temperature” and 6 power load features. The train/val/test is 12/4/4 months.",
4 | "2": "The Electricity Transformer Temperature (ETT) is a crucial indicator in the electric power long-term deployment. This dataset consists of 2 years data from two separated counties in China. To explore the granularity on the Long sequence time-series forecasting (LSTF) problem, different subsets are created, {ETTh1, ETTh2} for 1-hour-level and ETTm1 for 15-minutes-level. Each data point consists of the target value ”oil temperature” and 6 power load features. The train/val/test is 12/4/4 months.",
5 | "3": "The Electricity Transformer Temperature (ETT) is a crucial indicator in the electric power long-term deployment. This dataset consists of 2 years data from two separated counties in China. To explore the granularity on the Long sequence time-series forecasting (LSTF) problem, different subsets are created, {ETTh1, ETTh2} for 1-hour-level and ETTm1 for 15-minutes-level. Each data point consists of the target value ”oil temperature” and 6 power load features. The train/val/test is 12/4/4 months.",
6 | "4": "Electricity contains electircity consumption of 321 clients from 2012 to 2014. And the data was converted to reflect hourly consumption.",
7 | "5": "Exchange rate is a collection of the daily exchange rates of eight foreign countries ranging from 1990 to 2016.",
8 | "6": "Traffic is a collection of hourly data from California Department of Transportation, which describes the road occupancy rates measured by different sensors on San Francisco Bay area freeways.",
9 | "7": "Weather is recorded every 10 minutes for the 2020 whole year, which contains 21 meteorological indicators, such as air temperature, humidity, etc."
10 | }
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | checkpoints/
2 | dataset/
3 | TSForecasting/
4 | *.swp
5 | output/
6 | .idea/
7 |
8 | *.pub
9 |
10 | # Byte-compiled / optimized / DLL files
11 | __pycache__/
12 | *.py[cod]
13 | *$py.class
14 |
15 | # C extensions
16 | *.so
17 |
18 | # Distribution / packaging
19 | .Python
20 | build/
21 | develop-eggs/
22 | dist/
23 | downloads/
24 | eggs/
25 | .eggs/
26 | lib/
27 | lib64/
28 | parts/
29 | sdist/
30 | var/
31 | wheels/
32 | pip-wheel-metadata/
33 | share/python-wheels/
34 | *.egg-info/
35 | .installed.cfg
36 | *.egg
37 | MANIFEST
38 |
39 | # PyInstaller
40 | # Usually these files are written by a python script from a template
41 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
42 | *.manifest
43 | *.spec
44 |
45 | # Installer logs
46 | pip-log.txt
47 | pip-delete-this-directory.txt
48 |
49 | # Unit test / coverage reports
50 | htmlcov/
51 | .tox/
52 | .nox/
53 | .coverage
54 | .coverage.*
55 | .cache
56 | nosetests.xml
57 | coverage.xml
58 | *.cover
59 | *.py,cover
60 | .hypothesis/
61 | .pytest_cache/
62 |
63 | # Translations
64 | *.mo
65 | *.pot
66 |
67 | # Django stuff:
68 | *.log
69 | local_settings.py
70 | db.sqlite3
71 | db.sqlite3-journal
72 |
73 | # Flask stuff:
74 | instance/
75 | .webassets-cache
76 |
77 | # Scrapy stuff:
78 | .scrapy
79 |
80 | # Sphinx documentation
81 | docs/_build/
82 |
83 | # PyBuilder
84 | target/
85 |
86 | # Jupyter Notebook
87 | .ipynb_checkpoints
88 |
89 | # IPython
90 | profile_default/
91 | ipython_config.py
92 |
93 | # pyenv
94 | .python-version
95 |
96 | # pipenv
97 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
98 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
99 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
100 | # install all needed dependencies.
101 | #Pipfile.lock
102 |
103 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
104 | __pypackages__/
105 |
106 | # Celery stuff
107 | celerybeat-schedule
108 | celerybeat.pid
109 |
110 | # SageMath parsed files
111 | *.sage.py
112 |
113 | # Environments
114 | .env
115 | .venv
116 | env/
117 | venv/
118 | ENV/
119 | env.bak/
120 | venv.bak/
121 |
122 | # Spyder project settings
123 | .spyderproject
124 | .spyproject
125 |
126 | # Rope project settings
127 | .ropeproject
128 |
129 | # mkdocs documentation
130 | /site
131 |
132 | # mypy
133 | .mypy_cache/
134 | .dmypy.json
135 | dmypy.json
136 |
137 | # Pyre type checker
138 | .pyre/
139 | *.csv
140 | scratch/
141 | .DS_Store
142 | .idea/
143 |
144 | /datasets
145 | /prompt_bank
--------------------------------------------------------------------------------
/ltsm/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .ltsm_stat_model import LTSM
2 | from .ltsm_wordprompt import LTSM_WordPrompt
3 | from .ltsm_ts_tokenizer import LTSM_Tokenizer
4 | from .PatchTST import PatchTST
5 | from .DLinear import DLinear
6 | from .Informer import Informer
7 | from transformers import PretrainedConfig, PreTrainedModel
8 |
9 | model_dict = {}
10 |
11 | def register_model(module, module_name: str):
12 | """
13 | Registers a PreTrainedModel module into the model dictionary.
14 |
15 | Args:
16 | module: A Python module or class that implements a PreTrainedModel.
17 | module_name (str): The key name for the module in the model dictionary.
18 |
19 | Raises:
20 | AssertionError: If a model with the same name is already registered
21 | """
22 | assert module_name not in model_dict, f"Reader {module_name} already registered"
23 | model_dict[module_name] = module
24 |
25 | register_model(LTSM, 'LTSM')
26 | register_model(LTSM_WordPrompt, 'LTSM_WordPrompt')
27 | register_model(LTSM_Tokenizer, 'LTSM_Tokenizer')
28 | register_model(PatchTST, 'PatchTST')
29 | register_model(DLinear, 'DLinear')
30 | register_model(Informer, 'Informer')
31 |
32 | def get_model(config: PretrainedConfig, model_name: str, local_pretrain: str = None, hf_hub_model: str = None) -> PreTrainedModel:
33 | """
34 | Factory method to create a model by name.
35 |
36 | Args:
37 | config (PreTrainedConfig): The configuration for the model.
38 | model_name (str): The name of the model to instantiate.
39 | local_pretrain (bool): If True, load the model from a local pretraining path.
40 | hf_hub_model (str): The Hugging Face Hub model name.
41 |
42 | Returns:
43 | torch.nn.Module: Instantiated model.
44 |
45 | Raises:
46 | ValueError: If the model name is not found in model_dict.
47 | """
48 | if model_name not in model_dict:
49 | raise ValueError(f"Model {model_name} is not registered. Available models: {list(model_dict.keys())}")
50 |
51 | # Load pretrained weights if hf_hub_model is provided
52 | if hf_hub_model is not None:
53 | return model_dict[model_name].from_pretrained(hf_hub_model, config)
54 |
55 | # Check for local pretraining
56 | if local_pretrain is None or local_pretrain == "None":
57 | return model_dict[model_name](config)
58 | else:
59 | model_config = PretrainedConfig.from_pretrained(local_pretrain)
60 | return model_dict[model_name].from_pretrained(local_pretrain, model_config)
61 |
62 |
63 | __all__ = {
64 | register_model,
65 | get_model,
66 | PatchTST,
67 | DLinear,
68 | Informer,
69 | LTSM,
70 | LTSM_WordPrompt,
71 | LTSM_Tokenizer
72 | }
--------------------------------------------------------------------------------
/tests/data_reader/database_reader_test.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | from unittest.mock import patch, MagicMock
3 | import ltsm.data_reader.database_reader as db_connector
4 | import pandas as pd
5 |
6 | class TestDatabaseConnector(unittest.TestCase):
7 |
8 | def setUp(self):
9 | # Sample DataFrame with different data types
10 | self.input_df = pd.DataFrame({
11 | 'Updated Time': ['06/30/2023 19:01:24', '06/30/2023 19:03:04', '06/30/2023 19:04:44'],
12 | 'Temperature': [61.71, 69.21,323.64], # Float
13 | 'Count': [10, 15,18], # Integer
14 | 'Status': [True, False,False], # Boolean
15 | 'Description': ['Normal', 'High','Low'] # String
16 | })
17 | self.database = "test_database"
18 | self.table_name = "test_table"
19 |
20 | @patch('ltsm.data_reader.database_reader.create_connection')
21 | def test_setup_tables_with_various_data_types(self, mock_create_connection):
22 | # Mock the connection and cursor
23 | mock_conn = MagicMock()
24 | mock_cursor = MagicMock()
25 | mock_conn.cursor.return_value = mock_cursor
26 | mock_create_connection.return_value = mock_conn
27 |
28 | # Call the function to be tested
29 | db_connector.setup_tables(mock_conn, self.database, self.table_name, self.input_df)
30 |
31 | # Check if the correct SQL commands were executed
32 | mock_cursor.execute.assert_any_call(f"USE {self.database}")
33 | expected_schema = "(ts TIMESTAMP, Temperature FLOAT, Count INT, Status BOOL, Description STRING)"
34 | mock_cursor.execute.assert_any_call(f"CREATE TABLE IF NOT EXISTS {self.table_name} {expected_schema}")
35 |
36 | @patch('ltsm.data_reader.database_reader.create_connection')
37 | @patch('ltsm.data_reader.database_reader.pd.read_csv')
38 | def test_insert_data_with_various_data_types(self, mock_read_csv, mock_create_connection):
39 | # Mock the connection and cursor
40 | mock_conn = MagicMock()
41 | mock_cursor = MagicMock()
42 | mock_conn.cursor.return_value = mock_cursor
43 | mock_create_connection.return_value = mock_conn
44 |
45 | # Mock reading CSV with various data types
46 | mock_read_csv.return_value = self.input_df
47 |
48 | # Call the function to be tested
49 | db_connector.insert_data_from_csv(mock_conn, self.database, "dummy_path.csv", self.table_name)
50 |
51 | # Check if data insertion commands were executed
52 | self.assertTrue(mock_cursor.execute.called)
53 | self.assertEqual(mock_cursor.execute.call_count, len(self.input_df)+4) # Check the number of execute calls
54 |
55 | if __name__ == '__main__':
56 | unittest.main()
57 |
--------------------------------------------------------------------------------
/ltsm/prompt_reader/stat_prompt/tsfel/utils/progress_bar.py:
--------------------------------------------------------------------------------
1 | from IPython.display import HTML
2 | from IPython import get_ipython
3 |
4 |
5 | def progress_bar_terminal(iteration, total, prefix="", suffix="", decimals=0, length=100, fill="█", printend="\r"):
6 | """Call in a loop to create terminal progress bar.
7 |
8 | Parameters
9 | ----------
10 | iteration: int
11 | current iteration
12 | total: int
13 | total iterations
14 | prefix: str
15 | prefix string
16 | suffix: str
17 | suffix string
18 | decimals: int
19 | positive number of decimals in percent complete
20 | length: int
21 | character length of bar
22 | fill: str
23 | bar fill character
24 | printend: str
25 | end character (e.g. "\r", "\r\n")
26 | """
27 |
28 | percent = ("{0:." + str(decimals) + "f}").format(100 * (iteration / float(total)))
29 | filledlength = int(length * iteration // total)
30 | bar = fill * filledlength + "-" * (length - filledlength)
31 | print("\r%s |%s| %s%% %s" % (prefix, bar, percent, suffix), end=printend)
32 | # Print New Line on Complete
33 | if iteration == total:
34 | print()
35 |
36 |
37 | def progress_bar_notebook(iteration, total=100):
38 | """Progress bar for notebooks.
39 |
40 | Parameters
41 | ----------
42 | iteration: int
43 | current iteration
44 | total: int
45 | total iterations
46 |
47 | Returns
48 | -------
49 | Progress bar for notebooks
50 |
51 | """
52 | result = int((iteration / total) * 100)
53 | return HTML(
54 | """
55 |
56 | Progress: {result}% Complete
57 |
58 |
65 |
66 | """.format(
67 | value=iteration, max_value=total, result=result
68 | )
69 | )
70 |
71 |
72 | def display_progress_bar(iteration, total, out):
73 | """Displays progress bar according to python interface.
74 |
75 | Parameters
76 | ----------
77 | iteration: int
78 | current iteration
79 | total: int
80 | total iterations
81 | out: progress bar notebook output
82 |
83 | """
84 |
85 | if (
86 | (get_ipython().__class__.__name__ == "ZMQInteractiveShell")
87 | or (get_ipython().__class__.__name__ == "Shell")
88 | and out is not None
89 | ):
90 | out.update(progress_bar_notebook(iteration + 1, total))
91 | else:
92 | progress_bar_terminal(iteration + 1, total, prefix="Progress:", suffix="Complete", length=50)
93 | return
--------------------------------------------------------------------------------
/tests/models/DLinear_test.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from ltsm.models import get_model
3 | from ltsm.models.base_config import DLinearConfig
4 | from ltsm.common.base_training_pipeline import TrainingConfig
5 | from transformers import PreTrainedModel
6 | import torch
7 | import numpy as np
8 |
9 | @pytest.fixture
10 | def config(tmp_path):
11 | data_path = tmp_path / "test.csv"
12 | prompt_data_path = tmp_path / "prompt_normalize_split"
13 | prompt_data_path.mkdir()
14 | OUTPUT_PATH = data_path / "output"
15 |
16 | train_params = {
17 | "data_path": str(data_path),
18 | "model": "DLinear",
19 | "model_name_or_path": "gpt2-medium",
20 | "gradient_accumulation_steps": 64,
21 | "test_data_path_list": [str(data_path)],
22 | "prompt_data_path": str(prompt_data_path),
23 | "train_epochs": 100,
24 | "patience": 10,
25 | "lradj": 'TST',
26 | "pct_start": 0.2,
27 | "freeze": 0,
28 | "itr": 1,
29 | "batch_size": 32,
30 | "learning_rate": 1e-3,
31 | "downsample_rate": 20,
32 | "output_dir": str(OUTPUT_PATH),
33 | "eval": 0,
34 | "local_pretrain": "None"
35 | }
36 | config = {
37 | "pred_len": 96,
38 | "enc_in": 1,
39 | "seq_len": 336, # Equal to the sequence length + the length of prompt
40 | "individual": 0,
41 | "embed": "timeF"
42 | }
43 | dlinear_config = DLinearConfig(**config)
44 |
45 | return TrainingConfig(model_config=dlinear_config, **train_params)
46 |
47 | def test_model_initialization(config):
48 | print(config.train_params["model"])
49 | model = get_model(config.model_config, model_name=config.train_params["model"], local_pretrain=config.train_params["local_pretrain"])
50 | assert model is not None
51 | assert isinstance(model, PreTrainedModel)
52 |
53 |
54 | def test_parameter_count(config):
55 | model =get_model(config.model_config, model_name=config.train_params["model"], local_pretrain=config.train_params["local_pretrain"])
56 | param_count = sum([p.numel() for p in model.parameters() if p.requires_grad])
57 |
58 | expected_param_count = 2*(config.model_config.seq_len*config.model_config.pred_len + config.model_config.pred_len)
59 |
60 | assert param_count == expected_param_count
61 |
62 | def test_forward_output_shape(config):
63 | torch.set_default_dtype(torch.float64)
64 | model = get_model(config.model_config, model_name=config.train_params["model"], local_pretrain=config.train_params["local_pretrain"])
65 | batch_size = 32
66 | channel = 16
67 | input_length = config.model_config.seq_len
68 | input = torch.tensor(np.zeros((batch_size, input_length, channel)))
69 | output = model(input)
70 | assert output.size() == torch.Size([batch_size, config.model_config.pred_len, channel])
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | accelerate==0.32.0
2 | aiofiles==24.1.0
3 | aiohappyeyeballs==2.4.4
4 | aiohttp==3.11.10
5 | aiosignal==1.3.2
6 | annotated-types==0.7.0
7 | anyio==4.6.2
8 | asttokens==3.0.0
9 | async-timeout==5.0.1
10 | attrs==24.3.0
11 | autogen-agentchat==0.5.1
12 | autogen-core==0.5.1
13 | autogen-ext==0.5.1
14 | certifi==2024.8.30
15 | charset-normalizer==3.1.0
16 | click==8.1.8
17 | cmake==3.26.3
18 | contourpy==1.0.7
19 | coverage==7.8.0
20 | cycler==0.11.0
21 | decorator==5.2.1
22 | Deprecated==1.2.18
23 | distro==1.9.0
24 | einops==0.6.0
25 | exceptiongroup==1.2.2
26 | executing==2.2.0
27 | fastapi==0.112.2
28 | filelock==3.12.0
29 | fonttools==4.39.3
30 | frozenlist==1.5.0
31 | fsspec==2025.3.2
32 | h11==0.14.0
33 | httpcore==1.0.8
34 | httpx==0.28.1
35 | huggingface-hub==0.30.2
36 | idna==3.4
37 | importlib-resources==5.12.0
38 | importlib_metadata==8.4.0
39 | iniconfig==2.0.0
40 | ipdb==0.13.13
41 | ipython==8.35.0
42 | jedi==0.19.2
43 | Jinja2==3.1.2
44 | jiter==0.9.0
45 | joblib==1.2.0
46 | jsonref==1.1.0
47 | kiwisolver==1.4.4
48 | lit==16.0.1
49 | MarkupSafe==2.1.2
50 | matplotlib==3.7.1
51 | matplotlib-inline==0.1.7
52 | mpmath==1.3.0
53 | multidict==6.4.3
54 | networkx==3.1
55 | numpy==1.24.2
56 | nvidia-cublas-cu11==11.10.3.66
57 | nvidia-cuda-cupti-cu11==11.7.101
58 | nvidia-cuda-nvrtc-cu11==11.7.99
59 | nvidia-cuda-runtime-cu11==11.7.99
60 | nvidia-cudnn-cu11==8.5.0.96
61 | nvidia-cufft-cu11==10.9.0.58
62 | nvidia-curand-cu11==10.2.10.91
63 | nvidia-cusolver-cu11==11.4.0.1
64 | nvidia-cusparse-cu11==11.7.4.91
65 | nvidia-nccl-cu11==2.14.3
66 | nvidia-nvtx-cu11==11.7.91
67 | openai==1.73.0
68 | opentelemetry-api==1.32.0
69 | packaging==23.1
70 | pandas==2.0.0
71 | parso==0.8.4
72 | peft==0.10.0
73 | pexpect==4.9.0
74 | pillow==11.1.0
75 | pluggy==1.5.0
76 | prompt_toolkit==3.0.50
77 | propcache==0.3.1
78 | protobuf==5.29.4
79 | psutil==6.1.0
80 | ptyprocess==0.7.0
81 | pure_eval==0.2.3
82 | pydantic==2.11.3
83 | pydantic_core==2.33.1
84 | Pygments==2.19.1
85 | pyparsing==3.0.9
86 | pytest==8.1.1
87 | pytest-cov==4.1.0
88 | pytest-mock==3.14.0
89 | python-dateutil==2.8.2
90 | pytz==2023.3
91 | PyYAML==6.0
92 | regex==2023.3.23
93 | requests==2.28.2
94 | safetensors==0.5.3
95 | scikit-learn==1.2.2
96 | scipy==1.10.1
97 | six==1.16.0
98 | sniffio==1.3.1
99 | stack-data==0.6.3
100 | starlette==0.38.6
101 | sympy==1.11.1
102 | taos-ws-py==0.3.3
103 | threadpoolctl==3.1.0
104 | tiktoken==0.9.0
105 | tokenizers==0.19.1
106 | tomli==2.0.2
107 | torch==2.0.0
108 | tqdm==4.65.0
109 | traitlets==5.14.3
110 | transformers==4.40.0
111 | triton==2.0.0
112 | typing-inspection==0.4.0
113 | typing_extensions==4.12.2
114 | tzdata==2023.3
115 | urllib3==1.26.15
116 | uvicorn==0.34.0
117 | wcwidth==0.2.13
118 | wrapt==1.17.2
119 | yarl==1.19.0
120 | zipp==3.15.0
121 | ipdb
122 | peft==0.10.0
--------------------------------------------------------------------------------
/multi_agents_pipeline/Readme.md:
--------------------------------------------------------------------------------
1 |
2 | # Quick Command
3 |
4 | ## Run the local LLM Server
5 | The command `CUDA_VISIBLE_DEVICES=1,2,3 uvicorn llm-server:app --port --reload` should be run in the `multi_agents_pipeline` directory. e.g. `CUDA_VISIBLE_DEVICES=2,3,4 uvicorn llm-server:app` will run the FastAPI app on http://127.0.0.1:8000/.
6 |
7 | ## Run the Pipeline
8 | To execute the full pipeline, go to the `multi_agents_pipeline` folder and run `python main.py`.
9 |
10 | > To use LLama-3-8B-Instruct, please check transformers >= 4.40!
11 |
12 | # Messages and Communication
13 |
14 | ```python
15 | from pydantic import BaseModel
16 | from typing import Optional, List
17 |
18 |
19 | class TextMessage(BaseModel):
20 | """
21 | pass QA related message"""
22 | source: str
23 | content: str
24 |
25 | class TSMessage(BaseModel):
26 | """
27 | passed from Planner to TS Agent, and from TS Agent to QA Agent
28 |
29 | filepath should be a valid path to a csv/tsv file"""
30 | source: str
31 | filepath: str # TO DO : Sopport more possible types
32 | task_type:Optional[str] = None
33 | description: Optional[str] = None
34 |
35 | class TSTaskMessage(BaseModel):
36 | """
37 | passed to Planner
38 |
39 | This message contains a text prompt and the filepath to the data file.
40 | """
41 | description: str
42 | filepath: str
43 | ```
44 | | **Agent** | **Publishes** | **Subscribes** |
45 | |------------------|--------------------------------------------------------|--------------------------------------------------------|
46 | | **Planner** | `Planner-QA` (`TextMessage`)
`Planner-TS` (`TSMessage`) | `TSTaskMessage` |
47 | | **TS Agent** | `TS-Info` (`TSMessage`) | `Planner-TS` (`TSMessage`)
`Reward-TS` (`TSMessage`) |
48 | | **QA Agent** | `QA-Response` (`TextMessage`) | `Planner-QA` (`TextMessage`)
`TS-Info` (`TSMessage`)
`Reward-QA` (`TextMessage`) |
49 | | **Reward Agent** | `Reward-QA` (`TextMessage`)
`Reward-TS` (`TSMessage`) | `TS-Info` (`TSMessage`)
`QA-Response` (`TextMessage`) |
50 |
51 |
52 |
53 | # Agents
54 |
55 | 
56 |
57 | ## Planner
58 |
59 | Receive TSTaskMessage from user. Then generate TS Task and QA Task to be sent tox TS Agent and QA Agent.
60 |
61 | ## TS Agent
62 |
63 | Handle TSMessage, use Time Series Models(e.g., LTSM) or Chat Models(e.g., ChatGPT) to extract features from time series.
64 |
65 | ## QA Agent
66 |
67 | Combine TS Info and Planner-QA, get the response of LLM, and provide
68 |
69 | ## Reward Agent
70 |
71 | Gather output of TS Agent and QA Agent. Send Feedback to TS and QA if the evaluation score is lower than a threshold.
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
--------------------------------------------------------------------------------
/multi_agents_pipeline/ltsm_inference.py:
--------------------------------------------------------------------------------
1 | """
2 | TODO Apr 7, 2025 ~ Apr 13. 2025
3 | - Select different models based on task_type
4 |
5 | """
6 | from ltsm.models.base_config import LTSMConfig
7 | from ltsm.models import get_model
8 | from ltsm.data_provider.prompt_generator import prompt_generate_split, prompt_normalization_split
9 | import torch
10 | import torch.nn as nn
11 | import numpy as np
12 | import pandas as pd
13 | from huggingface_hub import login
14 | from ltsm.data_reader.csv_reader import CSVReader
15 | from ltsm.data_provider.tokenizer.standard_scaler import StandardScaler
16 | from pydantic import BaseModel
17 | import os
18 |
19 |
20 | def inference(file: str, task_type: str = "ts-classification") -> str:
21 | """
22 | Currently just a minimal working example.
23 |
24 | Task: according to different task requirements, select different models, and save inference results.
25 |
26 | Models can be selected:
27 | - LTSM : forecasting
28 | - DLinear
29 | - Informer
30 | - PatchTST
31 | """
32 |
33 | #login(token="Hugging Face Token") # Login to Hugging Face Hub if needed
34 | config = LTSMConfig(seq_len=150, pred_len=150, prompt_len=0)
35 | #model = get_model(config, "LTSM", local_pretrain=None, hf_hub_model="LSC2204/LTSM-bundle")
36 | model = get_model(config, "LTSM", local_pretrain=None, hf_hub_model=None)
37 |
38 | task_type = task_type
39 | files = file.split()
40 | print(f"[TS Inferencer] Received inference request with task_type: {task_type}")
41 |
42 | dataList = []
43 | base_path = os.path.join(os.path.dirname(__file__), "cache")
44 | os.makedirs(base_path, exist_ok=True)
45 | for index, file in enumerate(files):
46 | df = CSVReader(file).fetch()
47 | processor = StandardScaler()
48 | input_data, _, _, = processor.process(
49 | raw_data=df.to_numpy(),
50 | train_data=[df.to_numpy()],
51 | val_data=[df.to_numpy()],
52 | test_data=[df.to_numpy()],
53 | fit_train_only=True, # Use the training data for scaling
54 | do_anomaly=False
55 | )
56 | input_data = np.array(input_data[0])
57 | if input_data.ndim == 1:
58 | input_data = input_data.reshape(-1, 1)
59 | tensor_data = torch.tensor(input_data, dtype=torch.float32)
60 | tensor_data = tensor_data.unsqueeze(0)
61 | with torch.no_grad():
62 | model.eval()
63 | output = model(tensor_data)
64 |
65 | output_np = output.squeeze(0).detach().numpy()
66 | output_path = os.path.join(base_path, f"{index}.csv")
67 | pd.DataFrame(output_np).to_csv(output_path, index=False)
68 | dataList.append(output_path)
69 | LTSM_Output = " ".join(dataList)
70 |
71 | return LTSM_Output
72 |
73 |
74 |
75 |
76 |
77 | #inference()
78 |
79 |
80 |
81 |
82 |
83 |
--------------------------------------------------------------------------------
/ltsm/models/ltsm_stat_model.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 | import torch.nn as nn
4 | from einops import rearrange
5 | from .base_config import LTSMConfig
6 | from transformers.modeling_utils import PreTrainedModel, PretrainedConfig
7 | from transformers import AutoModel, AutoConfig, AutoTokenizer
8 |
9 | class LTSM(PreTrainedModel):
10 | config_class = LTSMConfig
11 | def __init__(self, configs, *model_args, **model_kwargs):
12 | super().__init__(configs)
13 | self.patch_size = configs.patch_size
14 | self.pretrain = configs.pretrain
15 | self.stride = configs.stride
16 | self.patch_num = (configs.seq_len + configs.prompt_len - self.patch_size) // self.stride + 1
17 | self.d_type = torch.bfloat16
18 | self.padding_patch_layer = nn.ReplicationPad1d((0, self.stride))
19 | self.patch_num += 1
20 | self.configs = configs
21 |
22 | if configs.pretrain:
23 | print("Loading the pretraining weight.")
24 | self.llm_config = AutoConfig.from_pretrained(configs.model_name_or_path)
25 | self.llm = AutoModel.from_pretrained(configs.model_name_or_path) # loads a pretrained GPT-2 base model
26 | else:
27 | raise NotImplementedError("You must load the pretraining weight.")
28 |
29 | self.model_prune(configs)
30 | print("model = {}".format(self.llm))
31 |
32 | self.in_layer = nn.Linear(configs.patch_size, self.llm_config.hidden_size)
33 | self.out_layer = nn.Linear(self.llm_config.hidden_size * self.patch_num, configs.pred_len)
34 |
35 | self.cnt = 0
36 |
37 | def model_prune(self, configs):
38 | if "gpt2" in configs.model_name_or_path:
39 | self.llm.h = self.llm.h[:configs.gpt_layers]
40 | elif "phi" in configs.model_name_or_path or "llama" in configs.model_name_or_path or "gemma" in configs.model_name_or_path:
41 | self.llm.layers = self.llm.layers[:configs.gpt_layers]
42 | else:
43 | raise NotImplementedError(f"No implementation in model prune for {self.llm}.")
44 |
45 | def forward(self, x):
46 | B, L, M = x.shape
47 |
48 | means = x.mean(1, keepdim=True).detach()
49 |
50 | x = x - means
51 | stdev = torch.sqrt(torch.var(x, dim=1, keepdim=True, unbiased=False)+ 1e-5).detach()
52 | x /= stdev
53 | x = rearrange(x, 'b l m -> b m l')
54 |
55 | x = self.padding_patch_layer(x)
56 | x = x.unfold(dimension=-1, size=self.patch_size, step=self.stride)
57 | x = rearrange(x, 'b m n p -> (b m) n p')
58 | outputs = self.in_layer(x).to(dtype=torch.bfloat16)
59 |
60 | outputs = self.llm(inputs_embeds=outputs).last_hidden_state
61 | outputs = outputs.to(dtype=x.dtype)
62 |
63 | outputs = self.out_layer(outputs.reshape(B*M, -1))
64 | outputs = rearrange(outputs, '(b m) l -> b l m', b=B)
65 |
66 | outputs = outputs * stdev
67 | outputs = outputs + means
68 |
69 | return outputs
70 |
--------------------------------------------------------------------------------
/ltsm/data_provider/data_splitter.py:
--------------------------------------------------------------------------------
1 | from ltsm.common.base_splitter import DataSplitter
2 | import pandas as pd
3 | import numpy as np
4 |
5 | from typing import Tuple, List
6 | import logging
7 |
8 | logging.basicConfig(
9 | level=logging.INFO,
10 | format='%(asctime)s - %(levelname)s - %(message)s',
11 | )
12 |
13 | class SplitterByTimestamp(DataSplitter):
14 | """
15 | Data splitter class that splits time-series data by timestamp.
16 | """
17 | def __init__(self, seq_len: int, pred_len: int, train_ratio: float, val_ratio: float):
18 | """
19 | Initializes the SplitterByTimestamp with the given arguments.
20 |
21 | Args:
22 | seq_len (int): The number of timesteps used in the input sequence.
23 | pred_len (int): The number of timesteps the model should predict for the output sequence.
24 | train_ratio (float): The training set ratio.
25 | val_ratio (float): The validation set ratio.
26 | """
27 | super().__init__()
28 | self.seq_len = seq_len
29 | self.pred_len = pred_len
30 | self.train_ratio = train_ratio
31 | self.val_ratio = val_ratio
32 |
33 |
34 | def get_csv_splits(self, df_data: pd.DataFrame, do_anomaly: bool=False) -> Tuple[List[np.ndarray], List[np.ndarray], List[np.ndarray], List[np.ndarray]]:
35 | """
36 | Splits the .csv data into training-validation-training sets.
37 |
38 | Args:
39 | df_data (pd.DataFrame): A Pandas DataFrame containing the data to be split.
40 |
41 | Returns:
42 | Tuple[List[np.ndarray], List[np.ndarray], List[np.ndarray], List[np.ndarray]]:
43 | A tuple containing fours lists of sequences for the training, validation, and test sets.
44 | The last list contains the row labels of these sequences.
45 | """
46 | train_split, val_split, test_split, buff = [], [], [], []
47 | raw_data = df_data.to_numpy()
48 |
49 | for index, sequence in zip(df_data.index, raw_data):
50 | if len(sequence) > 0 and isinstance(sequence[0], np.ndarray):
51 | logging.error("Time-series should be 1D.")
52 | raise ValueError("Time-series should be 1D.")
53 |
54 | num_train = int(len(sequence) * self.train_ratio)
55 | num_val = int(len(sequence) * self.val_ratio)
56 |
57 | if not do_anomaly:
58 | if num_train < self.seq_len + self.pred_len:
59 | continue
60 | else:
61 | if num_train < self.seq_len:
62 | continue
63 |
64 |
65 | # We also add the previous seq_len points to the val and test sets
66 | train_split.append(sequence[:num_train])
67 | val_split.append(sequence[num_train-self.seq_len:num_train+num_val])
68 | test_split.append(sequence[num_train+num_val-self.seq_len:])
69 | buff.append(index)
70 |
71 | return train_split, val_split, test_split, buff
72 |
--------------------------------------------------------------------------------
/tests/data_provider/tokenizer/standard_scaler_test.py:
--------------------------------------------------------------------------------
1 | from ltsm.data_provider.tokenizer.standard_scaler import StandardScaler
2 |
3 | import numpy as np
4 | import pytest
5 | import os
6 |
7 | @pytest.fixture
8 | def setup():
9 | processor = StandardScaler()
10 | train_data = [np.array([x*i for i in range(100)]) for x in [1, 100, 10000]]
11 | val_data = [np.array([x*i for i in range(100)]) for x in [1, 100, 10000]]
12 | test_data = [np.array([x*i for i in range(100)]) for x in [1, 100, 10000]]
13 | raw_data = [np.concatenate((train_data[x], val_data[x], test_data[x])) for x in range(3)]
14 |
15 | new_train, new_val, new_test = processor.process(raw_data, train_data, val_data, test_data, fit_train_only=True)
16 | return new_train, new_val, new_test, train_data, val_data, test_data, raw_data, processor
17 |
18 | def test_standard_scaler_process_on_train_only(setup):
19 | new_train, new_val, new_test, train_data, val_data, test_data, raw_data, processor = setup
20 |
21 | assert len(new_train) == len(train_data)
22 | assert len(new_val) == len(val_data)
23 | assert len(new_test) == len(test_data)
24 |
25 | means = [np.mean(train_data[i]) for i in range(3)]
26 | stds = [np.std(train_data[i]) for i in range(3)]
27 | for i in range(3):
28 | assert new_train[i].shape == train_data[i].shape
29 | assert new_val[i].shape == val_data[i].shape
30 | assert new_test[i].shape == test_data[i].shape
31 | for j in range(100):
32 | assert new_train[i][j] == (train_data[i][j] - means[i]) / stds[i]
33 | assert new_val[i][j] == (val_data[i][j] - means[i]) / stds[i]
34 | assert new_test[i][j] == (test_data[i][j] - means[i]) / stds[i]
35 |
36 | def test_standard_scaler_process(setup):
37 | new_train, new_val, new_test, train_data, val_data, test_data, raw_data, processor = setup
38 |
39 | assert len(new_train) == len(train_data)
40 | assert len(new_val) == len(val_data)
41 | assert len(new_test) == len(test_data)
42 |
43 | means = [np.mean(raw_data[i]) for i in range(3)]
44 | stds = [np.std(raw_data[i]) for i in range(3)]
45 | for i in range(3):
46 | assert new_train[i].shape == train_data[i].shape
47 | assert new_val[i].shape == val_data[i].shape
48 | assert new_test[i].shape == test_data[i].shape
49 | for j in range(100):
50 | assert new_train[i][j] == (train_data[i][j] - means[i]) / stds[i]
51 | assert new_val[i][j] == (val_data[i][j] - means[i]) / stds[i]
52 | assert new_test[i][j] == (test_data[i][j] - means[i]) / stds[i]
53 |
54 | def test_standard_scaler_save(tmp_path, setup):
55 | d = tmp_path / "save_dir"
56 | d.mkdir()
57 | new_train, new_val, new_test, train_data, val_data, test_data, raw_data, processor = setup
58 | processor.save(str(d))
59 | assert os.path.isfile(f"{str(d)}/processor.pkl")
60 |
61 | def test_standard_scaler_load(tmp_path, setup):
62 | d = tmp_path / "save_dir"
63 | d.mkdir()
64 | new_train, new_val, new_test, train_data, val_data, test_data, raw_data, processor = setup
65 | processor.save(str(d))
66 | processor._scaler = None
67 | processor.load(str(d))
68 | assert processor is not None
--------------------------------------------------------------------------------
/ltsm/prompt_reader/stat_prompt/tsfel/utils/signal_processing.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pandas as pd
3 | from scipy.interpolate import interp1d
4 |
5 |
6 | def signal_window_splitter(signal, window_size, overlap=0):
7 | """Splits the signal into windows
8 | Parameters
9 | ----------
10 | signal : nd-array or pandas DataFrame
11 | input signal
12 | window_size : int
13 | number of points of window size
14 | overlap : float
15 | percentage of overlap, value between 0 and 1 (exclusive)
16 | Default: 0
17 | Returns
18 | -------
19 | list
20 | list of signal windows
21 | """
22 | if not isinstance(window_size, int):
23 | raise SystemExit('window_size must be an integer.')
24 | step = int(round(window_size)) if overlap == 0 else int(round(window_size * (1 - overlap)))
25 | if step == 0:
26 | raise SystemExit('Invalid overlap. '
27 | 'Choose a lower overlap value.')
28 | if len(signal) % window_size == 0 and overlap == 0:
29 | return [signal[i:i + window_size] for i in range(0, len(signal), step)]
30 | else:
31 | return [signal[i:i + window_size] for i in range(0, len(signal) - window_size + 1, step)]
32 |
33 |
34 | def merge_time_series(data, fs_resample, time_unit):
35 | """Time series data interpolation
36 |
37 | Parameters
38 | ----------
39 | data : dict
40 | data to interpolate
41 | fs_resample :
42 | resample sampling frequency
43 | time_unit :
44 | time unit in seconds
45 |
46 | Returns
47 | -------
48 | DataFrame
49 | Interpolated data
50 |
51 | """
52 |
53 | # time interval for interpolation
54 | sensors_time = np.array([[dn.iloc[0, 0], dn.iloc[-1, 0]] for k, dn in data.items()])
55 | t0 = np.max(sensors_time[:, 0])
56 | tn = np.min(sensors_time[:, 1])
57 | x_new = np.linspace(t0, tn, int((tn - t0) / ((1 / fs_resample) * time_unit)))
58 |
59 | # interpolation
60 | data_new = np.copy(x_new.reshape(len(x_new), 1))
61 | header_values = ['time']
62 | for k, dn in data.items():
63 | header_values += [k + str(i) for i in range(1, np.shape(dn)[1])]
64 | data_new = np.hstack((data_new, np.array([interp1d(dn.iloc[:, 0], dn.iloc[:, ax])(x_new) for ax in range(1, np.shape(dn)[1])]).T))
65 |
66 | return pd.DataFrame(data=data_new[:, 1:], columns=header_values[1:])
67 |
68 |
69 | def correlated_features(features, threshold=0.95):
70 | """Compute pairwise correlation of features using pearson method
71 |
72 | Parameters
73 | ----------
74 | features : DataFrame
75 | features
76 | threshold :
77 | correlation value for removing highly correlated features
78 | Returns
79 | -------
80 | DataFrame
81 | correlated features names
82 |
83 | """
84 | corr_matrix = features.corr().abs()
85 | # Select upper triangle of correlation matrix
86 | upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))
87 | # Find index and column name of features with correlation greater than 0.95
88 | to_drop = [column for column in upper.columns if any(upper[column] > threshold)]
89 |
90 | return to_drop
91 |
--------------------------------------------------------------------------------
/tests/test_scripts/test_pipeline_training.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 | from torch import nn
4 |
5 | from ltsm.data_pipeline import StatisticalTrainingPipeline, get_args, seed_all
6 | from ltsm.data_provider.data_loader import HF_Dataset
7 | from ltsm.models.utils import freeze_parameters, print_trainable_parameters
8 | from peft import get_peft_config, get_peft_model, LoraConfig
9 |
10 | from transformers import (
11 | EvalPrediction,
12 | )
13 |
14 | def run():
15 | config = get_args()
16 | seed = config.seed
17 | seed_all(seed)
18 |
19 | model = get_model(config)
20 |
21 | if config.lora:
22 | peft_config = LoraConfig(
23 | target_modules=["c_attn"],
24 | inference_mode=False,
25 | r=config.lora_dim,
26 | lora_alpha=32,
27 | lora_dropout=0.1
28 | )
29 | model = get_peft_model(model, peft_config)
30 | model.print_trainable_parameters()
31 |
32 | elif config.freeze:
33 | freeze_parameters(model)
34 |
35 | print_trainable_parameters(model)
36 |
37 | # Optimizer settings
38 | model_optim = torch.optim.Adam(model.parameters(), lr=config.learning_rate)
39 | lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(model_optim, T_max=config.tmax, eta_min=1e-8)
40 |
41 | # Evaluation metrics
42 | def compute_metrics(p: EvalPrediction):
43 | preds = p.predictions[0] if isinstance(p.predictions, tuple) else p.predictions
44 | preds = np.squeeze(preds)
45 | if preds.shape != p.label_ids.shape:
46 | label_ids = np.squeeze(p.label_ids)
47 | else:
48 | label_ids = p.label_ids
49 | return {
50 | "mse": ((preds - label_ids) ** 2).mean().item(),
51 | "mae": (np.abs(preds - label_ids)).mean().item()
52 | }
53 |
54 | # Loss function
55 | def compute_loss(model, inputs, return_outputs=False):
56 | outputs = model(inputs["input_data"])
57 | loss = nn.functional.mse_loss(outputs, inputs["labels"])
58 | return (loss, outputs) if return_outputs else loss
59 |
60 | # Data collator
61 | def collate_fn(batch):
62 | return {
63 | 'input_data': torch.from_numpy(np.stack([x['input_data'] for x in batch])).type(torch.float32),
64 | 'labels': torch.from_numpy(np.stack([x['labels'] for x in batch])).type(torch.float32),
65 | }
66 |
67 | # Prediction step
68 | @torch.no_grad()
69 | def prediction_step(model, inputs, prediction_loss_only=False, ignore_keys=None):
70 | # CSV
71 | input_data = inputs["input_data"].to(model.module.device)
72 | labels = inputs["labels"].to(model.module.device)
73 | outputs = model(input_data)
74 | loss = nn.functional.mse_loss(outputs, labels)
75 | return (loss, outputs, labels)
76 |
77 |
78 | pipeline = StatisticalTrainingPipeline(config,
79 | model=model,
80 | collate_fn=collate_fn,
81 | prediction_step=prediction_step,
82 | compute_loss=compute_loss,
83 | compute_metrics=compute_metrics)
84 | pipeline.run()
85 |
86 |
87 | if __name__ == "__main__":
88 | run()
--------------------------------------------------------------------------------
/ltsm/common/sklearn.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pandas as pd
3 |
4 | def get_default_hyperparameter(primitive, hyperparameter):
5 |
6 | # check if input legal hyperparameter
7 | hyperparam_buf = list(primitive.metadata.get_hyperparams().defaults().keys())
8 | hyperparam_input = list(hyperparameter.keys())
9 | if not set(hyperparam_buf) > set(hyperparam_input):
10 | invalid_hyperparam = list(set(hyperparam_input) - set(hyperparam_buf))
11 | raise TypeError(primitive.__name__ + ' got unexpected keyword argument ' + str(invalid_hyperparam))
12 |
13 | hyperparams_class = primitive.metadata.get_hyperparams()
14 | hyperparams = hyperparams_class.defaults()
15 |
16 | if len(hyperparameter.items()) != 0:
17 | hyperparams = hyperparams.replace(hyperparameter)
18 |
19 | return hyperparams
20 |
21 | class BaseSKI:
22 |
23 | def __init__(self, primitive, **hyperparameter):
24 |
25 | self.fit_available = True if 'fit' in primitive.__dict__ else False
26 | self.predict_available = True if 'produce' in primitive.__dict__ else False
27 | self.predict_score_available = True if 'produce_score' in dir(primitive) else False
28 | self.produce_available = True if 'produce' in primitive.__dict__ else False
29 |
30 | hyperparams = get_default_hyperparameter(primitive, hyperparameter)
31 | self.primitives = primitive(hyperparams=hyperparams)
32 |
33 | def _sys_data_check(self, data):
34 | if self.system_num == 1:
35 | if type(data) is np.ndarray and data.ndim == 2:
36 | data = [data] # np.expand_dims(data, axis=0)
37 | else:
38 | raise AttributeError('For system_num = 1, input data should be 2D numpy array.')
39 | elif self.system_num > 1:
40 | if type(data) is list and len(data) == self.system_num:
41 | for ts_data in data:
42 | if type(ts_data) is np.ndarray and ts_data.ndim == 2:
43 | continue
44 | else:
45 | raise AttributeError('For system_num > 1, each element of input list should be 2D numpy arrays.')
46 | else:
47 | raise AttributeError('For system_num > 1, input data should be the list of `system_num` 2D numpy arrays.')
48 |
49 | return data
50 |
51 | def fit(self, data):
52 |
53 | if not self.fit_available:
54 | raise AttributeError('type object ' + self.__class__.__name__ + ' has no attribute \'fit\'')
55 |
56 | data = self._sys_data_check(data)
57 |
58 | for sys_idx, primitive in enumerate(self.primitives):
59 | sys_data = data[sys_idx]
60 | sys_data = self._transform(sys_data)
61 | primitive.set_training_data(inputs=sys_data)
62 | primitive.fit()
63 |
64 | return
65 |
66 | def predict(self, data):
67 |
68 | if not self.predict_available:
69 | raise AttributeError('type object ' + self.__class__.__name__ + ' has no attribute \'predict\'')
70 |
71 | data = self._sys_data_check(data)
72 | output_data = self._forward(data, '_produce')
73 |
74 | return output_data
75 |
76 | def _transform(self, X): #transform the ndarray to d3m dataframe, select columns to use
77 | column_name = [str(col_index) for col_index in range(X.shape[1])]
78 | return pd.DataFrame(X, columns=column_name, generate_metadata=True)
79 |
--------------------------------------------------------------------------------
/multi_agents_pipeline/agents/QA_Agent.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | from pathlib import Path
3 | from typing import Optional, List
4 |
5 | from autogen_core import (
6 | RoutedAgent,
7 | message_handler,
8 | default_subscription,
9 | MessageContext,
10 | DefaultTopicId,
11 | TopicId,
12 | type_subscription
13 | )
14 | from autogen_core.models import ChatCompletionClient, UserMessage, AssistantMessage, SystemMessage
15 | from autogen_core.model_context import BufferedChatCompletionContext
16 | from pydantic import BaseModel
17 | from .custom_messages import TextMessage, TSMessage
18 |
19 | @type_subscription(topic_type="Planner-QA") # for receiving task from Planner
20 | @type_subscription(topic_type="Redo-QA") # for receiving QA Feedback
21 | @type_subscription(topic_type="TS-Info") # for receiving TS info from TS Agent
22 | class QAAgent(RoutedAgent):
23 | def __init__(self, name: str, model_client: ChatCompletionClient):
24 | super().__init__(description=f"{name} with LLM support")
25 | self.name = name
26 | self._last_plan: Optional[str] = None
27 | self._model_client = model_client
28 | self._model_context = BufferedChatCompletionContext(buffer_size=5)
29 | self._system_messages = [SystemMessage(content="You are a helpful AI assistant.")]
30 |
31 | self._last_llm_response: Optional[str] = None # for evaluation
32 |
33 | @message_handler
34 | async def handle_plan(self, message: TextMessage, ctx: MessageContext) -> None:
35 | self._last_plan = message.content
36 | print(f"[{self.name}] Stored plan from {message.source}: {message.content}")
37 |
38 | @message_handler
39 | async def handle_TS(self, message: TSMessage, ctx: MessageContext) -> None:
40 | """This is the TS info given by TS Agent
41 | """
42 | df = pd.read_csv(Path(message.filepath))
43 | stats = df.describe().to_string()
44 |
45 | # below is the prompt that combine the task and the TS Info.
46 | # TODO : Modify according to the task type and task description. Currently just a placeholder
47 | prompt = f"""
48 | You are a Time Series Expert.
49 |
50 | Here is a task given by the planner:
51 | {self._last_plan or "(no plan received)"}
52 |
53 | Here is the output of Time-Series Agent:
54 | {stats}
55 |
56 | Please finish the task based on the above information.
57 | """
58 |
59 | print(f"[{self.name}] Sending prompt to LLM...")
60 |
61 | user_message = UserMessage(content=prompt, source=self.name)
62 | await self._model_context.add_message(user_message)
63 |
64 | # send to LLM for response
65 | llm_response = await self._model_client.create(
66 | self._system_messages + (await self._model_context.get_messages()),
67 | cancellation_token=ctx.cancellation_token,
68 | )
69 |
70 | assert isinstance(llm_response.content, str)
71 |
72 | self._last_llm_response = llm_response.content
73 |
74 | await self._model_context.add_message(
75 | AssistantMessage(content=self._last_llm_response, source=self.name)
76 | )
77 | # publish the inference result of QA Agent
78 | await self.publish_message(
79 | TextMessage(source=self.name, content=self._last_llm_response, task = self._last_plan), # add task
80 | TopicId(type="QA-Response", source=self.id.key) # publish to a specific topic for QA response
81 | )
82 |
83 | def get_last_response(self) -> Optional[str]:
84 | return self._last_llm_response
85 |
--------------------------------------------------------------------------------
/ltsm/prompt_reader/stat_prompt/tsfel/utils/calculate_complexity.py:
--------------------------------------------------------------------------------
1 | import time
2 | import json
3 | import numpy as np
4 | from scipy.optimize import curve_fit
5 | from tsfel.feature_extraction.features_settings import load_json
6 | from tsfel.feature_extraction.calc_features import calc_window_features
7 |
8 |
9 | # curves
10 | def n_squared(x, no):
11 | """The model function"""
12 | return no * x ** 2
13 |
14 |
15 | def n_nlog(x, no):
16 | """The model function"""
17 | return no * x * np.log(x)
18 |
19 |
20 | def n_linear(x, no):
21 | """The model function"""
22 | return no * x
23 |
24 |
25 | def n_log(x, no):
26 | """The model function"""
27 | return no * np.log(x)
28 |
29 |
30 | def n_constant(x, no):
31 | """The model function"""
32 | return np.zeros(len(x)) + no
33 |
34 |
35 | def find_best_curve(t, signal):
36 | """Finds the best curve.
37 |
38 | Parameters
39 | ----------
40 | t : nd-array
41 | Log space
42 | signal : nd-array
43 | Mean execution time array
44 |
45 | Returns
46 | -------
47 | str
48 | Best fit curve name
49 |
50 | """
51 |
52 | all_chisq = []
53 | list_curves = [n_squared, n_nlog, n_linear, n_log, n_constant]
54 | all_curves = []
55 | # Model parameters
56 | stdev = 2
57 | sig = np.zeros(len(signal)) + stdev
58 |
59 | # Fit the curve
60 | for curve in list_curves:
61 | start = 1
62 | popt, pcov = curve_fit(curve, t, signal, sigma=sig, p0=start, absolute_sigma=True)
63 |
64 | # Compute chi square
65 | nexp = curve(t, *popt)
66 | r = signal - nexp
67 | chisq = np.sum((r / stdev) ** 2)
68 | all_chisq.append(chisq)
69 | all_curves.append(nexp)
70 |
71 | idx_best = np.argmin(all_chisq)
72 |
73 | curve_name = str(list_curves[idx_best])
74 | idx1 = curve_name.find("n_")
75 | idx2 = curve_name.find("at")
76 | curve_name = curve_name[idx1 + 2:idx2 - 1]
77 |
78 | return curve_name
79 |
80 |
81 | def compute_complexity(feature, domain, json_path, **kwargs):
82 | """Computes the feature complexity.
83 |
84 | Parameters
85 | ----------
86 | feature : string
87 | Feature name
88 | domain : string
89 | Feature domain
90 | json_path: json
91 | Features json file
92 | \**kwargs:
93 | See below:
94 | * *features_path* (``string``) --
95 | Directory of script with personal features
96 |
97 | Returns
98 | -------
99 | int
100 | Feature complexity
101 |
102 | Writes complexity in json file
103 |
104 | """
105 |
106 | dictionary = load_json(json_path)
107 |
108 | features_path = kwargs.get('features_path', None)
109 |
110 | # The inputs from this function should be replaced by a dictionary
111 | one_feat_dict = {domain: {feature: dictionary[domain][feature]}}
112 |
113 | t = np.logspace(3.0, 5.0, 6)
114 | signal, s = [], []
115 | f = 0.05
116 | x = np.arange(0, t[-1] + 1, 1)
117 | fs = 100
118 | wave = np.sin(2 * np.pi * f * x / fs)
119 |
120 | for ti in t:
121 | for _ in range(20):
122 |
123 | start = time.time()
124 | calc_window_features(one_feat_dict, wave[:int(ti)], fs, features_path=features_path)
125 | end = time.time()
126 |
127 | s += [end - start]
128 |
129 | signal += [np.mean(s)]
130 |
131 | curve_name = find_best_curve(t, signal)
132 | dictionary[domain][feature]['complexity'] = curve_name
133 |
134 | with open(json_path, "w") as write_file:
135 | json.dump(dictionary, write_file, indent=4, sort_keys=True)
136 |
137 | if curve_name == 'constant' or curve_name == 'log':
138 | return 1
139 | elif curve_name == 'linear':
140 | return 2
141 | elif curve_name == 'nlog' or curve_name == 'squared':
142 | return 3
143 | else:
144 | return 0
145 |
--------------------------------------------------------------------------------
/tests/data_pipeline/stat_pipeline_test.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from ltsm.models.base_config import LTSMConfig
3 | from ltsm.data_provider.dataset import TSDataset
4 | from ltsm.data_pipeline import StatisticalTrainingPipeline
5 | from ltsm.common.base_training_pipeline import TrainingConfig
6 | from transformers import TrainingArguments
7 |
8 | @pytest.fixture
9 | def mock_config():
10 | #Fixture for creating mock arguments
11 |
12 | train_params = {
13 | 'model': 'LTSM',
14 | 'model_name_or_path': 'gpt2-medium',
15 | 'log_file': 'log.txt',
16 | 'data_path':'./datasets',
17 | 'prompt_data_path':'./prompt_bank',
18 | 'output_dir': './output',
19 | 'train_ratio': 0.7,
20 | 'val_ratio': 0.1,
21 | 'tmax': 10,
22 | 'learning_rate': 5e-5,
23 | 'downsample_rate': 10,
24 | 'train_epochs': 8,
25 | 'batch_size': 100,
26 | 'eval': False,
27 | 'lora': False,
28 | 'freeze': False,
29 | 'data_processing': 'standard_scaler',
30 | 'gradient_accumulation_steps': 1
31 | }
32 |
33 | model_params = {
34 | 'gpt_layers': 3,
35 | 'patch_size': 16,
36 | 'pretrain': True,
37 | 'stride': 2,
38 | 'seq_len': 256,
39 | 'pred_len': 12,
40 | 'prompt_len': 8,
41 | }
42 |
43 | model_config = LTSMConfig(**model_params)
44 | return TrainingConfig(model_config, **train_params)
45 |
46 | @pytest.fixture
47 | def pipeline(mock_config):
48 | # Fixture to create pipeline
49 | return StatisticalTrainingPipeline(mock_config)
50 |
51 | def test_initialization(pipeline, mock_config):
52 | #Test that StatisticalTrainingPipeline initializes correctly
53 |
54 | assert pipeline.config == mock_config
55 | assert pipeline.training_args.output_dir == mock_config.train_params["output_dir"]
56 | assert pipeline.training_args.per_device_train_batch_size == mock_config.train_params["batch_size"]
57 | assert pipeline.training_args.per_device_eval_batch_size == mock_config.train_params["batch_size"]
58 | assert pipeline.training_args.num_train_epochs == mock_config.train_params["train_epochs"]
59 | assert pipeline.training_args.learning_rate == mock_config.train_params["learning_rate"]
60 | assert pipeline.training_args.gradient_accumulation_steps == mock_config.train_params["gradient_accumulation_steps"]
61 |
62 |
63 | def test_run_training(mocker, pipeline):
64 | # Mock dataset loading and Trainer behavior
65 | mock_get_datasets = mocker.patch.object(pipeline, 'get_datasets', return_value=(TSDataset([], 0, 0), TSDataset([], 0, 0), [None, None, None, None], None))
66 | mock_trainer = mocker.patch('ltsm.data_pipeline.stat_pipeline.Trainer')
67 | mock_trainer.evaluate.return_value = None
68 |
69 | pipeline.run()
70 |
71 | # Ensure datasets are loaded and Trainer is instantiated
72 | mock_get_datasets.assert_called_once()
73 |
74 | # Check if train is called when eval is False
75 | if not pipeline.config.train_params["eval"]:
76 | assert mock_trainer.return_value.train.called
77 | assert mock_trainer.return_value.save_model.called
78 |
79 | assert mock_trainer.return_value.evaluate.call_count == 4
80 | assert mock_trainer.return_value.save_metrics.call_count == 5
81 | assert mock_trainer.return_value.log_metrics.call_count == 5
82 |
83 |
84 | def test_run_evaluation_only(mocker, pipeline):
85 | pipeline.config.train_params["eval"] = True # Set eval-only mode
86 | # Mock dataset loading and Trainer behavior
87 | mock_get_datasets = mocker.patch.object(pipeline, 'get_datasets', return_value=(TSDataset([], 0, 0), TSDataset([], 0, 0), [None, None, None, None], None))
88 | mock_trainer = mocker.patch('ltsm.data_pipeline.stat_pipeline.Trainer')
89 |
90 | pipeline.run()
91 |
92 | # Ensure datasets are loaded and Trainer is instantiated
93 | mock_get_datasets.assert_called_once()
94 |
95 | # Ensure training is skipped and only evaluation is called
96 | assert not mock_trainer.return_value.train.called
97 | assert mock_trainer.return_value.evaluate.called
98 | assert mock_trainer.return_value.save_metrics.called
--------------------------------------------------------------------------------
/tests/models/PatchTST_test.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from ltsm.models import get_model
3 | from ltsm.models.base_config import PatchTSTConfig
4 | from ltsm.common.base_training_pipeline import TrainingConfig
5 | from transformers import PreTrainedModel
6 | import torch
7 | import numpy as np
8 |
9 | @pytest.fixture
10 | def config(tmp_path):
11 | data_path = tmp_path / "test.csv"
12 | prompt_data_path = tmp_path / "prompt_normalize_split"
13 | prompt_data_path.mkdir()
14 | OUTPUT_PATH = data_path / "output"
15 |
16 | train_params = {
17 | "data_path": str(data_path),
18 | "model": "PatchTST",
19 | "model_name_or_path": "gpt2-medium",
20 | "gradient_accumulation_steps": 64,
21 | "test_data_path_list": [str(data_path)],
22 | "prompt_data_path": str(prompt_data_path),
23 | "train_epochs": 1000,
24 | "patience": 10,
25 | "lradj": 'TST',
26 | "pct_start": 0.2,
27 | "freeze": 0,
28 | "itr": 1,
29 | "batch_size": 32,
30 | "learning_rate": 1e-3,
31 | "downsample_rate": 20,
32 | "output_dir": str(OUTPUT_PATH),
33 | "des": 'Exp',
34 | "eval": 0
35 | }
36 | config = {
37 | "pred_len": 96,
38 | "enc_in": 1,
39 | "seq_len": 336,
40 | "patch_len": 16,
41 | "decomposition": False,
42 | "stride": 8,
43 | "e_layers": 3,
44 | "n_heads": 16,
45 | "d_model": 128,
46 | "d_ff": 256,
47 | "dropout": 0.2,
48 | "fc_dropout": 0.2,
49 | "head_dropout": 0,
50 | "revin": True,
51 | "affine": True,
52 | "subtract_last": False,
53 | "individual": False
54 | }
55 |
56 | patchtst_config = PatchTSTConfig(**config)
57 | return TrainingConfig(patchtst_config, **train_params)
58 |
59 | def test_model_initialization(config):
60 | model = get_model(config.model_config, model_name=config.train_params["model"], local_pretrain=config.train_params["local_pretrain"])
61 | assert model is not None
62 | assert isinstance(model, PreTrainedModel)
63 |
64 |
65 | def test_parameter_count(config):
66 | model = get_model(config.model_config, model_name=config.train_params["model"], local_pretrain=config.train_params["local_pretrain"])
67 | param_count = sum([p.numel() for p in model.parameters() if p.requires_grad])
68 |
69 | patch_num = int((config.model_config.seq_len - config.model_config.patch_len) / config.model_config.stride + 1)
70 | # multi-head self-attention parameter count (W_Q, W_K, W_V, to_out)
71 | expected_param_count = 4*(config.model_config.d_model * config.model_config.d_model + config.model_config.d_model)
72 | # feed-forward nn parameter count
73 | expected_param_count += 2*config.model_config.d_model*config.model_config.d_ff + config.model_config.d_model + config.model_config.d_ff
74 | # layer norm parameter count
75 | expected_param_count += 4*config.model_config.d_model
76 |
77 | # multiply by number of encoder layers
78 | expected_param_count *= config.model_config.e_layers
79 |
80 | # Input encoding parameter count
81 | expected_param_count += config.model_config.patch_len*config.model_config.d_model + config.model_config.d_model
82 |
83 | # Positional encoding parameter count
84 | expected_param_count += patch_num*config.model_config.d_model
85 |
86 | # RevIn parameter count
87 | expected_param_count += 2
88 |
89 | # Flatten Head parameter count
90 | expected_param_count += config.model_config.d_model*patch_num*config.model_config.pred_len + config.model_config.pred_len
91 |
92 | assert param_count == expected_param_count
93 |
94 | def test_forward_output_shape(config):
95 | model = get_model(config.model_config, model_name=config.train_params["model"], local_pretrain=config.train_params["local_pretrain"])
96 | batch_size = 32
97 | channel = 16
98 | input_length = config.model_config.seq_len
99 | input = torch.tensor(np.zeros((batch_size, input_length, channel))).float()
100 | output = model(input)
101 | assert output.size() == torch.Size([batch_size, config.model_config.pred_len, channel])
--------------------------------------------------------------------------------
/ltsm/models/DLinear.py:
--------------------------------------------------------------------------------
1 | # code from https://github.com/yuqinie98/PatchTST, with minor modifications
2 | import torch
3 | from torch import Tensor
4 | import torch.nn as nn
5 | import torch.nn.functional as F
6 | import numpy as np
7 | from transformers import PreTrainedModel
8 | from .base_config import DLinearConfig
9 |
10 | class DLinear(PreTrainedModel):
11 | """
12 | Decomposition-Linear
13 | """
14 | config_class = DLinearConfig
15 |
16 | def __init__(self, config: DLinearConfig, **kwargs):
17 | super().__init__(config)
18 | self.seq_len = config.seq_len
19 | self.pred_len = config.pred_len
20 |
21 | # Decompsition Kernel Size
22 | kernel_size = 25
23 | self.decompsition = series_decomp(kernel_size)
24 | self.individual = config.individual
25 | self.channels = config.enc_in
26 |
27 | if self.individual:
28 | self.Linear_Seasonal = nn.ModuleList()
29 | self.Linear_Trend = nn.ModuleList()
30 |
31 | for i in range(self.channels):
32 | self.Linear_Seasonal.append(nn.Linear(self.seq_len,self.pred_len))
33 | self.Linear_Trend.append(nn.Linear(self.seq_len,self.pred_len))
34 |
35 | # Use this two lines if you want to visualize the weights
36 | # self.Linear_Seasonal[i].weight = nn.Parameter((1/self.seq_len)*torch.ones([self.pred_len,self.seq_len]))
37 | # self.Linear_Trend[i].weight = nn.Parameter((1/self.seq_len)*torch.ones([self.pred_len,self.seq_len]))
38 | else:
39 | self.Linear_Seasonal = nn.Linear(self.seq_len,self.pred_len)
40 | self.Linear_Trend = nn.Linear(self.seq_len,self.pred_len)
41 |
42 | # Use this two lines if you want to visualize the weights
43 | # self.Linear_Seasonal.weight = nn.Parameter((1/self.seq_len)*torch.ones([self.pred_len,self.seq_len]))
44 | # self.Linear_Trend.weight = nn.Parameter((1/self.seq_len)*torch.ones([self.pred_len,self.seq_len]))
45 |
46 | def forward(self, x: Tensor):
47 | # x: [Batch, Input length, Channel]
48 | seasonal_init, trend_init = self.decompsition(x)
49 | seasonal_init, trend_init = seasonal_init.permute(0,2,1), trend_init.permute(0,2,1)
50 | if self.individual:
51 | seasonal_output = torch.zeros([seasonal_init.size(0),seasonal_init.size(1),self.pred_len],dtype=seasonal_init.dtype).to(seasonal_init.device)
52 | trend_output = torch.zeros([trend_init.size(0),trend_init.size(1),self.pred_len],dtype=trend_init.dtype).to(trend_init.device)
53 | for i in range(self.channels):
54 | seasonal_output[:,i,:] = self.Linear_Seasonal[i](seasonal_init[:,i,:])
55 | trend_output[:,i,:] = self.Linear_Trend[i](trend_init[:,i,:])
56 | else:
57 | seasonal_output = self.Linear_Seasonal(seasonal_init)
58 | trend_output = self.Linear_Trend(trend_init)
59 |
60 | x = seasonal_output + trend_output
61 | return x.permute(0,2,1) # to [Batch, Output length, Channel]
62 |
63 |
64 | class moving_avg(nn.Module):
65 | """
66 | Moving average block to highlight the trend of time series
67 | """
68 | def __init__(self, kernel_size, stride):
69 | super(moving_avg, self).__init__()
70 | self.kernel_size = kernel_size
71 | self.avg = nn.AvgPool1d(kernel_size=kernel_size, stride=stride, padding=0)
72 |
73 | def forward(self, x):
74 | # padding on the both ends of time series
75 | front = x[:, 0:1, :].repeat(1, (self.kernel_size - 1) // 2, 1)
76 | end = x[:, -1:, :].repeat(1, (self.kernel_size - 1) // 2, 1)
77 | x = torch.cat([front, x, end], dim=1)
78 | x = self.avg(x.permute(0, 2, 1))
79 | x = x.permute(0, 2, 1)
80 | return x
81 |
82 |
83 | class series_decomp(nn.Module):
84 | """
85 | Series decomposition block
86 | """
87 | def __init__(self, kernel_size):
88 | super(series_decomp, self).__init__()
89 | self.moving_avg = moving_avg(kernel_size, stride=1)
90 |
91 | def forward(self, x):
92 | moving_mean = self.moving_avg(x)
93 | res = x - moving_mean
94 | return res, moving_mean
--------------------------------------------------------------------------------
/multi_agents_pipeline/main.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | import logging
3 | from autogen_core import AgentId, SingleThreadedAgentRuntime, TopicId
4 | from autogen_core.models import (
5 | ChatCompletionClient,
6 | LLMMessage,
7 | SystemMessage,
8 | UserMessage,
9 | )
10 | from agents.QA_Agent import QAAgent
11 | from agents.TS_Agent import TSAgent
12 | from agents.Planning_Agent import PlanningAgent
13 | from agents.Reward_Agent import RewardAgent
14 | from agents.custom_messages import TextMessage, TSMessage, TSTaskMessage
15 | from autogen_core import TRACE_LOGGER_NAME
16 | import aiofiles
17 | import yaml
18 |
19 | QA_MODEL_CONFIG_PATH = "model_config.yaml"
20 |
21 | async def get_model_client(model_config_path: str) -> ChatCompletionClient:
22 | async with aiofiles.open(model_config_path, "r") as file:
23 | model_config = yaml.safe_load(await file.read())
24 | return ChatCompletionClient.load_component(model_config)
25 |
26 | async def main() -> None:
27 |
28 | runtime = SingleThreadedAgentRuntime()
29 |
30 | model_client = await get_model_client(QA_MODEL_CONFIG_PATH)
31 |
32 | await PlanningAgent.register(
33 | runtime,
34 | "Planning_Agent",
35 | lambda: PlanningAgent(name="Planning_Agent", model_client=model_client),
36 | )
37 |
38 | await QAAgent.register(
39 | runtime,
40 | "QA_Agent",
41 | lambda: QAAgent(name="QA_Agent", model_client=model_client),
42 | )
43 |
44 | # Register the TS Agent
45 | await TSAgent.register(
46 | runtime,
47 | "TS_Agent",
48 | lambda: TSAgent(name="TS_Agent"),
49 | )
50 |
51 | # Register the Reward Agent
52 | await RewardAgent.register(
53 | runtime,
54 | "Reward_Agent",
55 | lambda: RewardAgent(name="Reward_Agent", model_client=model_client, force_bad_score=True),
56 | )
57 |
58 | runtime.start()
59 |
60 | # # mock a plan message from planner
61 | # await runtime.send_message(
62 | # TextMessage(source="user", content="TS classification"),
63 | # AgentId("QA_Agent", "default"),
64 | # )
65 |
66 | # # mock a TS Info message from TS Agent
67 | # await runtime.send_message(
68 | # TSMessage(source="user", filepath="../datasets/UCR-gunpoint/sample_0000.csv",task_type="TS_classification", description="TS Data"),
69 | # AgentId("TS_Agent", "default"),
70 | # )
71 |
72 | await runtime.publish_message(
73 | TextMessage(source="Planner", content="TS classification"),
74 | topic_id=TopicId(
75 | type="Planner-QA", # This is the topic for Planner to send the initial plan
76 | source="Planner"
77 | )
78 | )
79 |
80 | await runtime.publish_message(
81 | TSMessage(
82 | source="Planner",
83 | filepath="../datasets/UCR-gunpoint/sample_0000.csv", # Example file path
84 | task_type="ts-classification", # Example task type
85 | description="TS Data"
86 | ),
87 | topic_id=TopicId(
88 | type="Planner-TS", # This is the topic for TS Agent to send the TS info
89 | source="Planner"
90 | )
91 | )
92 |
93 |
94 | # mock a TSTaskMessage from user
95 | # ts_task_message = TSTaskMessage(
96 | # description="The file contains time series data of the hand motion of an actor raising their arm. From this data alone, tell me if the actor is raising a gun or pointing their finger.",
97 | # filepath="../datasets/GunPointAgeSpan/GunPointAgeSpan_TRAIN.tsv"
98 | # )
99 | # await runtime.send_message(ts_task_message, AgentId("Planning_Agent", "default"))
100 |
101 |
102 | await runtime.stop_when_idle()
103 |
104 | if __name__ == "__main__":
105 | logging.basicConfig(level=logging.WARNING)
106 | logging.getLogger("autogen_core").setLevel(logging.WARNING)
107 | logging.getLogger("autogen_core.events").setLevel(logging.WARNING)
108 | logging.getLogger("autogen_core.runtime").setLevel(logging.WARNING)
109 | logging.getLogger("uvicorn.access").setLevel(logging.WARNING)
110 | logging.getLogger(TRACE_LOGGER_NAME).setLevel(logging.WARNING)
111 | asyncio.run(main())
112 |
--------------------------------------------------------------------------------
/tutorial/README.md:
--------------------------------------------------------------------------------
1 | # Tutorial of LTSM-bundle
2 |
3 |
4 | ## Installation
5 | ```
6 | conda create -n ltsm python=3.8.0
7 | conda activate ltsm
8 | git clone git@github.com:daochenzha/ltsm.git
9 | cd ltsm
10 | pip3 install -e .
11 | pip3 install -r requirements.txt
12 | ```
13 |
14 |
15 | ## :bookmark: Step 0: Collect Datasets and Time Series Prompts
16 |
17 | ### :cyclone: You can use our prepared dataset to on-board youselves on LTSM-bundle
18 |
19 | ### Download training datasets
20 | ```bash
21 | cd datasets
22 | download: https://drive.google.com/drive/folders/1hLFbz0FRxdiDCzgFYtKCOPJYSBVvwW9P
23 | ```
24 |
25 | ### Download time sereis prompts
26 | ```bash
27 | cd prompt_bank/propmt_data_csv
28 | download: https://drive.google.com/drive/folders/1hLFbz0FRxdiDCzgFYtKCOPJYSBVvwW9P
29 | ```
30 |
31 | ### Check word prompts
32 | ```bash
33 | cd prompt_bank/text_prompt_data_csv/
34 | check: csv_prompt.json
35 | ```
36 |
37 | ## :bookmark: Step 1: Customize Datasets and Time Series Prompts
38 |
39 | ### :cyclone: If you prefer to build LTSM-bundle on your own dataset, please follow the 5-step instructions below:
40 |
41 | **Step 1-a.** Prepare your dataset. Make sure your local data folder like this:
42 | ````angular2html
43 | - ltsm/
44 | - datasets/
45 | DATA_1.csv/
46 | DATA_2.csv/
47 | ...
48 | ````
49 |
50 | **Step 1-b.** Generating the time series prompts from training, validating, and testing datasets
51 | ````angular2html
52 | python3 prompt_generate_split.py
53 | ````
54 |
55 | **Step 1-c.** Find the generated time series prompts in the './prompt_data_split' folder. Then run the following command for normalizing the prompts:
56 | ````angular2html
57 | python3 prompt_normalization_split.py --mode fit
58 | ````
59 |
60 | **Step 1-d.** Run this command to export the prompts to the "./prompt_data_normalize_split" folder:
61 | ````angular2html
62 | python3 prompt_normalization_split.py --mode transform
63 | ````
64 |
65 | **Step 1-e.** Modify the word prompt based on your dataset description in "prompt_bank/text_prompt_data_csv/csv_prompt.json":
66 | ````angular2html
67 | vim prompt_bank/text_prompt_data_csv/csv_prompt.json
68 | ````
69 |
70 | ## :bookmark: Step 2: Customize your own LTSM-bundle
71 |
72 | ### :cyclone: Now, it's time to build you own LTSM-bundle!!
73 |
74 | #### Option-(1) Explore [Word Prompt] and [Linear Tokenization] on gpt2-medium
75 | ```bash
76 | python3 main_ltsm.py \
77 | --model LTSM_WordPrompt \
78 | --model_name_or_path gpt2-medium \
79 | --train_epochs 500 \
80 | --batch_size 10 \
81 | --pred_len 96 \
82 | --data_path "datasets/ETT-small/ETTh1.csv" \
83 | --test_data_path_list "datasets/ETT-small/ETTh1.csv" \
84 | --prompt_data_path "prompt_bank/text_prompt_data_csv/csv_prompt.json" \
85 | --freeze 0 \
86 | --learning_rate 1e-3 \
87 | --downsample_rate 20 \
88 | --output_dir [Your_Output_Path] \
89 | ```
90 |
91 | #### Option-(2) Explore [Time Series Prompt] and [Linear Tokenization] on gpt2-medium
92 | ```bash
93 | python3 main_ltsm.py \
94 | --model LTSM \
95 | --model_name_or_path gpt2-medium \
96 | --train_epochs 500 \
97 | --batch_size 10 \
98 | --pred_len 96 \
99 | --data_path "datasets/ETT-small/ETTh1.csv" \
100 | --test_data_path_list "datasets/ETT-small/ETTh1.csv" \
101 | --prompt_data_path "prompt_bank/prompt_data_normalize_split" \
102 | --freeze 0 \
103 | --learning_rate 1e-3 \
104 | --downsample_rate 20 \
105 | --output_dir [Your_Output_Path] \
106 | ```
107 |
108 | #### Option-(3) Finetune your dataset based on pre-trained LTSM-bundle model: [Time Series Prompt] and [Linear Tokenization] on gpt2-medium
109 | ```bash
110 | python3 main_ltsm.py \
111 | --model LTSM \
112 | --model_name_or_path gpt2-medium \
113 | --local_pretrain LSC2204/LTSM-bundle \ # This model weight is for pred_len == 96
114 | --train_epochs 500 \
115 | --batch_size 10 \
116 | --pred_len 96 \
117 | --data_path "datasets/ETT-small/ETTh1.csv" \
118 | --test_data_path_list "datasets/ETT-small/ETTh1.csv" \
119 | --prompt_data_path "prompt_bank/prompt_data_normalize_split" \
120 | --freeze 0 \
121 | --learning_rate 1e-3 \
122 | --downsample_rate 20 \
123 | --output_dir [Your_Output_Path] \
124 | ```
125 |
126 |
--------------------------------------------------------------------------------
/ltsm/prompt_reader/stat_prompt/tsfel/utils/add_personal_features.py:
--------------------------------------------------------------------------------
1 | import importlib
2 | import inspect
3 | import json
4 | import os
5 | import sys
6 | import warnings
7 | from inspect import getmembers, isfunction
8 |
9 | from tsfel.feature_extraction.features_settings import load_json
10 | from tsfel.utils.calculate_complexity import compute_complexity
11 |
12 |
13 | def add_feature_json(features_path, json_path):
14 | """Adds new feature to features.json.
15 |
16 | Parameters
17 | ----------
18 | features_path: string
19 | Personal Python module directory containing new features implementation.
20 |
21 | json_path: string
22 | Personal .json file directory containing existing features from TSFEL.
23 | New customised features will be added to file in this directory.
24 |
25 | """
26 |
27 | sys.path.append(features_path[:-len(features_path.split(os.sep)[-1]) - 1])
28 | exec("import " + features_path.split(os.sep)[-1][:-3])
29 |
30 | # Reload module containing the new features
31 | importlib.reload(sys.modules[features_path.split(os.sep)[-1][:-3]])
32 | exec("import " + features_path.split(os.sep)[-1][:-3] + " as pymodule")
33 |
34 | # Functions from module containing the new features
35 | functions_list = [o for o in getmembers(locals()['pymodule']) if isfunction(o[1])]
36 | function_names = [fname[0] for fname in functions_list]
37 |
38 | # Check if @set_domain was declared on features module
39 | vset_domain = False
40 |
41 | for fname, f in list(locals()['pymodule'].__dict__.items()):
42 |
43 | if getattr(f, "domain", None) is not None:
44 |
45 | vset_domain = True
46 |
47 | # Access to personal features.json
48 | feat_json = load_json(json_path)
49 |
50 | # Assign domain and tag
51 | domain = getattr(f, "domain", None)
52 | tag = getattr(f, "tag", None)
53 |
54 | # Feature specifications
55 | # Description
56 | if f.__doc__ is not None:
57 | descrip = f.__doc__.split("\n")[0]
58 | else:
59 | descrip = ""
60 | # Feature usage
61 | use = "yes"
62 | # Feature function arguments
63 | args_name = inspect.getfullargspec(f)[0]
64 |
65 | # Access feature parameters
66 | if args_name != "":
67 | # Retrieve default values of arguments
68 | spec = inspect.getfullargspec(f)
69 | defaults = dict(zip(spec.args[::-1], (spec.defaults or ())[::-1]))
70 | defaults.update(spec.kwonlydefaults or {})
71 |
72 | for p in args_name[1:]:
73 | if p not in list(defaults.keys()):
74 | if p == 'fs':
75 | # Assigning a default value for fs if not given
76 | defaults[p] = 100
77 | else:
78 | defaults[p] = None
79 | if len(defaults) == 0:
80 | defaults = ""
81 | else:
82 | defaults = ""
83 |
84 | # Settings of new feature
85 | new_feature = {"description": descrip,
86 | "parameters": defaults,
87 | "function": fname,
88 | "use": use
89 | }
90 |
91 | # Check if domain exists
92 | try:
93 | feat_json[domain][fname] = new_feature
94 | except KeyError:
95 | feat_json[domain] = {fname: new_feature}
96 |
97 | # Insert tag if it is declared
98 | if tag is not None:
99 | feat_json[domain][fname]['tag'] = tag
100 |
101 | # Write new feature on json file
102 | with open(json_path, "w") as fout:
103 | json.dump(feat_json, fout, indent=" ")
104 |
105 | # Calculate feature complexity
106 | compute_complexity(fname, domain, json_path, features_path=features_path)
107 | print('Feature '+str(fname)+' was added.')
108 |
109 | if vset_domain is False:
110 | warnings.warn('No features were added. Please declare @set_domain.', stacklevel=2)
111 |
112 |
113 |
--------------------------------------------------------------------------------
/ltsm/utils/timefeatures.py:
--------------------------------------------------------------------------------
1 | from typing import List
2 |
3 | import numpy as np
4 | import pandas as pd
5 | from pandas.tseries import offsets
6 | from pandas.tseries.frequencies import to_offset
7 |
8 |
9 | class TimeFeature:
10 | def __init__(self):
11 | pass
12 |
13 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
14 | pass
15 |
16 | def __repr__(self):
17 | return self.__class__.__name__ + "()"
18 |
19 |
20 | class SecondOfMinute(TimeFeature):
21 | """Minute of hour encoded as value between [-0.5, 0.5]"""
22 |
23 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
24 | return index.second / 59.0 - 0.5
25 |
26 |
27 | class MinuteOfHour(TimeFeature):
28 | """Minute of hour encoded as value between [-0.5, 0.5]"""
29 |
30 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
31 | return index.minute / 59.0 - 0.5
32 |
33 |
34 | class HourOfDay(TimeFeature):
35 | """Hour of day encoded as value between [-0.5, 0.5]"""
36 |
37 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
38 | return index.hour / 23.0 - 0.5
39 |
40 |
41 | class DayOfWeek(TimeFeature):
42 | """Hour of day encoded as value between [-0.5, 0.5]"""
43 |
44 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
45 | return index.dayofweek / 6.0 - 0.5
46 |
47 |
48 | class DayOfMonth(TimeFeature):
49 | """Day of month encoded as value between [-0.5, 0.5]"""
50 |
51 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
52 | return (index.day - 1) / 30.0 - 0.5
53 |
54 |
55 | class DayOfYear(TimeFeature):
56 | """Day of year encoded as value between [-0.5, 0.5]"""
57 |
58 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
59 | return (index.dayofyear - 1) / 365.0 - 0.5
60 |
61 |
62 | class MonthOfYear(TimeFeature):
63 | """Month of year encoded as value between [-0.5, 0.5]"""
64 |
65 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
66 | return (index.month - 1) / 11.0 - 0.5
67 |
68 |
69 | class WeekOfYear(TimeFeature):
70 | """Week of year encoded as value between [-0.5, 0.5]"""
71 |
72 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
73 | return (index.isocalendar().week - 1) / 52.0 - 0.5
74 |
75 |
76 | def time_features_from_frequency_str(freq_str: str) -> List[TimeFeature]:
77 | """
78 | Returns a list of time features that will be appropriate for the given frequency string.
79 | Parameters
80 | ----------
81 | freq_str
82 | Frequency string of the form [multiple][granularity] such as "12H", "5min", "1D" etc.
83 | """
84 |
85 | features_by_offsets = {
86 | offsets.YearEnd: [],
87 | offsets.QuarterEnd: [MonthOfYear],
88 | offsets.MonthEnd: [MonthOfYear],
89 | offsets.Week: [DayOfMonth, WeekOfYear],
90 | offsets.Day: [DayOfWeek, DayOfMonth, DayOfYear],
91 | offsets.BusinessDay: [DayOfWeek, DayOfMonth, DayOfYear],
92 | offsets.Hour: [HourOfDay, DayOfWeek, DayOfMonth, DayOfYear],
93 | offsets.Minute: [
94 | MinuteOfHour,
95 | HourOfDay,
96 | DayOfWeek,
97 | DayOfMonth,
98 | DayOfYear,
99 | ],
100 | offsets.Second: [
101 | SecondOfMinute,
102 | MinuteOfHour,
103 | HourOfDay,
104 | DayOfWeek,
105 | DayOfMonth,
106 | DayOfYear,
107 | ],
108 | }
109 |
110 | offset = to_offset(freq_str)
111 |
112 | for offset_type, feature_classes in features_by_offsets.items():
113 | if isinstance(offset, offset_type):
114 | return [cls() for cls in feature_classes]
115 |
116 | supported_freq_msg = f"""
117 | Unsupported frequency {freq_str}
118 | The following frequencies are supported:
119 | Y - yearly
120 | alias: A
121 | M - monthly
122 | W - weekly
123 | D - daily
124 | B - business days
125 | H - hourly
126 | T - minutely
127 | alias: min
128 | S - secondly
129 | """
130 | raise RuntimeError(supported_freq_msg)
131 |
132 |
133 | def time_features(dates, freq='h'):
134 | return np.vstack([feat(dates) for feat in time_features_from_frequency_str(freq)])
135 |
--------------------------------------------------------------------------------
/ltsm/data_provider/tokenizer/standard_scaler.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pickle
3 | import numpy as np
4 | from sklearn.preprocessing import StandardScaler as SKStandardScaler
5 |
6 | from ltsm.common.base_processor import BaseProcessor
7 | from typing import Tuple, List
8 |
9 |
10 | class StandardScaler(BaseProcessor):
11 | """
12 | Represents a Standard Scaler object that uses Sklearn's Standard Scaler for data processing.
13 |
14 | Attributes:
15 | module_id (str): The identifier for base processor objects.
16 | """
17 | module_id = "standard_scaler"
18 |
19 | def __init__(self):
20 | self._scaler = None
21 |
22 | def process(self, raw_data: np.ndarray, train_data: List[np.ndarray], val_data: List[np.ndarray], test_data: List[np.ndarray], fit_train_only:bool=False, do_anomaly:bool=False)->Tuple[List[np.ndarray], List[np.ndarray], List[np.ndarray]]:
23 | """
24 | Standardizes the training, validation, and test sets by removing the mean and scaling to unit variance.
25 |
26 | Args:
27 | raw_data (np.ndarray): The raw data.
28 | train_data (List[np.ndarray]): The list of training sequences.
29 | val_data (List[np.ndarray]): The list of validation sequences.
30 | test_data (List[np.ndarray]): The list of test sequences.
31 | fit_train_only (bool): Indicates whether the datasets should be scaled based on the training data.
32 |
33 | Returns:
34 | Tuple[List[np.ndarray], List[np.ndarray], List[np.ndarray]]:
35 | A tuple of three lists containing the processed training, validation, and test data.
36 | """
37 | scaled_train_data, scaled_val_data, scaled_test_data = [], [], []
38 | for i, (raw_sequence, train_sequence, val_sequence, test_sequence) in enumerate(zip(
39 | raw_data,
40 | train_data,
41 | val_data,
42 | test_data,
43 | )):
44 | if do_anomaly and i == len(raw_data) - 1: # Skip anomaly label
45 | scaled_train_data.append(train_sequence)
46 | scaled_val_data.append(val_sequence)
47 | scaled_test_data.append(test_sequence)
48 | continue
49 |
50 | train_sequence = train_sequence.reshape(-1, 1)
51 | val_sequence = val_sequence.reshape(-1, 1)
52 | test_sequence = test_sequence.reshape(-1, 1)
53 |
54 | self._scaler = SKStandardScaler()
55 |
56 | if fit_train_only:
57 | self._scaler.fit(train_sequence)
58 | else:
59 | self._scaler.fit(raw_sequence.reshape(-1, 1))
60 |
61 | scaled_train_data.append(self._scaler.transform(train_sequence).flatten())
62 | scaled_val_data.append(self._scaler.transform(val_sequence).flatten())
63 | scaled_test_data.append(self._scaler.transform(test_sequence).flatten())
64 |
65 | return scaled_train_data, scaled_val_data, scaled_test_data
66 |
67 | def inverse_process(self, data: np.ndarray)->np.ndarray:
68 | """
69 | Scales back the data to its original representation.
70 |
71 | Args:
72 | data (np.ndarray): The data to scale back.
73 |
74 | Returns:
75 | np.ndarray: The scaled back data.
76 | """
77 | assert self._scaler is not None, "StandardScaler has not been fitted"
78 | raw_shape = data.shape
79 | data = self._scaler.inverse_transform(data.reshape(-1, 1))
80 |
81 | return data.reshape(raw_shape)
82 |
83 | def save(self, save_dir: str):
84 | """
85 | Saves the scaler to the save_dir directory as a Pickle file named processor.pkl.
86 |
87 | Args:
88 | save_dir (str): The directory where to store the scaler.
89 | """
90 | save_path = os.path.join(save_dir, "processor.pkl")
91 | with open(save_path, 'wb') as f:
92 | pickle.dump(self._scaler, f)
93 |
94 | def load(self, save_dir):
95 | """
96 | Loads the scaler saved at the save_dir directory.
97 |
98 | Args:
99 | save_dir (str): The directory the scaler was saved.
100 | """
101 | save_path = os.path.join(save_dir, "processor.pkl")
102 | with open(save_path, 'rb') as f:
103 | self._scaler = pickle.load(f)
104 |
105 |
--------------------------------------------------------------------------------
/tests/data_reader/train_database_reader_test.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | import pandas as pd
3 | import numpy as np
4 | from io import StringIO
5 | from unittest.mock import MagicMock, patch
6 | import taosws
7 |
8 | # Assuming your script is named `script` and contains the functions
9 | from ltsm.data_reader.train_database_reader import create_connection, setup_database, setup_tables, insert_data_from_csv, retrieve_data_to_csv
10 |
11 | class TestDatabaseOperations(unittest.TestCase):
12 |
13 | def setUp(self):
14 | # Simulated database connection
15 | self.conn = MagicMock(spec=taosws.Connection)
16 | self.cursor = MagicMock()
17 | self.conn.cursor.return_value = self.cursor
18 |
19 | # A larger, complex CSV input data (1000 rows, 10 float columns, 1 int column as Label)
20 | num_rows = 1000
21 | num_features = 10
22 | np.random.seed(42)
23 | float_data = np.random.rand(num_rows, num_features) # Generate random float values between 0 and 1
24 | label_data = np.random.randint(0, 2, size=(num_rows, 1)) # Random integer values 0 or 1 for 'Label'
25 |
26 | # Combine float data and label column to create a full dataset
27 | data = np.hstack((float_data, label_data))
28 | columns = [f'Feature{i + 1}' for i in range(num_features)] + ['Label']
29 |
30 | # Create a Pandas DataFrame from the generated data
31 | self.df = pd.DataFrame(data, columns=columns)
32 |
33 | # Ensure 'Label' is an integer type
34 | self.df['Label'] = self.df['Label'].astype(int)
35 |
36 | self.input_csv = StringIO(self.df.to_csv(index=False))
37 |
38 | # Sample expected table creation schema
39 | self.expected_schema = (
40 | "CREATE TABLE IF NOT EXISTS test_table ("
41 | "ts TIMESTAMP, " +
42 | ", ".join([f"`Feature{i + 1}` FLOAT" for i in range(num_features)]) +
43 | ", Label INT)"
44 | )
45 |
46 | @patch('taosws.connect')
47 | def test_create_connection(self, mock_connect):
48 | # Test connection creation
49 | mock_connect.return_value = self.conn
50 | connection = create_connection()
51 | mock_connect.assert_called_once()
52 | self.assertIsNotNone(connection)
53 |
54 | def test_setup_database(self):
55 | # Test database setup
56 | setup_database(self.conn, 'test_database')
57 | self.cursor.execute.assert_called_with("CREATE DATABASE IF NOT EXISTS test_database")
58 |
59 | def test_setup_tables(self):
60 | # Test table creation
61 | setup_tables(self.conn, 'test_database', 'test_table', self.df)
62 | self.cursor.execute.assert_any_call(f"USE test_database")
63 | self.cursor.execute.assert_any_call(self.expected_schema)
64 |
65 | def test_insert_data_from_csv(self):
66 | # Test data insertion
67 | insert_data_from_csv(self.conn, 'test_database', self.input_csv, 'test_table')
68 | self.cursor.execute.assert_any_call(f"USE test_database")
69 | # Check that data is being inserted
70 | self.assertTrue(
71 | any("INSERT INTO test_table VALUES" in call[0][0] for call in self.cursor.execute.call_args_list),
72 | "Insert data command was not called correctly."
73 | )
74 |
75 | def test_retrieve_data_to_csv(self):
76 | # Mock fetched data and column descriptions
77 | self.cursor.fetchall.side_effect = [
78 | [tuple(row) for row in self.df.values], # Fetched data as tuples
79 | [(f'Feature{i+1}',) for i in range(10)] + [('Label',)] # Column names
80 | ]
81 |
82 | output_file = "test_output.csv"
83 | retrieve_data_to_csv(self.conn, 'test_database', 'test_table', output_file)
84 |
85 | # Verify that the SELECT command was called
86 | self.cursor.execute.assert_any_call(f"SELECT * FROM test_table")
87 |
88 | # Check if output file is created and matches expected data structure
89 | result_df = pd.read_csv(output_file)
90 | self.assertEqual(len(result_df), 1000, "Output file does not have the expected number of rows.")
91 | for i in range(1, 11):
92 | self.assertTrue(f'Feature{i}' in result_df.columns, f"Expected column Feature{i} not found in output CSV.")
93 | self.assertTrue('Label' in result_df.columns, "Expected column 'Label' not found in output CSV.")
94 |
95 | if __name__ == '__main__':
96 | unittest.main()
--------------------------------------------------------------------------------
/ltsm/prompt_reader/stat_prompt/tsfel/feature_extraction/features_settings.py:
--------------------------------------------------------------------------------
1 | import json
2 | import tsfel
3 | import numpy as np
4 |
5 |
6 | def load_json(json_path):
7 | """Loads the json file given by filename.
8 |
9 | Parameters
10 | ----------
11 | json_path : string
12 | Json path
13 |
14 | Returns
15 | -------
16 | Dict
17 | Dictionary
18 |
19 | """
20 |
21 | return json.load(open(json_path))
22 |
23 |
24 | def get_features_by_domain(domain=None, json_path=None):
25 | """Creates a dictionary with the features settings by domain.
26 |
27 | Parameters
28 | ----------
29 | domain : string
30 | Available domains: "statistical"; "spectral"; "temporal"
31 | If domain equals None, then the features settings from all domains are returned.
32 | json_path : string
33 | Directory of json file. Default: package features.json directory
34 |
35 | Returns
36 | -------
37 | Dict
38 | Dictionary with the features settings
39 |
40 | """
41 |
42 | if json_path is None:
43 | json_path = tsfel.__path__[0] + "/feature_extraction/features.json"
44 |
45 | if domain not in ['statistical', 'temporal', 'spectral', None]:
46 | raise SystemExit(
47 | 'No valid domain. Choose: statistical, temporal, spectral or None (for all feature settings).')
48 |
49 | dict_features = load_json(json_path)
50 | if domain is None:
51 | return dict_features
52 | else:
53 | return {domain: dict_features[domain]}
54 |
55 |
56 | def get_features_by_tag(tag=None, json_path=None):
57 | """Creates a dictionary with the features settings by tag.
58 |
59 | Parameters
60 | ----------
61 | tag : string
62 | Available tags: "audio"; "inertial", "ecg"; "eeg"; "emg".
63 | If tag equals None then, all available features are returned.
64 | json_path : string
65 | Directory of json file. Default: package features.json directory
66 |
67 | Returns
68 | -------
69 | Dict
70 | Dictionary with the features settings
71 |
72 | """
73 | if json_path is None:
74 | json_path = tsfel.__path__[0] + "/feature_extraction/features.json"
75 |
76 | if tag not in ["audio", "inertial", "ecg", "eeg", "emg", None]:
77 | raise SystemExit(
78 | "No valid tag. Choose: audio, inertial, ecg, eeg, emg or None.")
79 | features_tag = {}
80 | dict_features = load_json(json_path)
81 | if tag is None:
82 | return dict_features
83 | else:
84 | for domain in dict_features:
85 | features_tag[domain] = {}
86 | for feat in dict_features[domain]:
87 | if dict_features[domain][feat]["use"] == "no":
88 | continue
89 | # Check if tag is defined
90 | try:
91 | js_tag = dict_features[domain][feat]["tag"]
92 | if isinstance(js_tag, list):
93 | if any([tag in js_t for js_t in js_tag]):
94 | features_tag[domain].update({feat: dict_features[domain][feat]})
95 | elif js_tag == tag:
96 | features_tag[domain].update({feat: dict_features[domain][feat]})
97 | except KeyError:
98 | continue
99 | # To remove empty dicts
100 | return dict([[d, features_tag[d]] for d in list(features_tag.keys()) if bool(features_tag[d])])
101 |
102 |
103 | def get_number_features(dict_features):
104 | """Count the total number of features based on input parameters of each feature
105 |
106 | Parameters
107 | ----------
108 | dict_features : dict
109 | Dictionary with features settings
110 |
111 | Returns
112 | -------
113 | int
114 | Feature vector size
115 | """
116 | number_features = 0
117 | for domain in dict_features:
118 | for feat in dict_features[domain]:
119 | if dict_features[domain][feat]["use"] == "no":
120 | continue
121 | n_feat = dict_features[domain][feat]["n_features"]
122 |
123 | if isinstance(n_feat, int):
124 | number_features += n_feat
125 | else:
126 | n_feat_param = dict_features[domain][feat]["parameters"][n_feat]
127 | if isinstance(n_feat_param, int):
128 | number_features += n_feat_param
129 | else:
130 | number_features += eval("len(" + n_feat_param + ")")
131 |
132 | return number_features
133 |
--------------------------------------------------------------------------------
/tests/data_reader/npy_database_reader_test.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | import numpy as np
3 | import pandas as pd
4 | from unittest.mock import MagicMock, patch
5 | import taosws
6 | import os
7 |
8 | # Assuming your script is named `script` and it contains the functions defined
9 | from ltsm.data_reader.npy_database_reader import create_connection, setup_database, setup_tables, insert_data_from_npy, retrieve_data_to_npy
10 |
11 | class TestDatabaseOperations(unittest.TestCase):
12 |
13 | def setUp(self):
14 | # Simulated database connection
15 | self.conn = MagicMock(spec=taosws.Connection)
16 | self.cursor = MagicMock()
17 | self.conn.cursor.return_value = self.cursor
18 |
19 | # Generate a large, complex synthetic NumPy array for testing (1000 rows, 50 columns)
20 | self.num_rows = 1000
21 | self.num_cols = 50
22 | np.random.seed(42)
23 | self.data = np.random.rand(self.num_rows, self.num_cols) # Random floats between 0 and 100
24 |
25 | # Save the array to a temporary .npy file
26 | self.test_npy_file = 'test_data.npy'
27 | np.save(self.test_npy_file, self.data)
28 |
29 | # Create a DataFrame from the NumPy array for table setup
30 | self.df = pd.DataFrame(self.data)
31 | self.table_name = 'test_table'
32 |
33 | @patch('taosws.connect')
34 | def test_create_connection(self, mock_connect):
35 | # Test the connection creation
36 | mock_connect.return_value = self.conn
37 | connection = create_connection()
38 | mock_connect.assert_called_once()
39 | self.assertIsNotNone(connection)
40 |
41 | def test_setup_database(self):
42 | # Test database setup
43 | setup_database(self.conn, 'test_database')
44 | self.cursor.execute.assert_called_with("CREATE DATABASE IF NOT EXISTS test_database")
45 |
46 | def test_setup_tables(self):
47 | # Test table creation with a large number of columns
48 | setup_tables(self.conn, 'test_database', self.table_name, self.df)
49 | self.cursor.execute.assert_any_call(f"USE test_database")
50 | self.cursor.execute.assert_any_call(f"DROP TABLE IF EXISTS {self.table_name}")
51 |
52 | # Check that the table creation schema was executed correctly
53 | expected_schema_columns = [f"`{i}` FLOAT" for i in range(self.num_cols)]
54 | expected_schema = f"CREATE TABLE IF NOT EXISTS {self.table_name} (ts TIMESTAMP, {', '.join(expected_schema_columns)})"
55 | self.cursor.execute.assert_any_call(expected_schema)
56 |
57 | def test_insert_data_from_npy(self):
58 | # Test data insertion from .npy file with batch processing
59 | insert_data_from_npy(self.conn, 'test_database', self.test_npy_file, self.table_name, batch_size=100)
60 | self.cursor.execute.assert_any_call(f"USE test_database")
61 |
62 | # Check that data is inserted in batches
63 | batch_inserts = [call for call in self.cursor.execute.call_args_list if "INSERT INTO" in call[0][0]]
64 | self.assertGreaterEqual(len(batch_inserts), 5, "Expected at least 5 batch insertions for 1000 rows with a batch size of 200.")
65 |
66 | def test_retrieve_data_to_npy(self):
67 | # Mock fetched data and column descriptions for the retrieval test
68 | self.cursor.fetchall.side_effect = [
69 | [tuple(row) for row in self.data], # Mocked data returned as tuples
70 | [(f'{i}',) for i in range(self.num_cols)] # Mocked column names
71 | ]
72 |
73 | output_file = 'test_output.npy'
74 | retrieve_data_to_npy(self.conn, 'test_database', self.table_name, output_file)
75 |
76 | # Verify that the SELECT command was called
77 | self.cursor.execute.assert_any_call(f"SELECT * FROM {self.table_name}")
78 |
79 | # Load and check the output file
80 | result_array = np.load(output_file)
81 | self.assertEqual(result_array.shape, (self.num_rows, self.num_cols), # Subtract 1 for the timestamp column
82 | "Output file shape does not match expected data shape.")
83 | np.testing.assert_array_almost_equal(self.data, result_array, decimal=5,
84 | err_msg="Output data does not match expected data.")
85 |
86 | # Clean up by deleting the generated .npy file
87 | if os.path.exists(output_file):
88 | os.remove(output_file)
89 | print(f"Cleaned up generated file: {output_file}")
90 |
91 | if os.path.exists('test_data.npy'):
92 | os.remove('test_data.npy')
93 | print(f"Cleaned up generated file: test_data.npy")
94 |
95 | if __name__ == '__main__':
96 | unittest.main()
97 |
--------------------------------------------------------------------------------
/tests/data_provider/prompt_generator_test.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pytest
3 | import pandas as pd
4 | import numpy as np
5 | import torch
6 | from ltsm.data_provider.prompt_generator import save_data, prompt_save
7 |
8 | @pytest.fixture
9 | def setup_prompt(mocker, tmp_path):
10 | """set up the test environment"""
11 | mocker.patch.dict('sys.modules', {'tsfel': mocker.MagicMock()})
12 |
13 | sample_prompt_buf = {
14 | 'train': pd.DataFrame({
15 | 'feature1': np.random.rand(10),
16 | 'feature2': np.random.rand(10)
17 | }),
18 | 'val': pd.DataFrame({
19 | 'feature1': np.random.rand(5),
20 | 'feature2': np.random.rand(5)
21 | }),
22 | 'test': pd.DataFrame({
23 | 'feature1': np.random.rand(5),
24 | 'feature2': np.random.rand(5)
25 | })
26 | }
27 |
28 | output_path = str(tmp_path)
29 | data_name = "test_data"
30 | ifTest = False
31 |
32 | for split in ["train", "val", "test"]:
33 | split_dir = os.path.join(output_path, split)
34 | os.makedirs(split_dir, exist_ok=True)
35 |
36 | return prompt_save, sample_prompt_buf, output_path, data_name, ifTest
37 |
38 | @pytest.mark.parametrize("save_format", ["pth.tar", "csv", "npz"])
39 | def test_prompt_save(setup_prompt, save_format):
40 | """test if the prompt data is saved correctly in different formats and loaded back correctly
41 | """
42 | prompt_save, sample_prompt_buf, output_path, data_name, ifTest = setup_prompt
43 | prompt_save(sample_prompt_buf, output_path, data_name, save_format, ifTest)
44 |
45 | for split in ["train", "val", "test"]:
46 | split_dir = os.path.join(output_path, split)
47 | for index, col in sample_prompt_buf[split].T.iterrows():
48 | file_name = f"{data_name}_{index}_prompt.{save_format}"
49 | file_path = os.path.join(split_dir, file_name)
50 | assert os.path.exists(file_path), f"File {file_path} does not exist"
51 |
52 | prompt_data = col
53 | prompt_data.columns = [index]
54 | prompt_data = prompt_data.T
55 |
56 | if save_format == "pth.tar":
57 | load_data = torch.load(file_path)
58 | elif save_format == "csv":
59 | load_data = pd.read_csv(file_path)
60 | if isinstance(load_data, pd.DataFrame):
61 | load_data = load_data.squeeze()
62 | elif save_format == "npz":
63 | loaded = np.load(file_path)
64 | load_data = pd.Series(data=loaded["data"], index=loaded["index"], name=loaded["name"].item())
65 | if isinstance(load_data, pd.DataFrame):
66 | load_data = load_data.squeeze()
67 | else:
68 | raise ValueError(f"Unsupported save format: {save_format}")
69 |
70 | assert type(load_data) == type(prompt_data), f"Type mismatch: {type(load_data)} vs {type(prompt_data)}"
71 | assert load_data.shape == prompt_data.shape, f"Shape mismatch: {load_data.shape} vs {prompt_data.shape}"
72 | assert load_data.index.equals(prompt_data.index), "Index mismatch"
73 | assert load_data.name == prompt_data.name, f"Series names mismatch: {load_data.name} vs {prompt_data.name}"
74 | assert np.allclose(load_data.values, prompt_data.values, rtol=1e-8, atol=1e-8), "Data values mismatch"
75 | if save_format != "csv":
76 | assert load_data.equals(prompt_data), f"Data mismatch: {load_data} vs {prompt_data}"
77 | print(f"All tests passed for {file_path}")
78 |
79 |
80 | @pytest.fixture
81 | def setup_save():
82 | """input data for testing"""
83 | data = pd.DataFrame([range(133)])
84 | print(data.shape)
85 | return data
86 |
87 | @pytest.mark.parametrize("save_format", ["pth.tar", "csv", "npz"])
88 | def test_save_data(tmpdir, setup_save, save_format):
89 | """test save_data function: save data in different formats and load it back to check if the data is saved correctly"""
90 | data_path = os.path.join(tmpdir, f"test_data.{save_format}")
91 |
92 | save_data(setup_save, data_path, save_format)
93 |
94 | if save_format == "pth.tar":
95 | loaded_data = torch.load(data_path)
96 | elif save_format == "csv":
97 | loaded_data = pd.read_csv(data_path)
98 | loaded_data.columns = loaded_data.columns.astype(int)
99 | elif save_format == "npz":
100 | loaded = np.load(data_path)
101 | loaded_data = pd.DataFrame(data=loaded["data"])
102 |
103 | assert isinstance(loaded_data, pd.DataFrame), "Loaded data should be a DataFrame"
104 | assert loaded_data.shape == setup_save.shape, f"Shape mismatch: {loaded_data.shape} vs {setup_save.shape}"
105 | assert loaded_data.columns.equals(setup_save.columns), "Columns mismatch"
106 | assert np.allclose(loaded_data.values, setup_save.values, rtol=1e-8, atol=1e-8), "Data values mismatch"
107 |
108 |
109 |
--------------------------------------------------------------------------------
/tests/evaluate_pipeline/evaluation_pipeline_test.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | from ltsm.evaluate_pipeline.evaluation_pipeline import EvaluationPipeline
3 | import random
4 |
5 |
6 | class TestEvaluationPipeline(unittest.TestCase):
7 | def setUp(self):
8 | # Complex data for testing
9 | # Predicted ranges with overlaps, gaps, and exact matches
10 | self.x_test = [
11 | (0, 10), (15, 25), (30, 40), (45, 55), (60, 70)
12 | ]
13 | # True ranges with partial overlaps, non-overlapping ranges, and complete overlaps
14 | self.y_test = [
15 | (5, 15), (20, 30), (35, 50), (65, 75)
16 | ]
17 |
18 | # Large, random test set
19 | self.x_test_large = self.generate_random_ranges(100, 0, 1000)
20 | self.y_test_large = self.generate_random_ranges(100, 0, 1000)
21 |
22 | @staticmethod
23 | def generate_random_ranges(count, range_start, range_end):
24 | """Generate random range pairs."""
25 | ranges = []
26 | for _ in range(count):
27 | start = random.randint(range_start, range_end - 10)
28 | end = start + random.randint(5, 20)
29 | ranges.append((start, end))
30 | return ranges
31 |
32 | def test_overlap(self):
33 | # Test overlap size with varying ranges
34 | pipeline = EvaluationPipeline(self.x_test, self.y_test)
35 |
36 | # Overlap between first predicted and first true range
37 | overlap_1 = pipeline.overlap_size((0, 10), (5, 15))
38 | self.assertAlmostEqual(overlap_1, 0.545, places=2) # Partial overlap
39 |
40 | overlap_2 = pipeline.overlap_size((17, 29), (23, 33))
41 | self.assertAlmostEqual(overlap_2, 0.5384615384615384, places=2) # Partial overlap
42 |
43 | # Overlap between first predicted and first true range
44 | overlap_3 = pipeline.overlap_size((16, 20), (20, 35))
45 | self.assertAlmostEqual(overlap_3, 0.2, places=2)
46 |
47 | # No overlap
48 | overlap_4 = pipeline.overlap_size((15, 25), (35, 50))
49 | self.assertEqual(overlap_4, 0)
50 |
51 | # Complete overlap
52 | overlap_5 = pipeline.overlap_size((30, 40), (30, 40))
53 | self.assertEqual(overlap_5, 1.0)
54 |
55 | def test_cardinality_factor(self):
56 | # Test cardinality factor with overlapping ranges
57 | pipeline = EvaluationPipeline(self.x_test, self.y_test)
58 |
59 | # A range with overlaps
60 | cardinality_1 = pipeline.cardinality_factor((35, 50), self.y_test)
61 | self.assertGreaterEqual(cardinality_1, 1.0)
62 |
63 | # A range with no overlaps
64 | cardinality_2 = pipeline.cardinality_factor((100, 110), self.y_test)
65 | self.assertEqual(cardinality_2, 1.0)
66 |
67 | def test_large_random_data(self):
68 | # Test pipeline with large randomized data
69 | pipeline = EvaluationPipeline(self.x_test_large, self.y_test_large)
70 |
71 | recall = pipeline.evaluate_recall_score()
72 | precision = pipeline.evaluate_precision_score()
73 | f1_score = pipeline.evaluate_f1_score()
74 |
75 | # Check scores are within bounds
76 | self.assertGreaterEqual(recall, 0.0)
77 | self.assertGreaterEqual(precision, 0.0)
78 | self.assertGreaterEqual(f1_score, 0.0)
79 | self.assertLessEqual(recall, 1.0)
80 | self.assertLessEqual(precision, 1.0)
81 | self.assertLessEqual(f1_score, 1.0)
82 |
83 | def test_edge_case_empty_inputs(self):
84 | # Edge case: Empty inputs
85 | pipeline = EvaluationPipeline([], [])
86 | recall = pipeline.evaluate_recall_score()
87 | precision = pipeline.evaluate_precision_score()
88 | f1_score = pipeline.evaluate_f1_score()
89 |
90 | self.assertEqual(recall, 0.0)
91 | self.assertEqual(precision, 0.0)
92 | self.assertEqual(f1_score, 0.0)
93 |
94 | def test_edge_case_no_overlap(self):
95 | # Edge case: No overlaps
96 | x_test_no_overlap = [(0, 10), (20, 30)]
97 | y_test_no_overlap = [(40, 50), (60, 70)]
98 | pipeline = EvaluationPipeline(x_test_no_overlap, y_test_no_overlap)
99 | recall = pipeline.evaluate_recall_score()
100 | precision = pipeline.evaluate_precision_score()
101 | f1_score = pipeline.evaluate_f1_score()
102 |
103 | self.assertEqual(recall, 0.0)
104 | self.assertEqual(precision, 0.0)
105 | self.assertEqual(f1_score, 0.0)
106 |
107 | def test_f1_score_consistency(self):
108 | # Validate that F1 score is consistent with precision and recall
109 | pipeline = EvaluationPipeline(self.x_test, self.y_test)
110 | recall = pipeline.evaluate_recall_score()
111 | precision = pipeline.evaluate_precision_score()
112 | f1_score = pipeline.evaluate_f1_score()
113 |
114 | if precision + recall > 0:
115 | self.assertAlmostEqual(f1_score, 2 * (precision * recall) / (precision + recall), places=2)
116 | else:
117 | self.assertEqual(f1_score, 0.0)
118 |
119 |
120 | if __name__ == "__main__":
121 | unittest.main()
122 |
--------------------------------------------------------------------------------
/ltsm/layers/Transformer_EncDec.py:
--------------------------------------------------------------------------------
1 | # code from https://github.com/yuqinie98/PatchTST, with minor modifications
2 | import torch
3 | import torch.nn as nn
4 | import torch.nn.functional as F
5 |
6 |
7 | class ConvLayer(nn.Module):
8 | def __init__(self, c_in):
9 | super(ConvLayer, self).__init__()
10 | self.downConv = nn.Conv1d(in_channels=c_in,
11 | out_channels=c_in,
12 | kernel_size=3,
13 | padding=2,
14 | padding_mode='circular')
15 | self.norm = nn.BatchNorm1d(c_in)
16 | self.activation = nn.ELU()
17 | self.maxPool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1)
18 |
19 | def forward(self, x):
20 | x = self.downConv(x.permute(0, 2, 1))
21 | x = self.norm(x)
22 | x = self.activation(x)
23 | x = self.maxPool(x)
24 | x = x.transpose(1, 2)
25 | return x
26 |
27 |
28 | class EncoderLayer(nn.Module):
29 | def __init__(self, attention, d_model, d_ff=None, dropout=0.1, activation="relu"):
30 | super(EncoderLayer, self).__init__()
31 | d_ff = d_ff or 4 * d_model
32 | self.attention = attention
33 | self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)
34 | self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)
35 | self.norm1 = nn.LayerNorm(d_model)
36 | self.norm2 = nn.LayerNorm(d_model)
37 | self.dropout = nn.Dropout(dropout)
38 | self.activation = F.relu if activation == "relu" else F.gelu
39 |
40 | def forward(self, x, attn_mask=None):
41 | new_x, attn = self.attention(
42 | x, x, x,
43 | attn_mask=attn_mask
44 | )
45 | x = x + self.dropout(new_x)
46 |
47 | y = x = self.norm1(x)
48 | y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
49 | y = self.dropout(self.conv2(y).transpose(-1, 1))
50 |
51 | return self.norm2(x + y), attn
52 |
53 |
54 | class Encoder(nn.Module):
55 | def __init__(self, attn_layers, conv_layers=None, norm_layer=None):
56 | super(Encoder, self).__init__()
57 | self.attn_layers = nn.ModuleList(attn_layers)
58 | self.conv_layers = nn.ModuleList(conv_layers) if conv_layers is not None else None
59 | self.norm = norm_layer
60 |
61 | def forward(self, x, attn_mask=None):
62 | # x [B, L, D]
63 | attns = []
64 | if self.conv_layers is not None:
65 | for attn_layer, conv_layer in zip(self.attn_layers, self.conv_layers):
66 | x, attn = attn_layer(x, attn_mask=attn_mask)
67 | x = conv_layer(x)
68 | attns.append(attn)
69 | x, attn = self.attn_layers[-1](x)
70 | attns.append(attn)
71 | else:
72 | for attn_layer in self.attn_layers:
73 | x, attn = attn_layer(x, attn_mask=attn_mask)
74 | attns.append(attn)
75 |
76 | if self.norm is not None:
77 | x = self.norm(x)
78 |
79 | return x, attns
80 |
81 |
82 | class DecoderLayer(nn.Module):
83 | def __init__(self, self_attention, cross_attention, d_model, d_ff=None,
84 | dropout=0.1, activation="relu"):
85 | super(DecoderLayer, self).__init__()
86 | d_ff = d_ff or 4 * d_model
87 | self.self_attention = self_attention
88 | self.cross_attention = cross_attention
89 | self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)
90 | self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)
91 | self.norm1 = nn.LayerNorm(d_model)
92 | self.norm2 = nn.LayerNorm(d_model)
93 | self.norm3 = nn.LayerNorm(d_model)
94 | self.dropout = nn.Dropout(dropout)
95 | self.activation = F.relu if activation == "relu" else F.gelu
96 |
97 | def forward(self, x, cross, x_mask=None, cross_mask=None):
98 | x = x + self.dropout(self.self_attention(
99 | x, x, x,
100 | attn_mask=x_mask
101 | )[0])
102 | x = self.norm1(x)
103 |
104 | x = x + self.dropout(self.cross_attention(
105 | x, cross, cross,
106 | attn_mask=cross_mask
107 | )[0])
108 |
109 | y = x = self.norm2(x)
110 | y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
111 | y = self.dropout(self.conv2(y).transpose(-1, 1))
112 |
113 | return self.norm3(x + y)
114 |
115 |
116 | class Decoder(nn.Module):
117 | def __init__(self, layers, norm_layer=None, projection=None):
118 | super(Decoder, self).__init__()
119 | self.layers = nn.ModuleList(layers)
120 | self.norm = norm_layer
121 | self.projection = projection
122 |
123 | def forward(self, x, cross, x_mask=None, cross_mask=None):
124 | for layer in self.layers:
125 | x = layer(x, cross, x_mask=x_mask, cross_mask=cross_mask)
126 |
127 | if self.norm is not None:
128 | x = self.norm(x)
129 |
130 | if self.projection is not None:
131 | x = self.projection(x)
132 | return x
--------------------------------------------------------------------------------
/ltsm/layers/PatchTST_layers.py:
--------------------------------------------------------------------------------
1 | # code from https://github.com/yuqinie98/PatchTST, with minor modifications
2 | __all__ = ['Transpose', 'get_activation_fn', 'moving_avg', 'series_decomp', 'PositionalEncoding', 'SinCosPosEncoding', 'Coord2dPosEncoding', 'Coord1dPosEncoding', 'positional_encoding']
3 |
4 | import torch
5 | from torch import nn
6 | import math
7 |
8 | class Transpose(nn.Module):
9 | def __init__(self, *dims, contiguous=False):
10 | super().__init__()
11 | self.dims, self.contiguous = dims, contiguous
12 | def forward(self, x):
13 | if self.contiguous: return x.transpose(*self.dims).contiguous()
14 | else: return x.transpose(*self.dims)
15 |
16 |
17 | def get_activation_fn(activation):
18 | if callable(activation): return activation()
19 | elif activation.lower() == "relu": return nn.ReLU()
20 | elif activation.lower() == "gelu": return nn.GELU()
21 | raise ValueError(f'{activation} is not available. You can use "relu", "gelu", or a callable')
22 |
23 |
24 | # decomposition
25 |
26 | class moving_avg(nn.Module):
27 | """
28 | Moving average block to highlight the trend of time series
29 | """
30 | def __init__(self, kernel_size, stride):
31 | super(moving_avg, self).__init__()
32 | self.kernel_size = kernel_size
33 | self.avg = nn.AvgPool1d(kernel_size=kernel_size, stride=stride, padding=0)
34 |
35 | def forward(self, x):
36 | # padding on the both ends of time series
37 | front = x[:, 0:1, :].repeat(1, (self.kernel_size - 1) // 2, 1)
38 | end = x[:, -1:, :].repeat(1, (self.kernel_size - 1) // 2, 1)
39 | x = torch.cat([front, x, end], dim=1)
40 | x = self.avg(x.permute(0, 2, 1))
41 | x = x.permute(0, 2, 1)
42 | return x
43 |
44 |
45 | class series_decomp(nn.Module):
46 | """
47 | Series decomposition block
48 | """
49 | def __init__(self, kernel_size):
50 | super(series_decomp, self).__init__()
51 | self.moving_avg = moving_avg(kernel_size, stride=1)
52 |
53 | def forward(self, x):
54 | moving_mean = self.moving_avg(x)
55 | res = x - moving_mean
56 | return res, moving_mean
57 |
58 |
59 |
60 | # pos_encoding
61 |
62 | def PositionalEncoding(q_len, d_model, normalize=True):
63 | pe = torch.zeros(q_len, d_model)
64 | position = torch.arange(0, q_len).unsqueeze(1)
65 | div_term = torch.exp(torch.arange(0, d_model, 2) * -(math.log(10000.0) / d_model))
66 | pe[:, 0::2] = torch.sin(position * div_term)
67 | pe[:, 1::2] = torch.cos(position * div_term)
68 | if normalize:
69 | pe = pe - pe.mean()
70 | pe = pe / (pe.std() * 10)
71 | return pe
72 |
73 | SinCosPosEncoding = PositionalEncoding
74 |
75 | def Coord2dPosEncoding(q_len, d_model, exponential=False, normalize=True, eps=1e-3, verbose=False):
76 | x = .5 if exponential else 1
77 | i = 0
78 | for i in range(100):
79 | cpe = 2 * (torch.linspace(0, 1, q_len).reshape(-1, 1) ** x) * (torch.linspace(0, 1, d_model).reshape(1, -1) ** x) - 1
80 | pv(f'{i:4.0f} {x:5.3f} {cpe.mean():+6.3f}', verbose)
81 | if abs(cpe.mean()) <= eps: break
82 | elif cpe.mean() > eps: x += .001
83 | else: x -= .001
84 | i += 1
85 | if normalize:
86 | cpe = cpe - cpe.mean()
87 | cpe = cpe / (cpe.std() * 10)
88 | return cpe
89 |
90 | def Coord1dPosEncoding(q_len, exponential=False, normalize=True):
91 | cpe = (2 * (torch.linspace(0, 1, q_len).reshape(-1, 1)**(.5 if exponential else 1)) - 1)
92 | if normalize:
93 | cpe = cpe - cpe.mean()
94 | cpe = cpe / (cpe.std() * 10)
95 | return cpe
96 |
97 | def positional_encoding(pe, learn_pe, q_len, d_model):
98 | # Positional encoding
99 | if pe == None:
100 | W_pos = torch.empty((q_len, d_model)) # pe = None and learn_pe = False can be used to measure impact of pe
101 | nn.init.uniform_(W_pos, -0.02, 0.02)
102 | learn_pe = False
103 | elif pe == 'zero':
104 | W_pos = torch.empty((q_len, 1))
105 | nn.init.uniform_(W_pos, -0.02, 0.02)
106 | elif pe == 'zeros':
107 | W_pos = torch.empty((q_len, d_model))
108 | nn.init.uniform_(W_pos, -0.02, 0.02)
109 | elif pe == 'normal' or pe == 'gauss':
110 | W_pos = torch.zeros((q_len, 1))
111 | torch.nn.init.normal_(W_pos, mean=0.0, std=0.1)
112 | elif pe == 'uniform':
113 | W_pos = torch.zeros((q_len, 1))
114 | nn.init.uniform_(W_pos, a=0.0, b=0.1)
115 | elif pe == 'lin1d': W_pos = Coord1dPosEncoding(q_len, exponential=False, normalize=True)
116 | elif pe == 'exp1d': W_pos = Coord1dPosEncoding(q_len, exponential=True, normalize=True)
117 | elif pe == 'lin2d': W_pos = Coord2dPosEncoding(q_len, d_model, exponential=False, normalize=True)
118 | elif pe == 'exp2d': W_pos = Coord2dPosEncoding(q_len, d_model, exponential=True, normalize=True)
119 | elif pe == 'sincos': W_pos = PositionalEncoding(q_len, d_model, normalize=True)
120 | else: raise ValueError(f"{pe} is not a valid pe (positional encoder. Available types: 'gauss'=='normal', \
121 | 'zeros', 'zero', uniform', 'lin1d', 'exp1d', 'lin2d', 'exp2d', 'sincos', None.)")
122 | return nn.Parameter(W_pos, requires_grad=learn_pe)
--------------------------------------------------------------------------------
/ltsm/models/PatchTST.py:
--------------------------------------------------------------------------------
1 | # code from https://github.com/yuqinie98/PatchTST, with minor modifications
2 | import torch
3 | from torch import Tensor
4 |
5 | from .base_config import PatchTSTConfig
6 | from ltsm.layers.PatchTST_backbone import PatchTST_backbone
7 | from ltsm.layers.PatchTST_layers import series_decomp
8 | from transformers import PreTrainedModel
9 |
10 | class PatchTST(PreTrainedModel):
11 | config_class = PatchTSTConfig
12 |
13 | def __init__(self, config: PatchTSTConfig, **kwargs):
14 | super().__init__(config)
15 |
16 | self.decomposition = config.decomposition
17 | if self.decomposition:
18 | self.decomp_module = series_decomp(config.kernel_size)
19 | self.model_trend = PatchTST_backbone(
20 | config.enc_in,
21 | config.seq_len,
22 | config.pred_len,
23 | config.patch_len,
24 | config.stride,
25 | config.max_seq_len,
26 | config.n_layers,
27 | config.d_model,
28 | config.n_heads,
29 | config.d_k,
30 | config.d_v,
31 | config.d_ff,
32 | config.norm,
33 | config.attn_dropout,
34 | config.dropout,
35 | config.activation,
36 | config.key_padding_mask,
37 | config.padding_var,
38 | config.attn_mask,
39 | config.res_attention,
40 | config.pre_norm,
41 | config.store_attn,
42 | config.pe,
43 | config.learn_pe,
44 | config.fc_dropout,
45 | config.head_dropout,
46 | config.padding_patch,
47 | config.pretrain_head,
48 | config.head_type,
49 | config.individual,
50 | config.revin,
51 | config.affine,
52 | config.subtract_last,
53 | config.verbose
54 | )
55 | self.model_res = PatchTST_backbone(
56 | config.enc_in,
57 | config.seq_len,
58 | config.pred_len,
59 | config.patch_len,
60 | config.stride,
61 | config.max_seq_len,
62 | config.n_layers,
63 | config.d_model,
64 | config.n_heads,
65 | config.d_k,
66 | config.d_v,
67 | config.d_ff,
68 | config.norm,
69 | config.attn_dropout,
70 | config.dropout,
71 | config.activation,
72 | config.key_padding_mask,
73 | config.padding_var,
74 | config.attn_mask,
75 | config.res_attention,
76 | config.pre_norm,
77 | config.store_attn,
78 | config.pe,
79 | config.learn_pe,
80 | config.fc_dropout,
81 | config.head_dropout,
82 | config.padding_patch,
83 | config.pretrain_head,
84 | config.head_type,
85 | config.individual,
86 | config.revin,
87 | config.affine,
88 | config.subtract_last,
89 | config.verbose
90 | )
91 | else:
92 | self.model = PatchTST_backbone(
93 | config.enc_in,
94 | config.seq_len,
95 | config.pred_len,
96 | config.patch_len,
97 | config.stride,
98 | config.max_seq_len,
99 | config.n_layers,
100 | config.d_model,
101 | config.n_heads,
102 | config.d_k,
103 | config.d_v,
104 | config.d_ff,
105 | config.norm,
106 | config.attn_dropout,
107 | config.dropout,
108 | config.activation,
109 | config.key_padding_mask,
110 | config.padding_var,
111 | config.attn_mask,
112 | config.res_attention,
113 | config.pre_norm,
114 | config.store_attn,
115 | config.pe,
116 | config.learn_pe,
117 | config.fc_dropout,
118 | config.head_dropout,
119 | config.padding_patch,
120 | config.pretrain_head,
121 | config.head_type,
122 | config.individual,
123 | config.revin,
124 | config.affine,
125 | config.subtract_last,
126 | config.verbose
127 | )
128 |
129 | def forward(self, x: Tensor):
130 | if self.decomposition:
131 | res_init, trend_init = self.decomp_module(x)
132 | res_init, trend_init = res_init.permute(0, 2, 1), trend_init.permute(0, 2, 1) # [Batch, Channel, Input length]
133 | res = self.model_res(res_init)
134 | trend = self.model_trend(trend_init)
135 | x = res + trend
136 | x = x.permute(0, 2, 1) # [Batch, Input length, Channel]
137 | else:
138 | x = x.permute(0, 2, 1) # [Batch, Channel, Input length]
139 | x = self.model(x)
140 | x = x.permute(0, 2, 1) # [Batch, Input length, Channel]
141 | return x
--------------------------------------------------------------------------------
/tests/models/Informer_test.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from ltsm.models import get_model
3 | from ltsm.models.base_config import InformerConfig
4 | from ltsm.common.base_training_pipeline import TrainingConfig
5 | from transformers import PreTrainedModel
6 | import torch
7 | import numpy as np
8 |
9 | @pytest.fixture
10 | def config(tmp_path):
11 | data_path = tmp_path / "test.csv"
12 | prompt_data_path = tmp_path / "prompt_normalize_split"
13 | prompt_data_path.mkdir()
14 | OUTPUT_PATH = data_path / "output"
15 |
16 | train_params = {
17 | "data_path": str(data_path),
18 | "model": "Informer",
19 | "model_name_or_path": "gpt2-medium",
20 | "gradient_accumulation_steps": 64,
21 | "test_data_path_list": [str(data_path)],
22 | "prompt_data_path": str(prompt_data_path),
23 | "train_epochs": 1000,
24 | "patience": 10,
25 | "lradj": 'TST',
26 | "pct_start": 0.2,
27 | "freeze": 0,
28 | "itr": 1,
29 | "batch_size": 32,
30 | "learning_rate": 1e-3,
31 | "downsample_rate": 20,
32 | "output_dir": str(OUTPUT_PATH),
33 | "eval": 0,
34 | "des": 'Exp',
35 | "padding_patch": 'end',
36 | "local_pretrain": "None"
37 | }
38 |
39 | config = {
40 | "pred_len": 96,
41 | "enc_in": 1,
42 | "e_layers": 3,
43 | "d_layers": 1,
44 | "n_heads": 16,
45 | "d_model": 128,
46 | "d_ff": 256,
47 | "dropout": 0.2,
48 | "fc_dropout": 0.2,
49 | "head_dropout": 0,
50 | "seq_len": 336,
51 | "output_attention": 0,
52 | "freq": "h",
53 | "embed": "timeF",
54 | "factor": 1,
55 | "c_out": 862,
56 | "distil": True,
57 | "embed_type": 0,
58 | "dec_in": 7,
59 | "activation": "gelu"
60 | }
61 | informer_config = InformerConfig(**config)
62 | return TrainingConfig(model_config=informer_config, **train_params)
63 |
64 | def test_model_initialization(config):
65 | model = get_model(config.model_config, model_name=config.train_params["model"], local_pretrain=config.train_params["local_pretrain"])
66 | assert model is not None
67 | assert isinstance(model, PreTrainedModel)
68 |
69 | def test_parameter_count(config):
70 | model = get_model(config.model_config, model_name=config.train_params["model"], local_pretrain=config.train_params["local_pretrain"])
71 | param_count = sum([p.numel() for p in model.parameters() if p.requires_grad])
72 |
73 | # Encoder Embedding parameter count
74 | expected_param_count = config.model_config.d_model*config.model_config.enc_in*3 + 4*config.model_config.d_model
75 |
76 | # Decoder Embedding parameter count
77 | expected_param_count += config.model_config.d_model*config.model_config.dec_in*3 + 4*config.model_config.d_model
78 |
79 | # Encoder parameter count
80 | # Encoder layer Conv
81 | encoder_param_count = 2*config.model_config.d_model*config.model_config.d_ff + config.model_config.d_model + config.model_config.d_ff
82 | # Encoder Layer Norm
83 | encoder_param_count += 4*config.model_config.d_model
84 | # Attention Layer
85 | encoder_param_count += 4*(config.model_config.d_model*config.model_config.d_model + config.model_config.d_model)
86 | # Multiply by number of encoder layers
87 | encoder_param_count *= config.model_config.e_layers
88 |
89 | # Conv layer
90 | encoder_param_count += (config.model_config.e_layers-1)*(config.model_config.d_model*config.model_config.d_model*3 + 3*config.model_config.d_model)
91 | # Layer Norm
92 | encoder_param_count += 2*config.model_config.d_model
93 |
94 | expected_param_count += encoder_param_count
95 |
96 | # Decoder layer parameter count
97 | # Decoder Conv layers
98 | decoder_param_count = 2*config.model_config.d_model*config.model_config.d_ff + config.model_config.d_model + config.model_config.d_ff
99 | # Decoder Layer Norm
100 | decoder_param_count += 6*config.model_config.d_model
101 | # Attention Layer
102 | decoder_param_count += 8*(config.model_config.d_model*config.model_config.d_model + config.model_config.d_model)
103 | # Multiply by number of decoder layers
104 | decoder_param_count *= config.model_config.d_layers
105 |
106 | # Layer Norm parameter count
107 | decoder_param_count += 2*config.model_config.d_model
108 |
109 | # Projection layer parameter count
110 | decoder_param_count += config.model_config.d_model*config.model_config.c_out+config.model_config.c_out
111 |
112 | expected_param_count += decoder_param_count
113 |
114 | assert param_count == expected_param_count
115 |
116 |
117 | def test_forward_output_shape(config):
118 | torch.set_default_dtype(torch.float64)
119 | model = get_model(config.model_config, model_name=config.train_params["model"], local_pretrain=config.train_params["local_pretrain"])
120 | batch_size = 32
121 | input_length = config.model_config.seq_len
122 | input = torch.tensor(np.zeros((batch_size, input_length, config.model_config.enc_in)))
123 | input_mark = torch.tensor(np.zeros((batch_size, input_length, 4)))
124 | dec_inp = torch.tensor(np.zeros((batch_size, input_length, config.model_config.dec_in)))
125 | dec_mark = torch.tensor(np.zeros((batch_size, input_length, 4)))
126 | output = model(input, input_mark, dec_inp, dec_mark)
127 | assert output.size() == torch.Size([batch_size, config.model_config.pred_len, config.model_config.c_out])
--------------------------------------------------------------------------------
/ltsm/models/base_config.py:
--------------------------------------------------------------------------------
1 | from transformers import PretrainedConfig
2 | from dataclasses import dataclass
3 | from typing import Optional
4 | from torch import Tensor
5 |
6 | @dataclass
7 | class LTSMConfig(PretrainedConfig):
8 | """
9 | LTSMConfig is a configuration class for the LTSM model.
10 | It contains all the necessary parameters to initialize the model.
11 | """
12 |
13 | def __init__(self, seq_len: int=336, pred_len: int=96, patch_size: int=16, pretrain: bool=True, stride: int=8, prompt_len: int=133,
14 | gpt_layers: int=3, model_name_or_path: str="gpt2-medium", d_ff: int=512, d_model: int=1024, enc_in: int=1,
15 | dropout: float=0.2, n_heads: int=16, prompt_data_path: str=None, **kwargs):
16 |
17 | super().__init__(**kwargs)
18 | self.patch_size = patch_size
19 | self.pretrain = pretrain
20 | self.stride = stride
21 | self.seq_len = seq_len
22 | self.pred_len = pred_len
23 | self.prompt_len = prompt_len
24 | self.gpt_layers = gpt_layers
25 | self.model_name_or_path = model_name_or_path
26 | self.d_ff = d_ff
27 | self.d_model = d_model
28 | self.enc_in = enc_in
29 | self.dropout = dropout
30 | self.n_heads = n_heads
31 | self.prompt_data_path = prompt_data_path
32 |
33 |
34 | @dataclass
35 | class DLinearConfig(PretrainedConfig):
36 | """
37 | DLinearConfig is a configuration class for the DLinear model.
38 | It contains all the necessary parameters to initialize the model.
39 | """
40 |
41 | def __init__(self, seq_len: int=336, pred_len: int=96, individual: bool=0, enc_in: int=1, **kwargs):
42 | super().__init__(**kwargs)
43 | self.seq_len = seq_len
44 | self.pred_len = pred_len
45 | self.individual = individual
46 | self.enc_in = enc_in
47 |
48 | @dataclass
49 | class InformerConfig(PretrainedConfig):
50 | """
51 | InformerConfig is a configuration class for the Informer model.
52 | It contains all the necessary parameters to initialize the model.
53 | """
54 |
55 | def __init__(self, seq_len=336, pred_len=96, enc_in=1, dec_in=7, d_model=1024, n_heads=16, e_layers=2, d_ff=512,
56 | dropout=0.2, activation='gelu', output_attention=False, embed_type=0, freq='h', factor=1,
57 | distil=True, c_out=862, embed='timeF', **kwargs):
58 | super().__init__(**kwargs)
59 | self.seq_len = seq_len
60 | self.pred_len = pred_len
61 | self.enc_in = enc_in
62 | self.dec_in = dec_in
63 | self.d_model = d_model
64 | self.n_heads = n_heads
65 | self.e_layers = e_layers
66 | self.d_ff = d_ff
67 | self.dropout = dropout
68 | self.activation = activation
69 | self.output_attention = output_attention
70 | self.embed_type = embed_type
71 | self.factor = factor
72 | self.freq = freq
73 | self.distil = distil
74 | self.c_out = c_out
75 | self.embed = embed
76 |
77 |
78 | @dataclass
79 | class PatchTSTConfig(PretrainedConfig):
80 | """
81 | PatchTSTConfig is a configuration class for the PatchTST model.
82 | It contains all the necessary parameters to initialize the model.
83 | """
84 |
85 | def __init__(self, seq_len=336, pred_len=96, enc_in=1, patch_len=16, stride=8, decomposition=False, max_seq_len:Optional[int]=1024,
86 | n_layers:int=3, d_model=128, n_heads=16, d_k:Optional[int]=None, d_v:Optional[int]=None,
87 | d_ff:int=256, norm:str='BatchNorm', attn_dropout:float=0., dropout:float=0., act:str="gelu", key_padding_mask:bool='auto',
88 | padding_var:Optional[int]=None, attn_mask:Optional[Tensor]=None, res_attention:bool=True, pre_norm:bool=False, store_attn:bool=False,
89 | pe:str='zeros', learn_pe:bool=True, fc_dropout:float=0., head_dropout = 0, padding_patch = None,
90 | pretrain_head:bool=False, head_type = 'flatten', individual = False, revin = True, affine = True, subtract_last = False,
91 | verbose:bool=False, embed='timeF', **kwargs):
92 | super().__init__(**kwargs)
93 | self.seq_len = seq_len
94 | self.pred_len = pred_len
95 | self.enc_in = enc_in
96 | self.patch_len = patch_len
97 | self.stride = stride
98 | self.decomposition = decomposition
99 | self.max_seq_len = max_seq_len
100 | self.n_layers = n_layers
101 | self.d_model = d_model
102 | self.n_heads = n_heads
103 | self.d_k = d_k
104 | self.d_v = d_v
105 | self.d_ff = d_ff
106 | self.norm = norm
107 | self.attn_dropout = attn_dropout
108 | self.dropout = dropout
109 | self.activation = act
110 | self.key_padding_mask = key_padding_mask
111 | self.padding_var = padding_var
112 | self.attn_mask = attn_mask
113 | self.res_attention = res_attention
114 | self.pre_norm = pre_norm
115 | self.store_attn = store_attn
116 | self.pe = pe
117 | self.learn_pe = learn_pe
118 | self.fc_dropout = fc_dropout
119 | self.head_dropout = head_dropout
120 | self.padding_patch = padding_patch
121 | self.pretrain_head = pretrain_head
122 | self.head_type = head_type
123 | self.individual = individual
124 | self.revin = revin
125 | self.affine = affine
126 | self.subtract_last = subtract_last
127 | self.verbose = verbose,
128 | self.embed = embed
129 |
--------------------------------------------------------------------------------
/ltsm/models/Informer.py:
--------------------------------------------------------------------------------
1 | # code from https://github.com/yuqinie98/PatchTST, with minor modifications
2 | import torch
3 | from torch import Tensor
4 | import torch.nn as nn
5 | import torch.nn.functional as F
6 | from ltsm.utils.masking import TriangularCausalMask, ProbMask
7 | from ltsm.layers.Transformer_EncDec import Decoder, DecoderLayer, Encoder, EncoderLayer, ConvLayer
8 | from ltsm.layers.SelfAttention_Family import FullAttention, ProbAttention, AttentionLayer
9 | from ltsm.layers.Embed import DataEmbedding,DataEmbedding_wo_pos,DataEmbedding_wo_temp,DataEmbedding_wo_pos_temp
10 | import numpy as np
11 | from transformers import PreTrainedModel
12 | from .base_config import InformerConfig
13 |
14 | class Informer(PreTrainedModel):
15 | """
16 | Informer with Propspare attention in O(LlogL) complexity
17 | """
18 | config_class = InformerConfig
19 |
20 | def __init__(self, config: InformerConfig, **kwargs):
21 | super().__init__(config)
22 | self.pred_len = config.pred_len
23 | self.output_attention = config.output_attention
24 |
25 | # Embedding
26 | if config.embed_type == 0:
27 | self.enc_embedding = DataEmbedding(config.enc_in, config.d_model, config.embed, config.freq,
28 | config.dropout)
29 | self.dec_embedding = DataEmbedding(config.dec_in, config.d_model, config.embed, config.freq,
30 | config.dropout)
31 | elif config.embed_type == 1:
32 | self.enc_embedding = DataEmbedding(config.enc_in, config.d_model, config.embed, config.freq,
33 | config.dropout)
34 | self.dec_embedding = DataEmbedding(config.dec_in, config.d_model, config.embed, config.freq,
35 | config.dropout)
36 | elif config.embed_type == 2:
37 | self.enc_embedding = DataEmbedding_wo_pos(config.enc_in, config.d_model, config.embed, config.freq,
38 | config.dropout)
39 | self.dec_embedding = DataEmbedding_wo_pos(config.dec_in, config.d_model, config.embed, config.freq,
40 | config.dropout)
41 |
42 | elif config.embed_type == 3:
43 | self.enc_embedding = DataEmbedding_wo_temp(config.enc_in, config.d_model, config.embed, config.freq,
44 | config.dropout)
45 | self.dec_embedding = DataEmbedding_wo_temp(config.dec_in, config.d_model, config.embed, config.freq,
46 | config.dropout)
47 | elif config.embed_type == 4:
48 | self.enc_embedding = DataEmbedding_wo_pos_temp(config.enc_in, config.d_model, config.embed, config.freq,
49 | config.dropout)
50 | self.dec_embedding = DataEmbedding_wo_pos_temp(config.dec_in, config.d_model, config.embed, config.freq,
51 | config.dropout)
52 | # Encoder
53 | self.encoder = Encoder(
54 | [
55 | EncoderLayer(
56 | AttentionLayer(
57 | ProbAttention(False, config.factor, attention_dropout=config.dropout,
58 | output_attention=config.output_attention),
59 | config.d_model, config.n_heads),
60 | config.d_model,
61 | config.d_ff,
62 | dropout=config.dropout,
63 | activation=config.activation
64 | ) for l in range(config.e_layers)
65 | ],
66 | [
67 | ConvLayer(
68 | config.d_model
69 | ) for l in range(config.e_layers - 1)
70 | ] if config.distil else None,
71 | norm_layer=torch.nn.LayerNorm(config.d_model)
72 | )
73 | # Decoder
74 | self.decoder = Decoder(
75 | [
76 | DecoderLayer(
77 | AttentionLayer(
78 | ProbAttention(True, config.factor, attention_dropout=config.dropout, output_attention=False),
79 | config.d_model, config.n_heads),
80 | AttentionLayer(
81 | ProbAttention(False, config.factor, attention_dropout=config.dropout, output_attention=False),
82 | config.d_model, config.n_heads),
83 | config.d_model,
84 | config.d_ff,
85 | dropout=config.dropout,
86 | activation=config.activation,
87 | )
88 | for l in range(config.d_layers)
89 | ],
90 | norm_layer=torch.nn.LayerNorm(config.d_model),
91 | projection=nn.Linear(config.d_model, config.c_out, bias=True)
92 | )
93 |
94 | def forward(self, x_enc: Tensor, x_mark_enc: Tensor, x_dec: Tensor, x_mark_dec: Tensor,
95 | enc_self_mask: Tensor=None, dec_self_mask: Tensor=None, dec_enc_mask: Tensor=None):
96 |
97 | enc_out = self.enc_embedding(x_enc, x_mark_enc)
98 | enc_out, attns = self.encoder(enc_out, attn_mask=enc_self_mask)
99 |
100 | dec_out = self.dec_embedding(x_dec, x_mark_dec)
101 | dec_out = self.decoder(dec_out, enc_out, x_mask=dec_self_mask, cross_mask=dec_enc_mask)
102 |
103 | if self.output_attention:
104 | return dec_out[:, -self.pred_len:, :], attns
105 | else:
106 | return dec_out[:, -self.pred_len:, :] # [B, L, D]
--------------------------------------------------------------------------------
/ltsm/prompt_reader/stat_prompt/prompt_tsne.py:
--------------------------------------------------------------------------------
1 | # from ltsm.data_provider.data_factory import get_data_loader, get_data_loaders, get_dataset
2 | import argparse
3 | import ipdb
4 | import pandas as pd
5 | import numpy as np
6 | # import tsfel
7 | from pandas import read_csv, read_feather
8 | import matplotlib.pyplot as plt
9 | import sys, os
10 | import torch
11 | from sklearn.preprocessing import StandardScaler
12 | from sklearn import manifold
13 |
14 |
15 | def get_args():
16 | parser = argparse.ArgumentParser(description='LTSM')
17 |
18 | parser.add_argument('--data_path', type=str, default='dataset/weather.csv')
19 | parser.add_argument('--data', type=str, default='custom')
20 | parser.add_argument('--freq', type=str, default="h")
21 | parser.add_argument('--target', type=str, default='OT')
22 | parser.add_argument('--embed', type=str, default='timeF')
23 | parser.add_argument('--percent', type=int, default=10)
24 | parser.add_argument('--batch_size', type=int, default=512)
25 | parser.add_argument('--max_len', type=int, default=-1)
26 | parser.add_argument('--seq_len', type=int, default=512)
27 | parser.add_argument('--pred_len', type=int, default=96)
28 | parser.add_argument('--label_len', type=int, default=48)
29 | parser.add_argument('--features', type=str, default='M')
30 |
31 | args = parser.parse_args()
32 |
33 | return args
34 |
35 |
36 | def prompt_generation(ts):
37 | cfg = tsfel.get_features_by_domain()
38 | prompt = tsfel.time_series_features_extractor(cfg, ts)
39 | return prompt
40 |
41 |
42 | def prompt_prune(pt):
43 | pt_dict = pt.to_dict()
44 | pt_keys = list(pt_dict.keys())
45 | for key in pt_keys:
46 | if key.startswith("0_FFT mean coefficient"):
47 | del pt[key]
48 |
49 | return pt
50 |
51 |
52 | if __name__ == "__main__":
53 |
54 | root_path = "./prompt_bank/stat-prompt/prompt_data_split/"
55 | # print(data_path_buf)
56 |
57 | dataset_name = [
58 | "electricity",
59 | "ETT-small",
60 | "exchange_rate",
61 | "illness",
62 | "traffic",
63 | "weather",
64 | ]
65 | split_buf = ["train", "val", "test"]
66 |
67 | dataset_fullname_train = [os.path.join(root_path, "train", name) for name in dataset_name]
68 | dataset_fullname_val = [os.path.join(root_path, "val", name) for name in dataset_name]
69 | dataset_fullname_test = [os.path.join(root_path, "test", name) for name in dataset_name]
70 | dataset_fullname = dataset_fullname_train + dataset_fullname_val + dataset_fullname_test
71 | data_path_buf = []
72 | dataset_dir_buf = []
73 | dataset_split_buf = []
74 | K = 100
75 | for index, dataset_dir in enumerate(dataset_fullname):
76 | paths = os.listdir(dataset_dir)
77 | new_dataset = [os.path.join(dataset_dir, path) for path in paths]
78 | sample_idx = np.random.permutation(len(new_dataset))[:K].astype(np.int64)
79 | # ipdb.set_trace()
80 | new_dataset = np.array(new_dataset)[sample_idx].tolist()
81 | data_path_buf.extend(new_dataset)
82 |
83 | for dataset_index, dname in enumerate(dataset_name):
84 | if dname in dataset_dir:
85 | dataset_dir_buf.extend(len(new_dataset) * [dataset_index])
86 |
87 | for split_index, split in enumerate(split_buf):
88 | if split in dataset_dir:
89 | dataset_split_buf.extend(len(new_dataset) * [split_index])
90 | break
91 |
92 | prompt_data_buf = []
93 | for index, dataset_path in enumerate(data_path_buf):
94 | prompt_data = torch.load(dataset_path)
95 | prompt_data_buf.append(prompt_data)
96 | print("Import from {}".format(dataset_path))
97 | # print(prompt_data)
98 |
99 | # if index == 100:
100 | # break
101 |
102 | # print(prompt_data_buf)
103 | # print(output_path_buf)
104 |
105 | prompt_data_all = pd.concat(prompt_data_buf, axis=0).values
106 | print(prompt_data_all.shape)
107 | # (3166, 133)
108 |
109 | # nan_index = np.where(np.isnan(prompt_data_all))[0]
110 | # prompt_data_all[nan_index] = 0
111 |
112 | # ipdb.set_trace()
113 | tsne = manifold.TSNE(n_components=2, init='pca', random_state=0)
114 | prompt_data_tsne = tsne.fit_transform(prompt_data_all)
115 | dataset_plot_buf = ["electricity"]
116 | color_buf = ["red", "blue", "black", "green", "pink", "brown"]
117 | marker_buf = [".", "^", "x"]
118 | for index, _ in enumerate(dataset_name):
119 | for sindex, split_fold in enumerate(split_buf):
120 | data_index = (np.array(dataset_dir_buf) == index)
121 | split_index = (np.array(dataset_split_buf) == sindex)
122 | plot_index = data_index & split_index
123 | plt.plot(prompt_data_tsne[plot_index, 0], prompt_data_tsne[plot_index, 1], linewidth=0, marker=marker_buf[sindex], label=str(dataset_name[index][0:8] + "-" + split_fold), color=color_buf[index])
124 | # plt.text(prompt_data_tsne[data_index, 0].mean()-20, prompt_data_tsne[data_index, 1].mean(), str(dataset_name[index][0:8]), fontdict={'weight': 'bold', 'size': 9})
125 |
126 | plt.legend(loc="right")
127 | plt.savefig("./figures/stat_prompt_tsne.png")
128 | plt.close()
129 |
130 | # ipdb.set_trace()
131 | # plt.xticks([])
132 | # plt.yticks([])
133 |
134 | # print(prompt_data_all)
135 | # , color = plt.cm.Set1(dataset_dir_buf[index])
136 | # print(prompt_data_transform)
137 | # print(prompt_data_transform_array.mean(axis=0))
138 | # print(prompt_data_transform_array.std(axis=0))
139 | # print(prompt_data_transform.loc[5])
140 |
141 |
142 |
143 |
144 |
145 |
146 |
--------------------------------------------------------------------------------
/multi_agents_pipeline/agents/Planning_Agent.py:
--------------------------------------------------------------------------------
1 | from .custom_messages import TextMessage, TSTaskMessage, TSMessage
2 | from typing import Optional, List
3 | from autogen_core import RoutedAgent, default_subscription, message_handler, MessageContext, TopicId
4 | from autogen_core.models import ChatCompletionClient, SystemMessage
5 | from pydantic import ValidationError
6 | from pydantic import BaseModel
7 |
8 |
9 | @default_subscription
10 | class PlanningAgent(RoutedAgent):
11 | """A planning agent that uses OpenAI API to generate tasks for a Time Series Agent and QA Agent.
12 |
13 | Args:
14 | name (str): The name of the agent.
15 | model_client (ChatCompletionClient): The ChatCompletion client.
16 | """
17 | def __init__(self, name: str, model_client: ChatCompletionClient) -> None:
18 | super().__init__("planning_agent")
19 | self.name = name
20 | self._model_client = model_client
21 | self._system_messages = [SystemMessage(content="You are a helpful AI assistant.")]
22 |
23 | async def send_message_to_openai(self, messages: List[SystemMessage], ctx: MessageContext, json_output: Optional[bool | BaseModel] = False) -> str:
24 | """Sends messages to OpenAI and returns the response content.
25 |
26 | Args:
27 | messages (List[SystemMessage]): The list of messages to send to OpenAI.
28 |
29 | Returns:
30 | str: The response content from OpenAI.
31 | """
32 | response = await self._model_client.create(
33 | messages=self._system_messages + messages,
34 | cancellation_token=ctx.cancellation_token,
35 | json_output=json_output)
36 | if isinstance(response.content, str):
37 | return response.content
38 | else:
39 | raise ValueError("Response content is not a valid JSON string")
40 |
41 | async def generate_ts_task(self, original_message: TSTaskMessage, ctx: MessageContext) -> TSMessage:
42 | """Generates a time series task message based on the original message.
43 |
44 | Args:
45 | original_message (TSTaskMessage): The original message containing the task description and filepath.
46 |
47 | Returns:
48 | TSMessage: A new TSMessage with the task type and description.
49 | """
50 | ts_message = SystemMessage(
51 | source="user",
52 | content=f"""The task for the time series analysis is: {original_message.description}.
53 | The time-series data is stored at {original_message.filepath}. Provide a detailed description of the data
54 | based on the task description. Also, provide what type of analysis would be required to complete the task among
55 | the following types: ["statistical forecasting", "anomaly detection"].
56 | """
57 | )
58 |
59 | response_content = await self.send_message_to_openai([ts_message], ctx, json_output=TSMessage)
60 |
61 | try:
62 | ts_task = TSMessage.model_validate_json(response_content)
63 | ts_task.source = "planner" # Set the source to the Planning Agent
64 | ts_task.filepath = original_message.filepath # Ensure the filepath is preserved
65 | # Send the generated task to the QA Agent
66 | return ts_task
67 | except ValidationError as e:
68 | raise ValueError(f"Response content is not a valid TextMessage: {e}") from e
69 |
70 | async def generate_qa_task(self, original_message: TSTaskMessage, ctx: MessageContext) -> TextMessage:
71 | """Generates a QA task message based on the original message.
72 |
73 | Args:
74 | original_message (TSTaskMessage): The original message containing the task description and filepath.
75 |
76 | Returns:
77 | TextMessage: A new TextMessage with the task description.
78 | """
79 | task_message = SystemMessage(
80 | source="user",
81 | content=f"""Write a descriptive task for the following prompt: {original_message.description}.
82 | The time-series data is stored at {original_message.filepath}.
83 | """
84 | )
85 |
86 | response_content = await self.send_message_to_openai([task_message], ctx, json_output=TextMessage)
87 |
88 | try:
89 | qa_task = TextMessage.model_validate_json(response_content)
90 | qa_task.source = "planner" # Set the source to the Planning Agent
91 | # Send the generated task to the QA Agent
92 | return qa_task
93 | except ValidationError as e:
94 | raise ValueError(f"Response content is not a valid TextMessage: {e}") from e
95 |
96 | @message_handler
97 | async def handle_ts_task_message(self, message: TSTaskMessage, ctx: MessageContext) -> None:
98 | """Handles incoming time series task messages and generates a response using the OpenAI Assistant API.
99 |
100 | Args:
101 | message (TSTaskMessage): The incoming message containing the user's query.
102 | """
103 | ts_task = await self.generate_ts_task(message, ctx)
104 | print(f"[{self.name}] Sending TS task to TS Agent...")
105 | await self.publish_message(
106 | ts_task,
107 | TopicId(type="Planner-TS", source=self.id.key)
108 | )
109 | #await self.send_message(ts_task, AgentId("ts_agent", "default"))
110 |
111 | qa_task = await self.generate_qa_task(message, ctx)
112 | print(f"[{self.name}] Sending QA task to QA Agent...")
113 | await self.publish_message(
114 | qa_task,
115 | TopicId(type="Planner-QA", source=self.id.key)
116 | )
117 | #await self.send_message(qa_task, AgentId("qa_agent", "default"))
--------------------------------------------------------------------------------
/ltsm/models/utils.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 | import torch.nn as nn
4 | from math import sqrt
5 | from transformers.modeling_utils import PreTrainedModel, PretrainedConfig
6 |
7 | class Normalize(nn.Module):
8 | def __init__(self, num_features: int, eps=1e-5, affine=False, subtract_last=False, non_norm=False):
9 | """
10 | :param num_features: the number of features or channels
11 | :param eps: a value added for numerical stability
12 | :param affine: if True, RevIN has learnable affine parameters
13 | """
14 | super(Normalize, self).__init__()
15 | self.num_features = num_features
16 | self.eps = eps
17 | self.affine = affine
18 | self.subtract_last = subtract_last
19 | self.non_norm = non_norm
20 | if self.affine:
21 | self._init_params()
22 |
23 | def forward(self, x, mode: str):
24 | if mode == 'norm':
25 | self._get_statistics(x)
26 | x = self._normalize(x)
27 | elif mode == 'denorm':
28 | x = self._denormalize(x)
29 | else:
30 | raise NotImplementedError
31 | return x
32 |
33 | def _init_params(self):
34 | # initialize RevIN params: (C,)
35 | self.affine_weight = nn.Parameter(torch.ones(self.num_features))
36 | self.affine_bias = nn.Parameter(torch.zeros(self.num_features))
37 |
38 | def _get_statistics(self, x):
39 | dim2reduce = tuple(range(1, x.ndim - 1))
40 | if self.subtract_last:
41 | self.last = x[:, -1, :].unsqueeze(1)
42 | else:
43 | self.mean = torch.mean(x, dim=dim2reduce, keepdim=True).detach()
44 | self.stdev = torch.sqrt(torch.var(x, dim=dim2reduce, keepdim=True, unbiased=False) + self.eps).detach()
45 |
46 | def _normalize(self, x):
47 | if self.non_norm:
48 | return x
49 | if self.subtract_last:
50 | x = x - self.last
51 | else:
52 | x = x - self.mean
53 | x = x / self.stdev
54 | if self.affine:
55 | x = x * self.affine_weight
56 | x = x + self.affine_bias
57 | return x
58 |
59 | def _denormalize(self, x):
60 | if self.non_norm:
61 | return x
62 | if self.affine:
63 | x = x - self.affine_bias
64 | x = x / (self.affine_weight + self.eps * self.eps)
65 | x = x * self.stdev
66 | if self.subtract_last:
67 | x = x + self.last
68 | else:
69 | x = x + self.mean
70 | return x
71 |
72 |
73 | class FlattenHead(nn.Module):
74 | def __init__(self, n_vars, nf, target_window, head_dropout=0):
75 | super().__init__()
76 | self.n_vars = n_vars
77 | self.flatten = nn.Flatten(start_dim=-2)
78 | self.linear = nn.Linear(nf, target_window)
79 | self.dropout = nn.Dropout(head_dropout)
80 |
81 | def forward(self, x):
82 | x = self.flatten(x)
83 | x = self.linear(x)
84 | x = self.dropout(x)
85 | return x
86 |
87 | class ReprogrammingLayer(nn.Module):
88 | def __init__(self, d_model, n_heads, d_keys=None, d_llm=None, attention_dropout=0.1):
89 | super(ReprogrammingLayer, self).__init__()
90 |
91 | d_keys = d_keys or (d_model // n_heads)
92 |
93 | self.query_projection = nn.Linear(d_model, d_keys * n_heads)
94 | self.key_projection = nn.Linear(d_llm, d_keys * n_heads)
95 | self.value_projection = nn.Linear(d_llm, d_keys * n_heads)
96 | self.out_projection = nn.Linear(d_keys * n_heads, d_llm)
97 | self.n_heads = n_heads
98 | self.dropout = nn.Dropout(attention_dropout)
99 |
100 | def forward(self, target_embedding, source_embedding, value_embedding):
101 | B, L, _ = target_embedding.shape
102 | S, _ = source_embedding.shape
103 | H = self.n_heads
104 |
105 | target_embedding = self.query_projection(target_embedding).view(B, L, H, -1)
106 | source_embedding = self.key_projection(source_embedding).view(S, H, -1)
107 | value_embedding = self.value_projection(value_embedding).view(S, H, -1)
108 |
109 | out = self.reprogramming(target_embedding, source_embedding, value_embedding)
110 |
111 | out = out.reshape(B, L, -1)
112 |
113 | return self.out_projection(out)
114 |
115 | def reprogramming(self, target_embedding, source_embedding, value_embedding):
116 | B, L, H, E = target_embedding.shape
117 |
118 | scale = 1. / sqrt(E)
119 |
120 | scores = torch.einsum("blhe,she->bhls", target_embedding, source_embedding)
121 |
122 | A = self.dropout(torch.softmax(scale * scores, dim=-1))
123 | reprogramming_embedding = torch.einsum("bhls,she->blhe", A, value_embedding)
124 |
125 | return reprogramming_embedding
126 |
127 | def freeze_parameters(model: PreTrainedModel):
128 | """
129 | Sets certain model parameters to non-trainable, and specific parameters to trainable, based on predefined
130 | lists of layer names to freeze or keep trainable.
131 | """
132 | freeze_param_buf = ["gpt2"]
133 | for n, p in model.named_parameters():
134 | if any(fp in n for fp in freeze_param_buf):
135 | p.requires_grad = False
136 | print(f"{n} has been freeezed")
137 |
138 | trainable_param_buf = ["ln", "wpe", "in_layer", "out_layer", "lora"]
139 | for n, p in model.named_parameters():
140 | if any(fp in n for fp in trainable_param_buf):
141 | p.requires_grad = True
142 |
143 | def print_trainable_parameters(model):
144 | """
145 | Prints the names of parameters in the model that are trainable.
146 | """
147 | for n, p in model.named_parameters():
148 | if p.requires_grad:
149 | print(f"{n} is trainable...")
--------------------------------------------------------------------------------
/ltsm/data_reader/database_reader.py:
--------------------------------------------------------------------------------
1 | import taosws
2 | import pandas as pd
3 | import os
4 |
5 | # change to your own
6 | datapath = "original_upload"
7 | output_folder = 'original_download'
8 | database = "time_series_demo"
9 | user = "root"
10 | password = "taosdata"
11 |
12 | # create_connection() function to connect to the database. (change host and port to your own)
13 | def create_connection(host='35.153.211.255', port=6041):
14 | conn = None
15 | try:
16 | conn = taosws.connect(
17 | user=user,
18 | password=password,
19 | host=host,
20 | port=port,
21 | )
22 | print(f"Connected to {host}:{port} successfully.")
23 | return conn
24 | except Exception as err:
25 | print(f"Failed to connect to {host}:{port}, ErrMessage: {err}")
26 | raise err
27 |
28 |
29 | # setup_database() function to create a new database if it doesn't exist.
30 | def setup_database(conn, database):
31 | try:
32 | cursor = conn.cursor()
33 | cursor.execute(f"CREATE DATABASE IF NOT EXISTS {database}")
34 | print("Database time_series_demo set up successfully.")
35 | except Exception as err:
36 | print(f"Error setting up database: {err}")
37 | raise err
38 |
39 |
40 | # setup_tables() function to create tables based on CSV column names and data types.
41 | def setup_tables(conn, database, table_name, df):
42 | try:
43 | cursor = conn.cursor()
44 | cursor.execute(f"USE {database}")
45 | cursor.execute(f"DROP TABLE IF EXISTS {table_name}")
46 | columns = df.columns
47 | schema_columns = ["ts TIMESTAMP"]
48 |
49 | # Infer column types and set schema accordingly
50 | for column in columns[1:]:
51 | dtype = df[column].dtype
52 | if pd.api.types.is_float_dtype(dtype):
53 | schema_columns.append(f"{column.replace(' ', '_')} FLOAT")
54 | elif pd.api.types.is_integer_dtype(dtype):
55 | schema_columns.append(f"{column.replace(' ', '_')} INT")
56 | elif pd.api.types.is_bool_dtype(dtype):
57 | schema_columns.append(f"{column.replace(' ', '_')} BOOL")
58 | elif pd.api.types.is_datetime64_any_dtype(dtype):
59 | schema_columns.append(f"{column.replace(' ', '_')} TIMESTAMP")
60 | else: # Treat as STRING for other types like object (text)
61 | schema_columns.append(f"{column.replace(' ', '_')} STRING")
62 |
63 | schema = f"({', '.join(schema_columns)})"
64 | cursor.execute(f"CREATE TABLE IF NOT EXISTS {table_name} {schema}")
65 | print(f"Table {table_name} set up successfully with schema: {schema}")
66 | except Exception as err:
67 | print(f"Error setting up database or table {table_name}: {err}")
68 | raise err
69 |
70 |
71 | # insert_data_from_csv() function to insert data from CSV files into tables.
72 | def insert_data_from_csv(conn, database, csv_file, table_name):
73 | try:
74 | cursor = conn.cursor()
75 | df = pd.read_csv(csv_file)
76 |
77 | # Ensure the first column is a timestamp
78 | df[df.columns[0]] = pd.to_datetime(df[df.columns[0]], errors='coerce')
79 |
80 | setup_tables(conn, database, table_name, df)
81 | cursor.execute(f"USE {database}")
82 |
83 | for _, row in df.iterrows():
84 | values = [f"'{row[df.columns[0]]}'"] # Timestamp value
85 | for col in df.columns[1:]:
86 | value = row[col]
87 | if pd.isna(value):
88 | values.append("NULL")
89 | elif isinstance(value, str):
90 | values.append(f"'{value}'")
91 | elif isinstance(value, bool):
92 | values.append("true" if value else "false")
93 | else:
94 | values.append(str(value))
95 |
96 | insert_query = f"INSERT INTO {table_name} VALUES({', '.join(values)})"
97 | print(insert_query)
98 | cursor.execute(insert_query)
99 |
100 | print(f"Data from {csv_file} inserted into table {table_name} successfully.")
101 | except Exception as err:
102 | print(f"Error inserting data from {csv_file} into {table_name}: {err}")
103 | raise err
104 |
105 |
106 | # retrieve_data_to_csv() function to retrieve data from a table and save it to a CSV file.
107 | def retrieve_data_to_csv(conn, database, table_name, output_file):
108 | try:
109 | cursor = conn.cursor()
110 | cursor.execute(f"USE {database}")
111 | cursor.execute(f"SELECT * FROM {table_name}")
112 | data = cursor.fetchall()
113 | cursor.execute(f"DESCRIBE {table_name}")
114 | columns = [desc[0] for desc in cursor.fetchall()]
115 |
116 | df = pd.DataFrame(data, columns=columns)
117 | df.to_csv(output_file, index=False)
118 | print(f"Data from {table_name} saved to {output_file}.")
119 | except Exception as err:
120 | print(f"Error retrieving data from {table_name}: {err}")
121 | raise err
122 |
123 |
124 | # Example usage
125 | if __name__ == "__main__":
126 | conn = create_connection()
127 | if conn:
128 | try:
129 | setup_database(conn, database)
130 | csv_files = [os.path.join(datapath, f) for f in os.listdir(datapath) if f.endswith('.csv')]
131 | tables = [os.path.splitext(os.path.basename(csv_file))[0] for csv_file in csv_files]
132 | for csv_file, table_name in zip(csv_files, tables):
133 | insert_data_from_csv(conn, database, csv_file, table_name)
134 | if not os.path.exists(output_folder):
135 | os.makedirs(output_folder)
136 | for table_name in tables:
137 | output_file = os.path.join(output_folder, f"{table_name}.csv")
138 | retrieve_data_to_csv(conn, database, table_name, output_file)
139 |
140 | finally:
141 | conn.close()
142 |
--------------------------------------------------------------------------------