├── .gitignore
├── Dockerfile
├── Makefile
├── README.md
├── autoformer_multivariate.py
├── environment.yml
├── evaluation.py
├── images
├── nhits-arch.png
└── results.png
├── long_horizon_baselines.ipynb
├── nhits_intuition.ipynb
├── nhits_multivariate.py
├── residuals.py
├── rnn_multivariate.py
└── src
├── __init__.py
├── data
├── __init__.py
├── datasets
│ ├── __init__.py
│ ├── ecl.py
│ ├── epf.py
│ ├── ett.py
│ ├── favorita.py
│ ├── gefcom2012.py
│ ├── gefcom2014.py
│ ├── m3.py
│ ├── m4.py
│ ├── m5.py
│ ├── tourism.py
│ ├── utils.py
│ └── wth.py
├── scalers.py
├── tsdataset.py
├── tsloader.py
└── utils.py
├── experiments
├── __init__.py
└── utils.py
├── losses
├── __init__.py
├── numpy.py
├── pytorch.py
└── utils.py
└── models
├── __init__.py
├── components
├── __init__.py
├── autocorrelation.py
├── autoformer.py
├── common.py
├── drnn.py
├── embed.py
├── selfattention.py
├── tcn.py
└── transformer.py
├── esrnn
├── __init__.py
└── esrnn.py
├── nbeats
├── __init__.py
├── ensemble.py
└── nbeats.py
├── nhits
├── __init__.py
└── nhits.py
└── transformer
├── __init__.py
├── autoformer.py
├── informer.py
└── transformer.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 | *.DS_Store
9 |
10 | # Distribution / packaging
11 | .Python
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | wheels/
24 | pip-wheel-metadata/
25 | share/python-wheels/
26 | *.egg-info/
27 | .installed.cfg
28 | *.egg
29 | MANIFEST
30 |
31 | # PyInstaller
32 | # Usually these files are written by a python script from a template
33 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
34 | *.manifest
35 | *.spec
36 |
37 | # Installer logs
38 | pip-log.txt
39 | pip-delete-this-directory.txt
40 |
41 | # Unit test / coverage reports
42 | htmlcov/
43 | .tox/
44 | .nox/
45 | .coverage
46 | .coverage.*
47 | .cache
48 | nosetests.xml
49 | coverage.xml
50 | *.cover
51 | *.py,cover
52 | .hypothesis/
53 | .pytest_cache/
54 |
55 | # Translations
56 | *.mo
57 | *.pot
58 |
59 | # Django stuff:
60 | *.log
61 | local_settings.py
62 | db.sqlite3
63 | db.sqlite3-journal
64 |
65 | # Flask stuff:
66 | instance/
67 | .webassets-cache
68 |
69 | # Scrapy stuff:
70 | .scrapy
71 |
72 | # Sphinx documentation
73 | docs/_build/
74 |
75 | # PyBuilder
76 | target/
77 |
78 | # Jupyter Notebook
79 | .ipynb_checkpoints
80 |
81 | # IPython
82 | profile_default/
83 | ipython_config.py
84 |
85 | # pyenv
86 | .python-version
87 |
88 | # pipenv
89 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
90 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
91 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
92 | # install all needed dependencies.
93 | #Pipfile.lock
94 |
95 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
96 | __pypackages__/
97 |
98 | # Celery stuff
99 | celerybeat-schedule
100 | celerybeat.pid
101 |
102 | # SageMath parsed files
103 | *.sage.py
104 |
105 | # Environments
106 | .env
107 | .venv
108 | env/
109 | venv/
110 | ENV/
111 | env.bak/
112 | venv.bak/
113 |
114 | # Spyder project settings
115 | .spyderproject
116 | .spyproject
117 |
118 | # Rope project settings
119 | .ropeproject
120 |
121 | # mkdocs documentation
122 | /site
123 |
124 | # mypy
125 | .mypy_cache/
126 | .dmypy.json
127 | dmypy.json
128 |
129 | # Pyre type checker
130 | .pyre/
131 |
132 | # data
133 | results/
134 | data/
135 | *.csv
136 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM nvidia/cuda:11.2.0-devel-ubuntu18.04
2 |
3 | ENV DEBIAN_FRONTEND=noninteractive
4 |
5 | RUN apt-get update -y --fix-missing && \
6 | apt-get install -y --no-install-recommends \
7 | software-properties-common \
8 | wget \
9 | curl \
10 | unrar \
11 | unzip \
12 | git && \
13 | apt-get upgrade -y libstdc++6 && \
14 | apt-get clean -y
15 |
16 | RUN add-apt-repository ppa:ubuntu-toolchain-r/test && \
17 | apt-get update && \
18 | apt-get install -y gcc-9 && \
19 | apt-get upgrade -y libstdc++6
20 |
21 | RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
22 | bash Miniconda3-latest-Linux-x86_64.sh -p /miniconda -b && \
23 | rm -rf Miniconda3-latest-Linux-x86_64.sh
24 |
25 | ENV PATH=/miniconda/bin:${PATH}
26 | RUN conda update -y conda
27 |
28 | RUN conda install -n base -c conda-forge mamba
29 |
30 | ADD ./environment.yml ./environment.yml
31 | RUN mamba env update -n base -f ./environment.yml && \
32 | conda clean -afy
33 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | IMAGE := nhits
2 | ROOT := $(shell dirname $(realpath $(firstword ${MAKEFILE_LIST})))
3 | PARENT_ROOT := $(shell dirname ${ROOT})
4 | PORT := 8888
5 |
6 | DOCKER_PARAMETERS := \
7 | --user $(shell id -u) \
8 | -v ${ROOT}:/app \
9 | -w /app \
10 | -e HOME=/tmp
11 |
12 | ifdef gpu
13 | DOCKER_PARAMETERS += --gpus all
14 | endif
15 |
16 | init:
17 | docker build -t ${IMAGE} .
18 |
19 | get_dataset:
20 | $(MAKE) run_module module="mkdir -p data/"
21 | $(MAKE) run_module module="wget -O data/datasets.zip https://nhits-experiments.s3.amazonaws.com/datasets.zip"
22 | $(MAKE) run_module module="unzip data/datasets.zip -d data/"
23 |
24 | jupyter:
25 | docker run -d --rm ${DOCKER_PARAMETERS} -e HOME=/tmp -p ${PORT}:8888 ${IMAGE} \
26 | bash -c "jupyter lab --ip=0.0.0.0 --no-browser --NotebookApp.token=''"
27 |
28 | run_module: .require-module
29 | docker run -i --rm ${DOCKER_PARAMETERS} \
30 | ${IMAGE} ${module}
31 |
32 | bash_docker:
33 | docker run -it --rm ${DOCKER_PARAMETERS} ${IMAGE}
34 |
35 | .require-module:
36 | ifndef module
37 | $(error module is required)
38 | endif
39 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # N-HiTS: Neural Hierarchical Interpolation for Time Series Forecasting
2 |
3 | Recent progress in neural forecasting instigated significant improvements in the accuracy of large-scale forecasting systems. Yet, extremely long horizon forecasting remains a very difficult task. Two common challenges afflicting the long horizon forecasting are the volatility of the predictions and their computational complexity. In this paper we introduce `N-HiTS`, which addresses both challenges by incorporating novel hierarchical interpolation and multi-rate data sampling techniques. These techniques enable our method to assemble its predictions sequentially, selectively emphasizing components with different frequencies and scales while decomposing the input signal and synthesizing the forecast. We conduct an extensive empirical evaluation demonstrating the advantages of `N-HiTS` over the state-of-the-art long-horizon forecasting methods. On an array of multivariate forecasting tasks, our method provides an average accuracy improvement of 25% over the latest Transformer architectures while reducing the computational time by orders of magnitude.
4 |
5 |
6 |

7 |
8 |
9 | `N-HiTS` architecture. The model is composed of several `MLPs` with `ReLU` nonlinearities. Blocks are connected via doubly residual stacking principle with the backcast `y[t-L:t, l]` and forecast `y[t+1:t+H, l]` outputs of the `l`-th block.
10 | Multi-rate input pooling, hierarchical interpolation and backcast residual connections together induce the specialization of the additive predictions in different signal bands, reducing memory footprint and compute time, improving architecture parsimony and accuracy.
11 |
12 | ## Long Horizon Datasets Results
13 |
14 |
15 |

16 |
17 |
18 | ### Run N-HiTS experiment from console
19 |
20 | To replicate the results of the paper, in particular to produce the forecasts for N-HiTS, run the following:
21 |
22 |
23 | 1. `make init`
24 | 2. `make get_dataset` to download data.
25 | 3.
26 | ```console
27 | make run_module module="python -m nhits_multivariate --hyperopt_max_evals 10 --experiment_id run_1"
28 | ```
29 |
30 | If you want to use `GPU` simply add `gpu=0` to the last line.
31 | ```console
32 | make run_module module="python -m nhits_multivariate --hyperopt_max_evals 10 --experiment_id run_1" gpu=0
33 | ```
34 | 4. Evaluate results for a dataset using:
35 |
36 | ```console
37 | make run_module module="python -m evaluation --dataset ETTm2 --horizon -1 --model NHITS --experiment run_1"
38 | ```
39 |
40 | Alternatively, run all evaluations at once:
41 |
42 | ```console
43 | for dataset in ETTm2 ECL Exchange traffic weather ili;
44 | do make run_module module="python -m evaluation --dataset $dataset --horizon -1 --model NHITS --experiment run_1";
45 | done
46 | ```
47 |
--------------------------------------------------------------------------------
/autoformer_multivariate.py:
--------------------------------------------------------------------------------
1 | from math import ceil
2 | import os
3 | import pickle
4 | import glob
5 | import time
6 | import numpy as np
7 | import pandas as pd
8 | import argparse
9 | import platform
10 |
11 | from hyperopt import fmin, tpe, hp, Trials, STATUS_OK
12 |
13 | from src.losses.numpy import mae, mse
14 | from src.experiments.utils import hyperopt_tunning
15 |
16 |
17 | def get_experiment_space(args):
18 | space= {# Architecture parameters
19 | 'model':'autoformer',
20 | 'mode': 'iterate_windows',
21 | 'seq_len': hp.choice('seq_len', [args.seq_len]),
22 | 'label_len': hp.choice('label_len', [args.label_len]),
23 | 'pred_len': hp.choice('pred_len', [args.horizon]),
24 | 'output_attention': hp.choice('output_attention', [False]),
25 | 'enc_in': hp.choice('enc_in', [args.n_series]),
26 | 'dec_in': hp.choice('dec_in', [args.n_series]),
27 | 'c_out': hp.choice('c_out', [args.n_series]),
28 | 'e_layers': hp.choice('e_layers', [args.e_layers]),
29 | 'd_layers': hp.choice('d_layers', [args.d_layers]),
30 | 'd_model': hp.choice('d_model', [512]),
31 | 'embed': hp.choice('embed', ['timeF']),
32 | 'freq': hp.choice('freq', ['h']),
33 | 'dropout': hp.choice('dropout', [0.05]),
34 | 'factor': hp.choice('factor', [args.factor]),
35 | 'n_heads': hp.choice('n_heads', [8]),
36 | 'd_ff': hp.choice('d_ff', [2_048]),
37 | 'moving_avg': hp.choice('moving_avg', [25]),
38 | 'activation': hp.choice('activation', ['gelu']),
39 | # Regularization and optimization parameters
40 | 'learning_rate': hp.choice('learning_rate', [1e-4]),
41 | 'lr_decay': hp.choice('lr_decay', [0.5]),
42 | 'n_lr_decays': hp.choice('n_lr_decays', [ceil(args.max_epochs / 2)]),
43 | 'weight_decay': hp.choice('weight_decay', [0]),
44 | 'max_epochs': hp.choice('max_epochs', [args.max_epochs]),
45 | 'max_steps': hp.choice('max_steps', [None]),
46 | 'early_stop_patience': hp.choice('early_stop_patience', [3]),
47 | 'eval_freq': hp.choice('eval_freq', [1]),
48 | 'loss_train': hp.choice('loss', ['MSE']),
49 | 'loss_hypar': hp.choice('loss_hypar', [0.5]),
50 | 'loss_valid': hp.choice('loss_valid', ['MSE']),
51 | # Data parameters
52 | 'n_time_in': hp.choice('n_time_in', [args.seq_len]),
53 | 'n_time_out': hp.choice('n_time_out', [args.horizon]),
54 | 'normalizer_y': hp.choice('normalizer_y', [None]),
55 | 'normalizer_x': hp.choice('normalizer_x', [None]),
56 | 'val_idx_to_sample_freq': hp.choice('val_idx_to_sample_freq', [1]),
57 | 'batch_size': hp.choice('batch_size', [32]),
58 | 'random_seed': hp.choice('random_seed', [1])}
59 |
60 | return space
61 |
62 | def main(args):
63 |
64 | #----------------------------------------------- Load Data -----------------------------------------------#
65 | Y_df = pd.read_csv(f'./data/{args.dataset}/M/df_y.csv')
66 | X_df = pd.read_csv(f'./data/{args.dataset}/M/df_x.csv')
67 | print(Y_df)
68 | print(X_df)
69 |
70 | #raise Exception
71 |
72 | X_df = X_df.drop_duplicates(subset=['ds'])
73 |
74 | X_df = Y_df[['unique_id', 'ds']].merge(X_df, how='left', on=['ds'])
75 |
76 | S_df = None
77 | print('Y_df: ', Y_df.head())
78 | #arguments
79 | args.e_layers = 2
80 | args.d_layers = 1
81 | args.max_epochs = 10
82 | args.seq_len = 36 if args.dataset == 'ili' else 96
83 | args.label_len = 18 if args.dataset == 'ili' else 48
84 |
85 | if args.dataset == 'ETTm2':
86 | len_val = 11520
87 | len_test = 11520
88 | args.factor = 1
89 | args.n_series = 7
90 | if args.dataset == 'Exchange':
91 | len_val = 760
92 | len_test = 1517
93 | args.factor = 3
94 | args.n_series = 8
95 | args.max_epochs = 1 if args.horizon in [192, 336] else args.max_epochs
96 | if args.dataset == 'ECL':
97 | len_val = 2632
98 | len_test = 5260
99 | args.factor = 3
100 | args.n_series = 321
101 | if args.dataset == 'traffic':
102 | len_val = 1756
103 | len_test = 3508
104 | args.factor = 3
105 | args.max_epochs = 3
106 | args.n_series = 862
107 | if args.dataset == 'weather':
108 | len_val = 5270
109 | len_test = 10539
110 | args.factor = 3
111 | args.n_series = 21
112 | args.max_epochs = 2 if args.horizon in [96] else args.max_epochs
113 | if args.dataset == 'ili':
114 | len_val = 97
115 | len_test = 193
116 | args.factor = 3
117 | args.n_series = 7
118 |
119 | space = get_experiment_space(args)
120 |
121 | output_dir = f'./results/multivariate/{args.dataset}_{args.horizon}/autoformer/'
122 |
123 | os.makedirs(output_dir, exist_ok = True)
124 | assert os.path.exists(output_dir), f'Output dir {output_dir} does not exist'
125 |
126 | hyperopt_file = output_dir + f'hyperopt_{args.experiment_id}.p'
127 |
128 | if not os.path.isfile(hyperopt_file):
129 | print('Hyperparameter optimization')
130 | #----------------------------------------------- Hyperopt -----------------------------------------------#
131 | trials = hyperopt_tunning(space=space, hyperopt_max_evals=args.hyperopt_max_evals, loss_function_val=mae,
132 | loss_functions_test={'mae':mae, 'mse': mse},
133 | Y_df=Y_df, X_df=X_df, S_df=S_df, f_cols=[],
134 | ds_in_val=len_val, ds_in_test=len_test,
135 | return_forecasts=False,
136 | results_file = hyperopt_file,
137 | save_progress=True,
138 | loss_kwargs={})
139 |
140 | with open(hyperopt_file, "wb") as f:
141 | pickle.dump(trials, f)
142 | else:
143 | print('Hyperparameter optimization already done!')
144 |
145 | def parse_args():
146 | desc = "Example of hyperparameter tuning"
147 | parser = argparse.ArgumentParser(description=desc)
148 |
149 | parser.add_argument('--hyperopt_max_evals', type=int, help='hyperopt_max_evals', default=1)
150 | parser.add_argument('--experiment_id', default=None, required=False, type=str, help='string to identify experiment')
151 |
152 | return parser.parse_args()
153 |
154 |
155 | if __name__ == '__main__':
156 |
157 | # parse arguments
158 | args = parse_args()
159 | if args is None:
160 | exit()
161 |
162 | horizons = [96, 192, 336, 720]
163 | ILI_horizons = [24, 36, 48, 60]
164 | datasets = ['ili', 'Exchange', 'weather', 'ETTm2', 'ECL', 'traffic']
165 |
166 | for dataset in datasets:
167 | # Horizon
168 | if dataset == 'ili':
169 | horizons_dataset = ILI_horizons
170 | else:
171 | horizons_dataset = horizons
172 | for horizon in horizons_dataset:
173 | print(50*'-', dataset, 50*'-')
174 | print(50*'-', horizon, 50*'-')
175 | start = time.time()
176 | args.dataset = dataset
177 | args.horizon = horizon
178 | main(args)
179 | print('Time: ', time.time() - start)
180 |
--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
1 | channels:
2 | - conda-forge
3 | - defaults
4 | dependencies:
5 | - python=3.7
6 | - pip
7 | - matplotlib
8 | - numpy
9 | - pandas
10 | - r-base
11 | - r-forecast
12 | - r-data.table
13 | - r-tidyverse
14 | - r-furrr
15 | - scikit-learn
16 | - statsmodels
17 | - setuptools=58.2.0
18 | - pip:
19 | - fastcore
20 | - hyperopt
21 | - tqdm
22 | - pytorch-lightning>=1.3.0
23 | - torch==1.9.0
24 | - jupyterlab
25 | - parse
26 |
--------------------------------------------------------------------------------
/evaluation.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | import pickle
4 | import argparse
5 | import numpy as np
6 |
7 | from src.losses.numpy import mae, mse
8 |
9 |
10 | def get_score_min_val(dir):
11 | print(dir)
12 | result = pickle.load(open(dir, 'rb'))
13 | min_mae = 100
14 | mc = {}
15 | for i in range(len(result)):
16 | val_mae = result.trials[i]['result']['loss']
17 | if val_mae < min_mae:
18 | mae_best = result.trials[i]['result']['test_losses']['mae']
19 | mse_best = result.trials[i]['result']['test_losses']['mse']
20 | min_mae = val_mae
21 | mc = result.trials[i]['result']['mc']
22 | return mae_best, mse_best, mc
23 |
24 | def main(args):
25 |
26 | if args.horizon<0:
27 | if args.dataset == 'ili':
28 | horizons = [24, 36, 48, 60]
29 | else:
30 | horizons = [96, 192, 336, 720]
31 | else:
32 | horizons = [args.horizon]
33 |
34 | for horizon in horizons:
35 | result_dir = f'./results/{args.setting}/{args.dataset}_{horizon}/{args.model}/'
36 | result_dir = Path(result_dir)
37 | files = list(result_dir.glob(f'hyperopt_{args.experiment}*.p'))
38 | maes = []
39 | mses = []
40 | for file_ in files:
41 | mae_data, mse_data = get_score_min_val(file_)
42 | maes.append(mae_data)
43 | mses.append(mse_data)
44 |
45 | print(f'Horizon {horizon}')
46 | print(f'MSE: {np.mean(mses)}')
47 | print(f'MAE: {np.mean(maes)}')
48 |
49 | def parse_args():
50 | desc = "Example of hyperparameter tuning"
51 | parser = argparse.ArgumentParser(description=desc)
52 |
53 | parser.add_argument('--dataset', type=str, help='Name of the dataset')
54 | parser.add_argument('--setting', type=str, help='Multivariate or univariate', default='multivariate')
55 | parser.add_argument('--horizon', type=int, help='Horizon')
56 | parser.add_argument('--model', type=str, help='Model name')
57 | parser.add_argument('--experiment', type=str, help='string to identify experiment')
58 | return parser.parse_args()
59 |
60 | if __name__ == '__main__':
61 |
62 | # parse arguments
63 | args = parse_args()
64 | if args is None:
65 | exit()
66 |
67 | main(args)
68 |
--------------------------------------------------------------------------------
/images/nhits-arch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cchallu/n-hits/d882ee60b34c0ab6b67b31001f735b181a9efb93/images/nhits-arch.png
--------------------------------------------------------------------------------
/images/results.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cchallu/n-hits/d882ee60b34c0ab6b67b31001f735b181a9efb93/images/results.png
--------------------------------------------------------------------------------
/nhits_multivariate.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pickle
3 | import time
4 | import argparse
5 | import pandas as pd
6 |
7 | from hyperopt import fmin, tpe, hp, Trials, STATUS_OK
8 |
9 | from src.losses.numpy import mae, mse
10 | from src.experiments.utils import hyperopt_tunning
11 |
12 | def get_experiment_space(args):
13 | space= {# Architecture parameters
14 | 'model':'nhits',
15 | 'mode': 'simple',
16 | 'n_time_in': hp.choice('n_time_in', [5*args.horizon]),
17 | 'n_time_out': hp.choice('n_time_out', [args.horizon]),
18 | 'n_x_hidden': hp.choice('n_x_hidden', [0]),
19 | 'n_s_hidden': hp.choice('n_s_hidden', [0]),
20 | 'shared_weights': hp.choice('shared_weights', [False]),
21 | 'activation': hp.choice('activation', ['ReLU']),
22 | 'initialization': hp.choice('initialization', ['lecun_normal']),
23 | 'stack_types': hp.choice('stack_types', [ 3*['identity'] ]),
24 | 'n_blocks': hp.choice('n_blocks', [ 3*[1]]),
25 | 'n_layers': hp.choice('n_layers', [ 9*[2] ]),
26 | 'n_hidden': hp.choice('n_hidden', [ 512 ]),
27 | 'n_pool_kernel_size': hp.choice('n_pool_kernel_size', [ 3*[1], 3*[2], 3*[4], 3*[8], [8, 4, 1], [16, 8, 1] ]),
28 | 'n_freq_downsample': hp.choice('n_freq_downsample', [ [168, 24, 1], [24, 12, 1],
29 | [180, 60, 1], [60, 8, 1],
30 | [40, 20, 1]
31 | ]),
32 | 'pooling_mode': hp.choice('pooling_mode', [ 'max' ]),
33 | 'interpolation_mode': hp.choice('interpolation_mode', ['linear']),
34 | # Regularization and optimization parameters
35 | 'batch_normalization': hp.choice('batch_normalization', [False]),
36 | 'dropout_prob_theta': hp.choice('dropout_prob_theta', [ 0 ]),
37 | 'dropout_prob_exogenous': hp.choice('dropout_prob_exogenous', [0]),
38 | 'learning_rate': hp.choice('learning_rate', [0.001]),
39 | 'lr_decay': hp.choice('lr_decay', [0.5] ),
40 | 'n_lr_decays': hp.choice('n_lr_decays', [3]),
41 | 'weight_decay': hp.choice('weight_decay', [0] ),
42 | 'max_epochs': hp.choice('max_epochs', [None]),
43 | 'max_steps': hp.choice('max_steps', [1_000]),
44 | 'early_stop_patience': hp.choice('early_stop_patience', [10]),
45 | 'eval_freq': hp.choice('eval_freq', [50]),
46 | 'loss_train': hp.choice('loss', ['MAE']),
47 | 'loss_hypar': hp.choice('loss_hypar', [0.5]),
48 | 'loss_valid': hp.choice('loss_valid', ['MAE']),
49 | 'l1_theta': hp.choice('l1_theta', [0]),
50 | # Data parameters
51 | 'normalizer_y': hp.choice('normalizer_y', [None]),
52 | 'normalizer_x': hp.choice('normalizer_x', [None]),
53 | 'complete_windows': hp.choice('complete_windows', [True]),
54 | 'frequency': hp.choice('frequency', ['H']),
55 | 'seasonality': hp.choice('seasonality', [24]),
56 | 'idx_to_sample_freq': hp.choice('idx_to_sample_freq', [1]),
57 | 'val_idx_to_sample_freq': hp.choice('val_idx_to_sample_freq', [1]),
58 | 'batch_size': hp.choice('batch_size', [1]),
59 | 'n_windows': hp.choice('n_windows', [256]),
60 | 'random_seed': hp.quniform('random_seed', 1, 10, 1)}
61 | return space
62 |
63 | def main(args):
64 |
65 | #----------------------------------------------- Load Data -----------------------------------------------#
66 | Y_df = pd.read_csv(f'./data/{args.dataset}/M/df_y.csv')
67 |
68 | X_df = None
69 | S_df = None
70 |
71 | print('Y_df: ', Y_df.head())
72 | if args.dataset == 'ETTm2':
73 | len_val = 11520
74 | len_test = 11520
75 | if args.dataset == 'Exchange':
76 | len_val = 760
77 | len_test = 1517
78 | if args.dataset == 'ECL':
79 | len_val = 2632
80 | len_test = 5260
81 | if args.dataset == 'traffic':
82 | len_val = 1756
83 | len_test = 3508
84 | if args.dataset == 'weather':
85 | len_val = 5270
86 | len_test = 10539
87 | if args.dataset == 'ili':
88 | len_val = 97
89 | len_test = 193
90 |
91 | space = get_experiment_space(args)
92 |
93 | #---------------------------------------------- Directories ----------------------------------------------#
94 | output_dir = f'./results/multivariate/{args.dataset}_{args.horizon}/NHITS/'
95 |
96 | os.makedirs(output_dir, exist_ok = True)
97 | assert os.path.exists(output_dir), f'Output dir {output_dir} does not exist'
98 |
99 | hyperopt_file = output_dir + f'hyperopt_{args.experiment_id}.p'
100 |
101 | if not os.path.isfile(hyperopt_file):
102 | print('Hyperparameter optimization')
103 | #----------------------------------------------- Hyperopt -----------------------------------------------#
104 | trials = hyperopt_tunning(space=space, hyperopt_max_evals=args.hyperopt_max_evals, loss_function_val=mae,
105 | loss_functions_test={'mae':mae, 'mse': mse},
106 | Y_df=Y_df, X_df=X_df, S_df=S_df, f_cols=[],
107 | evaluate_train=True,
108 | ds_in_val=len_val, ds_in_test=len_test,
109 | return_forecasts=False,
110 | results_file = hyperopt_file,
111 | save_progress=True,
112 | loss_kwargs={})
113 |
114 | with open(hyperopt_file, "wb") as f:
115 | pickle.dump(trials, f)
116 | else:
117 | print('Hyperparameter optimization already done!')
118 |
119 | def parse_args():
120 | desc = "Example of hyperparameter tuning"
121 | parser = argparse.ArgumentParser(description=desc)
122 | parser.add_argument('--hyperopt_max_evals', type=int, help='hyperopt_max_evals')
123 | parser.add_argument('--experiment_id', default=None, required=False, type=str, help='string to identify experiment')
124 | return parser.parse_args()
125 |
126 | if __name__ == '__main__':
127 |
128 | # parse arguments
129 | args = parse_args()
130 | if args is None:
131 | exit()
132 |
133 | horizons = [96, 192, 336, 720]
134 | ILI_horizons = [24, 36, 48, 60]
135 | datasets = ['ETTm2', 'Exchange', 'weather', 'ili', 'ECL', 'traffic']
136 |
137 | for dataset in datasets:
138 | # Horizon
139 | if dataset == 'ili':
140 | horizons_dataset = ILI_horizons
141 | else:
142 | horizons_dataset = horizons
143 | for horizon in horizons_dataset:
144 | print(50*'-', dataset, 50*'-')
145 | print(50*'-', horizon, 50*'-')
146 | start = time.time()
147 | args.dataset = dataset
148 | args.horizon = horizon
149 | main(args)
150 | print('Time: ', time.time() - start)
151 |
152 | main(args)
153 |
154 | # source ~/anaconda3/etc/profile.d/conda.sh
155 | # conda activate nixtla
156 | # CUDA_VISIBLE_DEVICES=0 python nhits_multivariate.py --hyperopt_max_evals 10 --experiment_id "eval_train"
157 |
--------------------------------------------------------------------------------
/residuals.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import os
3 |
4 | import numpy as np
5 | import pandas as pd
6 |
7 | from evaluation import get_score_min_val
8 | from src.experiments.utils import model_fit_predict
9 |
10 | def main(args):
11 |
12 | #----------------------------------------------- Load Data -----------------------------------------------#
13 | Y_df = pd.read_csv(f'./data/{args.dataset}/M/df_y.csv')
14 |
15 | X_df = None
16 | S_df = None
17 |
18 | print('Y_df: ', Y_df.head())
19 | if args.dataset == 'ETTm2':
20 | len_val = 11520
21 | len_test = 11520
22 | if args.dataset == 'Exchange':
23 | len_val = 760
24 | len_test = 1517
25 | if args.dataset == 'ECL':
26 | len_val = 2632
27 | len_test = 5260
28 | if args.dataset == 'traffic':
29 | len_val = 1756
30 | len_test = 3508
31 | if args.dataset == 'weather':
32 | len_val = 5270
33 | len_test = 10539
34 | if args.dataset == 'ili':
35 | len_val = 97
36 | len_test = 193
37 |
38 | output_dir = f'./results/multivariate/{args.dataset}_{args.horizon}/NHITS/'
39 |
40 | os.makedirs(output_dir, exist_ok = True)
41 | assert os.path.exists(output_dir), f'Output dir {output_dir} does not exist'
42 |
43 | hyperopt_file = output_dir + f'hyperopt_{args.experiment_id}.p'
44 | *_, mc = get_score_min_val(hyperopt_file)
45 | results = model_fit_predict(mc=mc, S_df=S_df,
46 | Y_df=Y_df, X_df=X_df,
47 | f_cols=[], ds_in_val=len_val,
48 | ds_in_test=len_test,
49 | insample=True)
50 |
51 | n_series = Y_df['unique_id'].nunique()
52 | for data_kind in ['insample', 'val', 'test']:
53 | for y_kind in ['true', 'hat']:
54 | name = f'{data_kind}_y_{y_kind}'
55 | result_name = results[name].reshape((n_series, -1, mc['n_time_out']))
56 | np.save(output_dir + f'{name}.npy', result_name)
57 |
58 | def parse_args():
59 | desc = "Example of hyperparameter tuning"
60 | parser = argparse.ArgumentParser(description=desc)
61 | parser.add_argument('--experiment_id', default=None, required=False, type=str, help='string to identify experiment')
62 | return parser.parse_args()
63 |
64 | if __name__ == '__main__':
65 |
66 | # parse arguments
67 | args = parse_args()
68 | if args is None:
69 | exit()
70 |
71 | horizons = [96, 192, 336, 720]
72 | ILI_horizons = [24, 36, 48, 60]
73 | datasets = ['ETTm2', 'weather', 'Exchange']#['ECL', 'Exchange', 'traffic', 'weather', 'ili']
74 |
75 | for dataset in datasets:
76 | # Horizon
77 | if dataset == 'ili':
78 | horizons_dataset = ILI_horizons
79 | else:
80 | horizons_dataset = horizons
81 | for horizon in horizons_dataset:
82 | print(50*'-', dataset, 50*'-')
83 | print(50*'-', horizon, 50*'-')
84 | args.dataset = dataset
85 | args.horizon = horizon
86 | main(args)
87 |
88 | main(args)
89 |
90 | # source ~/anaconda3/etc/profile.d/conda.sh
91 | # conda activate nixtla
92 | # CUDA_VISIBLE_DEVICES=0 python nhits_multivariate.py --hyperopt_max_evals 10 --experiment_id "test"
93 |
--------------------------------------------------------------------------------
/rnn_multivariate.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pickle
3 | import time
4 | import argparse
5 | import pandas as pd
6 |
7 | from hyperopt import fmin, tpe, hp, Trials, STATUS_OK
8 |
9 | from src.losses.numpy import mae, mse
10 | from neuralforecast.experiments.utils import hyperopt_tunning
11 |
12 | def get_experiment_space(args):
13 | space= {# Architecture parameters
14 | 'model':'rnn',
15 | 'mode': 'full',
16 | 'n_time_in': hp.choice('n_time_in', [1*horizon]),
17 | 'n_time_out': hp.choice('n_time_out', [horizon]),
18 | 'cell_type': hp.choice('cell_type', ['LSTM']),
19 | 'state_hsize': hp.choice('state_hsize', [10, 20, 50]),
20 | 'dilations': hp.choice('dilations', [ [[1, 2]], [[1, 2, 4, 8]], [[1,2],[4,8]] ]),
21 | 'add_nl_layer': hp.choice('add_nl_layer', [ False ]),
22 | 'n_pool_kernel_size': hp.choice('n_pool_kernel_size', [ args.pooling ]),
23 | 'n_freq_downsample': hp.choice('n_freq_downsample', [ args.interpolation ]),
24 | 'sample_freq': hp.choice('sample_freq', [1]),
25 | # Regularization and optimization parameters
26 | 'learning_rate': hp.choice('learning_rate', [0.0001, 0.001, 0.005, 0.01, 0.05, 0.1]),
27 | 'lr_decay': hp.choice('lr_decay', [0.5] ),
28 | 'n_lr_decays': hp.choice('n_lr_decays', [3]),
29 | 'gradient_eps': hp.choice('gradient_eps', [1e-8]),
30 | 'gradient_clipping_threshold': hp.choice('gradient_clipping_threshold', [10]),
31 | 'weight_decay': hp.choice('weight_decay', [0]),
32 | 'noise_std': hp.choice('noise_std', [0.001]),
33 | 'max_epochs': hp.choice('max_epochs', [None]),
34 | 'max_steps': hp.choice('max_steps', [500]),
35 | 'early_stop_patience': hp.choice('early_stop_patience', [10]),
36 | 'eval_freq': hp.choice('eval_freq', [50]),
37 | 'loss_train': hp.choice('loss', ['MAE']),
38 | 'loss_hypar': hp.choice('loss_hypar', [0.5]),
39 | 'loss_valid': hp.choice('loss_valid', ['MAE']),
40 | # Data parameters
41 | 'normalizer_y': hp.choice('normalizer_y', [None]),
42 | 'normalizer_x': hp.choice('normalizer_x', [None]),
43 | 'complete_windows': hp.choice('complete_windows', [True]),
44 | 'idx_to_sample_freq': hp.choice('idx_to_sample_freq', [1]),
45 | 'val_idx_to_sample_freq': hp.choice('val_idx_to_sample_freq', [1]),
46 | 'batch_size': hp.choice('batch_size', [8, 16, 32]),
47 | 'n_windows': hp.choice('n_windows', [None]),
48 | 'frequency': hp.choice('frequency', ['D']),
49 | 'random_seed': hp.quniform('random_seed', 1, 10, 1)}
50 | return space
51 |
52 | def main(args):
53 |
54 | #----------------------------------------------- Load Data -----------------------------------------------#
55 | Y_df = pd.read_csv(f'./data/{args.dataset}/M/df_y.csv')
56 |
57 | X_df = None
58 | S_df = None
59 |
60 | print('Y_df: ', Y_df.head())
61 | if args.dataset == 'ETTm2':
62 | len_val = 11520
63 | len_test = 11520
64 | window_sampling_limit = 4000+len_val+len_test
65 | if args.dataset == 'Exchange':
66 | len_val = 760
67 | len_test = 1517
68 | window_sampling_limit = 1517+len_val+len_test
69 | if args.dataset == 'ECL':
70 | len_val = 2632
71 | len_test = 5260
72 | window_sampling_limit = 4000+len_val+len_test
73 | if args.dataset == 'traffic':
74 | len_val = 1756
75 | len_test = 3508
76 | window_sampling_limit = 3508+len_val+len_test
77 | if args.dataset == 'weather':
78 | len_val = 5270
79 | len_test = 10539
80 | window_sampling_limit = 4000+len_val+len_test
81 | if args.dataset == 'ili':
82 | len_val = 97
83 | len_test = 193
84 | window_sampling_limit = 193+len_val+len_test
85 |
86 | Y_df = Y_df.groupby('unique_id').tail(window_sampling_limit).reset_index(drop=True)
87 |
88 | space = get_experiment_space(args)
89 |
90 | #---------------------------------------------- Directories ----------------------------------------------#
91 | output_dir = f'./results/multivariate/{args.dataset}_{args.horizon}/RNN_{args.pooling}_{args.interpolation}/{args.experiment_id}'
92 |
93 | os.makedirs(output_dir, exist_ok = True)
94 | assert os.path.exists(output_dir), f'Output dir {output_dir} does not exist'
95 |
96 | #----------------------------------------------- Hyperopt -----------------------------------------------#
97 | hyperopt_tunning(space=space, hyperopt_max_evals=args.hyperopt_max_evals, loss_function_val=mae,
98 | loss_functions_test={'mae':mae, 'mse': mse},
99 | Y_df=Y_df, X_df=X_df, S_df=S_df, f_cols=[],
100 | ds_in_val=len_val, ds_in_test=len_test,
101 | return_forecasts=False,
102 | return_model=False,
103 | save_trials=True,
104 | results_dir=output_dir,
105 | verbose=True)
106 |
107 |
108 | def parse_args():
109 | desc = "Example of hyperparameter tuning"
110 | parser = argparse.ArgumentParser(description=desc)
111 | parser.add_argument('--hyperopt_max_evals', type=int, help='hyperopt_max_evals')
112 | parser.add_argument('--pooling', type=int, help='pooling')
113 | parser.add_argument('--interpolation', type=int, help='interpolation')
114 | parser.add_argument('--experiment_id', default=None, required=False, type=str, help='string to identify experiment')
115 | return parser.parse_args()
116 |
117 | if __name__ == '__main__':
118 |
119 | # parse arguments
120 | args = parse_args()
121 | if args is None:
122 | exit()
123 |
124 | horizons = [96, 192, 336, 720]
125 | ILI_horizons = [24, 36, 48, 60]
126 | datasets = ['ETTm2', 'Exchange', 'ili', 'ECL', 'traffic', 'weather']
127 |
128 | for dataset in datasets:
129 | # Horizon
130 | if dataset == 'ili':
131 | horizons_dataset = ILI_horizons
132 | else:
133 | horizons_dataset = horizons
134 | for horizon in horizons_dataset:
135 | print(50*'-', dataset, 50*'-')
136 | print(50*'-', horizon, 50*'-')
137 | start = time.time()
138 | args.dataset = dataset
139 | args.horizon = horizon
140 | main(args)
141 | print('Time: ', time.time() - start)
142 |
143 | main(args)
144 |
145 | # source ~/anaconda3/etc/profile.d/conda.sh
146 | # conda activate nixtla
147 | # CUDA_VISIBLE_DEVICES=2 python rnn_multivariate.py --pooling 2 --interpolation 2 --hyperopt_max_evals 5 --experiment_id "2022_05_15"
148 |
--------------------------------------------------------------------------------
/src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cchallu/n-hits/d882ee60b34c0ab6b67b31001f735b181a9efb93/src/__init__.py
--------------------------------------------------------------------------------
/src/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cchallu/n-hits/d882ee60b34c0ab6b67b31001f735b181a9efb93/src/data/__init__.py
--------------------------------------------------------------------------------
/src/data/datasets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cchallu/n-hits/d882ee60b34c0ab6b67b31001f735b181a9efb93/src/data/datasets/__init__.py
--------------------------------------------------------------------------------
/src/data/datasets/ecl.py:
--------------------------------------------------------------------------------
1 | # AUTOGENERATED! DO NOT EDIT! File to edit: nbs/data_datasets__ecl.ipynb (unless otherwise specified).
2 |
3 | __all__ = ['ECL', 'ECLInfo', 'ECL']
4 |
5 | # Cell
6 | import os
7 | from dataclasses import dataclass
8 | from typing import Dict, List, Optional, Tuple, Union
9 |
10 | import gdown
11 | import numpy as np
12 | import pandas as pd
13 |
14 | from .utils import Info, time_features_from_frequency_str
15 | from .ett import process_multiple_ts
16 |
17 | # Cell
18 | @dataclass
19 | class ECL:
20 | freq: str = 'H'
21 | name: str = 'ECL'
22 | n_ts: int = 321
23 |
24 | # Cell
25 | ECLInfo = Info(groups=('ECL',),
26 | class_groups=(ECL,))
27 |
28 | # Cell
29 | @dataclass
30 | class ECL:
31 |
32 | source_url: str = 'https://drive.google.com/uc?id=1rUPdR7R2iWFW-LMoDdHoO2g4KgnkpFzP'
33 |
34 | @staticmethod
35 | def load(directory: str,
36 | cache: bool = True) -> Tuple[pd.DataFrame,
37 | Optional[pd.DataFrame],
38 | Optional[pd.DataFrame]]:
39 | """Downloads and loads ETT data.
40 |
41 | Parameters
42 | ----------
43 | directory: str
44 | Directory where data will be downloaded.
45 | cache: bool
46 | If `True` saves and loads
47 |
48 | Notes
49 | -----
50 | [1] Returns train+val+test sets.
51 | """
52 | path = f'{directory}/ecl/datasets'
53 | file_cache = f'{path}/ECL.p'
54 |
55 | if os.path.exists(file_cache) and cache:
56 | df, X_df, S_df = pd.read_pickle(file_cache)
57 |
58 | return df, X_df, S_df
59 |
60 |
61 | ECL.download(directory)
62 | path = f'{directory}/ecl/datasets'
63 |
64 | y_df = pd.read_csv(f'{path}/ECL.csv')
65 | y_df, X_df = process_multiple_ts(y_df)
66 |
67 | S_df = None
68 | if cache:
69 | pd.to_pickle((y_df, X_df, S_df), file_cache)
70 |
71 | return y_df, X_df, S_df
72 |
73 | @staticmethod
74 | def download(directory: str) -> None:
75 | """Download ECL Dataset."""
76 | path = f'{directory}/ecl/datasets/'
77 | if not os.path.exists(path):
78 | os.makedirs(path)
79 | gdown.download(ECL.source_url, f'{path}/ECL.csv')
--------------------------------------------------------------------------------
/src/data/datasets/epf.py:
--------------------------------------------------------------------------------
1 | # AUTOGENERATED! DO NOT EDIT! File to edit: nbs/data_datasets__epf.ipynb (unless otherwise specified).
2 |
3 | __all__ = ['NP', 'PJM', 'BE', 'FR', 'DE', 'EPFInfo', 'EPF', 'epf_naive_forecast']
4 |
5 | # Cell
6 | import os
7 | from datetime import timedelta
8 | from dataclasses import dataclass
9 | from typing import Dict, List, Optional, Tuple, Union
10 |
11 | import numpy as np
12 | import pandas as pd
13 | from pandas.tseries.frequencies import to_offset
14 |
15 | from .utils import download_file, Info, TimeSeriesDataclass
16 | from ..tsdataset import TimeSeriesDataset
17 |
18 | # Cell
19 | @dataclass
20 | class NP:
21 | test_date: str = '2016-12-27'
22 | name: str = 'NP'
23 |
24 | @dataclass
25 | class PJM:
26 | test_date: str = '2016-12-27'
27 | name: str = 'PJM'
28 |
29 | @dataclass
30 | class BE:
31 | test_date: str = '2015-01-04'
32 | name: str = 'BE'
33 |
34 | @dataclass
35 | class FR:
36 | test_date: str = '2015-01-04'
37 | name: str = 'FR'
38 |
39 | @dataclass
40 | class DE:
41 | test_date: str = '2016-01-04'
42 | name: str = 'DE'
43 |
44 | # Cell
45 | EPFInfo = Info(groups=('NP', 'PJM', 'BE', 'FR', 'DE'),
46 | class_groups=(NP, PJM, BE, FR, DE))
47 |
48 | # Cell
49 | class EPF:
50 |
51 | source_url = 'https://sandbox.zenodo.org/api/files/da5b2c6f-8418-4550-a7d0-7f2497b40f1b/'
52 |
53 | @staticmethod
54 | def load(directory: str,
55 | group: str) -> Tuple[pd.DataFrame,
56 | Optional[pd.DataFrame],
57 | Optional[pd.DataFrame]]:
58 | """
59 | Downloads and loads EPF data.
60 |
61 | Parameters
62 | ----------
63 | directory: str
64 | Directory where data will be downloaded.
65 | group: str
66 | Group name.
67 | Allowed groups: 'NP', 'PJM', 'BE', 'FR', 'DE'.
68 | """
69 | EPF.download(directory)
70 | class_group = EPFInfo.get_group(group)
71 |
72 | path = f'{directory}/epf/datasets'
73 | file = f'{path}/{group}.csv'
74 |
75 | df = pd.read_csv(file)
76 |
77 | df.columns = ['ds', 'y'] + \
78 | [f'Exogenous{i}' for i in range(1, len(df.columns) - 1)]
79 |
80 | df['unique_id'] = group
81 | df['ds'] = pd.to_datetime(df['ds'])
82 | df['week_day'] = df['ds'].dt.dayofweek
83 |
84 | dummies = pd.get_dummies(df['week_day'], prefix='day')
85 | df = pd.concat([df, dummies], axis=1)
86 |
87 | dummies_cols = [col for col in df if col.startswith('day')]
88 |
89 | Y = df.filter(items=['unique_id', 'ds', 'y'])
90 | X = df.filter(items=['unique_id', 'ds', 'Exogenous1', 'Exogenous2', 'week_day'] + \
91 | dummies_cols)
92 |
93 | return Y, X, None
94 |
95 | @staticmethod
96 | def load_groups(directory: str,
97 | groups: List[str]) -> Tuple[pd.DataFrame,
98 | Optional[pd.DataFrame],
99 | Optional[pd.DataFrame]]:
100 | """
101 | Downloads and loads panel of EPF data
102 | according of groups.
103 |
104 | Parameters
105 | ----------
106 | directory: str
107 | Directory where data will be downloaded.
108 | groups: List[str]
109 | Group names.
110 | Allowed groups: 'NP', 'PJM', 'BE', 'FR', 'DE'.
111 | """
112 | Y = []
113 | X = []
114 | for group in groups:
115 | Y_df, X_df, S_df = EPF.load(directory=directory, group=group)
116 | Y.append(Y_df)
117 | X.append(X_df)
118 |
119 | Y = pd.concat(Y).sort_values(['unique_id', 'ds']).reset_index(drop=True)
120 | X = pd.concat(X).sort_values(['unique_id', 'ds']).reset_index(drop=True)
121 |
122 | S = Y[['unique_id']].drop_duplicates().reset_index(drop=True)
123 | dummies = pd.get_dummies(S['unique_id'], prefix='static')
124 | S = pd.concat([S, dummies], axis=1)
125 |
126 | return Y, X, S
127 |
128 | @staticmethod
129 | def download(directory: str) -> None:
130 | """Downloads EPF Dataset."""
131 | path = f'{directory}/epf/datasets'
132 | if not os.path.exists(path):
133 | for group in EPFInfo.groups:
134 | download_file(path, EPF.source_url + f'{group}.csv')
135 |
136 | # Cell
137 | # TODO: extend this to group_by unique_id application
138 | def epf_naive_forecast(Y_df):
139 | """Function to build the naive forecast for electricity price forecasting
140 |
141 | The function is used to compute the accuracy metrics MASE and RMAE, the function
142 | assumes that the number of prices per day is 24. And computes naive forecast for
143 | days of the week and seasonal Naive forecast for weekends.
144 |
145 | Parameters
146 | ----------
147 | Y_df : pandas.DataFrame
148 | Dataframe containing the real prices in long format
149 | that contains variables ['ds', 'unique_id', 'y']
150 |
151 | Returns
152 | -------
153 | Y_hat_df : pandas.DataFrame
154 | Dataframe containing the predictions of the epf naive forecast.
155 | """
156 | assert type(Y_df) == pd.core.frame.DataFrame
157 | assert all([(col in Y_df) for col in ['unique_id', 'ds', 'y']])
158 |
159 | # Init the naive forecast
160 | Y_hat_df = Y_df[24 * 7:].copy()
161 | Y_hat_df['dayofweek'] = Y_df['ds'].dt.dayofweek
162 |
163 | # Monday, Saturday and Sunday
164 | # we have a naive forecast using weekly seasonality
165 | weekend_indicator = Y_hat_df['dayofweek'].isin([0,5,6])
166 |
167 | # Tuesday, Wednesday, Thursday, Friday
168 | # we have a naive forecast using daily seasonality
169 | week_indicator = Y_hat_df['dayofweek'].isin([1,2,3,4])
170 |
171 | naive = Y_df['y'].shift(24).values[24 * 7:]
172 | seasonal_naive = Y_df['y'].shift(24*7).values[24 * 7:]
173 |
174 | Y_hat_df['y_hat'] = naive * week_indicator + seasonal_naive * weekend_indicator
175 | return Y_hat_df
--------------------------------------------------------------------------------
/src/data/datasets/ett.py:
--------------------------------------------------------------------------------
1 | # AUTOGENERATED! DO NOT EDIT! File to edit: nbs/data_datasets__ett.ipynb (unless otherwise specified).
2 |
3 | __all__ = ['ETTh1', 'ETTh2', 'ETTm1', 'ETTm2', 'ETTInfo', 'process_multiple_ts', 'ETT']
4 |
5 | # Cell
6 | import os
7 | from dataclasses import dataclass
8 | from typing import Dict, List, Optional, Tuple, Union
9 |
10 | import numpy as np
11 | import pandas as pd
12 |
13 | from .utils import download_file, Info, time_features_from_frequency_str
14 |
15 | # Cell
16 | @dataclass
17 | class ETTh1:
18 | freq: str = 'H'
19 | name: str = 'ETTh1'
20 | n_ts: int = 7
21 |
22 | @dataclass
23 | class ETTh2:
24 | freq: str = 'H'
25 | name: str = 'ETTh2'
26 | n_ts: int = 7
27 |
28 | @dataclass
29 | class ETTm1:
30 | freq: str = '15T'
31 | name: str = 'ETTm1'
32 | n_ts: int = 7
33 |
34 | @dataclass
35 | class ETTm2:
36 | freq: str = '15T'
37 | name: str = 'ETTm2'
38 | n_ts: int = 7
39 |
40 | # Cell
41 | ETTInfo = Info(groups=('ETTh1', 'ETTh2', 'ETTm1', 'ETTm2'),
42 | class_groups=(ETTh1, ETTh2, ETTm1, ETTm2))
43 |
44 | # Cell
45 | def process_multiple_ts(y_df: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame]:
46 | """Transforms multiple timeseries as columns to long format."""
47 | y_df['date'] = pd.to_datetime(y_df['date'])
48 | y_df.rename(columns={'date': 'ds'}, inplace=True)
49 | u_ids = y_df.columns.to_list()
50 | u_ids.remove('ds')
51 |
52 | time_cls = time_features_from_frequency_str('h')
53 | for cls_ in time_cls:
54 | cls_name = cls_.__class__.__name__
55 | y_df[cls_name] = cls_(y_df['ds'].dt)
56 |
57 | X_df = y_df.drop(u_ids, axis=1)
58 | y_df = y_df.filter(items=['ds'] + u_ids)
59 | y_df = y_df.set_index('ds').stack()
60 | y_df = y_df.rename('y').rename_axis(['ds', 'unique_id']).reset_index()
61 | y_df['unique_id'] = pd.Categorical(y_df['unique_id'], u_ids)
62 | y_df = y_df[['unique_id', 'ds', 'y']].sort_values(['unique_id', 'ds'])
63 |
64 | X_df = y_df[['unique_id', 'ds']].merge(X_df, how='left', on=['ds'])
65 |
66 | return y_df, X_df
67 |
68 | # Cell
69 | @dataclass
70 | class ETT:
71 |
72 | source_url: str = 'https://raw.githubusercontent.com/zhouhaoyi/ETDataset/main/ETT-small/'
73 |
74 | @staticmethod
75 | def load(directory: str,
76 | group: str,
77 | cache: bool = True) -> Tuple[pd.DataFrame,
78 | Optional[pd.DataFrame],
79 | Optional[pd.DataFrame]]:
80 | """Downloads and loads ETT data.
81 |
82 | Parameters
83 | ----------
84 | directory: str
85 | Directory where data will be downloaded.
86 | group: str
87 | Group name.
88 | Allowed groups: 'ETTh1', 'ETTh2',
89 | 'ETTm1', 'ETTm2'.
90 | cache: bool
91 | If `True` saves and loads
92 |
93 | Notes
94 | -----
95 | [1] Returns train+val+test sets.
96 | """
97 | path = f'{directory}/ett/datasets'
98 | file_cache = f'{path}/{group}.p'
99 |
100 | if os.path.exists(file_cache) and cache:
101 | df, X_df, S_df = pd.read_pickle(file_cache)
102 |
103 | return df, X_df, S_df
104 |
105 |
106 | ETT.download(directory)
107 | path = f'{directory}/ett/datasets'
108 | class_group = ETTInfo[group]
109 |
110 | y_df = pd.read_csv(f'{path}/{group}.csv')
111 |
112 | y_df, X_df = process_multiple_ts(y_df)
113 |
114 | S_df = None
115 | if cache:
116 | pd.to_pickle((y_df, X_df, S_df), file_cache)
117 |
118 | return y_df, X_df, S_df
119 |
120 | @staticmethod
121 | def download(directory: str) -> None:
122 | """Download ETT Dataset."""
123 | path = f'{directory}/ett/datasets/'
124 | if not os.path.exists(path):
125 | for group in ETTInfo.groups:
126 | download_file(path, f'{ETT.source_url}/{group}.csv')
--------------------------------------------------------------------------------
/src/data/datasets/gefcom2012.py:
--------------------------------------------------------------------------------
1 | # AUTOGENERATED! DO NOT EDIT! File to edit: nbs/data_datasets__gefcom2012.ipynb (unless otherwise specified).
2 |
3 | __all__ = ['logger', 'GEFCom2012', 'GEFCom2012_L', 'GEFCom2012_W']
4 |
5 | # Cell
6 | import os
7 | import re
8 | import logging
9 | import zipfile
10 |
11 | from dataclasses import dataclass
12 | from typing import Dict, List, Optional, Tuple, Union
13 | from pandas.tseries.holiday import USFederalHolidayCalendar as calendar
14 |
15 | import numpy as np
16 | import pandas as pd
17 |
18 | from .utils import (
19 | download_file,
20 | Info,
21 | TimeSeriesDataclass,
22 | create_calendar_variables,
23 | create_us_holiday_distance_variables,
24 | )
25 | from ..tsdataset import TimeSeriesDataset
26 |
27 | logging.basicConfig(level=logging.INFO)
28 | logger = logging.getLogger(__name__)
29 |
30 | # Cell
31 | class GEFCom2012:
32 |
33 | source_url = 'https://www.dropbox.com/s/epj9b57eivn79j7/GEFCom2012.zip?dl=1'
34 |
35 | @staticmethod
36 | def download(directory: str) -> None:
37 | """Downloads GEFCom2012 Dataset."""
38 | path = f'{directory}/gefcom2012'
39 | if not os.path.exists(path):
40 | download_file(directory=path,
41 | source_url=GEFCom2012.source_url,
42 | decompress=True)
43 |
44 | # Cell
45 | class GEFCom2012_L:
46 |
47 | @staticmethod
48 | def load_Y(directory) -> pd.DataFrame:
49 | # Meta data
50 | path = f'{directory}/gefcom2012/GEFCOM2012_Data/Load'
51 | filepath = f'{path}/Load_history.csv'
52 | Y_df = pd.read_csv(filepath, sep=',', thousands=',')
53 |
54 | # Parsing temperature data
55 | Y_df['ds'] = pd.to_datetime(dict(year=Y_df.year,
56 | month=Y_df.month,
57 | day=Y_df.day))
58 | del Y_df['year'], Y_df['month'], Y_df['day']
59 | Y_df = pd.wide_to_long(Y_df, ['h'], i=['zone_id', 'ds'], j="hour")
60 | Y_df.reset_index(inplace=True)
61 | Y_df['tdelta'] = pd.to_timedelta(Y_df.hour, unit="h")
62 | Y_df['ds'] = Y_df['ds'] + Y_df['tdelta']
63 | del Y_df['tdelta'], Y_df['hour']
64 | Y_df.rename(columns={'zone_id': 'unique_id', 'h': 'y'}, inplace=True)
65 | #Y_df['y'] = pd.to_numeric(Y_df['y'], errors='coerce')
66 | return Y_df
67 |
68 | @staticmethod
69 | def load_X(directory) -> pd.DataFrame:
70 | # Meta data
71 | path = f'{directory}/gefcom2012/GEFCOM2012_Data/Load'
72 | filepath = f'{path}/temperature_history.csv'
73 | X_df = pd.read_csv(filepath, sep=',')
74 |
75 | # Parsing temperature data
76 | X_df['ds'] = pd.to_datetime(dict(year=X_df.year,
77 | month=X_df.month,
78 | day=X_df.day))
79 | del X_df['year'], X_df['month'], X_df['day']
80 | X_df = pd.wide_to_long(X_df, ['h'], i=['station_id', 'ds'], j="hour")
81 | X_df.reset_index(inplace=True)
82 | X_df['tdelta'] = pd.to_timedelta(X_df.hour, unit="h")
83 | X_df['ds'] = X_df['ds'] + X_df['tdelta']
84 | del X_df['tdelta'], X_df['hour']
85 | X_df['station_id'] = 'station_' + X_df['station_id'].astype(str)
86 |
87 | X_df = X_df.pivot(index='ds', columns='station_id', values='h').reset_index('ds')
88 | X_df.reset_index(drop=True, inplace=True)
89 | X_df = create_calendar_variables(X_df=X_df)
90 | X_df = create_us_holiday_distance_variables(X_df=X_df)
91 | return X_df
92 |
93 | @staticmethod
94 | def load_benchmark(directory) -> pd.DataFrame:
95 | # Meta data
96 | path = f'{directory}/gefcom2012/GEFCOM2012_Data/Load'
97 | filepath = f'{path}/Load_benchmark.csv'
98 | benchmark_df = pd.read_csv(filepath, sep=',')
99 |
100 | # Parsing benchmark data
101 | benchmark_df['ds'] = pd.to_datetime(dict(year=benchmark_df.year,
102 | month=benchmark_df.month,
103 | day=benchmark_df.day))
104 | del benchmark_df['year'], benchmark_df['month'], benchmark_df['day'], benchmark_df['id']
105 | benchmark_df.rename(columns={'zone_id': 'unique_id'}, inplace=True)
106 |
107 | benchmark_df = pd.wide_to_long(benchmark_df, ['h'], i=['unique_id', 'ds'], j="hour")
108 | benchmark_df.reset_index(inplace=True)
109 |
110 | benchmark_df['tdelta'] = pd.to_timedelta(benchmark_df.hour, unit="h")
111 | benchmark_df['ds'] = benchmark_df['ds'] + benchmark_df['tdelta']
112 | del benchmark_df['tdelta'], benchmark_df['hour']
113 | benchmark_df.rename(columns={'h': 'y_hat'}, inplace=True)
114 | return benchmark_df
115 |
116 | @staticmethod
117 | def load(directory) -> Tuple[pd.DataFrame,
118 | pd.DataFrame,
119 | pd.DataFrame]:
120 |
121 | GEFCom2012.download(directory)
122 |
123 | Y_df = GEFCom2012_L.load_Y(directory)
124 | X_df = GEFCom2012_L.load_X(directory)
125 | benchmark_df = GEFCom2012_L.load_benchmark(directory)
126 | return Y_df, X_df, benchmark_df
127 |
128 | # Cell
129 | class GEFCom2012_W:
130 |
131 | train_start = '2009-07-01 00:00:00'
132 | train_end = '2010-12-31 23:00:00'
133 |
134 | test_start = '2011-01-01 01:00:00'
135 | test_end = '2012-06-28 12:00:00'
136 |
137 | @staticmethod
138 | def load_benchmark(directory):
139 | # Meta data
140 | path = f'{directory}/gefcom2012/GEFCOM2012_Data/Wind'
141 | filepath = f'{path}/benchmark.csv'
142 | benchmark_df = pd.read_csv(filepath, sep=',')
143 |
144 | benchmark_df['ds'] = pd.to_datetime(benchmark_df.date, format='%Y%m%d%H')
145 | del benchmark_df['date']
146 | benchmark_df = pd.wide_to_long(benchmark_df, ['wp'], i='ds', j="unique_id")
147 | benchmark_df.reset_index(inplace=True)
148 | return benchmark_df
149 |
150 | @staticmethod
151 | def load_Y(directory):
152 | # Meta data
153 | path = f'{directory}/gefcom2012/GEFCOM2012_Data/Wind'
154 | yfilepath = f'{path}/train.csv'
155 |
156 | # Read and parse Y data
157 | Y_df = pd.read_csv(yfilepath, sep='\t')
158 | Y_df['ds'] = pd.date_range(start=GEFCom2012_W.train_start,
159 | end=GEFCom2012_W.train_end, freq='H')
160 | del Y_df['date']
161 | Y_df = pd.wide_to_long(Y_df, ['wp'], i='ds', j="unique_id")
162 | Y_df.reset_index(inplace=True)
163 | return Y_df
164 |
165 | @staticmethod
166 | def load_X_group(directory, group):
167 | # Meta data
168 | path = f'{directory}/gefcom2012/GEFCOM2012_Data/Wind'
169 | xfilepath = f'{path}/windforecasts_wf{group}.csv'
170 | X_df = pd.read_csv(xfilepath, sep=',')
171 |
172 | # Create ds associated to each forecast from forecast creation date
173 | X_df['date'] = X_df.date.astype(str)
174 | X_df['fcd'] = pd.to_datetime(X_df.date, format='%Y%m%d%H')
175 | X_df['tdelta'] = pd.to_timedelta(X_df.hors, unit="h")
176 | X_df['ds'] = X_df['fcd'] + X_df['tdelta']
177 |
178 | # Separate forecasts by lead time
179 | X_lead12_df = X_df[(X_df.hors>0) & (X_df.hors<=12)].reset_index(drop=True)
180 | X_lead24_df = X_df[(X_df.hors>12) & (X_df.hors<=24)].reset_index(drop=True)
181 | X_lead36_df = X_df[(X_df.hors>24) & (X_df.hors<=36)].reset_index(drop=True)
182 | X_lead48_df = X_df[(X_df.hors>36) & (X_df.hors<=48)].reset_index(drop=True)
183 | del X_df
184 |
185 | # Cleaning auxiliary variables and reconstructing X_df
186 | X_df = pd.DataFrame({'ds': pd.date_range(start='2009-07-01 01:00:00',
187 | end=GEFCom2012_W.test_end, freq='H')})
188 | for lead, df in zip(['_lead12', '_lead24', '_lead36', '_lead48'], \
189 | [X_lead12_df, X_lead24_df, X_lead36_df, X_lead48_df]):
190 | df.drop(['fcd', 'tdelta', 'date', 'hors'], axis=1, inplace=True)
191 | df.columns = [f'u{lead}', f'v{lead}', f'ws{lead}', f'wd{lead}', 'ds']
192 | X_df = X_df.merge(df, on='ds', how='left')
193 |
194 | # Removing nans in hierarchical fashion (priority to shorter lead forecasts)
195 | for var in ['u', 'v', 'ws', 'wd']:
196 | X_df[var] = X_df[f'{var}_lead12']
197 | for lead in ['_lead24', '_lead36', '_lead48']:
198 | X_df[var].fillna(X_df[f'{var}{lead}'], inplace=True)
199 |
200 | for var in ['u', 'v', 'ws', 'wd']:
201 | for lead in ['_lead12', '_lead24', '_lead36', '_lead48']:
202 | X_df[f'{var}{lead}'].fillna(X_df[var], inplace=True)
203 | del X_df[var]
204 | del X_lead12_df, X_lead24_df, X_lead36_df, X_lead48_df
205 | X_df['unique_id'] = group
206 | return X_df
207 |
208 | @staticmethod
209 | def load(directory: str) -> Tuple[pd.DataFrame,
210 | pd.DataFrame,
211 | pd.DataFrame]:
212 | GEFCom2012.download(directory)
213 |
214 | Y_df = GEFCom2012_W.load_Y(directory)
215 | X_df_list = [GEFCom2012_W.load_X_group(directory, group) for group in range(1,8)]
216 | X_df = pd.concat(X_df_list)
217 |
218 | benchmark_df = GEFCom2012_W.load_benchmark(directory)
219 | return Y_df, X_df, benchmark_df
--------------------------------------------------------------------------------
/src/data/datasets/m3.py:
--------------------------------------------------------------------------------
1 | # AUTOGENERATED! DO NOT EDIT! File to edit: nbs/data_datasets__m3.ipynb (unless otherwise specified).
2 |
3 | __all__ = ['Yearly', 'Quarterly', 'Monthly', 'Other', 'M3Info', 'M3']
4 |
5 | # Cell
6 | import os
7 | from dataclasses import dataclass
8 | from typing import Dict, List, Optional, Tuple, Union
9 |
10 | import numpy as np
11 | import pandas as pd
12 |
13 | from .utils import download_file, Info, TimeSeriesDataclass
14 |
15 | # Cell
16 | @dataclass
17 | class Yearly:
18 | seasonality: int = 1
19 | horizon: int = 6
20 | freq: str = 'Y'
21 | sheet_name: str = 'M3Year'
22 | name: str = 'Yearly'
23 | n_ts: int = 645
24 |
25 | @dataclass
26 | class Quarterly:
27 | seasonality: int = 4
28 | horizon: int = 8
29 | freq: str = 'Q'
30 | sheet_name: str = 'M3Quart'
31 | name: str = 'Quarterly'
32 | n_ts: int = 756
33 |
34 | @dataclass
35 | class Monthly:
36 | seasonality: int = 12
37 | horizon: int = 18
38 | freq: str = 'M'
39 | sheet_name: str = 'M3Month'
40 | name: str = 'Monthly'
41 | n_ts: int = 1428
42 |
43 | @dataclass
44 | class Other:
45 | seasonality: int = 1
46 | horizon: int = 8
47 | freq: str = 'D'
48 | sheet_name: str = 'M3Other'
49 | name: str = 'Other'
50 | n_ts: int = 174
51 |
52 | # Cell
53 | M3Info = Info(groups=('Yearly', 'Quarterly', 'Monthly', 'Other'),
54 | class_groups=(Yearly, Quarterly, Monthly, Other))
55 |
56 | # Internal Cell
57 | def _return_year(ts):
58 | year = ts.iloc[0]
59 | year = year if year != 0 else 1970
60 |
61 | return year
62 |
63 | # Cell
64 | @dataclass
65 | class M3(TimeSeriesDataclass):
66 |
67 | source_url = 'https://forecasters.org/data/m3comp/M3C.xls'
68 |
69 | @staticmethod
70 | def load(directory: str,
71 | group: str) -> Tuple[pd.DataFrame,
72 | Optional[pd.DataFrame],
73 | Optional[pd.DataFrame]]:
74 | """
75 | Downloads and loads M3 data.
76 |
77 | Parameters
78 | ----------
79 | directory: str
80 | Directory where data will be downloaded.
81 | group: str
82 | Group name.
83 | Allowed groups: 'Yearly', 'Quarterly', 'Monthly', 'Other'.
84 |
85 | Notes
86 | -----
87 | [1] Returns train+test sets.
88 | [2] There are monthly time series without start year.
89 | This time series will start with 1970.
90 | [3] Other time series have no start date.
91 | This time series will start with 1970.
92 | """
93 | M3.download(directory)
94 |
95 | path = f'{directory}/m3/datasets/'
96 | file = f'{path}/M3C.xls'
97 |
98 | class_group = M3Info.get_group(group)
99 |
100 | df = pd.read_excel(file, sheet_name=class_group.sheet_name)
101 | df = df.rename(columns={'Series': 'unique_id',
102 | 'Category': 'category',
103 | 'Starting Year': 'year',
104 | 'Starting Month': 'month'})
105 | df['unique_id'] = [class_group.name[0] + str(i + 1) for i in range(len(df))]
106 | S = df.filter(items=['unique_id', 'category'])
107 |
108 | id_vars = list(df.columns[:6])
109 | df = pd.melt(df, id_vars=id_vars, var_name='ds', value_name='y')
110 | df = df.dropna().sort_values(['unique_id', 'ds']).reset_index(drop=True)
111 |
112 | freq = pd.tseries.frequencies.to_offset(class_group.freq)
113 |
114 | if group == 'Other':
115 | df['year'] = 1970
116 |
117 | df['ds'] = df.groupby('unique_id')['year'] \
118 | .transform(lambda df: pd.date_range(f'{_return_year(df)}-01-01',
119 | periods=df.shape[0],
120 | freq=freq))
121 | df = df.filter(items=['unique_id', 'ds', 'y'])
122 |
123 | return df, None, None
124 |
125 | @staticmethod
126 | def download(directory: str) -> None:
127 | """Download M3 Dataset."""
128 | path = f'{directory}/m3/datasets/'
129 | if not os.path.exists(path):
130 | download_file(path, M3.source_url)
--------------------------------------------------------------------------------
/src/data/datasets/m4.py:
--------------------------------------------------------------------------------
1 | # AUTOGENERATED! DO NOT EDIT! File to edit: nbs/data_datasets__m4.ipynb (unless otherwise specified).
2 |
3 | __all__ = ['Yearly', 'Quarterly', 'Monthly', 'Weekly', 'Daily', 'Hourly', 'Other', 'M4Info', 'M4', 'M4Evaluation']
4 |
5 | # Cell
6 | import os
7 | from dataclasses import dataclass
8 | from typing import Dict, List, Optional, Tuple, Union
9 |
10 | import numpy as np
11 | import pandas as pd
12 |
13 | from .utils import download_file, Info
14 | from ...losses.numpy import smape, mase
15 |
16 | # Cell
17 | @dataclass
18 | class Yearly:
19 | seasonality: int = 1
20 | horizon: int = 6
21 | freq: str = 'Y'
22 | name: str = 'Yearly'
23 | n_ts: int = 23_000
24 |
25 | @dataclass
26 | class Quarterly:
27 | seasonality: int = 4
28 | horizon: int = 8
29 | freq: str = 'Q'
30 | name: str = 'Quarterly'
31 | n_ts: int = 24_000
32 |
33 | @dataclass
34 | class Monthly:
35 | seasonality: int = 12
36 | horizon: int = 18
37 | freq: str = 'M'
38 | name: str = 'Monthly'
39 | n_ts: int = 48_000
40 |
41 | @dataclass
42 | class Weekly:
43 | seasonality: int = 1
44 | horizon: int = 13
45 | freq: str = 'W'
46 | name: str = 'Weekly'
47 | n_ts: int = 359
48 |
49 | @dataclass
50 | class Daily:
51 | seasonality: int = 1
52 | horizon: int = 14
53 | freq: str = 'D'
54 | name: str = 'Daily'
55 | n_ts: int = 4_227
56 |
57 | @dataclass
58 | class Hourly:
59 | seasonality: int = 24
60 | horizon: int = 48
61 | freq: str = 'H'
62 | name: str = 'Hourly'
63 | n_ts: int = 414
64 |
65 |
66 | @dataclass
67 | class Other:
68 | seasonality: int = 1
69 | horizon: int = 8
70 | freq: str = 'D'
71 | name: str = 'Other'
72 | n_ts: int = 5_000
73 | included_groups: Tuple = ('Weekly', 'Daily', 'Hourly')
74 |
75 | # Cell
76 | M4Info = Info(groups=('Yearly', 'Quarterly', 'Monthly', 'Weekly', 'Daily', 'Hourly', 'Other'),
77 | class_groups=(Yearly, Quarterly, Monthly, Weekly, Daily, Hourly, Other))
78 |
79 | # Cell
80 | @dataclass
81 | class M4:
82 |
83 | source_url: str = 'https://raw.githubusercontent.com/Mcompetitions/M4-methods/master/Dataset/'
84 | naive2_forecast_url: str = 'https://github.com/Nixtla/m4-forecasts/raw/master/forecasts/submission-Naive2.zip'
85 |
86 | @staticmethod
87 | def load(directory: str,
88 | group: str,
89 | cache: bool = True) -> Tuple[pd.DataFrame,
90 | Optional[pd.DataFrame],
91 | Optional[pd.DataFrame]]:
92 | """Downloads and loads M4 data.
93 |
94 | Parameters
95 | ----------
96 | directory: str
97 | Directory where data will be downloaded.
98 | group: str
99 | Group name.
100 | Allowed groups: 'Yearly', 'Quarterly', 'Monthly',
101 | 'Weekly', 'Daily', 'Hourly'.
102 | cache: bool
103 | If `True` saves and loads
104 |
105 | Notes
106 | -----
107 | [1] Returns train+test sets.
108 | """
109 | path = f'{directory}/m4/datasets'
110 | file_cache = f'{path}/{group}.p'
111 |
112 | if os.path.exists(file_cache) and cache:
113 | df, X_df, S_df = pd.read_pickle(file_cache)
114 |
115 | return df, X_df, S_df
116 |
117 | if group == 'Other':
118 | #Special case.
119 | included_dfs = [M4.load(directory, gr) \
120 | for gr in M4Info['Other'].included_groups]
121 | df, *_ = zip(*included_dfs)
122 | df = pd.concat(df)
123 | else:
124 | M4.download(directory)
125 | path = f'{directory}/m4/datasets'
126 | class_group = M4Info[group]
127 | S_df = pd.read_csv(f'{directory}/m4/datasets/M4-info.csv',
128 | usecols=['M4id','category'])
129 | S_df['category'] = S_df['category'].astype('category').cat.codes
130 | S_df.rename({'M4id': 'unique_id'}, axis=1, inplace=True)
131 | S_df = S_df[S_df['unique_id'].str.startswith(class_group.name[0])]
132 |
133 | def read_and_melt(file):
134 | df = pd.read_csv(file)
135 | df.columns = ['unique_id'] + list(range(1, df.shape[1]))
136 | df = pd.melt(df, id_vars=['unique_id'], var_name='ds', value_name='y')
137 | df = df.dropna()
138 |
139 | return df
140 |
141 | df_train = read_and_melt(file=f'{path}/{group}-train.csv')
142 | df_test = read_and_melt(file=f'{path}/{group}-test.csv')
143 |
144 | len_train = df_train.groupby('unique_id').agg({'ds': 'max'}).reset_index()
145 | len_train.columns = ['unique_id', 'len_serie']
146 | df_test = df_test.merge(len_train, on=['unique_id'])
147 | df_test['ds'] = df_test['ds'] + df_test['len_serie']
148 | df_test.drop('len_serie', axis=1, inplace=True)
149 |
150 | df = pd.concat([df_train, df_test])
151 | df = df.sort_values(['unique_id', 'ds']).reset_index(drop=True)
152 |
153 | S_df = S_df.sort_values('unique_id').reset_index(drop=True)
154 |
155 | X_df = None
156 | if cache:
157 | pd.to_pickle((df, X_df, S_df), file_cache)
158 |
159 | return df, None, S_df
160 |
161 | @staticmethod
162 | def download(directory: str) -> None:
163 | """Download M4 Dataset."""
164 | path = f'{directory}/m4/datasets/'
165 | if not os.path.exists(path):
166 | for group in M4Info.groups:
167 | download_file(path, f'{M4.source_url}/Train/{group}-train.csv')
168 | download_file(path, f'{M4.source_url}/Test/{group}-test.csv')
169 | download_file(path, f'{M4.source_url}/M4-info.csv')
170 | download_file(path, M4.naive2_forecast_url, decompress=True)
171 |
172 | # Cell
173 | class M4Evaluation:
174 |
175 | @staticmethod
176 | def load_benchmark(directory: str, group: str,
177 | source_url: Optional[str] = None) -> np.ndarray:
178 | """Downloads and loads a bechmark forecasts.
179 |
180 | Parameters
181 | ----------
182 | directory: str
183 | Directory where data will be downloaded.
184 | group: str
185 | Group name.
186 | Allowed groups: 'Yearly', 'Quarterly', 'Monthly',
187 | 'Weekly', 'Daily', 'Hourly'.
188 | source_url: str, optional
189 | Optional benchmark url obtained from
190 | https://github.com/Nixtla/m4-forecasts/tree/master/forecasts.
191 | If `None` returns Naive2.
192 |
193 | Returns
194 | -------
195 | benchmark: numpy array
196 | Numpy array of shape (n_series, horizon).
197 | """
198 | path = f'{directory}/m4/datasets'
199 | initial = group[0]
200 | if source_url is not None:
201 | filename = source_url.split('/')[-1].replace('.rar', '.csv')
202 | filepath = f'{path}/{filename}'
203 | if not os.path.exists(filepath):
204 | download_file(path, source_url, decompress=True)
205 |
206 | else:
207 | filepath = f'{path}/submission-Naive2.csv'
208 |
209 | benchmark = pd.read_csv(filepath)
210 | benchmark = benchmark[benchmark['id'].str.startswith(initial)]
211 | benchmark = benchmark.set_index('id').dropna(1)
212 | benchmark = benchmark.sort_values('id').values
213 |
214 | return benchmark
215 |
216 | @staticmethod
217 | def evaluate(directory: str, group: str,
218 | y_hat: Union[np.ndarray, str]) -> pd.DataFrame:
219 | """Evaluates y_hat according to M4 methodology.
220 |
221 | Parameters
222 | ----------
223 | directory: str
224 | Directory where data will be downloaded.
225 | group: str
226 | Group name.
227 | Allowed groups: 'Yearly', 'Quarterly', 'Monthly',
228 | 'Weekly', 'Daily', 'Hourly'.
229 | y_hat: numpy array, str
230 | Group forecasts as numpy array or
231 | benchmark url from
232 | https://github.com/Nixtla/m4-forecasts/tree/master/forecasts.
233 |
234 | Returns
235 | -------
236 | evaluation: pandas dataframe
237 | DataFrame with columns OWA, SMAPE, MASE
238 | and group as index.
239 | """
240 | if isinstance(y_hat, str):
241 | y_hat = M4Evaluation.load_benchmark(directory, group, y_hat)
242 |
243 | initial = group[0]
244 | class_group = M4Info[group]
245 | horizon = class_group.horizon
246 | seasonality = class_group.seasonality
247 | path = f'{directory}/m4/datasets'
248 | y_df, *_ = M4.load(directory, group)
249 |
250 | y_train = y_df.groupby('unique_id')['y']
251 | y_train = y_train.apply(lambda x: x.head(-horizon).values)
252 | y_train = y_train.values
253 |
254 | y_test = y_df.groupby('unique_id')['y']
255 | y_test = y_test.tail(horizon)
256 | y_test = y_test.values.reshape(-1, horizon)
257 |
258 | naive2 = M4Evaluation.load_benchmark(directory, group)
259 | smape_y_hat = smape(y_test, y_hat)
260 | smape_naive2 = smape(y_test, naive2)
261 |
262 | mase_y_hat = np.mean([mase(y_test[i], y_hat[i], y_train[i], seasonality)
263 | for i in range(class_group.n_ts)])
264 | mase_naive2 = np.mean([mase(y_test[i], naive2[i], y_train[i], seasonality)
265 | for i in range(class_group.n_ts)])
266 |
267 | owa = .5 * (mase_y_hat / mase_naive2 + smape_y_hat / smape_naive2)
268 |
269 | evaluation = pd.DataFrame({'SMAPE': smape_y_hat,
270 | 'MASE': mase_y_hat,
271 | 'OWA': owa},
272 | index=[group])
273 |
274 | return evaluation
--------------------------------------------------------------------------------
/src/data/datasets/m5.py:
--------------------------------------------------------------------------------
1 | # AUTOGENERATED! DO NOT EDIT! File to edit: nbs/data_datasets__m5.ipynb (unless otherwise specified).
2 |
3 | __all__ = ['M5', 'M5Evaluation']
4 |
5 | # Cell
6 | import os
7 | from dataclasses import dataclass
8 | from typing import Dict, List, Optional, Tuple, Union
9 |
10 | import numpy as np
11 | import pandas as pd
12 |
13 | from .utils import download_file, Info
14 |
15 | # Cell
16 | @dataclass
17 | class M5:
18 |
19 | # original data available from Kaggle directly
20 | # pip install kaggle --upgrade
21 | # kaggle competitions download -c m5-forecasting-accuracy
22 | source_url: str = 'https://github.com/Nixtla/m5-forecasts/raw/main/datasets/m5.zip'
23 |
24 | @staticmethod
25 | def download(directory: str) -> None:
26 | """Downloads M5 Competition Dataset."""
27 | path = f'{directory}/m5/datasets'
28 | if not os.path.exists(path):
29 | download_file(directory=path,
30 | source_url=M5.source_url,
31 | decompress=True)
32 |
33 | @staticmethod
34 | def load(directory: str, cache: bool = True) -> Tuple[pd.DataFrame,
35 | pd.DataFrame,
36 | pd.DataFrame]:
37 | """Downloads and loads M5 data.
38 |
39 | Parameters
40 | ----------
41 | directory: str
42 | Directory where data will be downloaded.
43 | cache: bool
44 | If `True` saves and loads.
45 |
46 | Notes
47 | -----
48 | [1] Returns train+test sets.
49 | [2] Based on https://www.kaggle.com/lemuz90/m5-preprocess.
50 | """
51 | path = f'{directory}/m5/datasets'
52 | file_cache = f'{path}/m5.p'
53 |
54 | if os.path.exists(file_cache) and cache:
55 | Y_df, X_df, S_df = pd.read_pickle(file_cache)
56 |
57 | return Y_df, X_df, S_df
58 |
59 | M5.download(directory)
60 | # Calendar data
61 | cal_dtypes = {
62 | 'wm_yr_wk': np.uint16,
63 | 'event_name_1': 'category',
64 | 'event_type_1': 'category',
65 | 'event_name_2': 'category',
66 | 'event_type_2': 'category',
67 | 'snap_CA': np.uint8,
68 | 'snap_TX': np.uint8,
69 | 'snap_WI': np.uint8,
70 | }
71 | cal = pd.read_csv(f'{path}/calendar.csv',
72 | dtype=cal_dtypes,
73 | usecols=list(cal_dtypes.keys()) + ['date'],
74 | parse_dates=['date'])
75 | cal['d'] = np.arange(cal.shape[0]) + 1
76 | cal['d'] = 'd_' + cal['d'].astype('str')
77 | cal['d'] = cal['d'].astype('category')
78 |
79 | event_cols = [k for k in cal_dtypes if k.startswith('event')]
80 | for col in event_cols:
81 | cal[col] = cal[col].cat.add_categories('nan').fillna('nan')
82 |
83 | # Prices
84 | prices_dtypes = {
85 | 'store_id': 'category',
86 | 'item_id': 'category',
87 | 'wm_yr_wk': np.uint16,
88 | 'sell_price': np.float32
89 | }
90 |
91 | prices = pd.read_csv(f'{path}/sell_prices.csv',
92 | dtype=prices_dtypes)
93 |
94 | # Sales
95 | sales_dtypes = {
96 | 'item_id': prices.item_id.dtype,
97 | 'dept_id': 'category',
98 | 'cat_id': 'category',
99 | 'store_id': 'category',
100 | 'state_id': 'category',
101 | **{f'd_{i+1}': np.float32 for i in range(1969)}
102 | }
103 | # Reading train and test sets
104 | sales_train = pd.read_csv(f'{path}/sales_train_evaluation.csv',
105 | dtype=sales_dtypes)
106 | sales_test = pd.read_csv(f'{path}/sales_test_evaluation.csv',
107 | dtype=sales_dtypes)
108 | sales = sales_train.merge(sales_test, how='left',
109 | on=['item_id', 'dept_id', 'cat_id', 'store_id', 'state_id'])
110 | sales['id'] = sales[['item_id', 'store_id']].astype(str).agg('_'.join, axis=1).astype('category')
111 | # Long format
112 | long = sales.melt(id_vars=['id', 'item_id', 'dept_id', 'cat_id', 'store_id', 'state_id'],
113 | var_name='d', value_name='y')
114 | long['d'] = long['d'].astype(cal.d.dtype)
115 | long = long.merge(cal, on=['d'])
116 | long = long.merge(prices, on=['store_id', 'item_id', 'wm_yr_wk'])
117 | long = long.drop(columns=['d', 'wm_yr_wk'])
118 |
119 | def first_nz_mask(values, index):
120 | """Return a boolean mask where the True starts at the first non-zero value."""
121 | mask = np.full(values.size, True)
122 | for idx, value in enumerate(values):
123 | if value == 0:
124 | mask[idx] = False
125 | else:
126 | break
127 | return mask
128 |
129 | long = long.sort_values(['id', 'date'], ignore_index=True)
130 | keep_mask = long.groupby('id')['y'].transform(first_nz_mask, engine='numba')
131 | long = long[keep_mask.astype(bool)]
132 | long.rename(columns={'id': 'unique_id', 'date': 'ds'}, inplace=True)
133 | Y_df = long.filter(items=['unique_id', 'ds', 'y'])
134 | cats = ['item_id', 'dept_id', 'cat_id', 'store_id', 'state_id']
135 | S_df = long.filter(items=['unique_id'] + cats)
136 | S_df = S_df.drop_duplicates(ignore_index=True)
137 | X_df = long.drop(columns=['y'] + cats)
138 |
139 | if cache:
140 | pd.to_pickle((Y_df, X_df, S_df), file_cache)
141 |
142 | return Y_df, X_df, S_df
143 |
144 | # Cell
145 | class M5Evaluation:
146 |
147 | levels: dict = dict(
148 | Level1=['total'],
149 | Level2=['state_id'],
150 | Level3=['store_id'],
151 | Level4=['cat_id'],
152 | Level5=['dept_id'],
153 | Level6=['state_id', 'cat_id'],
154 | Level7=['state_id', 'dept_id'],
155 | Level8=['store_id', 'cat_id'],
156 | Level9=['store_id', 'dept_id'],
157 | Level10=['item_id'],
158 | Level11=['state_id', 'item_id'],
159 | Level12=['item_id', 'store_id']
160 | )
161 |
162 | @staticmethod
163 | def load_benchmark(directory: str,
164 | source_url: Optional[str] = None,
165 | validation: bool = False) -> np.ndarray:
166 | """Downloads and loads a bechmark forecasts.
167 |
168 | Parameters
169 | ----------
170 | directory: str
171 | Directory where data will be downloaded.
172 | source_url: str, optional
173 | Optional benchmark url obtained from
174 | https://github.com/Nixtla/m5-forecasts/tree/master/forecasts.
175 | If `None` returns the M5 winner.
176 | validation: bool
177 | Wheter return validation forecasts.
178 | Default False, return test forecasts.
179 |
180 | Returns
181 | -------
182 | benchmark: numpy array
183 | Numpy array of shape (n_series, horizon).
184 | """
185 | path = f'{directory}/m5/datasets'
186 | if source_url is not None:
187 | filename = source_url.split('/')[-1].replace('.rar', '.csv')
188 | filepath = f'{path}/{filename}'
189 | if not os.path.exists(filepath):
190 | download_file(path, source_url, decompress=True)
191 |
192 | else:
193 | source_url = 'https://github.com/Nixtla/m5-forecasts/raw/main/forecasts/0001 YJ_STU.zip'
194 | return M5Evaluation.load_benchmark(directory, source_url, validation)
195 |
196 | benchmark = pd.read_csv(filepath)
197 | mask = benchmark['id'].str.endswith('validation')
198 | if validation:
199 | benchmark = benchmark[mask]
200 | benchmark['id'] = benchmark['id'].str.replace('_validation', '')
201 | else:
202 | benchmark = benchmark[~mask]
203 | benchmark['id'] = benchmark['id'].str.replace('_evaluation', '')
204 |
205 | benchmark = benchmark.sort_values('id', ignore_index=True)
206 | benchmark.rename(columns={'id': 'unique_id'}, inplace=True)
207 | *_, s_df = M5.load(directory)
208 | benchmark = benchmark.merge(s_df, how='left',
209 | on=['unique_id'])
210 |
211 | return benchmark
212 |
213 | @staticmethod
214 | def aggregate_levels(y_hat: pd.DataFrame,
215 | categories: pd.DataFrame = None) -> pd.DataFrame:
216 | """Aggregates the 30_480 series to get 42_840."""
217 | y_hat_cat = y_hat.assign(total='Total')
218 |
219 | df_agg = []
220 | for level, agg in M5Evaluation.levels.items():
221 | df = y_hat_cat.groupby(agg).sum().reset_index()
222 | renamer = dict(zip(agg, ['Agg_Level_1', 'Agg_Level_2']))
223 | df.rename(columns=renamer, inplace=True)
224 | df.insert(0, 'Level_id', level)
225 | df_agg.append(df)
226 | df_agg = pd.concat(df_agg)
227 | df_agg = df_agg.fillna('X')
228 | df_agg = df_agg.set_index(['Level_id', 'Agg_Level_1', 'Agg_Level_2'])
229 | df_agg.columns = [f'd_{i+1}' for i in range(df_agg.shape[1])]
230 |
231 | return df_agg
232 |
233 | @staticmethod
234 | def evaluate(directory: str,
235 | y_hat: Union[pd.DataFrame, str],
236 | validation: bool = False) -> pd.DataFrame:
237 | """Evaluates y_hat according to M4 methodology.
238 |
239 | Parameters
240 | ----------
241 | directory: str
242 | Directory where data will be downloaded.
243 | validation: bool
244 | Wheter perform validation evaluation.
245 | Default False, return test evaluation.
246 | y_hat: pandas datafrae, str
247 | Forecasts as wide pandas dataframe with columns
248 | ['unique_id'] and forecasts or
249 | benchmark url from
250 | https://github.com/Nixtla/m5-forecasts/tree/main/forecasts.
251 |
252 | Returns
253 | -------
254 | evaluation: pandas dataframe
255 | DataFrame with columns OWA, SMAPE, MASE
256 | and group as index.
257 | """
258 | if isinstance(y_hat, str):
259 | y_hat = M5Evaluation.load_benchmark(directory, y_hat, validation)
260 |
261 | M5.download(directory)
262 | path = f'{directory}/m5/datasets'
263 | if validation:
264 | weights = pd.read_csv(f'{path}/weights_validation.csv')
265 | sales = pd.read_csv(f'{path}/sales_train_vaidation.csv')
266 | y_test = pd.read_csv(f'{path}/sales_test_vaidation.csv')
267 | else:
268 | weights = pd.read_csv(f'{path}/weights_evaluation.csv')
269 | sales = pd.read_csv(f'{path}/sales_train_evaluation.csv')
270 | y_test = pd.read_csv(f'{path}/sales_test_evaluation.csv')
271 |
272 | # sales
273 | sales = M5Evaluation.aggregate_levels(sales)
274 | def scale(x):
275 | x = x.values
276 | x = x[np.argmax(x!=0):]
277 | scale = ((x[1:] - x[:-1]) ** 2).mean()
278 | return scale
279 | scales = sales.agg(scale, 1).rename('scale').reset_index()
280 |
281 | # y_test
282 | y_test = M5Evaluation.aggregate_levels(y_test)
283 |
284 | #y_hat
285 | y_hat = M5Evaluation.aggregate_levels(y_hat)
286 |
287 | score = (y_test - y_hat) ** 2
288 | score = score.mean(1)
289 | score = score.rename('rmse').reset_index()
290 | score = score.merge(weights, how='left',
291 | on=['Level_id', 'Agg_Level_1', 'Agg_Level_2'])
292 | score = score.merge(scales, how='left',
293 | on=['Level_id', 'Agg_Level_1', 'Agg_Level_2'])
294 | score['wrmsse'] = (score['rmse'] / score['scale']).pow(1 / 2) * score['weight']
295 | score = score.groupby('Level_id')[['wrmsse']].sum()
296 | score = score.loc[M5Evaluation.levels.keys()]
297 | total = score.mean().rename('Total').to_frame().T
298 | score = pd.concat([total, score])
299 |
300 | return score
--------------------------------------------------------------------------------
/src/data/datasets/tourism.py:
--------------------------------------------------------------------------------
1 | # AUTOGENERATED! DO NOT EDIT! File to edit: nbs/data_datasets__tourism.ipynb (unless otherwise specified).
2 |
3 | __all__ = ['Yearly', 'Quarterly', 'Monthly', 'TourismInfo', 'Tourism']
4 |
5 | # Cell
6 | import os
7 | from dataclasses import dataclass
8 | from typing import Dict, List, Optional, Tuple, Union
9 |
10 | import numpy as np
11 | import pandas as pd
12 | from pandas.tseries.frequencies import to_offset
13 |
14 | from .utils import download_file, Info, TimeSeriesDataclass
15 |
16 | # Cell
17 | @dataclass
18 | class Yearly:
19 | seasonality: int = 1
20 | horizon: int = 4
21 | freq: str = 'Y'
22 | rows: int = 2
23 | name: str = 'Yearly'
24 | n_ts: int = 518
25 |
26 | @dataclass
27 | class Quarterly:
28 | seasonality: int = 4
29 | horizon: int = 8
30 | freq: str = 'Q'
31 | rows: int = 3
32 | name: str = 'Quarterly'
33 | n_ts: int = 427
34 |
35 | @dataclass
36 | class Monthly:
37 | seasonality: int = 12
38 | horizon: int = 24
39 | freq: str = 'M'
40 | rows: int = 3
41 | name: str = 'Monthly'
42 | n_ts: int = 366
43 |
44 | # Cell
45 | TourismInfo = Info(groups=('Yearly', 'Quarterly', 'Monthly'),
46 | class_groups=(Yearly, Quarterly, Monthly))
47 |
48 | # Cell
49 | class Tourism(TimeSeriesDataclass):
50 |
51 | source_url = 'https://robjhyndman.com/data/27-3-Athanasopoulos1.zip'
52 |
53 | @staticmethod
54 | def load(directory: str,
55 | group: str) -> Tuple[pd.DataFrame,
56 | Optional[pd.DataFrame],
57 | Optional[pd.DataFrame]]:
58 | """
59 | Downloads and loads Tourism data.
60 |
61 | Parameters
62 | ----------
63 | directory: str
64 | Directory where data will be downloaded.
65 | group: str
66 | Group name.
67 | Allowed groups: 'Yearly', 'Quarterly', 'Monthly'.
68 |
69 | Notes
70 | -----
71 | [1] Returns train+test sets.
72 | """
73 | Tourism.download(directory)
74 |
75 | path = f'{directory}/tourism/datasets'
76 |
77 | class_group = TourismInfo.get_group(group)
78 | train_file = f'{path}/{class_group.name.lower()}_in.csv'
79 | test_file = f'{path}/{class_group.name.lower()}_oos.csv'
80 |
81 | train, test = pd.read_csv(train_file), pd.read_csv(test_file)
82 |
83 | dfs = []
84 | freq = to_offset(class_group.freq)
85 | for col in train.columns:
86 | df_appended = []
87 | for df, training in zip([train, test], [True, False]):
88 | df_col = df[col]
89 | length, year = df_col[:2].astype(int)
90 | skip_rows = class_group.rows
91 | start_date = pd.to_datetime(f'{year}-01-01')
92 | if group != 'Yearly':
93 | n_offsets = df_col[2].astype(int)
94 | start_date += n_offsets * freq
95 | elif col == 'Y18' and not training: # viene mal en el archivo esta serie
96 | start_date += 2 * freq
97 | df_col = df_col[skip_rows:length + skip_rows]
98 | df_col = df_col.rename('y').to_frame()
99 | df_col['unique_id'] = col
100 | df_col['ds'] = pd.date_range(start_date, periods=length, freq=freq)
101 | df_appended.append(df_col)
102 | df_appended = pd.concat(df_appended)
103 | dfs.append(df_appended)
104 |
105 | df = pd.concat(dfs)
106 |
107 | df = df.reset_index().filter(items=['unique_id', 'ds', 'y'])
108 | df = df.sort_values(['unique_id', 'ds'])
109 |
110 | return df, None, None
111 |
112 | @staticmethod
113 | def download(directory: str) -> None:
114 | """Downloads Tourism Dataset."""
115 | path = f'{directory}/tourism/datasets'
116 |
117 | if not os.path.exists(path):
118 | download_file(path, Tourism.source_url, decompress=True)
--------------------------------------------------------------------------------
/src/data/datasets/utils.py:
--------------------------------------------------------------------------------
1 | # AUTOGENERATED! DO NOT EDIT! File to edit: nbs/data_datasets__utils.ipynb (unless otherwise specified).
2 |
3 | __all__ = ['logger', 'download_file', 'Info', 'TimeSeriesDataclass', 'get_holiday_dates', 'holiday_kernel',
4 | 'create_calendar_variables', 'create_us_holiday_distance_variables', 'US_FEDERAL_HOLIDAYS', 'TimeFeature',
5 | 'SecondOfMinute', 'MinuteOfHour', 'HourOfDay', 'DayOfWeek', 'DayOfMonth', 'DayOfYear', 'MonthOfYear',
6 | 'WeekOfYear', 'time_features_from_frequency_str']
7 |
8 | # Cell
9 | import logging
10 | import requests
11 | import subprocess
12 | import zipfile
13 | from pathlib import Path
14 | from dataclasses import dataclass
15 | from typing import Dict, List, Optional, Tuple, Union
16 |
17 | import numpy as np
18 | import pandas as pd
19 | from tqdm import tqdm
20 | import pandas as pd
21 | from pandas.tseries import offsets
22 | from pandas.tseries.frequencies import to_offset
23 |
24 | logging.basicConfig(level=logging.INFO)
25 | logger = logging.getLogger(__name__)
26 |
27 | # Cell
28 | def download_file(directory: str, source_url: str, decompress: bool = False) -> None:
29 | """Download data from source_ulr inside directory.
30 |
31 | Parameters
32 | ----------
33 | directory: str, Path
34 | Custom directory where data will be downloaded.
35 | source_url: str
36 | URL where data is hosted.
37 | decompress: bool
38 | Wheter decompress downloaded file. Default False.
39 | """
40 | if isinstance(directory, str):
41 | directory = Path(directory)
42 | directory.mkdir(parents=True, exist_ok=True)
43 |
44 | filename = source_url.split('/')[-1]
45 | filepath = Path(f'{directory}/{filename}')
46 |
47 | # Streaming, so we can iterate over the response.
48 | headers = {'User-Agent': 'Mozilla/5.0'}
49 | r = requests.get(source_url, stream=True, headers=headers)
50 | # Total size in bytes.
51 | total_size = int(r.headers.get('content-length', 0))
52 | block_size = 1024 #1 Kibibyte
53 |
54 | t = tqdm(total=total_size, unit='iB', unit_scale=True)
55 | with open(filepath, 'wb') as f:
56 | for data in r.iter_content(block_size):
57 | t.update(len(data))
58 | f.write(data)
59 | f.flush()
60 | t.close()
61 |
62 | if total_size != 0 and t.n != total_size:
63 | logger.error('ERROR, something went wrong downloading data')
64 |
65 | size = filepath.stat().st_size
66 | logger.info(f'Successfully downloaded {filename}, {size}, bytes.')
67 |
68 | if decompress:
69 | if '.zip' in filepath.suffix:
70 | logger.info('Decompressing zip file...')
71 | with zipfile.ZipFile(filepath, 'r') as zip_ref:
72 | zip_ref.extractall(directory)
73 | else:
74 | from patoolib import extract_archive
75 | extract_archive(filepath, outdir=directory)
76 | logger.info(f'Successfully decompressed {filepath}')
77 |
78 | # Cell
79 | @dataclass
80 | class Info:
81 | """
82 | Info Dataclass of datasets.
83 | Args:
84 | groups (Tuple): Tuple of str groups
85 | class_groups (Tuple): Tuple of dataclasses.
86 | """
87 | groups: Tuple[str]
88 | class_groups: Tuple[dataclass]
89 |
90 | def get_group(self, group: str):
91 | """Gets dataclass of group."""
92 | if group not in self.groups:
93 | raise Exception(f'Unkown group {group}')
94 |
95 | return self.class_groups[self.groups.index(group)]
96 |
97 | def __getitem__(self, group: str):
98 | """Gets dataclass of group."""
99 | if group not in self.groups:
100 | raise Exception(f'Unkown group {group}')
101 |
102 | return self.class_groups[self.groups.index(group)]
103 |
104 | def __iter__(self):
105 | for group in self.groups:
106 | yield group, self.get_group(group)
107 |
108 |
109 | # Cell
110 | @dataclass
111 | class TimeSeriesDataclass:
112 | """
113 | Args:
114 | S (pd.DataFrame): DataFrame of static features of shape
115 | (n_time_series, n_features).
116 | X (pd.DataFrame): DataFrame of exogenous variables of shape
117 | (sum n_periods_i for i=1..n_time_series, n_exogenous).
118 | Y (pd.DataFrame): DataFrame of target variable of shape
119 | (sum n_periods_i for i=1..n_time_series, 1).
120 | idx_categorical_static (list, optional): List of categorical indexes
121 | of S.
122 | group (str, optional): Group name if applies.
123 | Example: 'Yearly'
124 | """
125 | S: pd.DataFrame
126 | X: pd.DataFrame
127 | Y: pd.DataFrame
128 | idx_categorical_static: Optional[List] = None
129 | group: Union[str, List[str]] = None
130 |
131 | # Cell
132 | import pandas as pd
133 | from pandas.tseries.holiday import (
134 | AbstractHolidayCalendar,
135 | Holiday,
136 | USMartinLutherKingJr,
137 | USPresidentsDay,
138 | USMemorialDay,
139 | USLaborDay,
140 | USColumbusDay,
141 | USThanksgivingDay,
142 | nearest_workday
143 | )
144 |
145 | US_FEDERAL_HOLIDAYS = {'new_year': Holiday("New Years Day", month=1, day=1, observance=nearest_workday),
146 | 'martin_luther_king': USMartinLutherKingJr,
147 | 'presidents': USPresidentsDay,
148 | 'memorial': USMemorialDay,
149 | 'independence': Holiday("July 4th", month=7, day=4, observance=nearest_workday),
150 | 'labor': USLaborDay,
151 | 'columbus': USColumbusDay,
152 | 'veterans': Holiday("Veterans Day", month=11, day=11, observance=nearest_workday),
153 | 'thanksgiving': USThanksgivingDay,
154 | 'christmas': Holiday("Christmas", month=12, day=25, observance=nearest_workday)}
155 |
156 | def get_holiday_dates(holiday, dates):
157 | start_date = min(dates) + pd.DateOffset(days=-366)
158 | end_date = max(dates) + pd.DateOffset(days=366)
159 | holiday_calendar = AbstractHolidayCalendar(rules=[US_FEDERAL_HOLIDAYS[holiday]])
160 | holiday_dates = holiday_calendar.holidays(start=start_date, end=end_date)
161 | return np.array(holiday_dates)
162 |
163 | def holiday_kernel(holiday, dates):
164 | # Get holidays around dates
165 | dates = pd.DatetimeIndex(dates)
166 | dates_np = np.array(dates).astype('datetime64[D]')
167 | holiday_dates = get_holiday_dates(holiday, dates)
168 | holiday_dates_np = np.array(pd.DatetimeIndex(holiday_dates)).astype('datetime64[D]')
169 |
170 | # Compute day distance to holiday
171 | nearest_holiday_idx = np.expand_dims(dates_np, axis=1) - np.expand_dims(holiday_dates_np, axis=0)
172 | nearest_holiday_idx = np.argmin(np.abs(nearest_holiday_idx), axis=1)
173 | nearest_holiday = pd.DatetimeIndex([holiday_dates[idx] for idx in nearest_holiday_idx])
174 | holiday_diff = (dates - nearest_holiday).days.values
175 | return holiday_diff
176 |
177 | def create_calendar_variables(X_df: pd.DataFrame):
178 | X_df['day_of_year'] = X_df.ds.dt.dayofyear
179 | X_df['day_of_week'] = X_df.ds.dt.dayofweek
180 | X_df['hour'] = X_df.ds.dt.hour
181 | return X_df
182 |
183 | def create_us_holiday_distance_variables(X_df: pd.DataFrame):
184 | dates = X_df.ds.dt.date
185 | for holiday in US_FEDERAL_HOLIDAYS.keys():
186 | X_df[f'holiday_dist_{holiday}'] = holiday_kernel(holiday=holiday,
187 | dates=dates)
188 | return X_df
189 |
190 | # Cell
191 | ## This code was taken from:
192 | # https://github.com/zhouhaoyi/Informer2020/blob/429f8ace8dde71655d8f8a5aad1a36303a2b2dfe/utils/timefeatures.py#L114
193 | class TimeFeature:
194 | def __init__(self):
195 | pass
196 |
197 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
198 | pass
199 |
200 | def __repr__(self):
201 | return self.__class__.__name__ + "()"
202 |
203 | class SecondOfMinute(TimeFeature):
204 | """Minute of hour encoded as value between [-0.5, 0.5]"""
205 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
206 | return index.second / 59.0 - 0.5
207 |
208 | class MinuteOfHour(TimeFeature):
209 | """Minute of hour encoded as value between [-0.5, 0.5]"""
210 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
211 | return index.minute / 59.0 - 0.5
212 |
213 | class HourOfDay(TimeFeature):
214 | """Hour of day encoded as value between [-0.5, 0.5]"""
215 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
216 | return index.hour / 23.0 - 0.5
217 |
218 | class DayOfWeek(TimeFeature):
219 | """Hour of day encoded as value between [-0.5, 0.5]"""
220 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
221 | return index.dayofweek / 6.0 - 0.5
222 |
223 | class DayOfMonth(TimeFeature):
224 | """Day of month encoded as value between [-0.5, 0.5]"""
225 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
226 | return (index.day - 1) / 30.0 - 0.5
227 |
228 | class DayOfYear(TimeFeature):
229 | """Day of year encoded as value between [-0.5, 0.5]"""
230 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
231 | return (index.dayofyear - 1) / 365.0 - 0.5
232 |
233 | class MonthOfYear(TimeFeature):
234 | """Month of year encoded as value between [-0.5, 0.5]"""
235 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
236 | return (index.month - 1) / 11.0 - 0.5
237 |
238 | class WeekOfYear(TimeFeature):
239 | """Week of year encoded as value between [-0.5, 0.5]"""
240 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
241 | return (index.isocalendar().week - 1) / 52.0 - 0.5
242 |
243 | def time_features_from_frequency_str(freq_str: str) -> List[TimeFeature]:
244 | """
245 | Returns a list of time features that will be appropriate for the given frequency string.
246 | Parameters
247 | ----------
248 | freq_str
249 | Frequency string of the form [multiple][granularity] such as "12H", "5min", "1D" etc.
250 | """
251 |
252 | features_by_offsets = {
253 | offsets.YearEnd: [],
254 | offsets.QuarterEnd: [MonthOfYear],
255 | offsets.MonthEnd: [MonthOfYear],
256 | offsets.Week: [DayOfMonth, WeekOfYear],
257 | offsets.Day: [DayOfWeek, DayOfMonth, DayOfYear],
258 | offsets.BusinessDay: [DayOfWeek, DayOfMonth, DayOfYear],
259 | offsets.Hour: [HourOfDay, DayOfWeek, DayOfMonth, DayOfYear],
260 | offsets.Minute: [
261 | MinuteOfHour,
262 | HourOfDay,
263 | DayOfWeek,
264 | DayOfMonth,
265 | DayOfYear,
266 | ],
267 | offsets.Second: [
268 | SecondOfMinute,
269 | MinuteOfHour,
270 | HourOfDay,
271 | DayOfWeek,
272 | DayOfMonth,
273 | DayOfYear,
274 | ],
275 | }
276 |
277 | offset = to_offset(freq_str)
278 |
279 | for offset_type, feature_classes in features_by_offsets.items():
280 | if isinstance(offset, offset_type):
281 | return [cls() for cls in feature_classes]
--------------------------------------------------------------------------------
/src/data/datasets/wth.py:
--------------------------------------------------------------------------------
1 | # AUTOGENERATED! DO NOT EDIT! File to edit: nbs/data_datasets__wth.ipynb (unless otherwise specified).
2 |
3 | __all__ = ['WTH', 'WTHInfo', 'WTH']
4 |
5 | # Cell
6 | import os
7 | from dataclasses import dataclass
8 | from typing import Dict, List, Optional, Tuple, Union
9 |
10 | import gdown
11 | import numpy as np
12 | import pandas as pd
13 |
14 | from .utils import Info, time_features_from_frequency_str
15 | from .ett import process_multiple_ts
16 |
17 | # Cell
18 | @dataclass
19 | class WTH:
20 | freq: str = 'H'
21 | name: str = 'WTH'
22 | n_ts: int = 12
23 |
24 | # Cell
25 | WTHInfo = Info(groups=('WTH',),
26 | class_groups=(WTH,))
27 |
28 | # Cell
29 | @dataclass
30 | class WTH:
31 |
32 | source_url: str = 'https://drive.google.com/uc?id=1UBRz-aM_57i_KCC-iaSWoKDPTGGv6EaG'
33 |
34 | @staticmethod
35 | def load(directory: str,
36 | cache: bool = True) -> Tuple[pd.DataFrame,
37 | Optional[pd.DataFrame],
38 | Optional[pd.DataFrame]]:
39 | """Downloads and loads ETT data.
40 |
41 | Parameters
42 | ----------
43 | directory: str
44 | Directory where data will be downloaded.
45 | cache: bool
46 | If `True` saves and loads
47 |
48 | Notes
49 | -----
50 | [1] Returns train+val+test sets.
51 | """
52 | path = f'{directory}/wth/datasets'
53 | file_cache = f'{path}/WTH.p'
54 |
55 | if os.path.exists(file_cache) and cache:
56 | df, X_df, S_df = pd.read_pickle(file_cache)
57 |
58 | return df, X_df, S_df
59 |
60 |
61 | WTH.download(directory)
62 | path = f'{directory}/wth/datasets'
63 |
64 | y_df = pd.read_csv(f'{path}/WTH.csv')
65 | y_df, X_df = process_multiple_ts(y_df)
66 |
67 | S_df = None
68 | if cache:
69 | pd.to_pickle((y_df, X_df, S_df), file_cache)
70 |
71 | return y_df, X_df, S_df
72 |
73 | @staticmethod
74 | def download(directory: str) -> None:
75 | """Download WTH Dataset."""
76 | path = f'{directory}/wth/datasets/'
77 | if not os.path.exists(path):
78 | os.makedirs(path)
79 | gdown.download(WTH.source_url, f'{path}/WTH.csv')
--------------------------------------------------------------------------------
/src/data/scalers.py:
--------------------------------------------------------------------------------
1 | # AUTOGENERATED! DO NOT EDIT! File to edit: nbs/data__scalers.ipynb (unless otherwise specified).
2 |
3 | __all__ = ['Scaler', 'norm_scaler', 'inv_norm_scaler', 'norm1_scaler', 'inv_norm1_scaler', 'std_scaler',
4 | 'inv_std_scaler', 'median_scaler', 'inv_median_scaler', 'invariant_scaler', 'inv_invariant_scaler']
5 |
6 | # Cell
7 | import numpy as np
8 | import statsmodels.api as sm
9 |
10 | # Cell
11 | import numpy as np
12 | import statsmodels.api as sm
13 |
14 | #TODO: rehacer todo, es codigo provisional porque corre
15 | #TODO: filtrar por adelantado con offset
16 | #TODO: codigo duplicado en clases muy parecidas
17 | #TODO: usar scaler sklearn?
18 | #TODO: shift scale abuso notacion con min, max
19 | #TODO: comentar cosas
20 | #TODO: hacer funcion devoluciondora de cosos particulares
21 | #TODO: subclase para cada scaler
22 | #TODO: funciona solo para una serie
23 |
24 | class Scaler(object):
25 | def __init__(self, normalizer):
26 | assert (normalizer in ['std', 'invariant', 'norm', 'norm1', 'median']), 'Normalizer not defined'
27 | self.normalizer = normalizer
28 | self.x_shift = None
29 | self.x_scale = None
30 |
31 | def scale(self, x, mask):
32 | if self.normalizer == 'invariant':
33 | x_scaled, x_shift, x_scale = invariant_scaler(x, mask)
34 | elif self.normalizer == 'median':
35 | x_scaled, x_shift, x_scale = median_scaler(x, mask)
36 | elif self.normalizer == 'std':
37 | x_scaled, x_shift, x_scale = std_scaler(x, mask)
38 | elif self.normalizer == 'norm':
39 | x_scaled, x_shift, x_scale = norm_scaler(x, mask)
40 | elif self.normalizer == 'norm1':
41 | x_scaled, x_shift, x_scale = norm1_scaler(x, mask)
42 |
43 | assert len(x[mask==1] == np.sum(mask)), 'Something weird is happening, call Cristian'
44 | nan_before_scale = np.sum(np.isnan(x))
45 | nan_after_scale = np.sum(np.isnan(x_scaled))
46 | assert nan_before_scale == nan_after_scale, 'Scaler induced nans'
47 |
48 | self.x_shift = x_shift
49 | self.x_scale = x_scale
50 | return np.array(x_scaled)
51 |
52 | def inv_scale(self, x):
53 | assert self.x_shift is not None
54 | assert self.x_scale is not None
55 |
56 | if self.normalizer == 'invariant':
57 | x_inv_scaled = inv_invariant_scaler(x, self.x_shift, self.x_scale)
58 | elif self.normalizer == 'median':
59 | x_inv_scaled = inv_median_scaler(x, self.x_shift, self.x_scale)
60 | elif self.normalizer == 'std':
61 | x_inv_scaled = inv_std_scaler(x, self.x_shift, self.x_scale)
62 | elif self.normalizer == 'norm':
63 | x_inv_scaled = inv_norm_scaler(x, self.x_shift, self.x_scale)
64 | elif self.normalizer == 'norm1':
65 | x_inv_scaled = inv_norm1_scaler(x, self.x_shift, self.x_scale)
66 |
67 | return np.array(x_inv_scaled)
68 |
69 | # Norm
70 | def norm_scaler(x, mask):
71 | x_max = np.max(x[mask==1])
72 | x_min = np.min(x[mask==1])
73 |
74 | x = (x - x_min) / (x_max - x_min) #TODO: cuidado dividir por zero
75 | return x, x_min, x_max
76 |
77 | def inv_norm_scaler(x, x_min, x_max):
78 | return x * (x_max - x_min) + x_min
79 |
80 | # Norm1
81 | def norm1_scaler(x, mask):
82 | x_max = np.max(x[mask==1])
83 | x_min = np.min(x[mask==1])
84 |
85 | x = (x - x_min) / (x_max - x_min) #TODO: cuidado dividir por zero
86 | x = x * (2) - 1
87 | return x, x_min, x_max
88 |
89 | def inv_norm1_scaler(x, x_min, x_max):
90 | x = (x + 1) / 2
91 | return x * (x_max - x_min) + x_min
92 |
93 | # Std
94 | def std_scaler(x, mask):
95 | x_mean = np.mean(x[mask==1])
96 | x_std = np.std(x[mask==1])
97 |
98 | x = (x - x_mean) / x_std #TODO: cuidado dividir por zero
99 | return x, x_mean, x_std
100 |
101 | def inv_std_scaler(x, x_mean, x_std):
102 | return (x * x_std) + x_mean
103 |
104 | # Median
105 | def median_scaler(x, mask):
106 | x_median = np.median(x[mask==1])
107 | x_mad = sm.robust.scale.mad(x[mask==1])
108 | if x_mad == 0:
109 | x_mad = np.std(x[mask==1], ddof = 1) / 0.6744897501960817
110 | x = (x - x_median) / x_mad
111 | return x, x_median, x_mad
112 |
113 | def inv_median_scaler(x, x_median, x_mad):
114 | return x * x_mad + x_median
115 |
116 | # Invariant
117 | def invariant_scaler(x, mask):
118 | x_median = np.median(x[mask==1])
119 | x_mad = sm.robust.scale.mad(x[mask==1])
120 | if x_mad == 0:
121 | x_mad = np.std(x[mask==1], ddof = 1) / 0.6744897501960817
122 | x = np.arcsinh((x - x_median) / x_mad)
123 | return x, x_median, x_mad
124 |
125 | def inv_invariant_scaler(x, x_median, x_mad):
126 | return np.sinh(x) * x_mad + x_median
127 |
--------------------------------------------------------------------------------
/src/data/tsloader.py:
--------------------------------------------------------------------------------
1 | # AUTOGENERATED! DO NOT EDIT! File to edit: nbs/data__tsloader.ipynb (unless otherwise specified).
2 |
3 | __all__ = ['TimeSeriesLoader', 'FastTimeSeriesLoader']
4 |
5 | # Cell
6 | import warnings
7 | from collections.abc import Mapping
8 | from typing import Dict, List, Optional, Union
9 |
10 | import numpy as np
11 | import torch as t
12 | from fastcore.foundation import patch
13 | from torch.utils.data import DataLoader
14 |
15 | from .tsdataset import TimeSeriesDataset, WindowsDataset
16 |
17 | # Cell
18 | class TimeSeriesLoader(DataLoader):
19 |
20 | def __init__(self, dataset: Union[TimeSeriesDataset, WindowsDataset],
21 | eq_batch_size: bool = False,
22 | n_windows: Optional[int] = None,
23 | **kwargs) -> 'TimeSeriesLoader':
24 | """Wraps the pytorch `DataLoader` with a special collate function
25 | for the `TimeSeriesDataset` ouputs.
26 |
27 | The TimeSeriesDataset constructs all the trainable windows
28 | of `batch_size` series. The number of windows can be greater
29 | or smaller than the `batch_size`. For this reason,
30 | an additional boolean parameter, `eq_batch_size` is included
31 | that if `True` samples `batch_size` windows randomly,
32 | while `False` returns all windows.
33 |
34 | Parameters
35 | ----------
36 | dataset: TimeSeriesDataset
37 | Stored time series.
38 | eq_batch_size: bool
39 | If `True` samples `batch_size` windows randomly,
40 | while `False` or `batch_size=None` returns all windows.
41 | n_windows: int
42 | Number of windows to sample after
43 | batching batch_size series.
44 | """
45 | if 'collate_fn' in kwargs.keys():
46 | warnings.warn(
47 | 'This class wraps the pytorch `DataLoader` with a '
48 | 'special collate function. If you want to use yours '
49 | 'simply use `DataLoader`. Removing collate_fn'
50 | )
51 | kwargs.pop('collate_fn')
52 |
53 | kwargs_ = {**kwargs, **dict(collate_fn=self._collate_fn)}
54 | DataLoader.__init__(self, dataset=dataset, **kwargs_)
55 | self.eq_batch_size = eq_batch_size
56 | self.n_windows = n_windows
57 | self.w_idxs: Optional[np.ndarray] = None
58 |
59 | # Cell
60 | @patch
61 | def _check_batch_size(self: TimeSeriesLoader, batch: t.Tensor):
62 | complete_batch = batch
63 | if self.w_idxs is not None:
64 | complete_batch = batch[self.w_idxs]
65 |
66 | return complete_batch
67 |
68 | # Cell
69 | @patch
70 | def _collate_fn(self: TimeSeriesLoader, batch: Union[List, Dict[str, t.Tensor], t.Tensor]):
71 | """Special collate fn for the `TimeSeriesDataset`.
72 |
73 | Notes
74 | -----
75 | [1] Adapted from https://github.com/pytorch/pytorch/blob/master/torch/utils/data/_utils/collate.py.
76 | """
77 | elem = batch[0]
78 | # if len(batch) == 1:
79 | # return {key: self._check_batch_size(elem[key]) for key in elem}
80 |
81 | elem_type = type(elem)
82 |
83 | if isinstance(elem, t.Tensor):
84 | out = None
85 | if t.utils.data.get_worker_info() is not None:
86 | # If we're in a background process, concatenate directly into a
87 | # shared memory tensor to avoid an extra copy
88 | numel = sum([x.numel() for x in batch])
89 | storage = elem.storage()._new_shared(numel)
90 | out = elem.new(storage)
91 | complete_batch = t.cat(batch, out=out)
92 | return self._check_batch_size(complete_batch)
93 |
94 | elif isinstance(elem, Mapping):
95 | n_windows = [elem_['Y'].size(0) for elem_ in batch]
96 | n_windows = sum(n_windows)
97 | if self.eq_batch_size and self.batch_size is not None:
98 | self.w_idxs = np.random.choice(n_windows, size=self.batch_size,
99 | replace=(n_windows < self.batch_size))
100 | if not self.eq_batch_size and self.n_windows is not None:
101 | self.w_idxs = np.random.choice(n_windows, size=self.n_windows,
102 | replace=(n_windows < self.n_windows))
103 | return {key: self.collate_fn([d[key] for d in batch]) for key in elem}
104 |
105 | raise TypeError(f'Unknown {elem_type}')
106 |
107 | # Cell
108 | class FastTimeSeriesLoader:
109 | """
110 | A DataLoader-like object for a set of tensors that can be much faster than
111 | TensorDataset + DataLoader because dataloader grabs individual indices of
112 | the dataset and calls cat (slow).
113 | Source: https://discuss.pytorch.org/t/dataloader-much-slower-than-manual-batching/27014/6
114 |
115 | Notes
116 | -----
117 | [1] Adapted from https://github.com/hcarlens/pytorch-tabular/blob/master/fast_tensor_data_loader.py.
118 | """
119 | def __init__(self, dataset: TimeSeriesDataset, batch_size: int = 32,
120 | eq_batch_size: bool = False,
121 | n_windows: Optional[int] = None,
122 | shuffle: bool = False) -> 'FastTimeSeriesLoader':
123 | """Initialize a FastTimeSeriesLoader.
124 |
125 | The TimeSeriesDataset constructs all the trainable windows
126 | of `batch_size` series. The number of windows can be greater
127 | or smaller than the `batch_size`. For this reason,
128 | an additional boolean parameter, `eq_batch_size` is included
129 | that if `True` samples `batch_size` windows randomly,
130 | while `False` returns all windows.
131 |
132 | Parameters
133 | -----------
134 | dataset: TimeSeriesDataset
135 | Stored time series.
136 | batch_size: int
137 | Batch size to load.
138 | n_windows: int
139 | Number of windows to sample after
140 | batching batch_size series.
141 | shuffle: bool
142 | If `True`, shuffle the data *in-place* whenever an
143 | iterator is created out of this object.
144 | """
145 | self.dataset = dataset
146 | self.dataset_len = len(dataset)
147 | self.batch_size = batch_size
148 | self.eq_batch_size = eq_batch_size
149 | self.n_windows = n_windows
150 | self.shuffle = shuffle
151 | self.idxs = np.arange(self.dataset_len)
152 |
153 | # Calculate # batches
154 | n_batches, remainder = divmod(self.dataset_len, self.batch_size)
155 | if remainder > 0:
156 | n_batches += 1
157 | self.n_batches = n_batches
158 | self.w_idxs: Optional[np.ndarray] = None
159 |
160 | # Cell
161 | @patch
162 | def __iter__(self: FastTimeSeriesLoader):
163 | if self.shuffle:
164 | self.idxs = np.random.permutation(self.dataset_len)
165 |
166 | self.i = 0
167 | return self
168 |
169 | # Cell
170 | @patch
171 | def _check_batch_size(self: FastTimeSeriesLoader, batch: t.Tensor):
172 | complete_batch = batch
173 | if self.w_idxs is not None:
174 | complete_batch = batch[self.w_idxs]
175 | return complete_batch
176 |
177 | # Cell
178 | @patch
179 | def __next__(self: FastTimeSeriesLoader):
180 | if self.i >= self.dataset_len:
181 | raise StopIteration
182 | idxs = self.idxs[self.i:(self.i + self.batch_size)].tolist()
183 | batch = self.dataset[idxs]
184 | self.i += self.batch_size
185 |
186 | n_windows = batch['Y'].size(0)
187 | if self.eq_batch_size and self.batch_size is not None:
188 | self.w_idxs = np.random.choice(n_windows, size=self.batch_size,
189 | replace=(n_windows < self.batch_size))
190 |
191 | if not self.eq_batch_size and self.n_windows is not None:
192 | self.w_idxs = np.random.choice(n_windows, size=self.n_windows,
193 | replace=(n_windows < self.n_windows))
194 |
195 | return {key: self._check_batch_size(batch[key]) for key in batch}
196 |
197 | # Cell
198 | @patch
199 | def __len__(self: FastTimeSeriesLoader):
200 | return self.n_batches
--------------------------------------------------------------------------------
/src/data/utils.py:
--------------------------------------------------------------------------------
1 | # AUTOGENERATED! DO NOT EDIT! File to edit: nbs/data__utils.ipynb (unless otherwise specified).
2 |
3 | __all__ = ['create_synthetic_tsdata']
4 |
5 | # Cell
6 | from typing import Tuple
7 |
8 | import numpy as np
9 | import pandas as pd
10 |
11 | # Cell
12 | def create_synthetic_tsdata(n_ts: int = 64,
13 | sort: bool = False) -> Tuple[pd.DataFrame,
14 | pd.DataFrame,
15 | pd.DataFrame]:
16 | """Creates synthetic time serie data."""
17 | uids = np.array([f'uid_{i + 1}' for i in range(n_ts)])
18 | dss = pd.date_range(end='2020-12-31', periods=n_ts)
19 |
20 | df = []
21 | for idx in range(n_ts):
22 | ts = pd.DataFrame({'unique_id': np.repeat(uids[idx], idx + 1),
23 | 'ds': dss[-(idx + 1):],
24 | 'y': 1 + np.arange(idx + 1)})
25 | df.append(ts)
26 |
27 | df = pd.concat(df)
28 | df['day_of_week'] = df['ds'].dt.day_of_week
29 | df['future_1'] = df['y'] + 1
30 | df['id_ts'] = df['unique_id'].astype('category').cat.codes
31 | if sort:
32 | df = df.sort_values(['unique_id', 'ds'])
33 |
34 | Y_df = df.filter(items=['unique_id', 'ds', 'y'])
35 | X_df = df.filter(items=['unique_id', 'ds', 'day_of_week', 'future_1'])
36 | S_df = df.filter(items=['unique_id', 'id_ts']).drop_duplicates()
37 |
38 | return Y_df, X_df, S_df
--------------------------------------------------------------------------------
/src/experiments/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cchallu/n-hits/d882ee60b34c0ab6b67b31001f735b181a9efb93/src/experiments/__init__.py
--------------------------------------------------------------------------------
/src/losses/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cchallu/n-hits/d882ee60b34c0ab6b67b31001f735b181a9efb93/src/losses/__init__.py
--------------------------------------------------------------------------------
/src/losses/pytorch.py:
--------------------------------------------------------------------------------
1 | # Cell
2 | import torch as t
3 | import torch.nn as nn
4 |
5 | # Cell
6 | def divide_no_nan(a, b):
7 | """
8 | Auxiliary funtion to handle divide by 0
9 | """
10 | div = a / b
11 | div[div != div] = 0.0
12 | div[div == float('inf')] = 0.0
13 | return div
14 |
15 | # Cell
16 | def MAPELoss(y, y_hat, mask=None):
17 | """MAPE Loss
18 |
19 | Calculates Mean Absolute Percentage Error between
20 | y and y_hat. MAPE measures the relative prediction
21 | accuracy of a forecasting method by calculating the
22 | percentual deviation of the prediction and the true
23 | value at a given time and averages these devations
24 | over the length of the series.
25 | As defined in: https://en.wikipedia.org/wiki/Mean_absolute_percentage_error
26 |
27 | Parameters
28 | ----------
29 | y: tensor (batch_size, output_size)
30 | actual values in torch tensor.
31 | y_hat: tensor (batch_size, output_size)
32 | predicted values in torch tensor.
33 | mask: tensor (batch_size, output_size)
34 | specifies date stamps per serie
35 | to consider in loss
36 |
37 | Returns
38 | -------
39 | mape:
40 | Mean absolute percentage error.
41 | """
42 | if mask is None: mask = t.ones_like(y_hat)
43 |
44 | mask = divide_no_nan(mask, t.abs(y))
45 | mape = t.abs(y - y_hat) * mask
46 | mape = t.mean(mape)
47 | return mape
48 |
49 | # Cell
50 | def MSELoss(y, y_hat, mask=None):
51 | """MSE Loss
52 |
53 | Calculates Mean Squared Error between
54 | y and y_hat. MAPE measures the relative prediction
55 | accuracy of a forecasting method by calculating the
56 | percentual deviation of the prediction and the true
57 | value at a given time and averages these devations
58 | over the length of the series.
59 |
60 | Parameters
61 | ----------
62 | y: tensor (batch_size, output_size)
63 | actual values in torch tensor.
64 | y_hat: tensor (batch_size, output_size)
65 | predicted values in torch tensor.
66 | mask: tensor (batch_size, output_size)
67 | specifies date stamps per serie
68 | to consider in loss
69 |
70 | Returns
71 | -------
72 | mse:
73 | Mean Squared Error.
74 | """
75 | if mask is None: mask = t.ones_like(y_hat)
76 |
77 | mse = (y - y_hat)**2
78 | mse = mask * mse
79 | mse = t.mean(mse)
80 | return mse
81 |
82 | # Cell
83 | def RMSELoss(y, y_hat, mask=None):
84 | """RMSE Loss
85 |
86 | Calculates Mean Squared Error between
87 | y and y_hat. MAPE measures the relative prediction
88 | accuracy of a forecasting method by calculating the
89 | percentual deviation of the prediction and the true
90 | value at a given time and averages these devations
91 | over the length of the series.
92 |
93 | Parameters
94 | ----------
95 | y: tensor (batch_size, output_size)
96 | actual values in torch tensor.
97 | y_hat: tensor (batch_size, output_size)
98 | predicted values in torch tensor.
99 | mask: tensor (batch_size, output_size)
100 | specifies date stamps per serie
101 | to consider in loss
102 |
103 | Returns
104 | -------
105 | rmse:
106 | Root Mean Squared Error.
107 | """
108 | if mask is None: mask = t.ones_like(y_hat)
109 |
110 | rmse = (y - y_hat)**2
111 | rmse = mask * rmse
112 | rmse = t.sqrt(t.mean(rmse))
113 | return rmse
114 |
115 | # Cell
116 | def SMAPELoss(y, y_hat, mask=None):
117 | """SMAPE2 Loss
118 |
119 | Calculates Symmetric Mean Absolute Percentage Error.
120 | SMAPE measures the relative prediction accuracy of a
121 | forecasting method by calculating the relative deviation
122 | of the prediction and the true value scaled by the sum of the
123 | absolute values for the prediction and true value at a
124 | given time, then averages these devations over the length
125 | of the series. This allows the SMAPE to have bounds between
126 | 0% and 200% which is desireble compared to normal MAPE that
127 | may be undetermined.
128 |
129 | Parameters
130 | ----------
131 | y: tensor (batch_size, output_size)
132 | actual values in torch tensor.
133 | y_hat: tensor (batch_size, output_size)
134 | predicted values in torch tensor.
135 |
136 | Returns
137 | -------
138 | smape:
139 | symmetric mean absolute percentage error
140 |
141 | References
142 | ----------
143 | [1] https://robjhyndman.com/hyndsight/smape/ (Makridakis 1993)
144 | """
145 | if mask is None: mask = t.ones_like(y_hat)
146 |
147 | delta_y = t.abs((y - y_hat))
148 | scale = t.abs(y) + t.abs(y_hat)
149 | smape = divide_no_nan(delta_y, scale)
150 | smape = smape * mask
151 | smape = 2 * t.mean(smape)
152 | return smape
153 |
154 | # Cell
155 | def MASELoss(y, y_hat, y_insample, seasonality, mask=None) :
156 | """ Calculates the M4 Mean Absolute Scaled Error.
157 |
158 | MASE measures the relative prediction accuracy of a
159 | forecasting method by comparinng the mean absolute errors
160 | of the prediction and the true value against the mean
161 | absolute errors of the seasonal naive model.
162 |
163 | Parameters
164 | ----------
165 | seasonality: int
166 | main frequency of the time series
167 | Hourly 24, Daily 7, Weekly 52,
168 | Monthly 12, Quarterly 4, Yearly 1
169 | y: tensor (batch_size, output_size)
170 | actual test values
171 | y_hat: tensor (batch_size, output_size)
172 | predicted values
173 | y_train: tensor (batch_size, input_size)
174 | actual insample values for Seasonal Naive predictions
175 |
176 | Returns
177 | -------
178 | mase:
179 | mean absolute scaled error
180 |
181 | References
182 | ----------
183 | [1] https://robjhyndman.com/papers/mase.pdf
184 | """
185 | if mask is None: mask = t.ones_like(y_hat)
186 |
187 | delta_y = t.abs(y - y_hat)
188 | scale = t.mean(t.abs(y_insample[:, seasonality:] - \
189 | y_insample[:, :-seasonality]), axis=1)
190 | mase = divide_no_nan(delta_y, scale[:, None])
191 | mase = mase * mask
192 | mase = t.mean(mase)
193 | return mase
194 |
195 | # Cell
196 | def MAELoss(y, y_hat, mask=None):
197 | """MAE Loss
198 |
199 | Calculates Mean Absolute Error between
200 | y and y_hat. MAE measures the relative prediction
201 | accuracy of a forecasting method by calculating the
202 | deviation of the prediction and the true
203 | value at a given time and averages these devations
204 | over the length of the series.
205 |
206 | Parameters
207 | ----------
208 | y: tensor (batch_size, output_size)
209 | actual values in torch tensor.
210 | y_hat: tensor (batch_size, output_size)
211 | predicted values in torch tensor.
212 | mask: tensor (batch_size, output_size)
213 | specifies date stamps per serie
214 | to consider in loss
215 |
216 | Returns
217 | -------
218 | mae:
219 | Mean absolute error.
220 | """
221 | if mask is None: mask = t.ones_like(y_hat)
222 |
223 | mae = t.abs(y - y_hat) * mask
224 | mae = t.mean(mae)
225 | return mae
226 |
227 | # Cell
228 | def PinballLoss(y, y_hat, mask=None, tau=0.5):
229 | """Pinball Loss
230 | Computes the pinball loss between y and y_hat.
231 |
232 | Parameters
233 | ----------
234 | y: tensor (batch_size, output_size)
235 | actual values in torch tensor.
236 | y_hat: tensor (batch_size, output_size)
237 | predicted values in torch tensor.
238 | tau: float, between 0 and 1
239 | the slope of the pinball loss, in the context of
240 | quantile regression, the value of tau determines the
241 | conditional quantile level.
242 |
243 | Returns
244 | -------
245 | pinball:
246 | average accuracy for the predicted quantile
247 | """
248 | if mask is None: mask = t.ones_like(y_hat)
249 |
250 | delta_y = t.sub(y, y_hat)
251 | pinball = t.max(t.mul(tau, delta_y), t.mul((tau - 1), delta_y))
252 | pinball = pinball * mask
253 | pinball = t.mean(pinball)
254 | return pinball
255 |
256 | # Cell
257 | def LevelVariabilityLoss(levels, level_variability_penalty):
258 | """ Level Variability Loss
259 | Computes the variability penalty for the level.
260 |
261 | Parameters
262 | ----------
263 | levels: tensor with shape (batch, n_time)
264 | levels obtained from exponential smoothing component of ESRNN
265 | level_variability_penalty: float
266 | this parameter controls the strength of the penalization
267 | to the wigglines of the level vector, induces smoothness
268 | in the output
269 |
270 | Returns
271 | ----------
272 | level_var_loss:
273 | wiggliness loss for the level vector
274 | """
275 | assert levels.shape[1] > 2
276 | level_prev = t.log(levels[:, :-1])
277 | level_next = t.log(levels[:, 1:])
278 | log_diff_of_levels = t.sub(level_prev, level_next)
279 |
280 | log_diff_prev = log_diff_of_levels[:, :-1]
281 | log_diff_next = log_diff_of_levels[:, 1:]
282 | diff = t.sub(log_diff_prev, log_diff_next)
283 | level_var_loss = diff**2
284 | level_var_loss = level_var_loss.mean() * level_variability_penalty
285 |
286 | return level_var_loss
287 |
288 | # Cell
289 | def SmylLoss(y, y_hat, levels, mask, tau, level_variability_penalty=0.0):
290 | """Computes the Smyl Loss that combines level variability with
291 | with Pinball loss.
292 | windows_y: tensor of actual values,
293 | shape (n_windows, batch_size, window_size).
294 | windows_y_hat: tensor of predicted values,
295 | shape (n_windows, batch_size, window_size).
296 | levels: levels obtained from exponential smoothing component of ESRNN.
297 | tensor with shape (batch, n_time).
298 | return: smyl_loss.
299 | """
300 |
301 | if mask is None: mask = t.ones_like(y_hat)
302 |
303 | smyl_loss = PinballLoss(y, y_hat, mask, tau)
304 |
305 | if level_variability_penalty > 0:
306 | log_diff_of_levels = LevelVariabilityLoss(levels, level_variability_penalty)
307 | smyl_loss += log_diff_of_levels
308 |
309 | return smyl_loss
310 |
311 | # Cell
312 | def MQLoss(y, y_hat, quantiles, mask=None):
313 | """MQLoss
314 |
315 | Calculates Average Multi-quantile Loss function, for
316 | a given set of quantiles, based on the absolute
317 | difference between predicted and true values.
318 |
319 | Parameters
320 | ----------
321 | y: tensor (batch_size, output_size) actual values in torch tensor.
322 | y_hat: tensor (batch_size, output_size, n_quantiles) predicted values in torch tensor.
323 | mask: tensor (batch_size, output_size, n_quantiles) specifies date stamps per serie
324 | to consider in loss
325 | quantiles: tensor(n_quantiles) quantiles to estimate from the distribution of y.
326 |
327 | Returns
328 | -------
329 | lq: tensor(n_quantiles) average multi-quantile loss.
330 | """
331 | assert len(quantiles) > 1, f'your quantiles are of len: {len(quantiles)}'
332 |
333 | if mask is None: mask = t.ones_like(y_hat)
334 |
335 | n_q = len(quantiles)
336 |
337 | error = y_hat - y.unsqueeze(-1)
338 | sq = t.maximum(-error, t.zeros_like(error))
339 | s1_q = t.maximum(error, t.zeros_like(error))
340 | loss = (quantiles * sq + (1 - quantiles) * s1_q)
341 |
342 | return t.mean(t.mean(loss, axis=1))
343 |
344 | # Cell
345 | def wMQLoss(y, y_hat, quantiles, mask=None):
346 | """wMQLoss
347 |
348 | Calculates Average Multi-quantile Loss function, for
349 | a given set of quantiles, based on the absolute
350 | difference between predicted and true values.
351 |
352 | Parameters
353 | ----------
354 | y: tensor (batch_size, output_size) actual values in torch tensor.
355 | y_hat: tensor (batch_size, output_size, n_quantiles) predicted values in torch tensor.
356 | mask: tensor (batch_size, output_size, n_quantiles) specifies date stamps per serie
357 | to consider in loss
358 | quantiles: tensor(n_quantiles) quantiles to estimate from the distribution of y.
359 |
360 | Returns
361 | -------
362 | lq: tensor(n_quantiles) average multi-quantile loss.
363 | """
364 | assert len(quantiles) > 1, f'your quantiles are of len: {len(quantiles)}'
365 |
366 | if mask is None: mask = t.ones_like(y_hat)
367 |
368 | n_q = len(quantiles)
369 |
370 | error = y_hat - y.unsqueeze(-1)
371 |
372 | sq = t.maximum(-error, t.zeros_like(error))
373 | s1_q = t.maximum(error, t.zeros_like(error))
374 | loss = (quantiles * sq + (1 - quantiles) * s1_q)
375 |
376 | loss = divide_no_nan(t.sum(loss * mask, axis=-2),
377 | t.sum(t.abs(y.unsqueeze(-1)) * mask, axis=-2))
378 |
379 | return t.mean(loss)
--------------------------------------------------------------------------------
/src/losses/utils.py:
--------------------------------------------------------------------------------
1 | # Cell
2 | from typing import Union, List, Optional
3 |
4 | import torch as t
5 | from fastcore.foundation import patch
6 |
7 | from .pytorch import (
8 | MAPELoss, MASELoss, SMAPELoss,
9 | MSELoss, MAELoss, SmylLoss,
10 | PinballLoss, MQLoss, wMQLoss
11 | )
12 |
13 | # Cell
14 | class LossFunction:
15 | def __init__(self, loss_name: str, seasonality: Optional[int] = None,
16 | percentile: Optional[Union[List[int], int]] = None,
17 | level_variability_penalty: Optional[int] = None) -> 'LossFunction':
18 | """Instantiates a callable class of the `loss_name` loss.
19 |
20 | Parameters
21 | ----------
22 | loss_name: str
23 | Name of the loss.
24 | seasonality: int
25 | main frequency of the time series
26 | Hourly 24, Daily 7, Weekly 52,
27 | Monthly 12, Quarterly 4, Yearly.
28 | Default `None`.
29 | Mandatory for MASE loss.
30 | percentile: Union[List[int], int]
31 | Target percentile.
32 | For SMYL and PINBALL losses an int
33 | is expected.
34 | For MQ and wMQ losses a list of ints
35 | is expected.
36 | Default `None`.
37 | level_variability_penalty: int
38 | Only used for SMYL loss.
39 | """
40 | if loss_name in ['SMYL', 'PINBALL'] and not isinstance(percentile, int):
41 | raise Exception(f'Percentile should be integer for {loss_name} loss.')
42 | elif loss_name in ['MQ', 'wMQ'] and not isinstance(percentile, list):
43 | raise Exception(f'Percentile should be list for {loss_name} loss')
44 | elif loss_name == 'MASE' and seasonality is None:
45 | raise Exception(f'Seasonality should be a list of integers for {loss_name} loss')
46 |
47 |
48 | self.loss_name = loss_name
49 | self.seasonality = seasonality
50 | self.percentile = percentile
51 | self.level_variability_penalty = level_variability_penalty
52 |
53 | self.tau = self.percentile / 100 if isinstance(percentile, int) else None
54 | self.quantiles = [tau / 100 for tau in percentile] if isinstance(percentile, list) else None
55 |
56 | # Cell
57 | @patch
58 | def __call__(self: LossFunction,
59 | y: t.Tensor,
60 | y_hat: t.Tensor,
61 | mask: Optional[t.Tensor] = None,
62 | y_insample: Optional[t.Tensor] = None,
63 | levels: Optional[t.Tensor] = None) -> t.Tensor:
64 | """Returns loss according to `loss_name`."""
65 | if self.loss_name == 'SMYL':
66 | return SmylLoss(y=y, y_hat=y_hat, levels=levels, mask=mask,
67 | tau=self.tau,
68 | level_variability_penalty=self.level_variability_penalty)
69 |
70 | elif self.loss_name == 'PINBALL':
71 | return PinballLoss(y=y, y_hat=y_hat, mask=mask,
72 | tau=self.tau)
73 |
74 | elif self.loss_name == 'MQ':
75 | quantiles = t.Tensor(self.quantiles, device=y.device)
76 | return MQLoss(y=y, y_hat=y_hat, quantiles=quantiles, mask=mask)
77 |
78 | elif self.loss_name == 'wMQ':
79 | quantiles = t.Tensor(self.quantiles, device=y.device)
80 | return wMQLoss(y=y, y_hat=y_hat, quantiles=quantiles, mask=mask)
81 |
82 | elif self.loss_name == 'MAPE':
83 | return MAPELoss(y=y, y_hat=y_hat, mask=mask)
84 |
85 | elif self.loss_name == 'MASE':
86 | return MASELoss(y=y, y_hat=y_hat, y_insample=y_insample,
87 | seasonality=self.seasonality, mask=mask)
88 |
89 | elif self.loss_name == 'SMAPE':
90 | return SMAPELoss(y=y, y_hat=y_hat, mask=mask)
91 |
92 | elif self.loss_name == 'MSE':
93 | return MSELoss(y=y, y_hat=y_hat, mask=mask)
94 |
95 | elif self.loss_name == 'MAE':
96 | return MAELoss(y=y, y_hat=y_hat, mask=mask)
97 |
98 | raise Exception(f'Unknown loss function: {loss_name}')
--------------------------------------------------------------------------------
/src/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cchallu/n-hits/d882ee60b34c0ab6b67b31001f735b181a9efb93/src/models/__init__.py
--------------------------------------------------------------------------------
/src/models/components/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cchallu/n-hits/d882ee60b34c0ab6b67b31001f735b181a9efb93/src/models/components/__init__.py
--------------------------------------------------------------------------------
/src/models/components/autocorrelation.py:
--------------------------------------------------------------------------------
1 | # Cell
2 | import math
3 |
4 | import torch
5 | import torch.nn as nn
6 | import torch.nn.functional as F
7 |
8 | # Cell
9 | class AutoCorrelation(nn.Module):
10 | """
11 | AutoCorrelation Mechanism with the following two phases:
12 | (1) period-based dependencies discovery
13 | (2) time delay aggregation
14 | This block can replace the self-attention family mechanism seamlessly.
15 | """
16 | def __init__(self, mask_flag=True, factor=1, scale=None, attention_dropout=0.1, output_attention=False):
17 | super(AutoCorrelation, self).__init__()
18 | self.factor = factor
19 | self.scale = scale
20 | self.mask_flag = mask_flag
21 | self.output_attention = output_attention
22 | self.dropout = nn.Dropout(attention_dropout)
23 |
24 | def time_delay_agg_training(self, values, corr):
25 | """
26 | SpeedUp version of Autocorrelation (a batch-normalization style design)
27 | This is for the training phase.
28 | """
29 | head = values.shape[1]
30 | channel = values.shape[2]
31 | length = values.shape[3]
32 | # find top k
33 | top_k = int(self.factor * math.log(length))
34 | mean_value = torch.mean(torch.mean(corr, dim=1), dim=1)
35 | index = torch.topk(torch.mean(mean_value, dim=0), top_k, dim=-1)[1]
36 | weights = torch.stack([mean_value[:, index[i]] for i in range(top_k)], dim=-1)
37 | # update corr
38 | tmp_corr = torch.softmax(weights, dim=-1)
39 | # aggregation
40 | tmp_values = values
41 | delays_agg = torch.zeros_like(values, dtype=torch.float, device=values.device)
42 | for i in range(top_k):
43 | pattern = torch.roll(tmp_values, -int(index[i]), -1)
44 | delays_agg = delays_agg + pattern * \
45 | (tmp_corr[:, i].unsqueeze(1).unsqueeze(1).unsqueeze(1).repeat(1, head, channel, length))
46 | return delays_agg
47 |
48 | def time_delay_agg_inference(self, values, corr):
49 | """
50 | SpeedUp version of Autocorrelation (a batch-normalization style design)
51 | This is for the inference phase.
52 | """
53 | batch = values.shape[0]
54 | head = values.shape[1]
55 | channel = values.shape[2]
56 | length = values.shape[3]
57 | # index init
58 | init_index = torch.arange(length, device=values.device).unsqueeze(0).unsqueeze(0).unsqueeze(0).repeat(batch, head, channel, 1)
59 | # find top k
60 | top_k = int(self.factor * math.log(length))
61 | mean_value = torch.mean(torch.mean(corr, dim=1), dim=1)
62 | weights = torch.topk(mean_value, top_k, dim=-1)[0]
63 | delay = torch.topk(mean_value, top_k, dim=-1)[1]
64 | # update corr
65 | tmp_corr = torch.softmax(weights, dim=-1)
66 | # aggregation
67 | tmp_values = values.repeat(1, 1, 1, 2)
68 | delays_agg = torch.zeros_like(values, dtype=torch.float, device=values.device)
69 | for i in range(top_k):
70 | tmp_delay = init_index + delay[:, i].unsqueeze(1).unsqueeze(1).unsqueeze(1).repeat(1, head, channel, length)
71 | pattern = torch.gather(tmp_values, dim=-1, index=tmp_delay)
72 | delays_agg = delays_agg + pattern * \
73 | (tmp_corr[:, i].unsqueeze(1).unsqueeze(1).unsqueeze(1).repeat(1, head, channel, length))
74 | return delays_agg
75 |
76 | def time_delay_agg_full(self, values, corr):
77 | """
78 | Standard version of Autocorrelation
79 | """
80 | batch = values.shape[0]
81 | head = values.shape[1]
82 | channel = values.shape[2]
83 | length = values.shape[3]
84 | # index init
85 | init_index = torch.arange(length, device=values.device).unsqueeze(0).unsqueeze(0).unsqueeze(0).repeat(batch, head, channel, 1)
86 | # find top k
87 | top_k = int(self.factor * math.log(length))
88 | weights = torch.topk(corr, top_k, dim=-1)[0]
89 | delay = torch.topk(corr, top_k, dim=-1)[1]
90 | # update corr
91 | tmp_corr = torch.softmax(weights, dim=-1)
92 | # aggregation
93 | tmp_values = values.repeat(1, 1, 1, 2)
94 | delays_agg = torch.zeros_like(values, dtype=torch.float, device=values.device)
95 | for i in range(top_k):
96 | tmp_delay = init_index + delay[..., i].unsqueeze(-1)
97 | pattern = torch.gather(tmp_values, dim=-1, index=tmp_delay)
98 | delays_agg = delays_agg + pattern * (tmp_corr[..., i].unsqueeze(-1))
99 | return delays_agg
100 |
101 | def forward(self, queries, keys, values, attn_mask):
102 | B, L, H, E = queries.shape
103 | _, S, _, D = values.shape
104 | if L > S:
105 | zeros = torch.zeros_like(queries[:, :(L - S), :], dtype=torch.float, device=queries.device)
106 | values = torch.cat([values, zeros], dim=1)
107 | keys = torch.cat([keys, zeros], dim=1)
108 | else:
109 | values = values[:, :L, :, :]
110 | keys = keys[:, :L, :, :]
111 |
112 | # period-based dependencies
113 | q_fft = torch.fft.rfft(queries.permute(0, 2, 3, 1).contiguous(), dim=-1)
114 | k_fft = torch.fft.rfft(keys.permute(0, 2, 3, 1).contiguous(), dim=-1)
115 | res = q_fft * torch.conj(k_fft)
116 | corr = torch.fft.irfft(res, dim=-1)
117 |
118 | # time delay agg
119 | if self.training:
120 | V = self.time_delay_agg_training(values.permute(0, 2, 3, 1).contiguous(), corr).permute(0, 3, 1, 2)
121 | else:
122 | V = self.time_delay_agg_inference(values.permute(0, 2, 3, 1).contiguous(), corr).permute(0, 3, 1, 2)
123 |
124 | if self.output_attention:
125 | return (V.contiguous(), corr.permute(0, 3, 1, 2))
126 | else:
127 | return (V.contiguous(), None)
128 |
129 |
130 | class AutoCorrelationLayer(nn.Module):
131 | def __init__(self, correlation, d_model, n_heads, d_keys=None,
132 | d_values=None):
133 | super(AutoCorrelationLayer, self).__init__()
134 |
135 | d_keys = d_keys or (d_model // n_heads)
136 | d_values = d_values or (d_model // n_heads)
137 |
138 | self.inner_correlation = correlation
139 | self.query_projection = nn.Linear(d_model, d_keys * n_heads)
140 | self.key_projection = nn.Linear(d_model, d_keys * n_heads)
141 | self.value_projection = nn.Linear(d_model, d_values * n_heads)
142 | self.out_projection = nn.Linear(d_values * n_heads, d_model)
143 | self.n_heads = n_heads
144 |
145 | def forward(self, queries, keys, values, attn_mask):
146 | B, L, _ = queries.shape
147 | _, S, _ = keys.shape
148 | H = self.n_heads
149 |
150 | queries = self.query_projection(queries).view(B, L, H, -1)
151 | keys = self.key_projection(keys).view(B, S, H, -1)
152 | values = self.value_projection(values).view(B, S, H, -1)
153 |
154 | out, attn = self.inner_correlation(
155 | queries,
156 | keys,
157 | values,
158 | attn_mask
159 | )
160 | out = out.view(B, L, -1)
161 |
162 | return self.out_projection(out), attn
--------------------------------------------------------------------------------
/src/models/components/autoformer.py:
--------------------------------------------------------------------------------
1 | # Cell
2 | import torch
3 | import torch.nn as nn
4 | import torch.nn.functional as F
5 |
6 | # Cell
7 | class my_Layernorm(nn.Module):
8 | """
9 | Special designed layernorm for the seasonal part
10 | """
11 | def __init__(self, channels):
12 | super(my_Layernorm, self).__init__()
13 | self.layernorm = nn.LayerNorm(channels)
14 |
15 | def forward(self, x):
16 | x_hat = self.layernorm(x)
17 | bias = torch.mean(x_hat, dim=1).unsqueeze(1).repeat(1, x.shape[1], 1)
18 | return x_hat - bias
19 |
20 |
21 | class moving_avg(nn.Module):
22 | """
23 | Moving average block to highlight the trend of time series
24 | """
25 | def __init__(self, kernel_size, stride):
26 | super(moving_avg, self).__init__()
27 | self.kernel_size = kernel_size
28 | self.avg = nn.AvgPool1d(kernel_size=kernel_size, stride=stride, padding=0)
29 |
30 | def forward(self, x):
31 | # padding on the both ends of time series
32 | front = x[:, 0:1, :].repeat(1, (self.kernel_size - 1) // 2, 1)
33 | end = x[:, -1:, :].repeat(1, (self.kernel_size - 1) // 2, 1)
34 | x = torch.cat([front, x, end], dim=1)
35 | x = self.avg(x.permute(0, 2, 1))
36 | x = x.permute(0, 2, 1)
37 | return x
38 |
39 |
40 | class series_decomp(nn.Module):
41 | """
42 | Series decomposition block
43 | """
44 | def __init__(self, kernel_size):
45 | super(series_decomp, self).__init__()
46 | self.moving_avg = moving_avg(kernel_size, stride=1)
47 |
48 | def forward(self, x):
49 | moving_mean = self.moving_avg(x)
50 | res = x - moving_mean
51 | return res, moving_mean
52 |
53 |
54 | class EncoderLayer(nn.Module):
55 | """
56 | Autoformer encoder layer with the progressive decomposition architecture
57 | """
58 | def __init__(self, attention, d_model, d_ff=None, moving_avg=25, dropout=0.1, activation="relu"):
59 | super(EncoderLayer, self).__init__()
60 | d_ff = d_ff or 4 * d_model
61 | self.attention = attention
62 | self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1, bias=False)
63 | self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1, bias=False)
64 | self.decomp1 = series_decomp(moving_avg)
65 | self.decomp2 = series_decomp(moving_avg)
66 | self.dropout = nn.Dropout(dropout)
67 | self.activation = F.relu if activation == "relu" else F.gelu
68 |
69 | def forward(self, x, attn_mask=None):
70 | new_x, attn = self.attention(
71 | x, x, x,
72 | attn_mask=attn_mask
73 | )
74 | x = x + self.dropout(new_x)
75 | x, _ = self.decomp1(x)
76 | y = x
77 | y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
78 | y = self.dropout(self.conv2(y).transpose(-1, 1))
79 | res, _ = self.decomp2(x + y)
80 | return res, attn
81 |
82 |
83 | class Encoder(nn.Module):
84 | """
85 | Autoformer encoder
86 | """
87 | def __init__(self, attn_layers, conv_layers=None, norm_layer=None):
88 | super(Encoder, self).__init__()
89 | self.attn_layers = nn.ModuleList(attn_layers)
90 | self.conv_layers = nn.ModuleList(conv_layers) if conv_layers is not None else None
91 | self.norm = norm_layer
92 |
93 | def forward(self, x, attn_mask=None):
94 | attns = []
95 | if self.conv_layers is not None:
96 | for attn_layer, conv_layer in zip(self.attn_layers, self.conv_layers):
97 | x, attn = attn_layer(x, attn_mask=attn_mask)
98 | x = conv_layer(x)
99 | attns.append(attn)
100 | x, attn = self.attn_layers[-1](x)
101 | attns.append(attn)
102 | else:
103 | for attn_layer in self.attn_layers:
104 | x, attn = attn_layer(x, attn_mask=attn_mask)
105 | attns.append(attn)
106 |
107 | if self.norm is not None:
108 | x = self.norm(x)
109 |
110 | return x, attns
111 |
112 |
113 | class DecoderLayer(nn.Module):
114 | """
115 | Autoformer decoder layer with the progressive decomposition architecture
116 | """
117 | def __init__(self, self_attention, cross_attention, d_model, c_out, d_ff=None,
118 | moving_avg=25, dropout=0.1, activation="relu"):
119 | super(DecoderLayer, self).__init__()
120 | d_ff = d_ff or 4 * d_model
121 | self.self_attention = self_attention
122 | self.cross_attention = cross_attention
123 | self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1, bias=False)
124 | self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1, bias=False)
125 | self.decomp1 = series_decomp(moving_avg)
126 | self.decomp2 = series_decomp(moving_avg)
127 | self.decomp3 = series_decomp(moving_avg)
128 | self.dropout = nn.Dropout(dropout)
129 | self.projection = nn.Conv1d(in_channels=d_model, out_channels=c_out, kernel_size=3, stride=1, padding=1,
130 | padding_mode='circular', bias=False)
131 | self.activation = F.relu if activation == "relu" else F.gelu
132 |
133 | def forward(self, x, cross, x_mask=None, cross_mask=None):
134 | x = x + self.dropout(self.self_attention(
135 | x, x, x,
136 | attn_mask=x_mask
137 | )[0])
138 | x, trend1 = self.decomp1(x)
139 | x = x + self.dropout(self.cross_attention(
140 | x, cross, cross,
141 | attn_mask=cross_mask
142 | )[0])
143 | x, trend2 = self.decomp2(x)
144 | y = x
145 | y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
146 | y = self.dropout(self.conv2(y).transpose(-1, 1))
147 | x, trend3 = self.decomp3(x + y)
148 |
149 | residual_trend = trend1 + trend2 + trend3
150 | residual_trend = self.projection(residual_trend.permute(0, 2, 1)).transpose(1, 2)
151 | return x, residual_trend
152 |
153 |
154 | class Decoder(nn.Module):
155 | """
156 | Autoformer encoder
157 | """
158 | def __init__(self, layers, norm_layer=None, projection=None):
159 | super(Decoder, self).__init__()
160 | self.layers = nn.ModuleList(layers)
161 | self.norm = norm_layer
162 | self.projection = projection
163 |
164 | def forward(self, x, cross, x_mask=None, cross_mask=None, trend=None):
165 | for layer in self.layers:
166 | x, residual_trend = layer(x, cross, x_mask=x_mask, cross_mask=cross_mask)
167 | trend = trend + residual_trend
168 |
169 | if self.norm is not None:
170 | x = self.norm(x)
171 |
172 | if self.projection is not None:
173 | x = self.projection(x)
174 | return x, trend
--------------------------------------------------------------------------------
/src/models/components/common.py:
--------------------------------------------------------------------------------
1 | # Cell
2 | import torch as t
3 | import torch.nn as nn
4 | from torch.nn.utils import weight_norm
5 | from torch.autograd.function import Function
6 |
7 | # Cell
8 | class Chomp1d(nn.Module):
9 | """
10 | Receives x input of dim [N,C,T], and trims it so that only
11 | 'time available' information is used. Used for one dimensional
12 | causal convolutions.
13 | : param chomp_size: lenght of outsample values to skip.
14 | """
15 | def __init__(self, chomp_size):
16 | super(Chomp1d, self).__init__()
17 | self.chomp_size = chomp_size
18 |
19 | def forward(self, x):
20 | return x[:, :, :-self.chomp_size].contiguous()
21 |
22 | # Cell
23 | ACTIVATIONS = ['ReLU',
24 | 'Softplus',
25 | 'Tanh',
26 | 'SELU',
27 | 'LeakyReLU',
28 | 'PReLU',
29 | 'Sigmoid']
30 |
31 | class CausalConv1d(nn.Module):
32 | """
33 | Receives x input of dim [N,C,T], computes a unidimensional
34 | causal convolution.
35 |
36 | Parameters
37 | ----------
38 | in_channels: int
39 | out_channels: int
40 | activation: str
41 | https://discuss.pytorch.org/t/call-activation-function-from-string
42 | padding: int
43 | kernel_size: int
44 | dilation: int
45 |
46 | Returns:
47 | x: tesor
48 | torch tensor of dim [N,C,T]
49 | activation(conv1d(inputs, kernel) + bias)
50 | """
51 | def __init__(self, in_channels, out_channels, kernel_size,
52 | padding, dilation, activation, stride:int=1, with_weight_norm:bool=False):
53 | super(CausalConv1d, self).__init__()
54 | assert activation in ACTIVATIONS, f'{activation} is not in {ACTIVATIONS}'
55 |
56 | self.conv = nn.Conv1d(in_channels=in_channels, out_channels=out_channels,
57 | kernel_size=kernel_size, stride=stride, padding=padding,
58 | dilation=dilation)
59 | if with_weight_norm: self.conv = weight_norm(self.conv)
60 |
61 | self.chomp = Chomp1d(padding)
62 | self.activation = getattr(nn, activation)()
63 | self.causalconv = nn.Sequential(self.conv, self.chomp, self.activation)
64 |
65 | def forward(self, x):
66 | return self.causalconv(x)
67 |
68 | # Cell
69 | class TimeDistributed2d(nn.Module):
70 | """
71 | Receives x input of dim [N,C,T], reshapes it to [T,N,C]
72 | Collapses input of dim [T,N,C] to [TxN,C] and applies module to C.
73 | Finally reshapes it to [N,C_out,T].
74 | Allows handling of variable sequence lengths and minibatch sizes.
75 | : param module: Module to apply input to.
76 | """
77 | def __init__(self, module):
78 | super(TimeDistributed2d, self).__init__()
79 | self.module = module
80 |
81 | def forward(self, x):
82 | N, C, T = x.size()
83 | x = x.permute(2, 0, 1).contiguous()
84 | x = x.view(T * N, -1)
85 | x = self.module(x)
86 | x = x.view(T, N, -1)
87 | x = x.permute(1, 2, 0).contiguous()
88 | return x
89 |
90 | # Cell
91 | class TimeDistributed3d(nn.Module):
92 | """
93 | Receives x input of dim [N,L,C,T], reshapes it to [T,N,L,C]
94 | Collapses input of dim [T,N,L,C] to [TxNxL,C] and applies module to C.
95 | Finally reshapes it to [N,L,C_out,T].
96 | Allows handling of variable sequence lengths and minibatch sizes.
97 | : param module: Module to apply input to.
98 | """
99 | def __init__(self, module):
100 | super(TimeDistributed3d, self).__init__()
101 | self.module = module
102 |
103 | def forward(self, x):
104 | N, L, C, T = x.size()
105 | x = x.permute(3, 0, 1, 2).contiguous() #[N,L,C,T] --> [T,N,L,C]
106 | x = x.view(T * N * L, -1)
107 | x = self.module(x)
108 | x = x.view(T, N, L, -1)
109 | x = x.permute(1, 2, 3, 0).contiguous() #[T,N,L,C] --> [N,L,C,T]
110 | return x
111 |
112 | # Cell
113 | class RepeatVector(nn.Module):
114 | """
115 | Receives x input of dim [N,C], and repeats the vector
116 | to create tensor of shape [N, C, K]
117 | : repeats: int, the number of repetitions for the vector.
118 | """
119 | def __init__(self, repeats):
120 | super(RepeatVector, self).__init__()
121 | self.repeats = repeats
122 |
123 | def forward(self, x):
124 | x = x.unsqueeze(-1).repeat(1, 1, self.repeats) # <------------ Mejorar?
125 | return x
126 |
127 | # Cell
128 | class L1Regularizer(nn.Module):
129 | """
130 | Layer meant to apply elementwise L1 regularization to a dimension.
131 | Receives x input of dim [N,C] and returns the input [N,C].
132 | """
133 | def __init__(self, in_features, l1_lambda):
134 | super(L1Regularizer, self).__init__()
135 | self.l1_lambda = l1_lambda
136 | self.weight = t.nn.Parameter(t.rand((in_features), dtype=t.float),
137 | requires_grad=True)
138 |
139 | def forward(self, x):
140 | # channelwise regularization, turns on or off channels
141 | x = t.einsum('bp,p->bp', x, self.weight)
142 | return x
143 |
144 | def regularization(self):
145 | return self.l1_lambda * t.norm(self.weight, 1)
--------------------------------------------------------------------------------
/src/models/components/drnn.py:
--------------------------------------------------------------------------------
1 | # Cell
2 | import torch
3 | import torch.nn as nn
4 | import torch.autograd as autograd
5 |
6 | # Cell
7 | class LSTMCell(nn.Module):
8 | def __init__(self, input_size, hidden_size, dropout=0.):
9 | super(LSTMCell, self).__init__()
10 | self.input_size = input_size
11 | self.hidden_size = hidden_size
12 | self.weight_ih = nn.Parameter(torch.randn(4 * hidden_size, input_size))
13 | self.weight_hh = nn.Parameter(torch.randn(4 * hidden_size, hidden_size))
14 | self.bias_ih = nn.Parameter(torch.randn(4 * hidden_size))
15 | self.bias_hh = nn.Parameter(torch.randn(4 * hidden_size))
16 | self.dropout = dropout
17 |
18 | def forward(self, inputs, hidden):
19 | hx, cx = hidden[0].squeeze(0), hidden[1].squeeze(0)
20 | gates = (torch.matmul(inputs, self.weight_ih.t()) + self.bias_ih +
21 | torch.matmul(hx, self.weight_hh.t()) + self.bias_hh)
22 | ingate, forgetgate, cellgate, outgate = gates.chunk(4, 1)
23 |
24 | ingate = torch.sigmoid(ingate)
25 | forgetgate = torch.sigmoid(forgetgate)
26 | cellgate = torch.tanh(cellgate)
27 | outgate = torch.sigmoid(outgate)
28 |
29 | cy = (forgetgate * cx) + (ingate * cellgate)
30 | hy = outgate * torch.tanh(cy)
31 |
32 | return hy, (hy, cy)
33 |
34 | # Cell
35 | class ResLSTMCell(nn.Module):
36 | def __init__(self, input_size, hidden_size, dropout=0.):
37 | super(ResLSTMCell, self).__init__()
38 | self.register_buffer('input_size', torch.Tensor([input_size]))
39 | self.register_buffer('hidden_size', torch.Tensor([hidden_size]))
40 | self.weight_ii = nn.Parameter(torch.randn(3 * hidden_size, input_size))
41 | self.weight_ic = nn.Parameter(torch.randn(3 * hidden_size, hidden_size))
42 | self.weight_ih = nn.Parameter(torch.randn(3 * hidden_size, hidden_size))
43 | self.bias_ii = nn.Parameter(torch.randn(3 * hidden_size))
44 | self.bias_ic = nn.Parameter(torch.randn(3 * hidden_size))
45 | self.bias_ih = nn.Parameter(torch.randn(3 * hidden_size))
46 | self.weight_hh = nn.Parameter(torch.randn(1 * hidden_size, hidden_size))
47 | self.bias_hh = nn.Parameter(torch.randn(1 * hidden_size))
48 | self.weight_ir = nn.Parameter(torch.randn(hidden_size, input_size))
49 | self.dropout = dropout
50 |
51 | def forward(self, inputs, hidden):
52 | hx, cx = hidden[0].squeeze(0), hidden[1].squeeze(0)
53 |
54 | ifo_gates = (torch.matmul(inputs, self.weight_ii.t()) + self.bias_ii +
55 | torch.matmul(hx, self.weight_ih.t()) + self.bias_ih +
56 | torch.matmul(cx, self.weight_ic.t()) + self.bias_ic)
57 | ingate, forgetgate, outgate = ifo_gates.chunk(3, 1)
58 |
59 | cellgate = torch.matmul(hx, self.weight_hh.t()) + self.bias_hh
60 |
61 | ingate = torch.sigmoid(ingate)
62 | forgetgate = torch.sigmoid(forgetgate)
63 | cellgate = torch.tanh(cellgate)
64 | outgate = torch.sigmoid(outgate)
65 |
66 | cy = (forgetgate * cx) + (ingate * cellgate)
67 | ry = torch.tanh(cy)
68 |
69 | if self.input_size == self.hidden_size:
70 | hy = outgate * (ry + inputs)
71 | else:
72 | hy = outgate * (ry + torch.matmul(inputs, self.weight_ir.t()))
73 | return hy, (hy, cy)
74 |
75 | # Cell
76 | class ResLSTMLayer(nn.Module):
77 | def __init__(self, input_size, hidden_size, dropout=0.):
78 | super(ResLSTMLayer, self).__init__()
79 | self.input_size = input_size
80 | self.hidden_size = hidden_size
81 | self.cell = ResLSTMCell(input_size, hidden_size, dropout=0.)
82 |
83 | def forward(self, inputs, hidden):
84 | inputs = inputs.unbind(0)
85 | outputs = []
86 | for i in range(len(inputs)):
87 | out, hidden = self.cell(inputs[i], hidden)
88 | outputs += [out]
89 | outputs = torch.stack(outputs)
90 | return outputs, hidden
91 |
92 | # Cell
93 | class AttentiveLSTMLayer(nn.Module):
94 | def __init__(self, input_size, hidden_size, dropout=0.0):
95 | super(AttentiveLSTMLayer, self).__init__()
96 | self.input_size = input_size
97 | self.hidden_size = hidden_size
98 | attention_hsize = hidden_size
99 | self.attention_hsize = attention_hsize
100 |
101 | self.cell = LSTMCell(input_size, hidden_size)
102 | self.attn_layer = nn.Sequential(nn.Linear(2 * hidden_size + input_size, attention_hsize),
103 | nn.Tanh(),
104 | nn.Linear(attention_hsize, 1))
105 | self.softmax = nn.Softmax(dim=0)
106 | self.dropout = dropout
107 |
108 | def forward(self, inputs, hidden):
109 | inputs = inputs.unbind(0)
110 | outputs = []
111 |
112 | for t in range(len(inputs)):
113 | # attention on windows
114 | hx, cx = (tensor.squeeze(0) for tensor in hidden)
115 | hx_rep = hx.repeat(len(inputs), 1, 1)
116 | cx_rep = cx.repeat(len(inputs), 1, 1)
117 | x = torch.cat((inputs, hx_rep, cx_rep), dim=-1)
118 | l = self.attn_layer(x)
119 | beta = self.softmax(l)
120 | context = torch.bmm(beta.permute(1, 2, 0),
121 | inputs.permute(1, 0, 2)).squeeze(1)
122 | out, hidden = self.cell(context, hidden)
123 | outputs += [out]
124 | outputs = torch.stack(outputs)
125 | return outputs, hidden
126 |
127 | # Cell
128 | class DRNN(nn.Module):
129 |
130 | def __init__(self, n_input, n_hidden, n_layers, dilations, dropout=0, cell_type='GRU', batch_first=False):
131 | super(DRNN, self).__init__()
132 |
133 | self.dilations = dilations
134 | self.cell_type = cell_type
135 | self.batch_first = batch_first
136 |
137 | layers = []
138 | if self.cell_type == "GRU":
139 | cell = nn.GRU
140 | elif self.cell_type == "RNN":
141 | cell = nn.RNN
142 | elif self.cell_type == "LSTM":
143 | cell = nn.LSTM
144 | elif self.cell_type == "ResLSTM":
145 | cell = ResLSTMLayer
146 | elif self.cell_type == "AttentiveLSTM":
147 | cell = AttentiveLSTMLayer
148 | else:
149 | raise NotImplementedError
150 |
151 | for i in range(n_layers):
152 | if i == 0:
153 | c = cell(n_input, n_hidden, dropout=dropout)
154 | else:
155 | c = cell(n_hidden, n_hidden, dropout=dropout)
156 | layers.append(c)
157 | self.cells = nn.Sequential(*layers)
158 |
159 | def forward(self, inputs, hidden=None):
160 | if self.batch_first:
161 | inputs = inputs.transpose(0, 1)
162 | outputs = []
163 | for i, (cell, dilation) in enumerate(zip(self.cells, self.dilations)):
164 | if hidden is None:
165 | inputs, _ = self.drnn_layer(cell, inputs, dilation)
166 | else:
167 | inputs, hidden[i] = self.drnn_layer(cell, inputs, dilation, hidden[i])
168 |
169 | outputs.append(inputs[-dilation:])
170 |
171 | if self.batch_first:
172 | inputs = inputs.transpose(0, 1)
173 | return inputs, outputs
174 |
175 | def drnn_layer(self, cell, inputs, rate, hidden=None):
176 | n_steps = len(inputs)
177 | batch_size = inputs[0].size(0)
178 | hidden_size = cell.hidden_size
179 |
180 | inputs, dilated_steps = self._pad_inputs(inputs, n_steps, rate)
181 | dilated_inputs = self._prepare_inputs(inputs, rate)
182 |
183 | if hidden is None:
184 | dilated_outputs, hidden = self._apply_cell(dilated_inputs, cell, batch_size, rate, hidden_size)
185 | else:
186 | hidden = self._prepare_inputs(hidden, rate)
187 | dilated_outputs, hidden = self._apply_cell(dilated_inputs, cell, batch_size, rate, hidden_size,
188 | hidden=hidden)
189 |
190 | splitted_outputs = self._split_outputs(dilated_outputs, rate)
191 | outputs = self._unpad_outputs(splitted_outputs, n_steps)
192 |
193 | return outputs, hidden
194 |
195 | def _apply_cell(self, dilated_inputs, cell, batch_size, rate, hidden_size, hidden=None):
196 | if hidden is None:
197 | hidden = torch.zeros(batch_size * rate, hidden_size,
198 | dtype=dilated_inputs.dtype,
199 | device=dilated_inputs.device)
200 | hidden = hidden.unsqueeze(0)
201 |
202 | if self.cell_type in ['LSTM', 'ResLSTM', 'AttentiveLSTM']:
203 | hidden = (hidden, hidden)
204 |
205 | dilated_outputs, hidden = cell(dilated_inputs, hidden) # compatibility hack
206 |
207 | return dilated_outputs, hidden
208 |
209 | def _unpad_outputs(self, splitted_outputs, n_steps):
210 | return splitted_outputs[:n_steps]
211 |
212 | def _split_outputs(self, dilated_outputs, rate):
213 | batchsize = dilated_outputs.size(1) // rate
214 |
215 | blocks = [dilated_outputs[:, i * batchsize: (i + 1) * batchsize, :] for i in range(rate)]
216 |
217 | interleaved = torch.stack((blocks)).transpose(1, 0).contiguous()
218 | interleaved = interleaved.view(dilated_outputs.size(0) * rate,
219 | batchsize,
220 | dilated_outputs.size(2))
221 | return interleaved
222 |
223 | def _pad_inputs(self, inputs, n_steps, rate):
224 | iseven = (n_steps % rate) == 0
225 |
226 | if not iseven:
227 | dilated_steps = n_steps // rate + 1
228 |
229 | zeros_ = torch.zeros(dilated_steps * rate - inputs.size(0),
230 | inputs.size(1),
231 | inputs.size(2),
232 | dtype=inputs.dtype,
233 | device=inputs.device)
234 | inputs = torch.cat((inputs, zeros_))
235 | else:
236 | dilated_steps = n_steps // rate
237 |
238 | return inputs, dilated_steps
239 |
240 | def _prepare_inputs(self, inputs, rate):
241 | dilated_inputs = torch.cat([inputs[j::rate, :, :] for j in range(rate)], 1)
242 | return dilated_inputs
--------------------------------------------------------------------------------
/src/models/components/embed.py:
--------------------------------------------------------------------------------
1 | # Cell
2 | import math
3 |
4 | import torch
5 | import torch.nn as nn
6 | import torch.nn.functional as F
7 | from torch.nn.utils import weight_norm
8 |
9 | # Cell
10 | class PositionalEmbedding(nn.Module):
11 | def __init__(self, d_model, max_len=5000):
12 | super(PositionalEmbedding, self).__init__()
13 | # Compute the positional encodings once in log space.
14 | pe = torch.zeros(max_len, d_model).float()
15 | pe.require_grad = False
16 |
17 | position = torch.arange(0, max_len).float().unsqueeze(1)
18 | div_term = (torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model)).exp()
19 |
20 | pe[:, 0::2] = torch.sin(position * div_term)
21 | pe[:, 1::2] = torch.cos(position * div_term)
22 |
23 | pe = pe.unsqueeze(0)
24 | self.register_buffer('pe', pe)
25 |
26 | def forward(self, x):
27 | return self.pe[:, :x.size(1)]
28 |
29 |
30 | class TokenEmbedding(nn.Module):
31 | def __init__(self, c_in, d_model):
32 | super(TokenEmbedding, self).__init__()
33 | padding = 1 if torch.__version__ >= '1.5.0' else 2
34 | self.tokenConv = nn.Conv1d(in_channels=c_in, out_channels=d_model,
35 | kernel_size=3, padding=padding, padding_mode='circular', bias=False)
36 | for m in self.modules():
37 | if isinstance(m, nn.Conv1d):
38 | nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='leaky_relu')
39 |
40 | def forward(self, x):
41 | x = self.tokenConv(x.permute(0, 2, 1)).transpose(1, 2)
42 | return x
43 |
44 |
45 | class FixedEmbedding(nn.Module):
46 | def __init__(self, c_in, d_model):
47 | super(FixedEmbedding, self).__init__()
48 |
49 | w = torch.zeros(c_in, d_model).float()
50 | w.require_grad = False
51 |
52 | position = torch.arange(0, c_in).float().unsqueeze(1)
53 | div_term = (torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model)).exp()
54 |
55 | w[:, 0::2] = torch.sin(position * div_term)
56 | w[:, 1::2] = torch.cos(position * div_term)
57 |
58 | self.emb = nn.Embedding(c_in, d_model)
59 | self.emb.weight = nn.Parameter(w, requires_grad=False)
60 |
61 | def forward(self, x):
62 | return self.emb(x).detach()
63 |
64 |
65 | class TemporalEmbedding(nn.Module):
66 | def __init__(self, d_model, embed_type='fixed', freq='h'):
67 | super(TemporalEmbedding, self).__init__()
68 |
69 | minute_size = 4
70 | hour_size = 24
71 | weekday_size = 7
72 | day_size = 32
73 | month_size = 13
74 |
75 | Embed = FixedEmbedding if embed_type == 'fixed' else nn.Embedding
76 | if freq == 't':
77 | self.minute_embed = Embed(minute_size, d_model)
78 | self.hour_embed = Embed(hour_size, d_model)
79 | self.weekday_embed = Embed(weekday_size, d_model)
80 | self.day_embed = Embed(day_size, d_model)
81 | self.month_embed = Embed(month_size, d_model)
82 |
83 | def forward(self, x):
84 | x = x.long()
85 |
86 | minute_x = self.minute_embed(x[:, :, 4]) if hasattr(self, 'minute_embed') else 0.
87 | hour_x = self.hour_embed(x[:, :, 3])
88 | weekday_x = self.weekday_embed(x[:, :, 2])
89 | day_x = self.day_embed(x[:, :, 1])
90 | month_x = self.month_embed(x[:, :, 0])
91 |
92 | return hour_x + weekday_x + day_x + month_x + minute_x
93 |
94 |
95 | class TimeFeatureEmbedding(nn.Module):
96 | def __init__(self, d_model, embed_type='timeF', freq='h'):
97 | super(TimeFeatureEmbedding, self).__init__()
98 |
99 | freq_map = {'h': 4, 't': 5, 's': 6, 'm': 1, 'a': 1, 'w': 2, 'd': 3, 'b': 3}
100 | d_inp = freq_map[freq]
101 | self.embed = nn.Linear(d_inp, d_model, bias=False)
102 |
103 | def forward(self, x):
104 | return self.embed(x)
105 |
106 |
107 | class DataEmbedding(nn.Module):
108 | def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1):
109 | super(DataEmbedding, self).__init__()
110 |
111 | self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model)
112 | self.position_embedding = PositionalEmbedding(d_model=d_model)
113 | self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type,
114 | freq=freq) if embed_type != 'timeF' else TimeFeatureEmbedding(
115 | d_model=d_model, embed_type=embed_type, freq=freq)
116 | self.dropout = nn.Dropout(p=dropout)
117 |
118 | def forward(self, x, x_mark):
119 | x = self.value_embedding(x) + self.temporal_embedding(x_mark) + self.position_embedding(x)
120 | return self.dropout(x)
121 |
122 |
123 | class DataEmbedding_wo_pos(nn.Module):
124 | def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1):
125 | super(DataEmbedding_wo_pos, self).__init__()
126 |
127 | self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model)
128 | self.position_embedding = PositionalEmbedding(d_model=d_model)
129 | self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type,
130 | freq=freq) if embed_type != 'timeF' else TimeFeatureEmbedding(
131 | d_model=d_model, embed_type=embed_type, freq=freq)
132 | self.dropout = nn.Dropout(p=dropout)
133 |
134 | def forward(self, x, x_mark):
135 | x = self.value_embedding(x) + self.temporal_embedding(x_mark)
136 | return self.dropout(x)
--------------------------------------------------------------------------------
/src/models/components/selfattention.py:
--------------------------------------------------------------------------------
1 | # Cell
2 | from math import sqrt
3 |
4 | import numpy as np
5 | import torch
6 | import torch.nn as nn
7 | import torch.nn.functional as F
8 |
9 | # Cell
10 | class TriangularCausalMask():
11 | def __init__(self, B, L, device="cpu"):
12 | mask_shape = [B, 1, L, L]
13 | with torch.no_grad():
14 | self._mask = torch.triu(torch.ones(mask_shape, dtype=torch.bool), diagonal=1).to(device)
15 |
16 | @property
17 | def mask(self):
18 | return self._mask
19 |
20 |
21 | class ProbMask():
22 | def __init__(self, B, H, L, index, scores, device="cpu"):
23 | _mask = torch.ones(L, scores.shape[-1], dtype=torch.bool).to(device).triu(1)
24 | _mask_ex = _mask[None, None, :].expand(B, H, L, scores.shape[-1])
25 | indicator = _mask_ex[torch.arange(B)[:, None, None],
26 | torch.arange(H)[None, :, None],
27 | index, :].to(device)
28 | self._mask = indicator.view(scores.shape).to(device)
29 |
30 | @property
31 | def mask(self):
32 | return self._mask
33 |
34 | # Cell
35 | class FullAttention(nn.Module):
36 | def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False):
37 | super(FullAttention, self).__init__()
38 | self.scale = scale
39 | self.mask_flag = mask_flag
40 | self.output_attention = output_attention
41 | self.dropout = nn.Dropout(attention_dropout)
42 |
43 | def forward(self, queries, keys, values, attn_mask):
44 | B, L, H, E = queries.shape
45 | _, S, _, D = values.shape
46 | scale = self.scale or 1. / sqrt(E)
47 |
48 | scores = torch.einsum("blhe,bshe->bhls", queries, keys)
49 |
50 | if self.mask_flag:
51 | if attn_mask is None:
52 | attn_mask = TriangularCausalMask(B, L, device=queries.device)
53 |
54 | scores.masked_fill_(attn_mask.mask, -np.inf)
55 |
56 | A = self.dropout(torch.softmax(scale * scores, dim=-1))
57 | V = torch.einsum("bhls,bshd->blhd", A, values)
58 |
59 | if self.output_attention:
60 | return (V.contiguous(), A)
61 | else:
62 | return (V.contiguous(), None)
63 |
64 |
65 | class ProbAttention(nn.Module):
66 | def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False):
67 | super(ProbAttention, self).__init__()
68 | self.factor = factor
69 | self.scale = scale
70 | self.mask_flag = mask_flag
71 | self.output_attention = output_attention
72 | self.dropout = nn.Dropout(attention_dropout)
73 |
74 | def _prob_QK(self, Q, K, sample_k, n_top): # n_top: c*ln(L_q)
75 | # Q [B, H, L, D]
76 | B, H, L_K, E = K.shape
77 | _, _, L_Q, _ = Q.shape
78 |
79 | # calculate the sampled Q_K
80 | K_expand = K.unsqueeze(-3).expand(B, H, L_Q, L_K, E)
81 | index_sample = torch.randint(L_K, (L_Q, sample_k)) # real U = U_part(factor*ln(L_k))*L_q
82 | K_sample = K_expand[:, :, torch.arange(L_Q).unsqueeze(1), index_sample, :]
83 | Q_K_sample = torch.matmul(Q.unsqueeze(-2), K_sample.transpose(-2, -1)).squeeze()
84 |
85 | # find the Top_k query with sparisty measurement
86 | M = Q_K_sample.max(-1)[0] - torch.div(Q_K_sample.sum(-1), L_K)
87 | M_top = M.topk(n_top, sorted=False)[1]
88 |
89 | # use the reduced Q to calculate Q_K
90 | Q_reduce = Q[torch.arange(B)[:, None, None],
91 | torch.arange(H)[None, :, None],
92 | M_top, :] # factor*ln(L_q)
93 | Q_K = torch.matmul(Q_reduce, K.transpose(-2, -1)) # factor*ln(L_q)*L_k
94 |
95 | return Q_K, M_top
96 |
97 | def _get_initial_context(self, V, L_Q):
98 | B, H, L_V, D = V.shape
99 | if not self.mask_flag:
100 | # V_sum = V.sum(dim=-2)
101 | V_sum = V.mean(dim=-2)
102 | contex = V_sum.unsqueeze(-2).expand(B, H, L_Q, V_sum.shape[-1]).clone()
103 | else: # use mask
104 | assert (L_Q == L_V) # requires that L_Q == L_V, i.e. for self-attention only
105 | contex = V.cumsum(dim=-2)
106 | return contex
107 |
108 | def _update_context(self, context_in, V, scores, index, L_Q, attn_mask):
109 | B, H, L_V, D = V.shape
110 |
111 | if self.mask_flag:
112 | attn_mask = ProbMask(B, H, L_Q, index, scores, device=V.device)
113 | scores.masked_fill_(attn_mask.mask, -np.inf)
114 |
115 | attn = torch.softmax(scores, dim=-1) # nn.Softmax(dim=-1)(scores)
116 |
117 | context_in[torch.arange(B)[:, None, None],
118 | torch.arange(H)[None, :, None],
119 | index, :] = torch.matmul(attn, V).type_as(context_in)
120 | if self.output_attention:
121 | attns = (torch.ones([B, H, L_V, L_V]) / L_V).type_as(attn).to(attn.device)
122 | attns[torch.arange(B)[:, None, None], torch.arange(H)[None, :, None], index, :] = attn
123 | return (context_in, attns)
124 | else:
125 | return (context_in, None)
126 |
127 | def forward(self, queries, keys, values, attn_mask):
128 | B, L_Q, H, D = queries.shape
129 | _, L_K, _, _ = keys.shape
130 |
131 | queries = queries.transpose(2, 1)
132 | keys = keys.transpose(2, 1)
133 | values = values.transpose(2, 1)
134 |
135 | U_part = self.factor * np.ceil(np.log(L_K)).astype('int').item() # c*ln(L_k)
136 | u = self.factor * np.ceil(np.log(L_Q)).astype('int').item() # c*ln(L_q)
137 |
138 | U_part = U_part if U_part < L_K else L_K
139 | u = u if u < L_Q else L_Q
140 |
141 | scores_top, index = self._prob_QK(queries, keys, sample_k=U_part, n_top=u)
142 |
143 | # add scale factor
144 | scale = self.scale or 1. / sqrt(D)
145 | if scale is not None:
146 | scores_top = scores_top * scale
147 | # get the context
148 | context = self._get_initial_context(values, L_Q)
149 | # update the context with selected top_k queries
150 | context, attn = self._update_context(context, values, scores_top, index, L_Q, attn_mask)
151 |
152 | return context.contiguous(), attn
153 |
154 |
155 | class AttentionLayer(nn.Module):
156 | def __init__(self, attention, d_model, n_heads, d_keys=None,
157 | d_values=None):
158 | super(AttentionLayer, self).__init__()
159 |
160 | d_keys = d_keys or (d_model // n_heads)
161 | d_values = d_values or (d_model // n_heads)
162 |
163 | self.inner_attention = attention
164 | self.query_projection = nn.Linear(d_model, d_keys * n_heads)
165 | self.key_projection = nn.Linear(d_model, d_keys * n_heads)
166 | self.value_projection = nn.Linear(d_model, d_values * n_heads)
167 | self.out_projection = nn.Linear(d_values * n_heads, d_model)
168 | self.n_heads = n_heads
169 |
170 | def forward(self, queries, keys, values, attn_mask):
171 | B, L, _ = queries.shape
172 | _, S, _ = keys.shape
173 | H = self.n_heads
174 |
175 | queries = self.query_projection(queries).view(B, L, H, -1)
176 | keys = self.key_projection(keys).view(B, S, H, -1)
177 | values = self.value_projection(values).view(B, S, H, -1)
178 |
179 | out, attn = self.inner_attention(
180 | queries,
181 | keys,
182 | values,
183 | attn_mask
184 | )
185 | out = out.view(B, L, -1)
186 |
187 | return self.out_projection(out), attn
--------------------------------------------------------------------------------
/src/models/components/tcn.py:
--------------------------------------------------------------------------------
1 | # Cell
2 | import torch
3 | import torch.nn as nn
4 | from torch.nn.utils import weight_norm
5 |
6 | from .common import Chomp1d
7 | from .common import CausalConv1d
8 |
9 | # Cell
10 | # https://github.com/locuslab/TCN
11 | class _TemporalBlock(nn.Module):
12 | def __init__(self, n_inputs, n_outputs, kernel_size, stride, dilation, padding, dropout=0.2):
13 | super(_TemporalBlock, self).__init__()
14 | self.conv1 = weight_norm(nn.Conv1d(n_inputs, n_outputs, kernel_size,
15 | stride=stride, padding=padding, dilation=dilation))
16 | self.chomp1 = Chomp1d(padding)
17 | self.relu1 = nn.ReLU()
18 | self.dropout1 = nn.Dropout(dropout)
19 |
20 | self.conv2 = weight_norm(nn.Conv1d(n_outputs, n_outputs, kernel_size,
21 | stride=stride, padding=padding, dilation=dilation))
22 | self.chomp2 = Chomp1d(padding)
23 | self.relu2 = nn.ReLU()
24 | self.dropout2 = nn.Dropout(dropout)
25 |
26 | self.net = nn.Sequential(self.conv1, self.chomp1, self.relu1, self.dropout1,
27 | self.conv2, self.chomp2, self.relu2, self.dropout2)
28 | self.downsample = nn.Conv1d(n_inputs, n_outputs, 1) if n_inputs != n_outputs else None
29 | self.relu = nn.ReLU()
30 | self.init_weights()
31 |
32 | def init_weights(self):
33 | self.conv1.weight.data.normal_(0, 0.01)
34 | self.conv2.weight.data.normal_(0, 0.01)
35 | if self.downsample is not None:
36 | self.downsample.weight.data.normal_(0, 0.01)
37 |
38 | def forward(self, x):
39 | out = self.net(x)
40 | res = x if self.downsample is None else self.downsample(x)
41 | return self.relu(out + res)
42 |
43 | # Cell
44 | class _TemporalBlock2(nn.Module):
45 | def __init__(self, n_inputs, n_outputs, kernel_size, stride, dilation, padding, dropout=0.2):
46 | super(_TemporalBlock2, self).__init__()
47 | self.causalconv1 = CausalConv1d(in_channels=n_inputs, out_channels=n_outputs,
48 | kernel_size=kernel_size, stride=stride, padding=padding,
49 | dilation=dilation, activation='ReLU', with_weight_norm=True)
50 |
51 | self.causalconv2 = CausalConv1d(in_channels=n_outputs, out_channels=n_outputs,
52 | kernel_size=kernel_size, stride=stride, padding=padding,
53 | dilation=dilation, activation='ReLU', with_weight_norm=True)
54 |
55 | self.net = nn.Sequential(self.causalconv1, nn.Dropout(dropout),
56 | self.causalconv2, nn.Dropout(dropout))
57 |
58 | self.downsample = nn.Conv1d(n_inputs, n_outputs, 1) if n_inputs != n_outputs else None
59 | self.relu = nn.ReLU()
60 | self.init_weights()
61 |
62 | def init_weights(self):
63 | self.causalconv1.conv.weight.data.normal_(0, 0.01)
64 | self.causalconv2.conv.weight.data.normal_(0, 0.01)
65 | if self.downsample is not None:
66 | self.downsample.weight.data.normal_(0, 0.01)
67 |
68 | def forward(self, x):
69 | out = self.net(x)
70 | res = x if self.downsample is None else self.downsample(x)
71 | return self.relu(out + res)
72 |
73 | # Cell
74 | class _TemporalConvNet(nn.Module):
75 | def __init__(self, num_inputs, num_channels, kernel_size=2, dropout=0.2):
76 | super(_TemporalConvNet, self).__init__()
77 | layers = []
78 | num_levels = len(num_channels)
79 | for i in range(num_levels):
80 | dilation_size = 2 ** i
81 | in_channels = num_inputs if i == 0 else num_channels[i-1]
82 | out_channels = num_channels[i]
83 | #layers += [_TemporalBlock(in_channels, out_channels, kernel_size, stride=1, dilation=dilation_size,
84 | # padding=(kernel_size-1) * dilation_size, dropout=dropout)]
85 | layers += [_TemporalBlock2(in_channels, out_channels, kernel_size, stride=1, dilation=dilation_size,
86 | padding=(kernel_size-1) * dilation_size, dropout=dropout)]
87 |
88 | self.network = nn.Sequential(*layers)
89 |
90 | def forward(self, x):
91 | return self.network(x)
--------------------------------------------------------------------------------
/src/models/components/transformer.py:
--------------------------------------------------------------------------------
1 | # Cell
2 | import torch
3 | import torch.nn as nn
4 | import torch.nn.functional as F
5 |
6 | # Cell
7 | class ConvLayer(nn.Module):
8 | def __init__(self, c_in):
9 | super(ConvLayer, self).__init__()
10 | self.downConv = nn.Conv1d(in_channels=c_in,
11 | out_channels=c_in,
12 | kernel_size=3,
13 | padding=2,
14 | padding_mode='circular')
15 | self.norm = nn.BatchNorm1d(c_in)
16 | self.activation = nn.ELU()
17 | self.maxPool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1)
18 |
19 | def forward(self, x):
20 | x = self.downConv(x.permute(0, 2, 1))
21 | x = self.norm(x)
22 | x = self.activation(x)
23 | x = self.maxPool(x)
24 | x = x.transpose(1, 2)
25 | return x
26 |
27 |
28 | class EncoderLayer(nn.Module):
29 | def __init__(self, attention, d_model, d_ff=None, dropout=0.1, activation="relu"):
30 | super(EncoderLayer, self).__init__()
31 | d_ff = d_ff or 4 * d_model
32 | self.attention = attention
33 | self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)
34 | self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)
35 | self.norm1 = nn.LayerNorm(d_model)
36 | self.norm2 = nn.LayerNorm(d_model)
37 | self.dropout = nn.Dropout(dropout)
38 | self.activation = F.relu if activation == "relu" else F.gelu
39 |
40 | def forward(self, x, attn_mask=None):
41 | new_x, attn = self.attention(
42 | x, x, x,
43 | attn_mask=attn_mask
44 | )
45 | x = x + self.dropout(new_x)
46 |
47 | y = x = self.norm1(x)
48 | y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
49 | y = self.dropout(self.conv2(y).transpose(-1, 1))
50 |
51 | return self.norm2(x + y), attn
52 |
53 |
54 | class Encoder(nn.Module):
55 | def __init__(self, attn_layers, conv_layers=None, norm_layer=None):
56 | super(Encoder, self).__init__()
57 | self.attn_layers = nn.ModuleList(attn_layers)
58 | self.conv_layers = nn.ModuleList(conv_layers) if conv_layers is not None else None
59 | self.norm = norm_layer
60 |
61 | def forward(self, x, attn_mask=None):
62 | # x [B, L, D]
63 | attns = []
64 | if self.conv_layers is not None:
65 | for attn_layer, conv_layer in zip(self.attn_layers, self.conv_layers):
66 | x, attn = attn_layer(x, attn_mask=attn_mask)
67 | x = conv_layer(x)
68 | attns.append(attn)
69 | x, attn = self.attn_layers[-1](x)
70 | attns.append(attn)
71 | else:
72 | for attn_layer in self.attn_layers:
73 | x, attn = attn_layer(x, attn_mask=attn_mask)
74 | attns.append(attn)
75 |
76 | if self.norm is not None:
77 | x = self.norm(x)
78 |
79 | return x, attns
80 |
81 |
82 | class DecoderLayer(nn.Module):
83 | def __init__(self, self_attention, cross_attention, d_model, d_ff=None,
84 | dropout=0.1, activation="relu"):
85 | super(DecoderLayer, self).__init__()
86 | d_ff = d_ff or 4 * d_model
87 | self.self_attention = self_attention
88 | self.cross_attention = cross_attention
89 | self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)
90 | self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)
91 | self.norm1 = nn.LayerNorm(d_model)
92 | self.norm2 = nn.LayerNorm(d_model)
93 | self.norm3 = nn.LayerNorm(d_model)
94 | self.dropout = nn.Dropout(dropout)
95 | self.activation = F.relu if activation == "relu" else F.gelu
96 |
97 | def forward(self, x, cross, x_mask=None, cross_mask=None):
98 | x = x + self.dropout(self.self_attention(
99 | x, x, x,
100 | attn_mask=x_mask
101 | )[0])
102 | x = self.norm1(x)
103 |
104 | x = x + self.dropout(self.cross_attention(
105 | x, cross, cross,
106 | attn_mask=cross_mask
107 | )[0])
108 |
109 | y = x = self.norm2(x)
110 | y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
111 | y = self.dropout(self.conv2(y).transpose(-1, 1))
112 |
113 | return self.norm3(x + y)
114 |
115 |
116 | class Decoder(nn.Module):
117 | def __init__(self, layers, norm_layer=None, projection=None):
118 | super(Decoder, self).__init__()
119 | self.layers = nn.ModuleList(layers)
120 | self.norm = norm_layer
121 | self.projection = projection
122 |
123 | def forward(self, x, cross, x_mask=None, cross_mask=None):
124 | for layer in self.layers:
125 | x = layer(x, cross, x_mask=x_mask, cross_mask=cross_mask)
126 |
127 | if self.norm is not None:
128 | x = self.norm(x)
129 |
130 | if self.projection is not None:
131 | x = self.projection(x)
132 | return x
--------------------------------------------------------------------------------
/src/models/esrnn/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cchallu/n-hits/d882ee60b34c0ab6b67b31001f735b181a9efb93/src/models/esrnn/__init__.py
--------------------------------------------------------------------------------
/src/models/nbeats/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cchallu/n-hits/d882ee60b34c0ab6b67b31001f735b181a9efb93/src/models/nbeats/__init__.py
--------------------------------------------------------------------------------
/src/models/nhits/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cchallu/n-hits/d882ee60b34c0ab6b67b31001f735b181a9efb93/src/models/nhits/__init__.py
--------------------------------------------------------------------------------
/src/models/transformer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cchallu/n-hits/d882ee60b34c0ab6b67b31001f735b181a9efb93/src/models/transformer/__init__.py
--------------------------------------------------------------------------------
/src/models/transformer/autoformer.py:
--------------------------------------------------------------------------------
1 | # Cell
2 | import math
3 | import random
4 |
5 | import numpy as np
6 | import torch
7 | import torch.nn as nn
8 | import torch.nn.functional as F
9 | import pytorch_lightning as pl
10 | from torch import optim
11 |
12 | from ..components.embed import DataEmbedding, DataEmbedding_wo_pos
13 | from ..components.autocorrelation import (
14 | AutoCorrelation, AutoCorrelationLayer
15 | )
16 | from ..components.autoformer import (
17 | Encoder, Decoder, EncoderLayer, DecoderLayer,
18 | my_Layernorm, series_decomp
19 | )
20 | from ...losses.utils import LossFunction
21 |
22 | # Cell
23 | class _Autoformer(nn.Module):
24 | """
25 | Autoformer is the first method to achieve the series-wise connection,
26 | with inherent O(LlogL) complexity
27 | """
28 | def __init__(self, seq_len,
29 | label_len, pred_len, output_attention,
30 | enc_in, dec_in, d_model, c_out, embed, freq, dropout,
31 | factor, n_heads, d_ff, moving_avg, activation, e_layers,
32 | d_layers):
33 | super(_Autoformer, self).__init__()
34 | self.seq_len = seq_len
35 | self.label_len = label_len
36 | self.pred_len = pred_len
37 | self.output_attention = output_attention
38 |
39 | # Decomp
40 | kernel_size = moving_avg
41 | self.decomp = series_decomp(kernel_size)
42 |
43 | # Embedding
44 | # The series-wise connection inherently contains the sequential information.
45 | # Thus, we can discard the position embedding of transformers.
46 | self.enc_embedding = DataEmbedding_wo_pos(enc_in, d_model, embed, freq,
47 | dropout)
48 | self.dec_embedding = DataEmbedding_wo_pos(dec_in, d_model, embed, freq,
49 | dropout)
50 |
51 | # Encoder
52 | self.encoder = Encoder(
53 | [
54 | EncoderLayer(
55 | AutoCorrelationLayer(
56 | AutoCorrelation(False, factor, attention_dropout=dropout,
57 | output_attention=output_attention),
58 | d_model, n_heads),
59 | d_model,
60 | d_ff,
61 | moving_avg=moving_avg,
62 | dropout=dropout,
63 | activation=activation
64 | ) for l in range(e_layers)
65 | ],
66 | norm_layer=my_Layernorm(d_model)
67 | )
68 | # Decoder
69 | self.decoder = Decoder(
70 | [
71 | DecoderLayer(
72 | AutoCorrelationLayer(
73 | AutoCorrelation(True, factor, attention_dropout=dropout,
74 | output_attention=False),
75 | d_model, n_heads),
76 | AutoCorrelationLayer(
77 | AutoCorrelation(False, factor, attention_dropout=dropout,
78 | output_attention=False),
79 | d_model, n_heads),
80 | d_model,
81 | c_out,
82 | d_ff,
83 | moving_avg=moving_avg,
84 | dropout=dropout,
85 | activation=activation,
86 | )
87 | for l in range(d_layers)
88 | ],
89 | norm_layer=my_Layernorm(d_model),
90 | projection=nn.Linear(d_model, c_out, bias=True)
91 | )
92 |
93 | def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec,
94 | enc_self_mask=None, dec_self_mask=None, dec_enc_mask=None):
95 | # decomp init
96 | mean = torch.mean(x_enc, dim=1).unsqueeze(1).repeat(1, self.pred_len, 1)
97 | zeros = torch.zeros([x_dec.shape[0], self.pred_len, x_dec.shape[2]], device=x_enc.device)
98 | seasonal_init, trend_init = self.decomp(x_enc)
99 | # decoder input
100 | trend_init = torch.cat([trend_init[:, -self.label_len:, :], mean], dim=1)
101 | seasonal_init = torch.cat([seasonal_init[:, -self.label_len:, :], zeros], dim=1)
102 | # enc
103 | enc_out = self.enc_embedding(x_enc, x_mark_enc)
104 | enc_out, attns = self.encoder(enc_out, attn_mask=enc_self_mask)
105 | # dec
106 | dec_out = self.dec_embedding(seasonal_init, x_mark_dec)
107 | seasonal_part, trend_part = self.decoder(dec_out, enc_out, x_mask=dec_self_mask, cross_mask=dec_enc_mask,
108 | trend=trend_init)
109 | # final
110 | dec_out = trend_part + seasonal_part
111 |
112 | if self.output_attention:
113 | return dec_out[:, -self.pred_len:, :], attns
114 | else:
115 | return dec_out[:, -self.pred_len:, :]
116 |
117 | # Cell
118 | class Autoformer(pl.LightningModule):
119 | def __init__(self, seq_len,
120 | label_len, pred_len, output_attention,
121 | enc_in, dec_in, d_model, c_out, embed, freq, dropout,
122 | factor, n_heads, d_ff, moving_avg, activation, e_layers, d_layers,
123 | loss_train, loss_valid, loss_hypar, learning_rate,
124 | lr_decay, weight_decay, lr_decay_step_size,
125 | random_seed):
126 | super(Autoformer, self).__init__()
127 |
128 | #------------------------ Model Attributes ------------------------#
129 | # Architecture parameters
130 | self.seq_len = seq_len
131 | self.label_len = label_len
132 | self.pred_len = pred_len
133 | self.output_attention = output_attention
134 | self.enc_in = enc_in
135 | self.dec_in = dec_in
136 | self.d_model = d_model
137 | self.c_out = c_out
138 | self.embed = embed
139 | self.freq = freq
140 | self.dropout = dropout
141 | self.factor = factor
142 | self.n_heads = n_heads
143 | self.d_ff = d_ff
144 | self.moving_avg = moving_avg
145 | self.activation = activation
146 | self.e_layers = e_layers
147 | self.d_layers = d_layers
148 |
149 | # Loss functions
150 | self.loss_train = loss_train
151 | self.loss_hypar = loss_hypar
152 | self.loss_valid = loss_valid
153 | self.loss_fn_train = LossFunction(loss_train,
154 | seasonality=self.loss_hypar)
155 | self.loss_fn_valid = LossFunction(loss_valid,
156 | seasonality=self.loss_hypar)
157 |
158 | # Regularization and optimization parameters
159 | self.learning_rate = learning_rate
160 | self.lr_decay = lr_decay
161 | self.weight_decay = weight_decay
162 | self.lr_decay_step_size = lr_decay_step_size
163 | self.random_seed = random_seed
164 |
165 | self.model = _Autoformer(seq_len,
166 | label_len, pred_len, output_attention,
167 | enc_in, dec_in, d_model, c_out,
168 | embed, freq, dropout,
169 | factor, n_heads, d_ff,
170 | moving_avg, activation, e_layers,
171 | d_layers)
172 |
173 | def forward(self, batch):
174 | """
175 | Autoformer needs batch of shape (batch_size, time, series) for y
176 | and (batch_size, time, exogenous) for x
177 | and doesnt need X for each time series.
178 | USE DataLoader from pytorch instead of TimeSeriesLoader.
179 | """
180 | Y = batch['Y'].permute(0, 2, 1)
181 | X = batch['X'][:, 0, :, :].permute(0, 2, 1)
182 | sample_mask = batch['sample_mask'].permute(0, 2, 1)
183 | available_mask = batch['available_mask']
184 |
185 | s_begin = 0
186 | s_end = s_begin + self.seq_len
187 | r_begin = s_end - self.label_len
188 | r_end = r_begin + self.label_len + self.pred_len
189 |
190 | batch_x = Y[:, s_begin:s_end, :]
191 | batch_y = Y[:, r_begin:r_end, :]
192 | batch_x_mark = X[:, s_begin:s_end, :]
193 | batch_y_mark = X[:, r_begin:r_end, :]
194 | outsample_mask = sample_mask[:, r_begin:r_end, :]
195 |
196 | dec_inp = torch.zeros_like(batch_y[:, -self.pred_len:, :])
197 | dec_inp = torch.cat([batch_y[:, :self.label_len, :], dec_inp], dim=1)
198 |
199 | if self.output_attention:
200 | forecast = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
201 | else:
202 | forecast = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
203 |
204 | batch_y = batch_y[:, -self.pred_len:, :]
205 | outsample_mask = outsample_mask[:, -self.pred_len:, :]
206 |
207 | return batch_y, forecast, outsample_mask
208 |
209 | def training_step(self, batch, batch_idx):
210 |
211 | outsample_y, forecast, outsample_mask = self(batch)
212 |
213 | loss = self.loss_fn_train(y=outsample_y,
214 | y_hat=forecast,
215 | mask=outsample_mask,
216 | y_insample= batch['Y'].permute(0, 2, 1))
217 |
218 | self.log('train_loss', loss, prog_bar=True, on_epoch=True)
219 |
220 | return loss
221 |
222 | def validation_step(self, batch, idx):
223 |
224 | outsample_y, forecast, outsample_mask = self(batch)
225 |
226 | loss = self.loss_fn_valid(y=outsample_y,
227 | y_hat=forecast,
228 | mask=outsample_mask,
229 | y_insample= batch['Y'].permute(0, 2, 1))
230 |
231 | self.log('val_loss', loss, prog_bar=True)
232 |
233 | return loss
234 |
235 | def on_fit_start(self):
236 | torch.manual_seed(self.random_seed)
237 | np.random.seed(self.random_seed)
238 | random.seed(self.random_seed)
239 |
240 | def configure_optimizers(self):
241 | optimizer = optim.Adam(self.model.parameters(),
242 | lr=self.learning_rate,
243 | weight_decay=self.weight_decay)
244 |
245 | lr_scheduler = optim.lr_scheduler.StepLR(optimizer,
246 | step_size=self.lr_decay_step_size,
247 | gamma=self.lr_decay)
248 |
249 | return {'optimizer': optimizer, 'lr_scheduler': lr_scheduler}
--------------------------------------------------------------------------------
/src/models/transformer/informer.py:
--------------------------------------------------------------------------------
1 | # Cell
2 | import math
3 | import random
4 |
5 | import numpy as np
6 | import torch
7 | import torch.nn as nn
8 | import torch.nn.functional as F
9 | import pytorch_lightning as pl
10 | from torch import optim
11 |
12 | from ..components.transformer import Decoder, DecoderLayer, Encoder, EncoderLayer, ConvLayer
13 | from ..components.selfattention import (
14 | TriangularCausalMask, ProbMask,
15 | FullAttention, ProbAttention, AttentionLayer
16 | )
17 | from ..components.embed import DataEmbedding
18 | from ...losses.utils import LossFunction
19 |
20 | # Cell
21 | class _Informer(nn.Module):
22 | """
23 | Informer with Propspare attention in O(LlogL) complexity
24 | """
25 | def __init__(self, pred_len, output_attention,
26 | enc_in, dec_in, d_model, c_out, embed, freq, dropout,
27 | factor, n_heads, d_ff, activation, e_layers,
28 | d_layers, distil):
29 | super(_Informer, self).__init__()
30 | self.pred_len = pred_len
31 | self.output_attention = output_attention
32 |
33 | # Embedding
34 | self.enc_embedding = DataEmbedding(enc_in, d_model, embed, freq,
35 | dropout)
36 | self.dec_embedding = DataEmbedding(dec_in, d_model, embed, freq,
37 | dropout)
38 |
39 | # Encoder
40 | self.encoder = Encoder(
41 | [
42 | EncoderLayer(
43 | AttentionLayer(
44 | ProbAttention(False, factor, attention_dropout=dropout,
45 | output_attention=output_attention),
46 | d_model, n_heads),
47 | d_model,
48 | d_ff,
49 | dropout=dropout,
50 | activation=activation
51 | ) for l in range(e_layers)
52 | ],
53 | [
54 | ConvLayer(
55 | d_model
56 | ) for l in range(e_layers - 1)
57 | ] if distil else None,
58 | norm_layer=torch.nn.LayerNorm(d_model)
59 | )
60 | # Decoder
61 | self.decoder = Decoder(
62 | [
63 | DecoderLayer(
64 | AttentionLayer(
65 | ProbAttention(True, factor, attention_dropout=dropout, output_attention=False),
66 | d_model, n_heads),
67 | AttentionLayer(
68 | ProbAttention(False, factor, attention_dropout=dropout, output_attention=False),
69 | d_model, n_heads),
70 | d_model,
71 | d_ff,
72 | dropout=dropout,
73 | activation=activation,
74 | )
75 | for l in range(d_layers)
76 | ],
77 | norm_layer=torch.nn.LayerNorm(d_model),
78 | projection=nn.Linear(d_model, c_out, bias=True)
79 | )
80 |
81 | def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec,
82 | enc_self_mask=None, dec_self_mask=None, dec_enc_mask=None):
83 |
84 | enc_out = self.enc_embedding(x_enc, x_mark_enc)
85 | enc_out, attns = self.encoder(enc_out, attn_mask=enc_self_mask)
86 |
87 | dec_out = self.dec_embedding(x_dec, x_mark_dec)
88 | dec_out = self.decoder(dec_out, enc_out, x_mask=dec_self_mask, cross_mask=dec_enc_mask)
89 |
90 | if self.output_attention:
91 | return dec_out[:, -self.pred_len:, :], attns
92 | else:
93 | return dec_out[:, -self.pred_len:, :] # [B, L, D]
94 |
95 | # Cell
96 | class Informer(pl.LightningModule):
97 | def __init__(self, seq_len,
98 | label_len, pred_len, output_attention,
99 | enc_in, dec_in, d_model, c_out, embed, freq, dropout,
100 | factor, n_heads, d_ff, activation, e_layers, d_layers, distil,
101 | loss_train, loss_valid, loss_hypar, learning_rate,
102 | lr_decay, weight_decay, lr_decay_step_size,
103 | random_seed):
104 | super(Informer, self).__init__()
105 |
106 | #------------------------ Model Attributes ------------------------#
107 | # Architecture parameters
108 | self.seq_len = seq_len
109 | self.label_len = label_len
110 | self.pred_len = pred_len
111 | self.output_attention = output_attention
112 | self.enc_in = enc_in
113 | self.dec_in = dec_in
114 | self.d_model = d_model
115 | self.c_out = c_out
116 | self.embed = embed
117 | self.freq = freq
118 | self.dropout = dropout
119 | self.factor = factor
120 | self.n_heads = n_heads
121 | self.d_ff = d_ff
122 | self.activation = activation
123 | self.e_layers = e_layers
124 | self.d_layers = d_layers
125 | self.distil = distil
126 |
127 | # Loss functions
128 | self.loss_train = loss_train
129 | self.loss_hypar = loss_hypar
130 | self.loss_valid = loss_valid
131 | self.loss_fn_train = LossFunction(loss_train,
132 | seasonality=self.loss_hypar)
133 | self.loss_fn_valid = LossFunction(loss_valid,
134 | seasonality=self.loss_hypar)
135 |
136 | # Regularization and optimization parameters
137 | self.learning_rate = learning_rate
138 | self.lr_decay = lr_decay
139 | self.weight_decay = weight_decay
140 | self.lr_decay_step_size = lr_decay_step_size
141 | self.random_seed = random_seed
142 |
143 | self.model = _Informer(pred_len, output_attention,
144 | enc_in, dec_in, d_model, c_out,
145 | embed, freq, dropout,
146 | factor, n_heads, d_ff,
147 | activation, e_layers,
148 | d_layers, distil)
149 |
150 | def forward(self, batch):
151 | """
152 | Autoformer needs batch of shape (batch_size, time, series) for y
153 | and (batch_size, time, exogenous) for x
154 | and doesnt need X for each time series.
155 | USE DataLoader from pytorch instead of TimeSeriesLoader.
156 | """
157 | Y = batch['Y'].permute(0, 2, 1)
158 | X = batch['X'][:, 0, :, :].permute(0, 2, 1)
159 | sample_mask = batch['sample_mask'].permute(0, 2, 1)
160 | available_mask = batch['available_mask']
161 |
162 | s_begin = 0
163 | s_end = s_begin + self.seq_len
164 | r_begin = s_end - self.label_len
165 | r_end = r_begin + self.label_len + self.pred_len
166 |
167 | batch_x = Y[:, s_begin:s_end, :]
168 | batch_y = Y[:, r_begin:r_end, :]
169 | batch_x_mark = X[:, s_begin:s_end, :]
170 | batch_y_mark = X[:, r_begin:r_end, :]
171 | outsample_mask = sample_mask[:, r_begin:r_end, :]
172 |
173 | dec_inp = torch.zeros_like(batch_y[:, -self.pred_len:, :])
174 | dec_inp = torch.cat([batch_y[:, :self.label_len, :], dec_inp], dim=1)
175 |
176 | if self.output_attention:
177 | forecast = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
178 | else:
179 | forecast = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
180 |
181 | batch_y = batch_y[:, -self.pred_len:, :]
182 | outsample_mask = outsample_mask[:, -self.pred_len:, :]
183 |
184 | return batch_y, forecast, outsample_mask, Y
185 |
186 | def training_step(self, batch, batch_idx):
187 |
188 | outsample_y, forecast, outsample_mask, Y = self(batch)
189 |
190 | loss = self.loss_fn_train(y=outsample_y,
191 | y_hat=forecast,
192 | mask=outsample_mask,
193 | y_insample=Y)
194 |
195 | self.log('train_loss', loss, prog_bar=True, on_epoch=True)
196 |
197 | return loss
198 |
199 | def validation_step(self, batch, idx):
200 |
201 | outsample_y, forecast, outsample_mask, Y = self(batch)
202 |
203 | loss = self.loss_fn_valid(y=outsample_y,
204 | y_hat=forecast,
205 | mask=outsample_mask,
206 | y_insample=Y)
207 |
208 | self.log('val_loss', loss, prog_bar=True)
209 |
210 | return loss
211 |
212 | def on_fit_start(self):
213 | torch.manual_seed(self.random_seed)
214 | np.random.seed(self.random_seed)
215 | random.seed(self.random_seed)
216 |
217 | def configure_optimizers(self):
218 | optimizer = optim.Adam(self.model.parameters(),
219 | lr=self.learning_rate,
220 | weight_decay=self.weight_decay)
221 |
222 | lr_scheduler = optim.lr_scheduler.StepLR(optimizer,
223 | step_size=self.lr_decay_step_size,
224 | gamma=self.lr_decay)
225 |
226 | return {'optimizer': optimizer, 'lr_scheduler': lr_scheduler}
--------------------------------------------------------------------------------
/src/models/transformer/transformer.py:
--------------------------------------------------------------------------------
1 | # Cell
2 | import random
3 |
4 | import numpy as np
5 | import torch
6 | import torch.nn as nn
7 | import torch.nn.functional as F
8 | import pytorch_lightning as pl
9 | from torch import optim
10 |
11 | from ..components.transformer import Decoder, DecoderLayer, Encoder, EncoderLayer, ConvLayer
12 | from ..components.selfattention import FullAttention, AttentionLayer
13 | from ..components.embed import DataEmbedding
14 | from ...losses.utils import LossFunction
15 |
16 | # Cell
17 | class _Transformer(nn.Module):
18 | """
19 | Vanilla Transformer with O(L^2) complexity
20 | """
21 | def __init__(self, pred_len, output_attention,
22 | enc_in, dec_in, d_model, c_out, embed, freq, dropout,
23 | factor, n_heads, d_ff, activation, e_layers,
24 | d_layers):
25 | super(_Transformer, self).__init__()
26 | self.pred_len = pred_len
27 | self.output_attention = output_attention
28 |
29 | # Embedding
30 | self.enc_embedding = DataEmbedding(enc_in, d_model, embed, freq,
31 | dropout)
32 | self.dec_embedding = DataEmbedding(dec_in, d_model, embed, freq,
33 | dropout)
34 | # Encoder
35 | self.encoder = Encoder(
36 | [
37 | EncoderLayer(
38 | AttentionLayer(
39 | FullAttention(False, factor, attention_dropout=dropout,
40 | output_attention=output_attention), d_model, n_heads),
41 | d_model,
42 | d_ff,
43 | dropout=dropout,
44 | activation=activation
45 | ) for l in range(e_layers)
46 | ],
47 | norm_layer=torch.nn.LayerNorm(d_model)
48 | )
49 | # Decoder
50 | self.decoder = Decoder(
51 | [
52 | DecoderLayer(
53 | AttentionLayer(
54 | FullAttention(True, factor, attention_dropout=dropout, output_attention=False),
55 | d_model, n_heads),
56 | AttentionLayer(
57 | FullAttention(False, factor, attention_dropout=dropout, output_attention=False),
58 | d_model, n_heads),
59 | d_model,
60 | d_ff,
61 | dropout=dropout,
62 | activation=activation,
63 | )
64 | for l in range(d_layers)
65 | ],
66 | norm_layer=torch.nn.LayerNorm(d_model),
67 | projection=nn.Linear(d_model, c_out, bias=True)
68 | )
69 |
70 | def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec,
71 | enc_self_mask=None, dec_self_mask=None, dec_enc_mask=None):
72 |
73 | enc_out = self.enc_embedding(x_enc, x_mark_enc)
74 | enc_out, attns = self.encoder(enc_out, attn_mask=enc_self_mask)
75 |
76 | dec_out = self.dec_embedding(x_dec, x_mark_dec)
77 | dec_out = self.decoder(dec_out, enc_out, x_mask=dec_self_mask, cross_mask=dec_enc_mask)
78 |
79 | if self.output_attention:
80 | return dec_out[:, -self.pred_len:, :], attns
81 | else:
82 | return dec_out[:, -self.pred_len:, :] # [B, L, D]
83 |
84 | # Cell
85 | class Transformer(pl.LightningModule):
86 | def __init__(self, seq_len,
87 | label_len, pred_len, output_attention,
88 | enc_in, dec_in, d_model, c_out, embed, freq, dropout,
89 | factor, n_heads, d_ff, activation, e_layers, d_layers,
90 | loss_train, loss_valid, loss_hypar, learning_rate,
91 | lr_decay, weight_decay, lr_decay_step_size,
92 | random_seed):
93 | super(Transformer, self).__init__()
94 |
95 | #------------------------ Model Attributes ------------------------#
96 | # Architecture parameters
97 | self.seq_len = seq_len
98 | self.label_len = label_len
99 | self.pred_len = pred_len
100 | self.output_attention = output_attention
101 | self.enc_in = enc_in
102 | self.dec_in = dec_in
103 | self.d_model = d_model
104 | self.c_out = c_out
105 | self.embed = embed
106 | self.freq = freq
107 | self.dropout = dropout
108 | self.factor = factor
109 | self.n_heads = n_heads
110 | self.d_ff = d_ff
111 | self.activation = activation
112 | self.e_layers = e_layers
113 | self.d_layers = d_layers
114 |
115 | # Loss functions
116 | self.loss_train = loss_train
117 | self.loss_hypar = loss_hypar
118 | self.loss_valid = loss_valid
119 | self.loss_fn_train = LossFunction(loss_train,
120 | seasonality=self.loss_hypar)
121 | self.loss_fn_valid = LossFunction(loss_valid,
122 | seasonality=self.loss_hypar)
123 |
124 | # Regularization and optimization parameters
125 | self.learning_rate = learning_rate
126 | self.lr_decay = lr_decay
127 | self.weight_decay = weight_decay
128 | self.lr_decay_step_size = lr_decay_step_size
129 | self.random_seed = random_seed
130 |
131 | self.model = _Transformer(pred_len, output_attention,
132 | enc_in, dec_in, d_model, c_out,
133 | embed, freq, dropout,
134 | factor, n_heads, d_ff,
135 | activation, e_layers,
136 | d_layers)
137 |
138 | def forward(self, batch):
139 | """
140 | Autoformer needs batch of shape (batch_size, time, series) for y
141 | and (batch_size, time, exogenous) for x
142 | and doesnt need X for each time series.
143 | USE DataLoader from pytorch instead of TimeSeriesLoader.
144 | """
145 | Y = batch['Y'].permute(0, 2, 1)
146 | X = batch['X'][:, 0, :, :].permute(0, 2, 1)
147 | sample_mask = batch['sample_mask'].permute(0, 2, 1)
148 | available_mask = batch['available_mask']
149 |
150 | s_begin = 0
151 | s_end = s_begin + self.seq_len
152 | r_begin = s_end - self.label_len
153 | r_end = r_begin + self.label_len + self.pred_len
154 |
155 | batch_x = Y[:, s_begin:s_end, :]
156 | batch_y = Y[:, r_begin:r_end, :]
157 | batch_x_mark = X[:, s_begin:s_end, :]
158 | batch_y_mark = X[:, r_begin:r_end, :]
159 | outsample_mask = sample_mask[:, r_begin:r_end, :]
160 |
161 | dec_inp = torch.zeros_like(batch_y[:, -self.pred_len:, :])
162 | dec_inp = torch.cat([batch_y[:, :self.label_len, :], dec_inp], dim=1)
163 |
164 | if self.output_attention:
165 | forecast = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
166 | else:
167 | forecast = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
168 |
169 | batch_y = batch_y[:, -self.pred_len:, :]
170 | outsample_mask = outsample_mask[:, -self.pred_len:, :]
171 |
172 | return batch_y, forecast, outsample_mask, Y
173 |
174 | def training_step(self, batch, batch_idx):
175 |
176 | outsample_y, forecast, outsample_mask, Y = self(batch)
177 |
178 | loss = self.loss_fn_train(y=outsample_y,
179 | y_hat=forecast,
180 | mask=outsample_mask,
181 | y_insample=Y)
182 |
183 | self.log('train_loss', loss, prog_bar=True, on_epoch=True)
184 |
185 | return loss
186 |
187 | def validation_step(self, batch, idx):
188 |
189 | outsample_y, forecast, outsample_mask, Y = self(batch)
190 |
191 | loss = self.loss_fn_valid(y=outsample_y,
192 | y_hat=forecast,
193 | mask=outsample_mask,
194 | y_insample=Y)
195 |
196 | self.log('val_loss', loss, prog_bar=True)
197 |
198 | return loss
199 |
200 | def on_fit_start(self):
201 | torch.manual_seed(self.random_seed)
202 | np.random.seed(self.random_seed)
203 | random.seed(self.random_seed)
204 |
205 | def configure_optimizers(self):
206 | optimizer = optim.Adam(self.model.parameters(),
207 | lr=self.learning_rate,
208 | weight_decay=self.weight_decay)
209 |
210 | lr_scheduler = optim.lr_scheduler.StepLR(optimizer,
211 | step_size=self.lr_decay_step_size,
212 | gamma=self.lr_decay)
213 |
214 | return {'optimizer': optimizer, 'lr_scheduler': lr_scheduler}
--------------------------------------------------------------------------------