├── .gitignore ├── Dockerfile ├── Makefile ├── README.md ├── autoformer_multivariate.py ├── environment.yml ├── evaluation.py ├── images ├── nhits-arch.png └── results.png ├── long_horizon_baselines.ipynb ├── nhits_intuition.ipynb ├── nhits_multivariate.py ├── residuals.py ├── rnn_multivariate.py └── src ├── __init__.py ├── data ├── __init__.py ├── datasets │ ├── __init__.py │ ├── ecl.py │ ├── epf.py │ ├── ett.py │ ├── favorita.py │ ├── gefcom2012.py │ ├── gefcom2014.py │ ├── m3.py │ ├── m4.py │ ├── m5.py │ ├── tourism.py │ ├── utils.py │ └── wth.py ├── scalers.py ├── tsdataset.py ├── tsloader.py └── utils.py ├── experiments ├── __init__.py └── utils.py ├── losses ├── __init__.py ├── numpy.py ├── pytorch.py └── utils.py └── models ├── __init__.py ├── components ├── __init__.py ├── autocorrelation.py ├── autoformer.py ├── common.py ├── drnn.py ├── embed.py ├── selfattention.py ├── tcn.py └── transformer.py ├── esrnn ├── __init__.py └── esrnn.py ├── nbeats ├── __init__.py ├── ensemble.py └── nbeats.py ├── nhits ├── __init__.py └── nhits.py └── transformer ├── __init__.py ├── autoformer.py ├── informer.py └── transformer.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | *.DS_Store 9 | 10 | # Distribution / packaging 11 | .Python 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | pip-wheel-metadata/ 25 | share/python-wheels/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | MANIFEST 30 | 31 | # PyInstaller 32 | # Usually these files are written by a python script from a template 33 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 34 | *.manifest 35 | *.spec 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Unit test / coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .nox/ 45 | .coverage 46 | .coverage.* 47 | .cache 48 | nosetests.xml 49 | coverage.xml 50 | *.cover 51 | *.py,cover 52 | .hypothesis/ 53 | .pytest_cache/ 54 | 55 | # Translations 56 | *.mo 57 | *.pot 58 | 59 | # Django stuff: 60 | *.log 61 | local_settings.py 62 | db.sqlite3 63 | db.sqlite3-journal 64 | 65 | # Flask stuff: 66 | instance/ 67 | .webassets-cache 68 | 69 | # Scrapy stuff: 70 | .scrapy 71 | 72 | # Sphinx documentation 73 | docs/_build/ 74 | 75 | # PyBuilder 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | .python-version 87 | 88 | # pipenv 89 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 90 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 91 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 92 | # install all needed dependencies. 93 | #Pipfile.lock 94 | 95 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 96 | __pypackages__/ 97 | 98 | # Celery stuff 99 | celerybeat-schedule 100 | celerybeat.pid 101 | 102 | # SageMath parsed files 103 | *.sage.py 104 | 105 | # Environments 106 | .env 107 | .venv 108 | env/ 109 | venv/ 110 | ENV/ 111 | env.bak/ 112 | venv.bak/ 113 | 114 | # Spyder project settings 115 | .spyderproject 116 | .spyproject 117 | 118 | # Rope project settings 119 | .ropeproject 120 | 121 | # mkdocs documentation 122 | /site 123 | 124 | # mypy 125 | .mypy_cache/ 126 | .dmypy.json 127 | dmypy.json 128 | 129 | # Pyre type checker 130 | .pyre/ 131 | 132 | # data 133 | results/ 134 | data/ 135 | *.csv 136 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:11.2.0-devel-ubuntu18.04 2 | 3 | ENV DEBIAN_FRONTEND=noninteractive 4 | 5 | RUN apt-get update -y --fix-missing && \ 6 | apt-get install -y --no-install-recommends \ 7 | software-properties-common \ 8 | wget \ 9 | curl \ 10 | unrar \ 11 | unzip \ 12 | git && \ 13 | apt-get upgrade -y libstdc++6 && \ 14 | apt-get clean -y 15 | 16 | RUN add-apt-repository ppa:ubuntu-toolchain-r/test && \ 17 | apt-get update && \ 18 | apt-get install -y gcc-9 && \ 19 | apt-get upgrade -y libstdc++6 20 | 21 | RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ 22 | bash Miniconda3-latest-Linux-x86_64.sh -p /miniconda -b && \ 23 | rm -rf Miniconda3-latest-Linux-x86_64.sh 24 | 25 | ENV PATH=/miniconda/bin:${PATH} 26 | RUN conda update -y conda 27 | 28 | RUN conda install -n base -c conda-forge mamba 29 | 30 | ADD ./environment.yml ./environment.yml 31 | RUN mamba env update -n base -f ./environment.yml && \ 32 | conda clean -afy 33 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | IMAGE := nhits 2 | ROOT := $(shell dirname $(realpath $(firstword ${MAKEFILE_LIST}))) 3 | PARENT_ROOT := $(shell dirname ${ROOT}) 4 | PORT := 8888 5 | 6 | DOCKER_PARAMETERS := \ 7 | --user $(shell id -u) \ 8 | -v ${ROOT}:/app \ 9 | -w /app \ 10 | -e HOME=/tmp 11 | 12 | ifdef gpu 13 | DOCKER_PARAMETERS += --gpus all 14 | endif 15 | 16 | init: 17 | docker build -t ${IMAGE} . 18 | 19 | get_dataset: 20 | $(MAKE) run_module module="mkdir -p data/" 21 | $(MAKE) run_module module="wget -O data/datasets.zip https://nhits-experiments.s3.amazonaws.com/datasets.zip" 22 | $(MAKE) run_module module="unzip data/datasets.zip -d data/" 23 | 24 | jupyter: 25 | docker run -d --rm ${DOCKER_PARAMETERS} -e HOME=/tmp -p ${PORT}:8888 ${IMAGE} \ 26 | bash -c "jupyter lab --ip=0.0.0.0 --no-browser --NotebookApp.token=''" 27 | 28 | run_module: .require-module 29 | docker run -i --rm ${DOCKER_PARAMETERS} \ 30 | ${IMAGE} ${module} 31 | 32 | bash_docker: 33 | docker run -it --rm ${DOCKER_PARAMETERS} ${IMAGE} 34 | 35 | .require-module: 36 | ifndef module 37 | $(error module is required) 38 | endif 39 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # N-HiTS: Neural Hierarchical Interpolation for Time Series Forecasting 2 | 3 | Recent progress in neural forecasting instigated significant improvements in the accuracy of large-scale forecasting systems. Yet, extremely long horizon forecasting remains a very difficult task. Two common challenges afflicting the long horizon forecasting are the volatility of the predictions and their computational complexity. In this paper we introduce `N-HiTS`, which addresses both challenges by incorporating novel hierarchical interpolation and multi-rate data sampling techniques. These techniques enable our method to assemble its predictions sequentially, selectively emphasizing components with different frequencies and scales while decomposing the input signal and synthesizing the forecast. We conduct an extensive empirical evaluation demonstrating the advantages of `N-HiTS` over the state-of-the-art long-horizon forecasting methods. On an array of multivariate forecasting tasks, our method provides an average accuracy improvement of 25% over the latest Transformer architectures while reducing the computational time by orders of magnitude. 4 | 5 |
6 | 7 |
8 | 9 | `N-HiTS` architecture. The model is composed of several `MLPs` with `ReLU` nonlinearities. Blocks are connected via doubly residual stacking principle with the backcast `y[t-L:t, l]` and forecast `y[t+1:t+H, l]` outputs of the `l`-th block. 10 | Multi-rate input pooling, hierarchical interpolation and backcast residual connections together induce the specialization of the additive predictions in different signal bands, reducing memory footprint and compute time, improving architecture parsimony and accuracy. 11 | 12 | ## Long Horizon Datasets Results 13 | 14 |
15 | 16 |
17 | 18 | ### Run N-HiTS experiment from console 19 | 20 | To replicate the results of the paper, in particular to produce the forecasts for N-HiTS, run the following: 21 | 22 | 23 | 1. `make init` 24 | 2. `make get_dataset` to download data. 25 | 3. 26 | ```console 27 | make run_module module="python -m nhits_multivariate --hyperopt_max_evals 10 --experiment_id run_1" 28 | ``` 29 | 30 | If you want to use `GPU` simply add `gpu=0` to the last line. 31 | ```console 32 | make run_module module="python -m nhits_multivariate --hyperopt_max_evals 10 --experiment_id run_1" gpu=0 33 | ``` 34 | 4. Evaluate results for a dataset using: 35 | 36 | ```console 37 | make run_module module="python -m evaluation --dataset ETTm2 --horizon -1 --model NHITS --experiment run_1" 38 | ``` 39 | 40 | Alternatively, run all evaluations at once: 41 | 42 | ```console 43 | for dataset in ETTm2 ECL Exchange traffic weather ili; 44 | do make run_module module="python -m evaluation --dataset $dataset --horizon -1 --model NHITS --experiment run_1"; 45 | done 46 | ``` 47 | -------------------------------------------------------------------------------- /autoformer_multivariate.py: -------------------------------------------------------------------------------- 1 | from math import ceil 2 | import os 3 | import pickle 4 | import glob 5 | import time 6 | import numpy as np 7 | import pandas as pd 8 | import argparse 9 | import platform 10 | 11 | from hyperopt import fmin, tpe, hp, Trials, STATUS_OK 12 | 13 | from src.losses.numpy import mae, mse 14 | from src.experiments.utils import hyperopt_tunning 15 | 16 | 17 | def get_experiment_space(args): 18 | space= {# Architecture parameters 19 | 'model':'autoformer', 20 | 'mode': 'iterate_windows', 21 | 'seq_len': hp.choice('seq_len', [args.seq_len]), 22 | 'label_len': hp.choice('label_len', [args.label_len]), 23 | 'pred_len': hp.choice('pred_len', [args.horizon]), 24 | 'output_attention': hp.choice('output_attention', [False]), 25 | 'enc_in': hp.choice('enc_in', [args.n_series]), 26 | 'dec_in': hp.choice('dec_in', [args.n_series]), 27 | 'c_out': hp.choice('c_out', [args.n_series]), 28 | 'e_layers': hp.choice('e_layers', [args.e_layers]), 29 | 'd_layers': hp.choice('d_layers', [args.d_layers]), 30 | 'd_model': hp.choice('d_model', [512]), 31 | 'embed': hp.choice('embed', ['timeF']), 32 | 'freq': hp.choice('freq', ['h']), 33 | 'dropout': hp.choice('dropout', [0.05]), 34 | 'factor': hp.choice('factor', [args.factor]), 35 | 'n_heads': hp.choice('n_heads', [8]), 36 | 'd_ff': hp.choice('d_ff', [2_048]), 37 | 'moving_avg': hp.choice('moving_avg', [25]), 38 | 'activation': hp.choice('activation', ['gelu']), 39 | # Regularization and optimization parameters 40 | 'learning_rate': hp.choice('learning_rate', [1e-4]), 41 | 'lr_decay': hp.choice('lr_decay', [0.5]), 42 | 'n_lr_decays': hp.choice('n_lr_decays', [ceil(args.max_epochs / 2)]), 43 | 'weight_decay': hp.choice('weight_decay', [0]), 44 | 'max_epochs': hp.choice('max_epochs', [args.max_epochs]), 45 | 'max_steps': hp.choice('max_steps', [None]), 46 | 'early_stop_patience': hp.choice('early_stop_patience', [3]), 47 | 'eval_freq': hp.choice('eval_freq', [1]), 48 | 'loss_train': hp.choice('loss', ['MSE']), 49 | 'loss_hypar': hp.choice('loss_hypar', [0.5]), 50 | 'loss_valid': hp.choice('loss_valid', ['MSE']), 51 | # Data parameters 52 | 'n_time_in': hp.choice('n_time_in', [args.seq_len]), 53 | 'n_time_out': hp.choice('n_time_out', [args.horizon]), 54 | 'normalizer_y': hp.choice('normalizer_y', [None]), 55 | 'normalizer_x': hp.choice('normalizer_x', [None]), 56 | 'val_idx_to_sample_freq': hp.choice('val_idx_to_sample_freq', [1]), 57 | 'batch_size': hp.choice('batch_size', [32]), 58 | 'random_seed': hp.choice('random_seed', [1])} 59 | 60 | return space 61 | 62 | def main(args): 63 | 64 | #----------------------------------------------- Load Data -----------------------------------------------# 65 | Y_df = pd.read_csv(f'./data/{args.dataset}/M/df_y.csv') 66 | X_df = pd.read_csv(f'./data/{args.dataset}/M/df_x.csv') 67 | print(Y_df) 68 | print(X_df) 69 | 70 | #raise Exception 71 | 72 | X_df = X_df.drop_duplicates(subset=['ds']) 73 | 74 | X_df = Y_df[['unique_id', 'ds']].merge(X_df, how='left', on=['ds']) 75 | 76 | S_df = None 77 | print('Y_df: ', Y_df.head()) 78 | #arguments 79 | args.e_layers = 2 80 | args.d_layers = 1 81 | args.max_epochs = 10 82 | args.seq_len = 36 if args.dataset == 'ili' else 96 83 | args.label_len = 18 if args.dataset == 'ili' else 48 84 | 85 | if args.dataset == 'ETTm2': 86 | len_val = 11520 87 | len_test = 11520 88 | args.factor = 1 89 | args.n_series = 7 90 | if args.dataset == 'Exchange': 91 | len_val = 760 92 | len_test = 1517 93 | args.factor = 3 94 | args.n_series = 8 95 | args.max_epochs = 1 if args.horizon in [192, 336] else args.max_epochs 96 | if args.dataset == 'ECL': 97 | len_val = 2632 98 | len_test = 5260 99 | args.factor = 3 100 | args.n_series = 321 101 | if args.dataset == 'traffic': 102 | len_val = 1756 103 | len_test = 3508 104 | args.factor = 3 105 | args.max_epochs = 3 106 | args.n_series = 862 107 | if args.dataset == 'weather': 108 | len_val = 5270 109 | len_test = 10539 110 | args.factor = 3 111 | args.n_series = 21 112 | args.max_epochs = 2 if args.horizon in [96] else args.max_epochs 113 | if args.dataset == 'ili': 114 | len_val = 97 115 | len_test = 193 116 | args.factor = 3 117 | args.n_series = 7 118 | 119 | space = get_experiment_space(args) 120 | 121 | output_dir = f'./results/multivariate/{args.dataset}_{args.horizon}/autoformer/' 122 | 123 | os.makedirs(output_dir, exist_ok = True) 124 | assert os.path.exists(output_dir), f'Output dir {output_dir} does not exist' 125 | 126 | hyperopt_file = output_dir + f'hyperopt_{args.experiment_id}.p' 127 | 128 | if not os.path.isfile(hyperopt_file): 129 | print('Hyperparameter optimization') 130 | #----------------------------------------------- Hyperopt -----------------------------------------------# 131 | trials = hyperopt_tunning(space=space, hyperopt_max_evals=args.hyperopt_max_evals, loss_function_val=mae, 132 | loss_functions_test={'mae':mae, 'mse': mse}, 133 | Y_df=Y_df, X_df=X_df, S_df=S_df, f_cols=[], 134 | ds_in_val=len_val, ds_in_test=len_test, 135 | return_forecasts=False, 136 | results_file = hyperopt_file, 137 | save_progress=True, 138 | loss_kwargs={}) 139 | 140 | with open(hyperopt_file, "wb") as f: 141 | pickle.dump(trials, f) 142 | else: 143 | print('Hyperparameter optimization already done!') 144 | 145 | def parse_args(): 146 | desc = "Example of hyperparameter tuning" 147 | parser = argparse.ArgumentParser(description=desc) 148 | 149 | parser.add_argument('--hyperopt_max_evals', type=int, help='hyperopt_max_evals', default=1) 150 | parser.add_argument('--experiment_id', default=None, required=False, type=str, help='string to identify experiment') 151 | 152 | return parser.parse_args() 153 | 154 | 155 | if __name__ == '__main__': 156 | 157 | # parse arguments 158 | args = parse_args() 159 | if args is None: 160 | exit() 161 | 162 | horizons = [96, 192, 336, 720] 163 | ILI_horizons = [24, 36, 48, 60] 164 | datasets = ['ili', 'Exchange', 'weather', 'ETTm2', 'ECL', 'traffic'] 165 | 166 | for dataset in datasets: 167 | # Horizon 168 | if dataset == 'ili': 169 | horizons_dataset = ILI_horizons 170 | else: 171 | horizons_dataset = horizons 172 | for horizon in horizons_dataset: 173 | print(50*'-', dataset, 50*'-') 174 | print(50*'-', horizon, 50*'-') 175 | start = time.time() 176 | args.dataset = dataset 177 | args.horizon = horizon 178 | main(args) 179 | print('Time: ', time.time() - start) 180 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | - defaults 4 | dependencies: 5 | - python=3.7 6 | - pip 7 | - matplotlib 8 | - numpy 9 | - pandas 10 | - r-base 11 | - r-forecast 12 | - r-data.table 13 | - r-tidyverse 14 | - r-furrr 15 | - scikit-learn 16 | - statsmodels 17 | - setuptools=58.2.0 18 | - pip: 19 | - fastcore 20 | - hyperopt 21 | - tqdm 22 | - pytorch-lightning>=1.3.0 23 | - torch==1.9.0 24 | - jupyterlab 25 | - parse 26 | -------------------------------------------------------------------------------- /evaluation.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import pickle 4 | import argparse 5 | import numpy as np 6 | 7 | from src.losses.numpy import mae, mse 8 | 9 | 10 | def get_score_min_val(dir): 11 | print(dir) 12 | result = pickle.load(open(dir, 'rb')) 13 | min_mae = 100 14 | mc = {} 15 | for i in range(len(result)): 16 | val_mae = result.trials[i]['result']['loss'] 17 | if val_mae < min_mae: 18 | mae_best = result.trials[i]['result']['test_losses']['mae'] 19 | mse_best = result.trials[i]['result']['test_losses']['mse'] 20 | min_mae = val_mae 21 | mc = result.trials[i]['result']['mc'] 22 | return mae_best, mse_best, mc 23 | 24 | def main(args): 25 | 26 | if args.horizon<0: 27 | if args.dataset == 'ili': 28 | horizons = [24, 36, 48, 60] 29 | else: 30 | horizons = [96, 192, 336, 720] 31 | else: 32 | horizons = [args.horizon] 33 | 34 | for horizon in horizons: 35 | result_dir = f'./results/{args.setting}/{args.dataset}_{horizon}/{args.model}/' 36 | result_dir = Path(result_dir) 37 | files = list(result_dir.glob(f'hyperopt_{args.experiment}*.p')) 38 | maes = [] 39 | mses = [] 40 | for file_ in files: 41 | mae_data, mse_data = get_score_min_val(file_) 42 | maes.append(mae_data) 43 | mses.append(mse_data) 44 | 45 | print(f'Horizon {horizon}') 46 | print(f'MSE: {np.mean(mses)}') 47 | print(f'MAE: {np.mean(maes)}') 48 | 49 | def parse_args(): 50 | desc = "Example of hyperparameter tuning" 51 | parser = argparse.ArgumentParser(description=desc) 52 | 53 | parser.add_argument('--dataset', type=str, help='Name of the dataset') 54 | parser.add_argument('--setting', type=str, help='Multivariate or univariate', default='multivariate') 55 | parser.add_argument('--horizon', type=int, help='Horizon') 56 | parser.add_argument('--model', type=str, help='Model name') 57 | parser.add_argument('--experiment', type=str, help='string to identify experiment') 58 | return parser.parse_args() 59 | 60 | if __name__ == '__main__': 61 | 62 | # parse arguments 63 | args = parse_args() 64 | if args is None: 65 | exit() 66 | 67 | main(args) 68 | -------------------------------------------------------------------------------- /images/nhits-arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cchallu/n-hits/d882ee60b34c0ab6b67b31001f735b181a9efb93/images/nhits-arch.png -------------------------------------------------------------------------------- /images/results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cchallu/n-hits/d882ee60b34c0ab6b67b31001f735b181a9efb93/images/results.png -------------------------------------------------------------------------------- /nhits_multivariate.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | import time 4 | import argparse 5 | import pandas as pd 6 | 7 | from hyperopt import fmin, tpe, hp, Trials, STATUS_OK 8 | 9 | from src.losses.numpy import mae, mse 10 | from src.experiments.utils import hyperopt_tunning 11 | 12 | def get_experiment_space(args): 13 | space= {# Architecture parameters 14 | 'model':'nhits', 15 | 'mode': 'simple', 16 | 'n_time_in': hp.choice('n_time_in', [5*args.horizon]), 17 | 'n_time_out': hp.choice('n_time_out', [args.horizon]), 18 | 'n_x_hidden': hp.choice('n_x_hidden', [0]), 19 | 'n_s_hidden': hp.choice('n_s_hidden', [0]), 20 | 'shared_weights': hp.choice('shared_weights', [False]), 21 | 'activation': hp.choice('activation', ['ReLU']), 22 | 'initialization': hp.choice('initialization', ['lecun_normal']), 23 | 'stack_types': hp.choice('stack_types', [ 3*['identity'] ]), 24 | 'n_blocks': hp.choice('n_blocks', [ 3*[1]]), 25 | 'n_layers': hp.choice('n_layers', [ 9*[2] ]), 26 | 'n_hidden': hp.choice('n_hidden', [ 512 ]), 27 | 'n_pool_kernel_size': hp.choice('n_pool_kernel_size', [ 3*[1], 3*[2], 3*[4], 3*[8], [8, 4, 1], [16, 8, 1] ]), 28 | 'n_freq_downsample': hp.choice('n_freq_downsample', [ [168, 24, 1], [24, 12, 1], 29 | [180, 60, 1], [60, 8, 1], 30 | [40, 20, 1] 31 | ]), 32 | 'pooling_mode': hp.choice('pooling_mode', [ 'max' ]), 33 | 'interpolation_mode': hp.choice('interpolation_mode', ['linear']), 34 | # Regularization and optimization parameters 35 | 'batch_normalization': hp.choice('batch_normalization', [False]), 36 | 'dropout_prob_theta': hp.choice('dropout_prob_theta', [ 0 ]), 37 | 'dropout_prob_exogenous': hp.choice('dropout_prob_exogenous', [0]), 38 | 'learning_rate': hp.choice('learning_rate', [0.001]), 39 | 'lr_decay': hp.choice('lr_decay', [0.5] ), 40 | 'n_lr_decays': hp.choice('n_lr_decays', [3]), 41 | 'weight_decay': hp.choice('weight_decay', [0] ), 42 | 'max_epochs': hp.choice('max_epochs', [None]), 43 | 'max_steps': hp.choice('max_steps', [1_000]), 44 | 'early_stop_patience': hp.choice('early_stop_patience', [10]), 45 | 'eval_freq': hp.choice('eval_freq', [50]), 46 | 'loss_train': hp.choice('loss', ['MAE']), 47 | 'loss_hypar': hp.choice('loss_hypar', [0.5]), 48 | 'loss_valid': hp.choice('loss_valid', ['MAE']), 49 | 'l1_theta': hp.choice('l1_theta', [0]), 50 | # Data parameters 51 | 'normalizer_y': hp.choice('normalizer_y', [None]), 52 | 'normalizer_x': hp.choice('normalizer_x', [None]), 53 | 'complete_windows': hp.choice('complete_windows', [True]), 54 | 'frequency': hp.choice('frequency', ['H']), 55 | 'seasonality': hp.choice('seasonality', [24]), 56 | 'idx_to_sample_freq': hp.choice('idx_to_sample_freq', [1]), 57 | 'val_idx_to_sample_freq': hp.choice('val_idx_to_sample_freq', [1]), 58 | 'batch_size': hp.choice('batch_size', [1]), 59 | 'n_windows': hp.choice('n_windows', [256]), 60 | 'random_seed': hp.quniform('random_seed', 1, 10, 1)} 61 | return space 62 | 63 | def main(args): 64 | 65 | #----------------------------------------------- Load Data -----------------------------------------------# 66 | Y_df = pd.read_csv(f'./data/{args.dataset}/M/df_y.csv') 67 | 68 | X_df = None 69 | S_df = None 70 | 71 | print('Y_df: ', Y_df.head()) 72 | if args.dataset == 'ETTm2': 73 | len_val = 11520 74 | len_test = 11520 75 | if args.dataset == 'Exchange': 76 | len_val = 760 77 | len_test = 1517 78 | if args.dataset == 'ECL': 79 | len_val = 2632 80 | len_test = 5260 81 | if args.dataset == 'traffic': 82 | len_val = 1756 83 | len_test = 3508 84 | if args.dataset == 'weather': 85 | len_val = 5270 86 | len_test = 10539 87 | if args.dataset == 'ili': 88 | len_val = 97 89 | len_test = 193 90 | 91 | space = get_experiment_space(args) 92 | 93 | #---------------------------------------------- Directories ----------------------------------------------# 94 | output_dir = f'./results/multivariate/{args.dataset}_{args.horizon}/NHITS/' 95 | 96 | os.makedirs(output_dir, exist_ok = True) 97 | assert os.path.exists(output_dir), f'Output dir {output_dir} does not exist' 98 | 99 | hyperopt_file = output_dir + f'hyperopt_{args.experiment_id}.p' 100 | 101 | if not os.path.isfile(hyperopt_file): 102 | print('Hyperparameter optimization') 103 | #----------------------------------------------- Hyperopt -----------------------------------------------# 104 | trials = hyperopt_tunning(space=space, hyperopt_max_evals=args.hyperopt_max_evals, loss_function_val=mae, 105 | loss_functions_test={'mae':mae, 'mse': mse}, 106 | Y_df=Y_df, X_df=X_df, S_df=S_df, f_cols=[], 107 | evaluate_train=True, 108 | ds_in_val=len_val, ds_in_test=len_test, 109 | return_forecasts=False, 110 | results_file = hyperopt_file, 111 | save_progress=True, 112 | loss_kwargs={}) 113 | 114 | with open(hyperopt_file, "wb") as f: 115 | pickle.dump(trials, f) 116 | else: 117 | print('Hyperparameter optimization already done!') 118 | 119 | def parse_args(): 120 | desc = "Example of hyperparameter tuning" 121 | parser = argparse.ArgumentParser(description=desc) 122 | parser.add_argument('--hyperopt_max_evals', type=int, help='hyperopt_max_evals') 123 | parser.add_argument('--experiment_id', default=None, required=False, type=str, help='string to identify experiment') 124 | return parser.parse_args() 125 | 126 | if __name__ == '__main__': 127 | 128 | # parse arguments 129 | args = parse_args() 130 | if args is None: 131 | exit() 132 | 133 | horizons = [96, 192, 336, 720] 134 | ILI_horizons = [24, 36, 48, 60] 135 | datasets = ['ETTm2', 'Exchange', 'weather', 'ili', 'ECL', 'traffic'] 136 | 137 | for dataset in datasets: 138 | # Horizon 139 | if dataset == 'ili': 140 | horizons_dataset = ILI_horizons 141 | else: 142 | horizons_dataset = horizons 143 | for horizon in horizons_dataset: 144 | print(50*'-', dataset, 50*'-') 145 | print(50*'-', horizon, 50*'-') 146 | start = time.time() 147 | args.dataset = dataset 148 | args.horizon = horizon 149 | main(args) 150 | print('Time: ', time.time() - start) 151 | 152 | main(args) 153 | 154 | # source ~/anaconda3/etc/profile.d/conda.sh 155 | # conda activate nixtla 156 | # CUDA_VISIBLE_DEVICES=0 python nhits_multivariate.py --hyperopt_max_evals 10 --experiment_id "eval_train" 157 | -------------------------------------------------------------------------------- /residuals.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | 4 | import numpy as np 5 | import pandas as pd 6 | 7 | from evaluation import get_score_min_val 8 | from src.experiments.utils import model_fit_predict 9 | 10 | def main(args): 11 | 12 | #----------------------------------------------- Load Data -----------------------------------------------# 13 | Y_df = pd.read_csv(f'./data/{args.dataset}/M/df_y.csv') 14 | 15 | X_df = None 16 | S_df = None 17 | 18 | print('Y_df: ', Y_df.head()) 19 | if args.dataset == 'ETTm2': 20 | len_val = 11520 21 | len_test = 11520 22 | if args.dataset == 'Exchange': 23 | len_val = 760 24 | len_test = 1517 25 | if args.dataset == 'ECL': 26 | len_val = 2632 27 | len_test = 5260 28 | if args.dataset == 'traffic': 29 | len_val = 1756 30 | len_test = 3508 31 | if args.dataset == 'weather': 32 | len_val = 5270 33 | len_test = 10539 34 | if args.dataset == 'ili': 35 | len_val = 97 36 | len_test = 193 37 | 38 | output_dir = f'./results/multivariate/{args.dataset}_{args.horizon}/NHITS/' 39 | 40 | os.makedirs(output_dir, exist_ok = True) 41 | assert os.path.exists(output_dir), f'Output dir {output_dir} does not exist' 42 | 43 | hyperopt_file = output_dir + f'hyperopt_{args.experiment_id}.p' 44 | *_, mc = get_score_min_val(hyperopt_file) 45 | results = model_fit_predict(mc=mc, S_df=S_df, 46 | Y_df=Y_df, X_df=X_df, 47 | f_cols=[], ds_in_val=len_val, 48 | ds_in_test=len_test, 49 | insample=True) 50 | 51 | n_series = Y_df['unique_id'].nunique() 52 | for data_kind in ['insample', 'val', 'test']: 53 | for y_kind in ['true', 'hat']: 54 | name = f'{data_kind}_y_{y_kind}' 55 | result_name = results[name].reshape((n_series, -1, mc['n_time_out'])) 56 | np.save(output_dir + f'{name}.npy', result_name) 57 | 58 | def parse_args(): 59 | desc = "Example of hyperparameter tuning" 60 | parser = argparse.ArgumentParser(description=desc) 61 | parser.add_argument('--experiment_id', default=None, required=False, type=str, help='string to identify experiment') 62 | return parser.parse_args() 63 | 64 | if __name__ == '__main__': 65 | 66 | # parse arguments 67 | args = parse_args() 68 | if args is None: 69 | exit() 70 | 71 | horizons = [96, 192, 336, 720] 72 | ILI_horizons = [24, 36, 48, 60] 73 | datasets = ['ETTm2', 'weather', 'Exchange']#['ECL', 'Exchange', 'traffic', 'weather', 'ili'] 74 | 75 | for dataset in datasets: 76 | # Horizon 77 | if dataset == 'ili': 78 | horizons_dataset = ILI_horizons 79 | else: 80 | horizons_dataset = horizons 81 | for horizon in horizons_dataset: 82 | print(50*'-', dataset, 50*'-') 83 | print(50*'-', horizon, 50*'-') 84 | args.dataset = dataset 85 | args.horizon = horizon 86 | main(args) 87 | 88 | main(args) 89 | 90 | # source ~/anaconda3/etc/profile.d/conda.sh 91 | # conda activate nixtla 92 | # CUDA_VISIBLE_DEVICES=0 python nhits_multivariate.py --hyperopt_max_evals 10 --experiment_id "test" 93 | -------------------------------------------------------------------------------- /rnn_multivariate.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | import time 4 | import argparse 5 | import pandas as pd 6 | 7 | from hyperopt import fmin, tpe, hp, Trials, STATUS_OK 8 | 9 | from src.losses.numpy import mae, mse 10 | from neuralforecast.experiments.utils import hyperopt_tunning 11 | 12 | def get_experiment_space(args): 13 | space= {# Architecture parameters 14 | 'model':'rnn', 15 | 'mode': 'full', 16 | 'n_time_in': hp.choice('n_time_in', [1*horizon]), 17 | 'n_time_out': hp.choice('n_time_out', [horizon]), 18 | 'cell_type': hp.choice('cell_type', ['LSTM']), 19 | 'state_hsize': hp.choice('state_hsize', [10, 20, 50]), 20 | 'dilations': hp.choice('dilations', [ [[1, 2]], [[1, 2, 4, 8]], [[1,2],[4,8]] ]), 21 | 'add_nl_layer': hp.choice('add_nl_layer', [ False ]), 22 | 'n_pool_kernel_size': hp.choice('n_pool_kernel_size', [ args.pooling ]), 23 | 'n_freq_downsample': hp.choice('n_freq_downsample', [ args.interpolation ]), 24 | 'sample_freq': hp.choice('sample_freq', [1]), 25 | # Regularization and optimization parameters 26 | 'learning_rate': hp.choice('learning_rate', [0.0001, 0.001, 0.005, 0.01, 0.05, 0.1]), 27 | 'lr_decay': hp.choice('lr_decay', [0.5] ), 28 | 'n_lr_decays': hp.choice('n_lr_decays', [3]), 29 | 'gradient_eps': hp.choice('gradient_eps', [1e-8]), 30 | 'gradient_clipping_threshold': hp.choice('gradient_clipping_threshold', [10]), 31 | 'weight_decay': hp.choice('weight_decay', [0]), 32 | 'noise_std': hp.choice('noise_std', [0.001]), 33 | 'max_epochs': hp.choice('max_epochs', [None]), 34 | 'max_steps': hp.choice('max_steps', [500]), 35 | 'early_stop_patience': hp.choice('early_stop_patience', [10]), 36 | 'eval_freq': hp.choice('eval_freq', [50]), 37 | 'loss_train': hp.choice('loss', ['MAE']), 38 | 'loss_hypar': hp.choice('loss_hypar', [0.5]), 39 | 'loss_valid': hp.choice('loss_valid', ['MAE']), 40 | # Data parameters 41 | 'normalizer_y': hp.choice('normalizer_y', [None]), 42 | 'normalizer_x': hp.choice('normalizer_x', [None]), 43 | 'complete_windows': hp.choice('complete_windows', [True]), 44 | 'idx_to_sample_freq': hp.choice('idx_to_sample_freq', [1]), 45 | 'val_idx_to_sample_freq': hp.choice('val_idx_to_sample_freq', [1]), 46 | 'batch_size': hp.choice('batch_size', [8, 16, 32]), 47 | 'n_windows': hp.choice('n_windows', [None]), 48 | 'frequency': hp.choice('frequency', ['D']), 49 | 'random_seed': hp.quniform('random_seed', 1, 10, 1)} 50 | return space 51 | 52 | def main(args): 53 | 54 | #----------------------------------------------- Load Data -----------------------------------------------# 55 | Y_df = pd.read_csv(f'./data/{args.dataset}/M/df_y.csv') 56 | 57 | X_df = None 58 | S_df = None 59 | 60 | print('Y_df: ', Y_df.head()) 61 | if args.dataset == 'ETTm2': 62 | len_val = 11520 63 | len_test = 11520 64 | window_sampling_limit = 4000+len_val+len_test 65 | if args.dataset == 'Exchange': 66 | len_val = 760 67 | len_test = 1517 68 | window_sampling_limit = 1517+len_val+len_test 69 | if args.dataset == 'ECL': 70 | len_val = 2632 71 | len_test = 5260 72 | window_sampling_limit = 4000+len_val+len_test 73 | if args.dataset == 'traffic': 74 | len_val = 1756 75 | len_test = 3508 76 | window_sampling_limit = 3508+len_val+len_test 77 | if args.dataset == 'weather': 78 | len_val = 5270 79 | len_test = 10539 80 | window_sampling_limit = 4000+len_val+len_test 81 | if args.dataset == 'ili': 82 | len_val = 97 83 | len_test = 193 84 | window_sampling_limit = 193+len_val+len_test 85 | 86 | Y_df = Y_df.groupby('unique_id').tail(window_sampling_limit).reset_index(drop=True) 87 | 88 | space = get_experiment_space(args) 89 | 90 | #---------------------------------------------- Directories ----------------------------------------------# 91 | output_dir = f'./results/multivariate/{args.dataset}_{args.horizon}/RNN_{args.pooling}_{args.interpolation}/{args.experiment_id}' 92 | 93 | os.makedirs(output_dir, exist_ok = True) 94 | assert os.path.exists(output_dir), f'Output dir {output_dir} does not exist' 95 | 96 | #----------------------------------------------- Hyperopt -----------------------------------------------# 97 | hyperopt_tunning(space=space, hyperopt_max_evals=args.hyperopt_max_evals, loss_function_val=mae, 98 | loss_functions_test={'mae':mae, 'mse': mse}, 99 | Y_df=Y_df, X_df=X_df, S_df=S_df, f_cols=[], 100 | ds_in_val=len_val, ds_in_test=len_test, 101 | return_forecasts=False, 102 | return_model=False, 103 | save_trials=True, 104 | results_dir=output_dir, 105 | verbose=True) 106 | 107 | 108 | def parse_args(): 109 | desc = "Example of hyperparameter tuning" 110 | parser = argparse.ArgumentParser(description=desc) 111 | parser.add_argument('--hyperopt_max_evals', type=int, help='hyperopt_max_evals') 112 | parser.add_argument('--pooling', type=int, help='pooling') 113 | parser.add_argument('--interpolation', type=int, help='interpolation') 114 | parser.add_argument('--experiment_id', default=None, required=False, type=str, help='string to identify experiment') 115 | return parser.parse_args() 116 | 117 | if __name__ == '__main__': 118 | 119 | # parse arguments 120 | args = parse_args() 121 | if args is None: 122 | exit() 123 | 124 | horizons = [96, 192, 336, 720] 125 | ILI_horizons = [24, 36, 48, 60] 126 | datasets = ['ETTm2', 'Exchange', 'ili', 'ECL', 'traffic', 'weather'] 127 | 128 | for dataset in datasets: 129 | # Horizon 130 | if dataset == 'ili': 131 | horizons_dataset = ILI_horizons 132 | else: 133 | horizons_dataset = horizons 134 | for horizon in horizons_dataset: 135 | print(50*'-', dataset, 50*'-') 136 | print(50*'-', horizon, 50*'-') 137 | start = time.time() 138 | args.dataset = dataset 139 | args.horizon = horizon 140 | main(args) 141 | print('Time: ', time.time() - start) 142 | 143 | main(args) 144 | 145 | # source ~/anaconda3/etc/profile.d/conda.sh 146 | # conda activate nixtla 147 | # CUDA_VISIBLE_DEVICES=2 python rnn_multivariate.py --pooling 2 --interpolation 2 --hyperopt_max_evals 5 --experiment_id "2022_05_15" 148 | -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cchallu/n-hits/d882ee60b34c0ab6b67b31001f735b181a9efb93/src/__init__.py -------------------------------------------------------------------------------- /src/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cchallu/n-hits/d882ee60b34c0ab6b67b31001f735b181a9efb93/src/data/__init__.py -------------------------------------------------------------------------------- /src/data/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cchallu/n-hits/d882ee60b34c0ab6b67b31001f735b181a9efb93/src/data/datasets/__init__.py -------------------------------------------------------------------------------- /src/data/datasets/ecl.py: -------------------------------------------------------------------------------- 1 | # AUTOGENERATED! DO NOT EDIT! File to edit: nbs/data_datasets__ecl.ipynb (unless otherwise specified). 2 | 3 | __all__ = ['ECL', 'ECLInfo', 'ECL'] 4 | 5 | # Cell 6 | import os 7 | from dataclasses import dataclass 8 | from typing import Dict, List, Optional, Tuple, Union 9 | 10 | import gdown 11 | import numpy as np 12 | import pandas as pd 13 | 14 | from .utils import Info, time_features_from_frequency_str 15 | from .ett import process_multiple_ts 16 | 17 | # Cell 18 | @dataclass 19 | class ECL: 20 | freq: str = 'H' 21 | name: str = 'ECL' 22 | n_ts: int = 321 23 | 24 | # Cell 25 | ECLInfo = Info(groups=('ECL',), 26 | class_groups=(ECL,)) 27 | 28 | # Cell 29 | @dataclass 30 | class ECL: 31 | 32 | source_url: str = 'https://drive.google.com/uc?id=1rUPdR7R2iWFW-LMoDdHoO2g4KgnkpFzP' 33 | 34 | @staticmethod 35 | def load(directory: str, 36 | cache: bool = True) -> Tuple[pd.DataFrame, 37 | Optional[pd.DataFrame], 38 | Optional[pd.DataFrame]]: 39 | """Downloads and loads ETT data. 40 | 41 | Parameters 42 | ---------- 43 | directory: str 44 | Directory where data will be downloaded. 45 | cache: bool 46 | If `True` saves and loads 47 | 48 | Notes 49 | ----- 50 | [1] Returns train+val+test sets. 51 | """ 52 | path = f'{directory}/ecl/datasets' 53 | file_cache = f'{path}/ECL.p' 54 | 55 | if os.path.exists(file_cache) and cache: 56 | df, X_df, S_df = pd.read_pickle(file_cache) 57 | 58 | return df, X_df, S_df 59 | 60 | 61 | ECL.download(directory) 62 | path = f'{directory}/ecl/datasets' 63 | 64 | y_df = pd.read_csv(f'{path}/ECL.csv') 65 | y_df, X_df = process_multiple_ts(y_df) 66 | 67 | S_df = None 68 | if cache: 69 | pd.to_pickle((y_df, X_df, S_df), file_cache) 70 | 71 | return y_df, X_df, S_df 72 | 73 | @staticmethod 74 | def download(directory: str) -> None: 75 | """Download ECL Dataset.""" 76 | path = f'{directory}/ecl/datasets/' 77 | if not os.path.exists(path): 78 | os.makedirs(path) 79 | gdown.download(ECL.source_url, f'{path}/ECL.csv') -------------------------------------------------------------------------------- /src/data/datasets/epf.py: -------------------------------------------------------------------------------- 1 | # AUTOGENERATED! DO NOT EDIT! File to edit: nbs/data_datasets__epf.ipynb (unless otherwise specified). 2 | 3 | __all__ = ['NP', 'PJM', 'BE', 'FR', 'DE', 'EPFInfo', 'EPF', 'epf_naive_forecast'] 4 | 5 | # Cell 6 | import os 7 | from datetime import timedelta 8 | from dataclasses import dataclass 9 | from typing import Dict, List, Optional, Tuple, Union 10 | 11 | import numpy as np 12 | import pandas as pd 13 | from pandas.tseries.frequencies import to_offset 14 | 15 | from .utils import download_file, Info, TimeSeriesDataclass 16 | from ..tsdataset import TimeSeriesDataset 17 | 18 | # Cell 19 | @dataclass 20 | class NP: 21 | test_date: str = '2016-12-27' 22 | name: str = 'NP' 23 | 24 | @dataclass 25 | class PJM: 26 | test_date: str = '2016-12-27' 27 | name: str = 'PJM' 28 | 29 | @dataclass 30 | class BE: 31 | test_date: str = '2015-01-04' 32 | name: str = 'BE' 33 | 34 | @dataclass 35 | class FR: 36 | test_date: str = '2015-01-04' 37 | name: str = 'FR' 38 | 39 | @dataclass 40 | class DE: 41 | test_date: str = '2016-01-04' 42 | name: str = 'DE' 43 | 44 | # Cell 45 | EPFInfo = Info(groups=('NP', 'PJM', 'BE', 'FR', 'DE'), 46 | class_groups=(NP, PJM, BE, FR, DE)) 47 | 48 | # Cell 49 | class EPF: 50 | 51 | source_url = 'https://sandbox.zenodo.org/api/files/da5b2c6f-8418-4550-a7d0-7f2497b40f1b/' 52 | 53 | @staticmethod 54 | def load(directory: str, 55 | group: str) -> Tuple[pd.DataFrame, 56 | Optional[pd.DataFrame], 57 | Optional[pd.DataFrame]]: 58 | """ 59 | Downloads and loads EPF data. 60 | 61 | Parameters 62 | ---------- 63 | directory: str 64 | Directory where data will be downloaded. 65 | group: str 66 | Group name. 67 | Allowed groups: 'NP', 'PJM', 'BE', 'FR', 'DE'. 68 | """ 69 | EPF.download(directory) 70 | class_group = EPFInfo.get_group(group) 71 | 72 | path = f'{directory}/epf/datasets' 73 | file = f'{path}/{group}.csv' 74 | 75 | df = pd.read_csv(file) 76 | 77 | df.columns = ['ds', 'y'] + \ 78 | [f'Exogenous{i}' for i in range(1, len(df.columns) - 1)] 79 | 80 | df['unique_id'] = group 81 | df['ds'] = pd.to_datetime(df['ds']) 82 | df['week_day'] = df['ds'].dt.dayofweek 83 | 84 | dummies = pd.get_dummies(df['week_day'], prefix='day') 85 | df = pd.concat([df, dummies], axis=1) 86 | 87 | dummies_cols = [col for col in df if col.startswith('day')] 88 | 89 | Y = df.filter(items=['unique_id', 'ds', 'y']) 90 | X = df.filter(items=['unique_id', 'ds', 'Exogenous1', 'Exogenous2', 'week_day'] + \ 91 | dummies_cols) 92 | 93 | return Y, X, None 94 | 95 | @staticmethod 96 | def load_groups(directory: str, 97 | groups: List[str]) -> Tuple[pd.DataFrame, 98 | Optional[pd.DataFrame], 99 | Optional[pd.DataFrame]]: 100 | """ 101 | Downloads and loads panel of EPF data 102 | according of groups. 103 | 104 | Parameters 105 | ---------- 106 | directory: str 107 | Directory where data will be downloaded. 108 | groups: List[str] 109 | Group names. 110 | Allowed groups: 'NP', 'PJM', 'BE', 'FR', 'DE'. 111 | """ 112 | Y = [] 113 | X = [] 114 | for group in groups: 115 | Y_df, X_df, S_df = EPF.load(directory=directory, group=group) 116 | Y.append(Y_df) 117 | X.append(X_df) 118 | 119 | Y = pd.concat(Y).sort_values(['unique_id', 'ds']).reset_index(drop=True) 120 | X = pd.concat(X).sort_values(['unique_id', 'ds']).reset_index(drop=True) 121 | 122 | S = Y[['unique_id']].drop_duplicates().reset_index(drop=True) 123 | dummies = pd.get_dummies(S['unique_id'], prefix='static') 124 | S = pd.concat([S, dummies], axis=1) 125 | 126 | return Y, X, S 127 | 128 | @staticmethod 129 | def download(directory: str) -> None: 130 | """Downloads EPF Dataset.""" 131 | path = f'{directory}/epf/datasets' 132 | if not os.path.exists(path): 133 | for group in EPFInfo.groups: 134 | download_file(path, EPF.source_url + f'{group}.csv') 135 | 136 | # Cell 137 | # TODO: extend this to group_by unique_id application 138 | def epf_naive_forecast(Y_df): 139 | """Function to build the naive forecast for electricity price forecasting 140 | 141 | The function is used to compute the accuracy metrics MASE and RMAE, the function 142 | assumes that the number of prices per day is 24. And computes naive forecast for 143 | days of the week and seasonal Naive forecast for weekends. 144 | 145 | Parameters 146 | ---------- 147 | Y_df : pandas.DataFrame 148 | Dataframe containing the real prices in long format 149 | that contains variables ['ds', 'unique_id', 'y'] 150 | 151 | Returns 152 | ------- 153 | Y_hat_df : pandas.DataFrame 154 | Dataframe containing the predictions of the epf naive forecast. 155 | """ 156 | assert type(Y_df) == pd.core.frame.DataFrame 157 | assert all([(col in Y_df) for col in ['unique_id', 'ds', 'y']]) 158 | 159 | # Init the naive forecast 160 | Y_hat_df = Y_df[24 * 7:].copy() 161 | Y_hat_df['dayofweek'] = Y_df['ds'].dt.dayofweek 162 | 163 | # Monday, Saturday and Sunday 164 | # we have a naive forecast using weekly seasonality 165 | weekend_indicator = Y_hat_df['dayofweek'].isin([0,5,6]) 166 | 167 | # Tuesday, Wednesday, Thursday, Friday 168 | # we have a naive forecast using daily seasonality 169 | week_indicator = Y_hat_df['dayofweek'].isin([1,2,3,4]) 170 | 171 | naive = Y_df['y'].shift(24).values[24 * 7:] 172 | seasonal_naive = Y_df['y'].shift(24*7).values[24 * 7:] 173 | 174 | Y_hat_df['y_hat'] = naive * week_indicator + seasonal_naive * weekend_indicator 175 | return Y_hat_df -------------------------------------------------------------------------------- /src/data/datasets/ett.py: -------------------------------------------------------------------------------- 1 | # AUTOGENERATED! DO NOT EDIT! File to edit: nbs/data_datasets__ett.ipynb (unless otherwise specified). 2 | 3 | __all__ = ['ETTh1', 'ETTh2', 'ETTm1', 'ETTm2', 'ETTInfo', 'process_multiple_ts', 'ETT'] 4 | 5 | # Cell 6 | import os 7 | from dataclasses import dataclass 8 | from typing import Dict, List, Optional, Tuple, Union 9 | 10 | import numpy as np 11 | import pandas as pd 12 | 13 | from .utils import download_file, Info, time_features_from_frequency_str 14 | 15 | # Cell 16 | @dataclass 17 | class ETTh1: 18 | freq: str = 'H' 19 | name: str = 'ETTh1' 20 | n_ts: int = 7 21 | 22 | @dataclass 23 | class ETTh2: 24 | freq: str = 'H' 25 | name: str = 'ETTh2' 26 | n_ts: int = 7 27 | 28 | @dataclass 29 | class ETTm1: 30 | freq: str = '15T' 31 | name: str = 'ETTm1' 32 | n_ts: int = 7 33 | 34 | @dataclass 35 | class ETTm2: 36 | freq: str = '15T' 37 | name: str = 'ETTm2' 38 | n_ts: int = 7 39 | 40 | # Cell 41 | ETTInfo = Info(groups=('ETTh1', 'ETTh2', 'ETTm1', 'ETTm2'), 42 | class_groups=(ETTh1, ETTh2, ETTm1, ETTm2)) 43 | 44 | # Cell 45 | def process_multiple_ts(y_df: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame]: 46 | """Transforms multiple timeseries as columns to long format.""" 47 | y_df['date'] = pd.to_datetime(y_df['date']) 48 | y_df.rename(columns={'date': 'ds'}, inplace=True) 49 | u_ids = y_df.columns.to_list() 50 | u_ids.remove('ds') 51 | 52 | time_cls = time_features_from_frequency_str('h') 53 | for cls_ in time_cls: 54 | cls_name = cls_.__class__.__name__ 55 | y_df[cls_name] = cls_(y_df['ds'].dt) 56 | 57 | X_df = y_df.drop(u_ids, axis=1) 58 | y_df = y_df.filter(items=['ds'] + u_ids) 59 | y_df = y_df.set_index('ds').stack() 60 | y_df = y_df.rename('y').rename_axis(['ds', 'unique_id']).reset_index() 61 | y_df['unique_id'] = pd.Categorical(y_df['unique_id'], u_ids) 62 | y_df = y_df[['unique_id', 'ds', 'y']].sort_values(['unique_id', 'ds']) 63 | 64 | X_df = y_df[['unique_id', 'ds']].merge(X_df, how='left', on=['ds']) 65 | 66 | return y_df, X_df 67 | 68 | # Cell 69 | @dataclass 70 | class ETT: 71 | 72 | source_url: str = 'https://raw.githubusercontent.com/zhouhaoyi/ETDataset/main/ETT-small/' 73 | 74 | @staticmethod 75 | def load(directory: str, 76 | group: str, 77 | cache: bool = True) -> Tuple[pd.DataFrame, 78 | Optional[pd.DataFrame], 79 | Optional[pd.DataFrame]]: 80 | """Downloads and loads ETT data. 81 | 82 | Parameters 83 | ---------- 84 | directory: str 85 | Directory where data will be downloaded. 86 | group: str 87 | Group name. 88 | Allowed groups: 'ETTh1', 'ETTh2', 89 | 'ETTm1', 'ETTm2'. 90 | cache: bool 91 | If `True` saves and loads 92 | 93 | Notes 94 | ----- 95 | [1] Returns train+val+test sets. 96 | """ 97 | path = f'{directory}/ett/datasets' 98 | file_cache = f'{path}/{group}.p' 99 | 100 | if os.path.exists(file_cache) and cache: 101 | df, X_df, S_df = pd.read_pickle(file_cache) 102 | 103 | return df, X_df, S_df 104 | 105 | 106 | ETT.download(directory) 107 | path = f'{directory}/ett/datasets' 108 | class_group = ETTInfo[group] 109 | 110 | y_df = pd.read_csv(f'{path}/{group}.csv') 111 | 112 | y_df, X_df = process_multiple_ts(y_df) 113 | 114 | S_df = None 115 | if cache: 116 | pd.to_pickle((y_df, X_df, S_df), file_cache) 117 | 118 | return y_df, X_df, S_df 119 | 120 | @staticmethod 121 | def download(directory: str) -> None: 122 | """Download ETT Dataset.""" 123 | path = f'{directory}/ett/datasets/' 124 | if not os.path.exists(path): 125 | for group in ETTInfo.groups: 126 | download_file(path, f'{ETT.source_url}/{group}.csv') -------------------------------------------------------------------------------- /src/data/datasets/gefcom2012.py: -------------------------------------------------------------------------------- 1 | # AUTOGENERATED! DO NOT EDIT! File to edit: nbs/data_datasets__gefcom2012.ipynb (unless otherwise specified). 2 | 3 | __all__ = ['logger', 'GEFCom2012', 'GEFCom2012_L', 'GEFCom2012_W'] 4 | 5 | # Cell 6 | import os 7 | import re 8 | import logging 9 | import zipfile 10 | 11 | from dataclasses import dataclass 12 | from typing import Dict, List, Optional, Tuple, Union 13 | from pandas.tseries.holiday import USFederalHolidayCalendar as calendar 14 | 15 | import numpy as np 16 | import pandas as pd 17 | 18 | from .utils import ( 19 | download_file, 20 | Info, 21 | TimeSeriesDataclass, 22 | create_calendar_variables, 23 | create_us_holiday_distance_variables, 24 | ) 25 | from ..tsdataset import TimeSeriesDataset 26 | 27 | logging.basicConfig(level=logging.INFO) 28 | logger = logging.getLogger(__name__) 29 | 30 | # Cell 31 | class GEFCom2012: 32 | 33 | source_url = 'https://www.dropbox.com/s/epj9b57eivn79j7/GEFCom2012.zip?dl=1' 34 | 35 | @staticmethod 36 | def download(directory: str) -> None: 37 | """Downloads GEFCom2012 Dataset.""" 38 | path = f'{directory}/gefcom2012' 39 | if not os.path.exists(path): 40 | download_file(directory=path, 41 | source_url=GEFCom2012.source_url, 42 | decompress=True) 43 | 44 | # Cell 45 | class GEFCom2012_L: 46 | 47 | @staticmethod 48 | def load_Y(directory) -> pd.DataFrame: 49 | # Meta data 50 | path = f'{directory}/gefcom2012/GEFCOM2012_Data/Load' 51 | filepath = f'{path}/Load_history.csv' 52 | Y_df = pd.read_csv(filepath, sep=',', thousands=',') 53 | 54 | # Parsing temperature data 55 | Y_df['ds'] = pd.to_datetime(dict(year=Y_df.year, 56 | month=Y_df.month, 57 | day=Y_df.day)) 58 | del Y_df['year'], Y_df['month'], Y_df['day'] 59 | Y_df = pd.wide_to_long(Y_df, ['h'], i=['zone_id', 'ds'], j="hour") 60 | Y_df.reset_index(inplace=True) 61 | Y_df['tdelta'] = pd.to_timedelta(Y_df.hour, unit="h") 62 | Y_df['ds'] = Y_df['ds'] + Y_df['tdelta'] 63 | del Y_df['tdelta'], Y_df['hour'] 64 | Y_df.rename(columns={'zone_id': 'unique_id', 'h': 'y'}, inplace=True) 65 | #Y_df['y'] = pd.to_numeric(Y_df['y'], errors='coerce') 66 | return Y_df 67 | 68 | @staticmethod 69 | def load_X(directory) -> pd.DataFrame: 70 | # Meta data 71 | path = f'{directory}/gefcom2012/GEFCOM2012_Data/Load' 72 | filepath = f'{path}/temperature_history.csv' 73 | X_df = pd.read_csv(filepath, sep=',') 74 | 75 | # Parsing temperature data 76 | X_df['ds'] = pd.to_datetime(dict(year=X_df.year, 77 | month=X_df.month, 78 | day=X_df.day)) 79 | del X_df['year'], X_df['month'], X_df['day'] 80 | X_df = pd.wide_to_long(X_df, ['h'], i=['station_id', 'ds'], j="hour") 81 | X_df.reset_index(inplace=True) 82 | X_df['tdelta'] = pd.to_timedelta(X_df.hour, unit="h") 83 | X_df['ds'] = X_df['ds'] + X_df['tdelta'] 84 | del X_df['tdelta'], X_df['hour'] 85 | X_df['station_id'] = 'station_' + X_df['station_id'].astype(str) 86 | 87 | X_df = X_df.pivot(index='ds', columns='station_id', values='h').reset_index('ds') 88 | X_df.reset_index(drop=True, inplace=True) 89 | X_df = create_calendar_variables(X_df=X_df) 90 | X_df = create_us_holiday_distance_variables(X_df=X_df) 91 | return X_df 92 | 93 | @staticmethod 94 | def load_benchmark(directory) -> pd.DataFrame: 95 | # Meta data 96 | path = f'{directory}/gefcom2012/GEFCOM2012_Data/Load' 97 | filepath = f'{path}/Load_benchmark.csv' 98 | benchmark_df = pd.read_csv(filepath, sep=',') 99 | 100 | # Parsing benchmark data 101 | benchmark_df['ds'] = pd.to_datetime(dict(year=benchmark_df.year, 102 | month=benchmark_df.month, 103 | day=benchmark_df.day)) 104 | del benchmark_df['year'], benchmark_df['month'], benchmark_df['day'], benchmark_df['id'] 105 | benchmark_df.rename(columns={'zone_id': 'unique_id'}, inplace=True) 106 | 107 | benchmark_df = pd.wide_to_long(benchmark_df, ['h'], i=['unique_id', 'ds'], j="hour") 108 | benchmark_df.reset_index(inplace=True) 109 | 110 | benchmark_df['tdelta'] = pd.to_timedelta(benchmark_df.hour, unit="h") 111 | benchmark_df['ds'] = benchmark_df['ds'] + benchmark_df['tdelta'] 112 | del benchmark_df['tdelta'], benchmark_df['hour'] 113 | benchmark_df.rename(columns={'h': 'y_hat'}, inplace=True) 114 | return benchmark_df 115 | 116 | @staticmethod 117 | def load(directory) -> Tuple[pd.DataFrame, 118 | pd.DataFrame, 119 | pd.DataFrame]: 120 | 121 | GEFCom2012.download(directory) 122 | 123 | Y_df = GEFCom2012_L.load_Y(directory) 124 | X_df = GEFCom2012_L.load_X(directory) 125 | benchmark_df = GEFCom2012_L.load_benchmark(directory) 126 | return Y_df, X_df, benchmark_df 127 | 128 | # Cell 129 | class GEFCom2012_W: 130 | 131 | train_start = '2009-07-01 00:00:00' 132 | train_end = '2010-12-31 23:00:00' 133 | 134 | test_start = '2011-01-01 01:00:00' 135 | test_end = '2012-06-28 12:00:00' 136 | 137 | @staticmethod 138 | def load_benchmark(directory): 139 | # Meta data 140 | path = f'{directory}/gefcom2012/GEFCOM2012_Data/Wind' 141 | filepath = f'{path}/benchmark.csv' 142 | benchmark_df = pd.read_csv(filepath, sep=',') 143 | 144 | benchmark_df['ds'] = pd.to_datetime(benchmark_df.date, format='%Y%m%d%H') 145 | del benchmark_df['date'] 146 | benchmark_df = pd.wide_to_long(benchmark_df, ['wp'], i='ds', j="unique_id") 147 | benchmark_df.reset_index(inplace=True) 148 | return benchmark_df 149 | 150 | @staticmethod 151 | def load_Y(directory): 152 | # Meta data 153 | path = f'{directory}/gefcom2012/GEFCOM2012_Data/Wind' 154 | yfilepath = f'{path}/train.csv' 155 | 156 | # Read and parse Y data 157 | Y_df = pd.read_csv(yfilepath, sep='\t') 158 | Y_df['ds'] = pd.date_range(start=GEFCom2012_W.train_start, 159 | end=GEFCom2012_W.train_end, freq='H') 160 | del Y_df['date'] 161 | Y_df = pd.wide_to_long(Y_df, ['wp'], i='ds', j="unique_id") 162 | Y_df.reset_index(inplace=True) 163 | return Y_df 164 | 165 | @staticmethod 166 | def load_X_group(directory, group): 167 | # Meta data 168 | path = f'{directory}/gefcom2012/GEFCOM2012_Data/Wind' 169 | xfilepath = f'{path}/windforecasts_wf{group}.csv' 170 | X_df = pd.read_csv(xfilepath, sep=',') 171 | 172 | # Create ds associated to each forecast from forecast creation date 173 | X_df['date'] = X_df.date.astype(str) 174 | X_df['fcd'] = pd.to_datetime(X_df.date, format='%Y%m%d%H') 175 | X_df['tdelta'] = pd.to_timedelta(X_df.hors, unit="h") 176 | X_df['ds'] = X_df['fcd'] + X_df['tdelta'] 177 | 178 | # Separate forecasts by lead time 179 | X_lead12_df = X_df[(X_df.hors>0) & (X_df.hors<=12)].reset_index(drop=True) 180 | X_lead24_df = X_df[(X_df.hors>12) & (X_df.hors<=24)].reset_index(drop=True) 181 | X_lead36_df = X_df[(X_df.hors>24) & (X_df.hors<=36)].reset_index(drop=True) 182 | X_lead48_df = X_df[(X_df.hors>36) & (X_df.hors<=48)].reset_index(drop=True) 183 | del X_df 184 | 185 | # Cleaning auxiliary variables and reconstructing X_df 186 | X_df = pd.DataFrame({'ds': pd.date_range(start='2009-07-01 01:00:00', 187 | end=GEFCom2012_W.test_end, freq='H')}) 188 | for lead, df in zip(['_lead12', '_lead24', '_lead36', '_lead48'], \ 189 | [X_lead12_df, X_lead24_df, X_lead36_df, X_lead48_df]): 190 | df.drop(['fcd', 'tdelta', 'date', 'hors'], axis=1, inplace=True) 191 | df.columns = [f'u{lead}', f'v{lead}', f'ws{lead}', f'wd{lead}', 'ds'] 192 | X_df = X_df.merge(df, on='ds', how='left') 193 | 194 | # Removing nans in hierarchical fashion (priority to shorter lead forecasts) 195 | for var in ['u', 'v', 'ws', 'wd']: 196 | X_df[var] = X_df[f'{var}_lead12'] 197 | for lead in ['_lead24', '_lead36', '_lead48']: 198 | X_df[var].fillna(X_df[f'{var}{lead}'], inplace=True) 199 | 200 | for var in ['u', 'v', 'ws', 'wd']: 201 | for lead in ['_lead12', '_lead24', '_lead36', '_lead48']: 202 | X_df[f'{var}{lead}'].fillna(X_df[var], inplace=True) 203 | del X_df[var] 204 | del X_lead12_df, X_lead24_df, X_lead36_df, X_lead48_df 205 | X_df['unique_id'] = group 206 | return X_df 207 | 208 | @staticmethod 209 | def load(directory: str) -> Tuple[pd.DataFrame, 210 | pd.DataFrame, 211 | pd.DataFrame]: 212 | GEFCom2012.download(directory) 213 | 214 | Y_df = GEFCom2012_W.load_Y(directory) 215 | X_df_list = [GEFCom2012_W.load_X_group(directory, group) for group in range(1,8)] 216 | X_df = pd.concat(X_df_list) 217 | 218 | benchmark_df = GEFCom2012_W.load_benchmark(directory) 219 | return Y_df, X_df, benchmark_df -------------------------------------------------------------------------------- /src/data/datasets/m3.py: -------------------------------------------------------------------------------- 1 | # AUTOGENERATED! DO NOT EDIT! File to edit: nbs/data_datasets__m3.ipynb (unless otherwise specified). 2 | 3 | __all__ = ['Yearly', 'Quarterly', 'Monthly', 'Other', 'M3Info', 'M3'] 4 | 5 | # Cell 6 | import os 7 | from dataclasses import dataclass 8 | from typing import Dict, List, Optional, Tuple, Union 9 | 10 | import numpy as np 11 | import pandas as pd 12 | 13 | from .utils import download_file, Info, TimeSeriesDataclass 14 | 15 | # Cell 16 | @dataclass 17 | class Yearly: 18 | seasonality: int = 1 19 | horizon: int = 6 20 | freq: str = 'Y' 21 | sheet_name: str = 'M3Year' 22 | name: str = 'Yearly' 23 | n_ts: int = 645 24 | 25 | @dataclass 26 | class Quarterly: 27 | seasonality: int = 4 28 | horizon: int = 8 29 | freq: str = 'Q' 30 | sheet_name: str = 'M3Quart' 31 | name: str = 'Quarterly' 32 | n_ts: int = 756 33 | 34 | @dataclass 35 | class Monthly: 36 | seasonality: int = 12 37 | horizon: int = 18 38 | freq: str = 'M' 39 | sheet_name: str = 'M3Month' 40 | name: str = 'Monthly' 41 | n_ts: int = 1428 42 | 43 | @dataclass 44 | class Other: 45 | seasonality: int = 1 46 | horizon: int = 8 47 | freq: str = 'D' 48 | sheet_name: str = 'M3Other' 49 | name: str = 'Other' 50 | n_ts: int = 174 51 | 52 | # Cell 53 | M3Info = Info(groups=('Yearly', 'Quarterly', 'Monthly', 'Other'), 54 | class_groups=(Yearly, Quarterly, Monthly, Other)) 55 | 56 | # Internal Cell 57 | def _return_year(ts): 58 | year = ts.iloc[0] 59 | year = year if year != 0 else 1970 60 | 61 | return year 62 | 63 | # Cell 64 | @dataclass 65 | class M3(TimeSeriesDataclass): 66 | 67 | source_url = 'https://forecasters.org/data/m3comp/M3C.xls' 68 | 69 | @staticmethod 70 | def load(directory: str, 71 | group: str) -> Tuple[pd.DataFrame, 72 | Optional[pd.DataFrame], 73 | Optional[pd.DataFrame]]: 74 | """ 75 | Downloads and loads M3 data. 76 | 77 | Parameters 78 | ---------- 79 | directory: str 80 | Directory where data will be downloaded. 81 | group: str 82 | Group name. 83 | Allowed groups: 'Yearly', 'Quarterly', 'Monthly', 'Other'. 84 | 85 | Notes 86 | ----- 87 | [1] Returns train+test sets. 88 | [2] There are monthly time series without start year. 89 | This time series will start with 1970. 90 | [3] Other time series have no start date. 91 | This time series will start with 1970. 92 | """ 93 | M3.download(directory) 94 | 95 | path = f'{directory}/m3/datasets/' 96 | file = f'{path}/M3C.xls' 97 | 98 | class_group = M3Info.get_group(group) 99 | 100 | df = pd.read_excel(file, sheet_name=class_group.sheet_name) 101 | df = df.rename(columns={'Series': 'unique_id', 102 | 'Category': 'category', 103 | 'Starting Year': 'year', 104 | 'Starting Month': 'month'}) 105 | df['unique_id'] = [class_group.name[0] + str(i + 1) for i in range(len(df))] 106 | S = df.filter(items=['unique_id', 'category']) 107 | 108 | id_vars = list(df.columns[:6]) 109 | df = pd.melt(df, id_vars=id_vars, var_name='ds', value_name='y') 110 | df = df.dropna().sort_values(['unique_id', 'ds']).reset_index(drop=True) 111 | 112 | freq = pd.tseries.frequencies.to_offset(class_group.freq) 113 | 114 | if group == 'Other': 115 | df['year'] = 1970 116 | 117 | df['ds'] = df.groupby('unique_id')['year'] \ 118 | .transform(lambda df: pd.date_range(f'{_return_year(df)}-01-01', 119 | periods=df.shape[0], 120 | freq=freq)) 121 | df = df.filter(items=['unique_id', 'ds', 'y']) 122 | 123 | return df, None, None 124 | 125 | @staticmethod 126 | def download(directory: str) -> None: 127 | """Download M3 Dataset.""" 128 | path = f'{directory}/m3/datasets/' 129 | if not os.path.exists(path): 130 | download_file(path, M3.source_url) -------------------------------------------------------------------------------- /src/data/datasets/m4.py: -------------------------------------------------------------------------------- 1 | # AUTOGENERATED! DO NOT EDIT! File to edit: nbs/data_datasets__m4.ipynb (unless otherwise specified). 2 | 3 | __all__ = ['Yearly', 'Quarterly', 'Monthly', 'Weekly', 'Daily', 'Hourly', 'Other', 'M4Info', 'M4', 'M4Evaluation'] 4 | 5 | # Cell 6 | import os 7 | from dataclasses import dataclass 8 | from typing import Dict, List, Optional, Tuple, Union 9 | 10 | import numpy as np 11 | import pandas as pd 12 | 13 | from .utils import download_file, Info 14 | from ...losses.numpy import smape, mase 15 | 16 | # Cell 17 | @dataclass 18 | class Yearly: 19 | seasonality: int = 1 20 | horizon: int = 6 21 | freq: str = 'Y' 22 | name: str = 'Yearly' 23 | n_ts: int = 23_000 24 | 25 | @dataclass 26 | class Quarterly: 27 | seasonality: int = 4 28 | horizon: int = 8 29 | freq: str = 'Q' 30 | name: str = 'Quarterly' 31 | n_ts: int = 24_000 32 | 33 | @dataclass 34 | class Monthly: 35 | seasonality: int = 12 36 | horizon: int = 18 37 | freq: str = 'M' 38 | name: str = 'Monthly' 39 | n_ts: int = 48_000 40 | 41 | @dataclass 42 | class Weekly: 43 | seasonality: int = 1 44 | horizon: int = 13 45 | freq: str = 'W' 46 | name: str = 'Weekly' 47 | n_ts: int = 359 48 | 49 | @dataclass 50 | class Daily: 51 | seasonality: int = 1 52 | horizon: int = 14 53 | freq: str = 'D' 54 | name: str = 'Daily' 55 | n_ts: int = 4_227 56 | 57 | @dataclass 58 | class Hourly: 59 | seasonality: int = 24 60 | horizon: int = 48 61 | freq: str = 'H' 62 | name: str = 'Hourly' 63 | n_ts: int = 414 64 | 65 | 66 | @dataclass 67 | class Other: 68 | seasonality: int = 1 69 | horizon: int = 8 70 | freq: str = 'D' 71 | name: str = 'Other' 72 | n_ts: int = 5_000 73 | included_groups: Tuple = ('Weekly', 'Daily', 'Hourly') 74 | 75 | # Cell 76 | M4Info = Info(groups=('Yearly', 'Quarterly', 'Monthly', 'Weekly', 'Daily', 'Hourly', 'Other'), 77 | class_groups=(Yearly, Quarterly, Monthly, Weekly, Daily, Hourly, Other)) 78 | 79 | # Cell 80 | @dataclass 81 | class M4: 82 | 83 | source_url: str = 'https://raw.githubusercontent.com/Mcompetitions/M4-methods/master/Dataset/' 84 | naive2_forecast_url: str = 'https://github.com/Nixtla/m4-forecasts/raw/master/forecasts/submission-Naive2.zip' 85 | 86 | @staticmethod 87 | def load(directory: str, 88 | group: str, 89 | cache: bool = True) -> Tuple[pd.DataFrame, 90 | Optional[pd.DataFrame], 91 | Optional[pd.DataFrame]]: 92 | """Downloads and loads M4 data. 93 | 94 | Parameters 95 | ---------- 96 | directory: str 97 | Directory where data will be downloaded. 98 | group: str 99 | Group name. 100 | Allowed groups: 'Yearly', 'Quarterly', 'Monthly', 101 | 'Weekly', 'Daily', 'Hourly'. 102 | cache: bool 103 | If `True` saves and loads 104 | 105 | Notes 106 | ----- 107 | [1] Returns train+test sets. 108 | """ 109 | path = f'{directory}/m4/datasets' 110 | file_cache = f'{path}/{group}.p' 111 | 112 | if os.path.exists(file_cache) and cache: 113 | df, X_df, S_df = pd.read_pickle(file_cache) 114 | 115 | return df, X_df, S_df 116 | 117 | if group == 'Other': 118 | #Special case. 119 | included_dfs = [M4.load(directory, gr) \ 120 | for gr in M4Info['Other'].included_groups] 121 | df, *_ = zip(*included_dfs) 122 | df = pd.concat(df) 123 | else: 124 | M4.download(directory) 125 | path = f'{directory}/m4/datasets' 126 | class_group = M4Info[group] 127 | S_df = pd.read_csv(f'{directory}/m4/datasets/M4-info.csv', 128 | usecols=['M4id','category']) 129 | S_df['category'] = S_df['category'].astype('category').cat.codes 130 | S_df.rename({'M4id': 'unique_id'}, axis=1, inplace=True) 131 | S_df = S_df[S_df['unique_id'].str.startswith(class_group.name[0])] 132 | 133 | def read_and_melt(file): 134 | df = pd.read_csv(file) 135 | df.columns = ['unique_id'] + list(range(1, df.shape[1])) 136 | df = pd.melt(df, id_vars=['unique_id'], var_name='ds', value_name='y') 137 | df = df.dropna() 138 | 139 | return df 140 | 141 | df_train = read_and_melt(file=f'{path}/{group}-train.csv') 142 | df_test = read_and_melt(file=f'{path}/{group}-test.csv') 143 | 144 | len_train = df_train.groupby('unique_id').agg({'ds': 'max'}).reset_index() 145 | len_train.columns = ['unique_id', 'len_serie'] 146 | df_test = df_test.merge(len_train, on=['unique_id']) 147 | df_test['ds'] = df_test['ds'] + df_test['len_serie'] 148 | df_test.drop('len_serie', axis=1, inplace=True) 149 | 150 | df = pd.concat([df_train, df_test]) 151 | df = df.sort_values(['unique_id', 'ds']).reset_index(drop=True) 152 | 153 | S_df = S_df.sort_values('unique_id').reset_index(drop=True) 154 | 155 | X_df = None 156 | if cache: 157 | pd.to_pickle((df, X_df, S_df), file_cache) 158 | 159 | return df, None, S_df 160 | 161 | @staticmethod 162 | def download(directory: str) -> None: 163 | """Download M4 Dataset.""" 164 | path = f'{directory}/m4/datasets/' 165 | if not os.path.exists(path): 166 | for group in M4Info.groups: 167 | download_file(path, f'{M4.source_url}/Train/{group}-train.csv') 168 | download_file(path, f'{M4.source_url}/Test/{group}-test.csv') 169 | download_file(path, f'{M4.source_url}/M4-info.csv') 170 | download_file(path, M4.naive2_forecast_url, decompress=True) 171 | 172 | # Cell 173 | class M4Evaluation: 174 | 175 | @staticmethod 176 | def load_benchmark(directory: str, group: str, 177 | source_url: Optional[str] = None) -> np.ndarray: 178 | """Downloads and loads a bechmark forecasts. 179 | 180 | Parameters 181 | ---------- 182 | directory: str 183 | Directory where data will be downloaded. 184 | group: str 185 | Group name. 186 | Allowed groups: 'Yearly', 'Quarterly', 'Monthly', 187 | 'Weekly', 'Daily', 'Hourly'. 188 | source_url: str, optional 189 | Optional benchmark url obtained from 190 | https://github.com/Nixtla/m4-forecasts/tree/master/forecasts. 191 | If `None` returns Naive2. 192 | 193 | Returns 194 | ------- 195 | benchmark: numpy array 196 | Numpy array of shape (n_series, horizon). 197 | """ 198 | path = f'{directory}/m4/datasets' 199 | initial = group[0] 200 | if source_url is not None: 201 | filename = source_url.split('/')[-1].replace('.rar', '.csv') 202 | filepath = f'{path}/{filename}' 203 | if not os.path.exists(filepath): 204 | download_file(path, source_url, decompress=True) 205 | 206 | else: 207 | filepath = f'{path}/submission-Naive2.csv' 208 | 209 | benchmark = pd.read_csv(filepath) 210 | benchmark = benchmark[benchmark['id'].str.startswith(initial)] 211 | benchmark = benchmark.set_index('id').dropna(1) 212 | benchmark = benchmark.sort_values('id').values 213 | 214 | return benchmark 215 | 216 | @staticmethod 217 | def evaluate(directory: str, group: str, 218 | y_hat: Union[np.ndarray, str]) -> pd.DataFrame: 219 | """Evaluates y_hat according to M4 methodology. 220 | 221 | Parameters 222 | ---------- 223 | directory: str 224 | Directory where data will be downloaded. 225 | group: str 226 | Group name. 227 | Allowed groups: 'Yearly', 'Quarterly', 'Monthly', 228 | 'Weekly', 'Daily', 'Hourly'. 229 | y_hat: numpy array, str 230 | Group forecasts as numpy array or 231 | benchmark url from 232 | https://github.com/Nixtla/m4-forecasts/tree/master/forecasts. 233 | 234 | Returns 235 | ------- 236 | evaluation: pandas dataframe 237 | DataFrame with columns OWA, SMAPE, MASE 238 | and group as index. 239 | """ 240 | if isinstance(y_hat, str): 241 | y_hat = M4Evaluation.load_benchmark(directory, group, y_hat) 242 | 243 | initial = group[0] 244 | class_group = M4Info[group] 245 | horizon = class_group.horizon 246 | seasonality = class_group.seasonality 247 | path = f'{directory}/m4/datasets' 248 | y_df, *_ = M4.load(directory, group) 249 | 250 | y_train = y_df.groupby('unique_id')['y'] 251 | y_train = y_train.apply(lambda x: x.head(-horizon).values) 252 | y_train = y_train.values 253 | 254 | y_test = y_df.groupby('unique_id')['y'] 255 | y_test = y_test.tail(horizon) 256 | y_test = y_test.values.reshape(-1, horizon) 257 | 258 | naive2 = M4Evaluation.load_benchmark(directory, group) 259 | smape_y_hat = smape(y_test, y_hat) 260 | smape_naive2 = smape(y_test, naive2) 261 | 262 | mase_y_hat = np.mean([mase(y_test[i], y_hat[i], y_train[i], seasonality) 263 | for i in range(class_group.n_ts)]) 264 | mase_naive2 = np.mean([mase(y_test[i], naive2[i], y_train[i], seasonality) 265 | for i in range(class_group.n_ts)]) 266 | 267 | owa = .5 * (mase_y_hat / mase_naive2 + smape_y_hat / smape_naive2) 268 | 269 | evaluation = pd.DataFrame({'SMAPE': smape_y_hat, 270 | 'MASE': mase_y_hat, 271 | 'OWA': owa}, 272 | index=[group]) 273 | 274 | return evaluation -------------------------------------------------------------------------------- /src/data/datasets/m5.py: -------------------------------------------------------------------------------- 1 | # AUTOGENERATED! DO NOT EDIT! File to edit: nbs/data_datasets__m5.ipynb (unless otherwise specified). 2 | 3 | __all__ = ['M5', 'M5Evaluation'] 4 | 5 | # Cell 6 | import os 7 | from dataclasses import dataclass 8 | from typing import Dict, List, Optional, Tuple, Union 9 | 10 | import numpy as np 11 | import pandas as pd 12 | 13 | from .utils import download_file, Info 14 | 15 | # Cell 16 | @dataclass 17 | class M5: 18 | 19 | # original data available from Kaggle directly 20 | # pip install kaggle --upgrade 21 | # kaggle competitions download -c m5-forecasting-accuracy 22 | source_url: str = 'https://github.com/Nixtla/m5-forecasts/raw/main/datasets/m5.zip' 23 | 24 | @staticmethod 25 | def download(directory: str) -> None: 26 | """Downloads M5 Competition Dataset.""" 27 | path = f'{directory}/m5/datasets' 28 | if not os.path.exists(path): 29 | download_file(directory=path, 30 | source_url=M5.source_url, 31 | decompress=True) 32 | 33 | @staticmethod 34 | def load(directory: str, cache: bool = True) -> Tuple[pd.DataFrame, 35 | pd.DataFrame, 36 | pd.DataFrame]: 37 | """Downloads and loads M5 data. 38 | 39 | Parameters 40 | ---------- 41 | directory: str 42 | Directory where data will be downloaded. 43 | cache: bool 44 | If `True` saves and loads. 45 | 46 | Notes 47 | ----- 48 | [1] Returns train+test sets. 49 | [2] Based on https://www.kaggle.com/lemuz90/m5-preprocess. 50 | """ 51 | path = f'{directory}/m5/datasets' 52 | file_cache = f'{path}/m5.p' 53 | 54 | if os.path.exists(file_cache) and cache: 55 | Y_df, X_df, S_df = pd.read_pickle(file_cache) 56 | 57 | return Y_df, X_df, S_df 58 | 59 | M5.download(directory) 60 | # Calendar data 61 | cal_dtypes = { 62 | 'wm_yr_wk': np.uint16, 63 | 'event_name_1': 'category', 64 | 'event_type_1': 'category', 65 | 'event_name_2': 'category', 66 | 'event_type_2': 'category', 67 | 'snap_CA': np.uint8, 68 | 'snap_TX': np.uint8, 69 | 'snap_WI': np.uint8, 70 | } 71 | cal = pd.read_csv(f'{path}/calendar.csv', 72 | dtype=cal_dtypes, 73 | usecols=list(cal_dtypes.keys()) + ['date'], 74 | parse_dates=['date']) 75 | cal['d'] = np.arange(cal.shape[0]) + 1 76 | cal['d'] = 'd_' + cal['d'].astype('str') 77 | cal['d'] = cal['d'].astype('category') 78 | 79 | event_cols = [k for k in cal_dtypes if k.startswith('event')] 80 | for col in event_cols: 81 | cal[col] = cal[col].cat.add_categories('nan').fillna('nan') 82 | 83 | # Prices 84 | prices_dtypes = { 85 | 'store_id': 'category', 86 | 'item_id': 'category', 87 | 'wm_yr_wk': np.uint16, 88 | 'sell_price': np.float32 89 | } 90 | 91 | prices = pd.read_csv(f'{path}/sell_prices.csv', 92 | dtype=prices_dtypes) 93 | 94 | # Sales 95 | sales_dtypes = { 96 | 'item_id': prices.item_id.dtype, 97 | 'dept_id': 'category', 98 | 'cat_id': 'category', 99 | 'store_id': 'category', 100 | 'state_id': 'category', 101 | **{f'd_{i+1}': np.float32 for i in range(1969)} 102 | } 103 | # Reading train and test sets 104 | sales_train = pd.read_csv(f'{path}/sales_train_evaluation.csv', 105 | dtype=sales_dtypes) 106 | sales_test = pd.read_csv(f'{path}/sales_test_evaluation.csv', 107 | dtype=sales_dtypes) 108 | sales = sales_train.merge(sales_test, how='left', 109 | on=['item_id', 'dept_id', 'cat_id', 'store_id', 'state_id']) 110 | sales['id'] = sales[['item_id', 'store_id']].astype(str).agg('_'.join, axis=1).astype('category') 111 | # Long format 112 | long = sales.melt(id_vars=['id', 'item_id', 'dept_id', 'cat_id', 'store_id', 'state_id'], 113 | var_name='d', value_name='y') 114 | long['d'] = long['d'].astype(cal.d.dtype) 115 | long = long.merge(cal, on=['d']) 116 | long = long.merge(prices, on=['store_id', 'item_id', 'wm_yr_wk']) 117 | long = long.drop(columns=['d', 'wm_yr_wk']) 118 | 119 | def first_nz_mask(values, index): 120 | """Return a boolean mask where the True starts at the first non-zero value.""" 121 | mask = np.full(values.size, True) 122 | for idx, value in enumerate(values): 123 | if value == 0: 124 | mask[idx] = False 125 | else: 126 | break 127 | return mask 128 | 129 | long = long.sort_values(['id', 'date'], ignore_index=True) 130 | keep_mask = long.groupby('id')['y'].transform(first_nz_mask, engine='numba') 131 | long = long[keep_mask.astype(bool)] 132 | long.rename(columns={'id': 'unique_id', 'date': 'ds'}, inplace=True) 133 | Y_df = long.filter(items=['unique_id', 'ds', 'y']) 134 | cats = ['item_id', 'dept_id', 'cat_id', 'store_id', 'state_id'] 135 | S_df = long.filter(items=['unique_id'] + cats) 136 | S_df = S_df.drop_duplicates(ignore_index=True) 137 | X_df = long.drop(columns=['y'] + cats) 138 | 139 | if cache: 140 | pd.to_pickle((Y_df, X_df, S_df), file_cache) 141 | 142 | return Y_df, X_df, S_df 143 | 144 | # Cell 145 | class M5Evaluation: 146 | 147 | levels: dict = dict( 148 | Level1=['total'], 149 | Level2=['state_id'], 150 | Level3=['store_id'], 151 | Level4=['cat_id'], 152 | Level5=['dept_id'], 153 | Level6=['state_id', 'cat_id'], 154 | Level7=['state_id', 'dept_id'], 155 | Level8=['store_id', 'cat_id'], 156 | Level9=['store_id', 'dept_id'], 157 | Level10=['item_id'], 158 | Level11=['state_id', 'item_id'], 159 | Level12=['item_id', 'store_id'] 160 | ) 161 | 162 | @staticmethod 163 | def load_benchmark(directory: str, 164 | source_url: Optional[str] = None, 165 | validation: bool = False) -> np.ndarray: 166 | """Downloads and loads a bechmark forecasts. 167 | 168 | Parameters 169 | ---------- 170 | directory: str 171 | Directory where data will be downloaded. 172 | source_url: str, optional 173 | Optional benchmark url obtained from 174 | https://github.com/Nixtla/m5-forecasts/tree/master/forecasts. 175 | If `None` returns the M5 winner. 176 | validation: bool 177 | Wheter return validation forecasts. 178 | Default False, return test forecasts. 179 | 180 | Returns 181 | ------- 182 | benchmark: numpy array 183 | Numpy array of shape (n_series, horizon). 184 | """ 185 | path = f'{directory}/m5/datasets' 186 | if source_url is not None: 187 | filename = source_url.split('/')[-1].replace('.rar', '.csv') 188 | filepath = f'{path}/{filename}' 189 | if not os.path.exists(filepath): 190 | download_file(path, source_url, decompress=True) 191 | 192 | else: 193 | source_url = 'https://github.com/Nixtla/m5-forecasts/raw/main/forecasts/0001 YJ_STU.zip' 194 | return M5Evaluation.load_benchmark(directory, source_url, validation) 195 | 196 | benchmark = pd.read_csv(filepath) 197 | mask = benchmark['id'].str.endswith('validation') 198 | if validation: 199 | benchmark = benchmark[mask] 200 | benchmark['id'] = benchmark['id'].str.replace('_validation', '') 201 | else: 202 | benchmark = benchmark[~mask] 203 | benchmark['id'] = benchmark['id'].str.replace('_evaluation', '') 204 | 205 | benchmark = benchmark.sort_values('id', ignore_index=True) 206 | benchmark.rename(columns={'id': 'unique_id'}, inplace=True) 207 | *_, s_df = M5.load(directory) 208 | benchmark = benchmark.merge(s_df, how='left', 209 | on=['unique_id']) 210 | 211 | return benchmark 212 | 213 | @staticmethod 214 | def aggregate_levels(y_hat: pd.DataFrame, 215 | categories: pd.DataFrame = None) -> pd.DataFrame: 216 | """Aggregates the 30_480 series to get 42_840.""" 217 | y_hat_cat = y_hat.assign(total='Total') 218 | 219 | df_agg = [] 220 | for level, agg in M5Evaluation.levels.items(): 221 | df = y_hat_cat.groupby(agg).sum().reset_index() 222 | renamer = dict(zip(agg, ['Agg_Level_1', 'Agg_Level_2'])) 223 | df.rename(columns=renamer, inplace=True) 224 | df.insert(0, 'Level_id', level) 225 | df_agg.append(df) 226 | df_agg = pd.concat(df_agg) 227 | df_agg = df_agg.fillna('X') 228 | df_agg = df_agg.set_index(['Level_id', 'Agg_Level_1', 'Agg_Level_2']) 229 | df_agg.columns = [f'd_{i+1}' for i in range(df_agg.shape[1])] 230 | 231 | return df_agg 232 | 233 | @staticmethod 234 | def evaluate(directory: str, 235 | y_hat: Union[pd.DataFrame, str], 236 | validation: bool = False) -> pd.DataFrame: 237 | """Evaluates y_hat according to M4 methodology. 238 | 239 | Parameters 240 | ---------- 241 | directory: str 242 | Directory where data will be downloaded. 243 | validation: bool 244 | Wheter perform validation evaluation. 245 | Default False, return test evaluation. 246 | y_hat: pandas datafrae, str 247 | Forecasts as wide pandas dataframe with columns 248 | ['unique_id'] and forecasts or 249 | benchmark url from 250 | https://github.com/Nixtla/m5-forecasts/tree/main/forecasts. 251 | 252 | Returns 253 | ------- 254 | evaluation: pandas dataframe 255 | DataFrame with columns OWA, SMAPE, MASE 256 | and group as index. 257 | """ 258 | if isinstance(y_hat, str): 259 | y_hat = M5Evaluation.load_benchmark(directory, y_hat, validation) 260 | 261 | M5.download(directory) 262 | path = f'{directory}/m5/datasets' 263 | if validation: 264 | weights = pd.read_csv(f'{path}/weights_validation.csv') 265 | sales = pd.read_csv(f'{path}/sales_train_vaidation.csv') 266 | y_test = pd.read_csv(f'{path}/sales_test_vaidation.csv') 267 | else: 268 | weights = pd.read_csv(f'{path}/weights_evaluation.csv') 269 | sales = pd.read_csv(f'{path}/sales_train_evaluation.csv') 270 | y_test = pd.read_csv(f'{path}/sales_test_evaluation.csv') 271 | 272 | # sales 273 | sales = M5Evaluation.aggregate_levels(sales) 274 | def scale(x): 275 | x = x.values 276 | x = x[np.argmax(x!=0):] 277 | scale = ((x[1:] - x[:-1]) ** 2).mean() 278 | return scale 279 | scales = sales.agg(scale, 1).rename('scale').reset_index() 280 | 281 | # y_test 282 | y_test = M5Evaluation.aggregate_levels(y_test) 283 | 284 | #y_hat 285 | y_hat = M5Evaluation.aggregate_levels(y_hat) 286 | 287 | score = (y_test - y_hat) ** 2 288 | score = score.mean(1) 289 | score = score.rename('rmse').reset_index() 290 | score = score.merge(weights, how='left', 291 | on=['Level_id', 'Agg_Level_1', 'Agg_Level_2']) 292 | score = score.merge(scales, how='left', 293 | on=['Level_id', 'Agg_Level_1', 'Agg_Level_2']) 294 | score['wrmsse'] = (score['rmse'] / score['scale']).pow(1 / 2) * score['weight'] 295 | score = score.groupby('Level_id')[['wrmsse']].sum() 296 | score = score.loc[M5Evaluation.levels.keys()] 297 | total = score.mean().rename('Total').to_frame().T 298 | score = pd.concat([total, score]) 299 | 300 | return score -------------------------------------------------------------------------------- /src/data/datasets/tourism.py: -------------------------------------------------------------------------------- 1 | # AUTOGENERATED! DO NOT EDIT! File to edit: nbs/data_datasets__tourism.ipynb (unless otherwise specified). 2 | 3 | __all__ = ['Yearly', 'Quarterly', 'Monthly', 'TourismInfo', 'Tourism'] 4 | 5 | # Cell 6 | import os 7 | from dataclasses import dataclass 8 | from typing import Dict, List, Optional, Tuple, Union 9 | 10 | import numpy as np 11 | import pandas as pd 12 | from pandas.tseries.frequencies import to_offset 13 | 14 | from .utils import download_file, Info, TimeSeriesDataclass 15 | 16 | # Cell 17 | @dataclass 18 | class Yearly: 19 | seasonality: int = 1 20 | horizon: int = 4 21 | freq: str = 'Y' 22 | rows: int = 2 23 | name: str = 'Yearly' 24 | n_ts: int = 518 25 | 26 | @dataclass 27 | class Quarterly: 28 | seasonality: int = 4 29 | horizon: int = 8 30 | freq: str = 'Q' 31 | rows: int = 3 32 | name: str = 'Quarterly' 33 | n_ts: int = 427 34 | 35 | @dataclass 36 | class Monthly: 37 | seasonality: int = 12 38 | horizon: int = 24 39 | freq: str = 'M' 40 | rows: int = 3 41 | name: str = 'Monthly' 42 | n_ts: int = 366 43 | 44 | # Cell 45 | TourismInfo = Info(groups=('Yearly', 'Quarterly', 'Monthly'), 46 | class_groups=(Yearly, Quarterly, Monthly)) 47 | 48 | # Cell 49 | class Tourism(TimeSeriesDataclass): 50 | 51 | source_url = 'https://robjhyndman.com/data/27-3-Athanasopoulos1.zip' 52 | 53 | @staticmethod 54 | def load(directory: str, 55 | group: str) -> Tuple[pd.DataFrame, 56 | Optional[pd.DataFrame], 57 | Optional[pd.DataFrame]]: 58 | """ 59 | Downloads and loads Tourism data. 60 | 61 | Parameters 62 | ---------- 63 | directory: str 64 | Directory where data will be downloaded. 65 | group: str 66 | Group name. 67 | Allowed groups: 'Yearly', 'Quarterly', 'Monthly'. 68 | 69 | Notes 70 | ----- 71 | [1] Returns train+test sets. 72 | """ 73 | Tourism.download(directory) 74 | 75 | path = f'{directory}/tourism/datasets' 76 | 77 | class_group = TourismInfo.get_group(group) 78 | train_file = f'{path}/{class_group.name.lower()}_in.csv' 79 | test_file = f'{path}/{class_group.name.lower()}_oos.csv' 80 | 81 | train, test = pd.read_csv(train_file), pd.read_csv(test_file) 82 | 83 | dfs = [] 84 | freq = to_offset(class_group.freq) 85 | for col in train.columns: 86 | df_appended = [] 87 | for df, training in zip([train, test], [True, False]): 88 | df_col = df[col] 89 | length, year = df_col[:2].astype(int) 90 | skip_rows = class_group.rows 91 | start_date = pd.to_datetime(f'{year}-01-01') 92 | if group != 'Yearly': 93 | n_offsets = df_col[2].astype(int) 94 | start_date += n_offsets * freq 95 | elif col == 'Y18' and not training: # viene mal en el archivo esta serie 96 | start_date += 2 * freq 97 | df_col = df_col[skip_rows:length + skip_rows] 98 | df_col = df_col.rename('y').to_frame() 99 | df_col['unique_id'] = col 100 | df_col['ds'] = pd.date_range(start_date, periods=length, freq=freq) 101 | df_appended.append(df_col) 102 | df_appended = pd.concat(df_appended) 103 | dfs.append(df_appended) 104 | 105 | df = pd.concat(dfs) 106 | 107 | df = df.reset_index().filter(items=['unique_id', 'ds', 'y']) 108 | df = df.sort_values(['unique_id', 'ds']) 109 | 110 | return df, None, None 111 | 112 | @staticmethod 113 | def download(directory: str) -> None: 114 | """Downloads Tourism Dataset.""" 115 | path = f'{directory}/tourism/datasets' 116 | 117 | if not os.path.exists(path): 118 | download_file(path, Tourism.source_url, decompress=True) -------------------------------------------------------------------------------- /src/data/datasets/utils.py: -------------------------------------------------------------------------------- 1 | # AUTOGENERATED! DO NOT EDIT! File to edit: nbs/data_datasets__utils.ipynb (unless otherwise specified). 2 | 3 | __all__ = ['logger', 'download_file', 'Info', 'TimeSeriesDataclass', 'get_holiday_dates', 'holiday_kernel', 4 | 'create_calendar_variables', 'create_us_holiday_distance_variables', 'US_FEDERAL_HOLIDAYS', 'TimeFeature', 5 | 'SecondOfMinute', 'MinuteOfHour', 'HourOfDay', 'DayOfWeek', 'DayOfMonth', 'DayOfYear', 'MonthOfYear', 6 | 'WeekOfYear', 'time_features_from_frequency_str'] 7 | 8 | # Cell 9 | import logging 10 | import requests 11 | import subprocess 12 | import zipfile 13 | from pathlib import Path 14 | from dataclasses import dataclass 15 | from typing import Dict, List, Optional, Tuple, Union 16 | 17 | import numpy as np 18 | import pandas as pd 19 | from tqdm import tqdm 20 | import pandas as pd 21 | from pandas.tseries import offsets 22 | from pandas.tseries.frequencies import to_offset 23 | 24 | logging.basicConfig(level=logging.INFO) 25 | logger = logging.getLogger(__name__) 26 | 27 | # Cell 28 | def download_file(directory: str, source_url: str, decompress: bool = False) -> None: 29 | """Download data from source_ulr inside directory. 30 | 31 | Parameters 32 | ---------- 33 | directory: str, Path 34 | Custom directory where data will be downloaded. 35 | source_url: str 36 | URL where data is hosted. 37 | decompress: bool 38 | Wheter decompress downloaded file. Default False. 39 | """ 40 | if isinstance(directory, str): 41 | directory = Path(directory) 42 | directory.mkdir(parents=True, exist_ok=True) 43 | 44 | filename = source_url.split('/')[-1] 45 | filepath = Path(f'{directory}/{filename}') 46 | 47 | # Streaming, so we can iterate over the response. 48 | headers = {'User-Agent': 'Mozilla/5.0'} 49 | r = requests.get(source_url, stream=True, headers=headers) 50 | # Total size in bytes. 51 | total_size = int(r.headers.get('content-length', 0)) 52 | block_size = 1024 #1 Kibibyte 53 | 54 | t = tqdm(total=total_size, unit='iB', unit_scale=True) 55 | with open(filepath, 'wb') as f: 56 | for data in r.iter_content(block_size): 57 | t.update(len(data)) 58 | f.write(data) 59 | f.flush() 60 | t.close() 61 | 62 | if total_size != 0 and t.n != total_size: 63 | logger.error('ERROR, something went wrong downloading data') 64 | 65 | size = filepath.stat().st_size 66 | logger.info(f'Successfully downloaded {filename}, {size}, bytes.') 67 | 68 | if decompress: 69 | if '.zip' in filepath.suffix: 70 | logger.info('Decompressing zip file...') 71 | with zipfile.ZipFile(filepath, 'r') as zip_ref: 72 | zip_ref.extractall(directory) 73 | else: 74 | from patoolib import extract_archive 75 | extract_archive(filepath, outdir=directory) 76 | logger.info(f'Successfully decompressed {filepath}') 77 | 78 | # Cell 79 | @dataclass 80 | class Info: 81 | """ 82 | Info Dataclass of datasets. 83 | Args: 84 | groups (Tuple): Tuple of str groups 85 | class_groups (Tuple): Tuple of dataclasses. 86 | """ 87 | groups: Tuple[str] 88 | class_groups: Tuple[dataclass] 89 | 90 | def get_group(self, group: str): 91 | """Gets dataclass of group.""" 92 | if group not in self.groups: 93 | raise Exception(f'Unkown group {group}') 94 | 95 | return self.class_groups[self.groups.index(group)] 96 | 97 | def __getitem__(self, group: str): 98 | """Gets dataclass of group.""" 99 | if group not in self.groups: 100 | raise Exception(f'Unkown group {group}') 101 | 102 | return self.class_groups[self.groups.index(group)] 103 | 104 | def __iter__(self): 105 | for group in self.groups: 106 | yield group, self.get_group(group) 107 | 108 | 109 | # Cell 110 | @dataclass 111 | class TimeSeriesDataclass: 112 | """ 113 | Args: 114 | S (pd.DataFrame): DataFrame of static features of shape 115 | (n_time_series, n_features). 116 | X (pd.DataFrame): DataFrame of exogenous variables of shape 117 | (sum n_periods_i for i=1..n_time_series, n_exogenous). 118 | Y (pd.DataFrame): DataFrame of target variable of shape 119 | (sum n_periods_i for i=1..n_time_series, 1). 120 | idx_categorical_static (list, optional): List of categorical indexes 121 | of S. 122 | group (str, optional): Group name if applies. 123 | Example: 'Yearly' 124 | """ 125 | S: pd.DataFrame 126 | X: pd.DataFrame 127 | Y: pd.DataFrame 128 | idx_categorical_static: Optional[List] = None 129 | group: Union[str, List[str]] = None 130 | 131 | # Cell 132 | import pandas as pd 133 | from pandas.tseries.holiday import ( 134 | AbstractHolidayCalendar, 135 | Holiday, 136 | USMartinLutherKingJr, 137 | USPresidentsDay, 138 | USMemorialDay, 139 | USLaborDay, 140 | USColumbusDay, 141 | USThanksgivingDay, 142 | nearest_workday 143 | ) 144 | 145 | US_FEDERAL_HOLIDAYS = {'new_year': Holiday("New Years Day", month=1, day=1, observance=nearest_workday), 146 | 'martin_luther_king': USMartinLutherKingJr, 147 | 'presidents': USPresidentsDay, 148 | 'memorial': USMemorialDay, 149 | 'independence': Holiday("July 4th", month=7, day=4, observance=nearest_workday), 150 | 'labor': USLaborDay, 151 | 'columbus': USColumbusDay, 152 | 'veterans': Holiday("Veterans Day", month=11, day=11, observance=nearest_workday), 153 | 'thanksgiving': USThanksgivingDay, 154 | 'christmas': Holiday("Christmas", month=12, day=25, observance=nearest_workday)} 155 | 156 | def get_holiday_dates(holiday, dates): 157 | start_date = min(dates) + pd.DateOffset(days=-366) 158 | end_date = max(dates) + pd.DateOffset(days=366) 159 | holiday_calendar = AbstractHolidayCalendar(rules=[US_FEDERAL_HOLIDAYS[holiday]]) 160 | holiday_dates = holiday_calendar.holidays(start=start_date, end=end_date) 161 | return np.array(holiday_dates) 162 | 163 | def holiday_kernel(holiday, dates): 164 | # Get holidays around dates 165 | dates = pd.DatetimeIndex(dates) 166 | dates_np = np.array(dates).astype('datetime64[D]') 167 | holiday_dates = get_holiday_dates(holiday, dates) 168 | holiday_dates_np = np.array(pd.DatetimeIndex(holiday_dates)).astype('datetime64[D]') 169 | 170 | # Compute day distance to holiday 171 | nearest_holiday_idx = np.expand_dims(dates_np, axis=1) - np.expand_dims(holiday_dates_np, axis=0) 172 | nearest_holiday_idx = np.argmin(np.abs(nearest_holiday_idx), axis=1) 173 | nearest_holiday = pd.DatetimeIndex([holiday_dates[idx] for idx in nearest_holiday_idx]) 174 | holiday_diff = (dates - nearest_holiday).days.values 175 | return holiday_diff 176 | 177 | def create_calendar_variables(X_df: pd.DataFrame): 178 | X_df['day_of_year'] = X_df.ds.dt.dayofyear 179 | X_df['day_of_week'] = X_df.ds.dt.dayofweek 180 | X_df['hour'] = X_df.ds.dt.hour 181 | return X_df 182 | 183 | def create_us_holiday_distance_variables(X_df: pd.DataFrame): 184 | dates = X_df.ds.dt.date 185 | for holiday in US_FEDERAL_HOLIDAYS.keys(): 186 | X_df[f'holiday_dist_{holiday}'] = holiday_kernel(holiday=holiday, 187 | dates=dates) 188 | return X_df 189 | 190 | # Cell 191 | ## This code was taken from: 192 | # https://github.com/zhouhaoyi/Informer2020/blob/429f8ace8dde71655d8f8a5aad1a36303a2b2dfe/utils/timefeatures.py#L114 193 | class TimeFeature: 194 | def __init__(self): 195 | pass 196 | 197 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 198 | pass 199 | 200 | def __repr__(self): 201 | return self.__class__.__name__ + "()" 202 | 203 | class SecondOfMinute(TimeFeature): 204 | """Minute of hour encoded as value between [-0.5, 0.5]""" 205 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 206 | return index.second / 59.0 - 0.5 207 | 208 | class MinuteOfHour(TimeFeature): 209 | """Minute of hour encoded as value between [-0.5, 0.5]""" 210 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 211 | return index.minute / 59.0 - 0.5 212 | 213 | class HourOfDay(TimeFeature): 214 | """Hour of day encoded as value between [-0.5, 0.5]""" 215 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 216 | return index.hour / 23.0 - 0.5 217 | 218 | class DayOfWeek(TimeFeature): 219 | """Hour of day encoded as value between [-0.5, 0.5]""" 220 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 221 | return index.dayofweek / 6.0 - 0.5 222 | 223 | class DayOfMonth(TimeFeature): 224 | """Day of month encoded as value between [-0.5, 0.5]""" 225 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 226 | return (index.day - 1) / 30.0 - 0.5 227 | 228 | class DayOfYear(TimeFeature): 229 | """Day of year encoded as value between [-0.5, 0.5]""" 230 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 231 | return (index.dayofyear - 1) / 365.0 - 0.5 232 | 233 | class MonthOfYear(TimeFeature): 234 | """Month of year encoded as value between [-0.5, 0.5]""" 235 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 236 | return (index.month - 1) / 11.0 - 0.5 237 | 238 | class WeekOfYear(TimeFeature): 239 | """Week of year encoded as value between [-0.5, 0.5]""" 240 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 241 | return (index.isocalendar().week - 1) / 52.0 - 0.5 242 | 243 | def time_features_from_frequency_str(freq_str: str) -> List[TimeFeature]: 244 | """ 245 | Returns a list of time features that will be appropriate for the given frequency string. 246 | Parameters 247 | ---------- 248 | freq_str 249 | Frequency string of the form [multiple][granularity] such as "12H", "5min", "1D" etc. 250 | """ 251 | 252 | features_by_offsets = { 253 | offsets.YearEnd: [], 254 | offsets.QuarterEnd: [MonthOfYear], 255 | offsets.MonthEnd: [MonthOfYear], 256 | offsets.Week: [DayOfMonth, WeekOfYear], 257 | offsets.Day: [DayOfWeek, DayOfMonth, DayOfYear], 258 | offsets.BusinessDay: [DayOfWeek, DayOfMonth, DayOfYear], 259 | offsets.Hour: [HourOfDay, DayOfWeek, DayOfMonth, DayOfYear], 260 | offsets.Minute: [ 261 | MinuteOfHour, 262 | HourOfDay, 263 | DayOfWeek, 264 | DayOfMonth, 265 | DayOfYear, 266 | ], 267 | offsets.Second: [ 268 | SecondOfMinute, 269 | MinuteOfHour, 270 | HourOfDay, 271 | DayOfWeek, 272 | DayOfMonth, 273 | DayOfYear, 274 | ], 275 | } 276 | 277 | offset = to_offset(freq_str) 278 | 279 | for offset_type, feature_classes in features_by_offsets.items(): 280 | if isinstance(offset, offset_type): 281 | return [cls() for cls in feature_classes] -------------------------------------------------------------------------------- /src/data/datasets/wth.py: -------------------------------------------------------------------------------- 1 | # AUTOGENERATED! DO NOT EDIT! File to edit: nbs/data_datasets__wth.ipynb (unless otherwise specified). 2 | 3 | __all__ = ['WTH', 'WTHInfo', 'WTH'] 4 | 5 | # Cell 6 | import os 7 | from dataclasses import dataclass 8 | from typing import Dict, List, Optional, Tuple, Union 9 | 10 | import gdown 11 | import numpy as np 12 | import pandas as pd 13 | 14 | from .utils import Info, time_features_from_frequency_str 15 | from .ett import process_multiple_ts 16 | 17 | # Cell 18 | @dataclass 19 | class WTH: 20 | freq: str = 'H' 21 | name: str = 'WTH' 22 | n_ts: int = 12 23 | 24 | # Cell 25 | WTHInfo = Info(groups=('WTH',), 26 | class_groups=(WTH,)) 27 | 28 | # Cell 29 | @dataclass 30 | class WTH: 31 | 32 | source_url: str = 'https://drive.google.com/uc?id=1UBRz-aM_57i_KCC-iaSWoKDPTGGv6EaG' 33 | 34 | @staticmethod 35 | def load(directory: str, 36 | cache: bool = True) -> Tuple[pd.DataFrame, 37 | Optional[pd.DataFrame], 38 | Optional[pd.DataFrame]]: 39 | """Downloads and loads ETT data. 40 | 41 | Parameters 42 | ---------- 43 | directory: str 44 | Directory where data will be downloaded. 45 | cache: bool 46 | If `True` saves and loads 47 | 48 | Notes 49 | ----- 50 | [1] Returns train+val+test sets. 51 | """ 52 | path = f'{directory}/wth/datasets' 53 | file_cache = f'{path}/WTH.p' 54 | 55 | if os.path.exists(file_cache) and cache: 56 | df, X_df, S_df = pd.read_pickle(file_cache) 57 | 58 | return df, X_df, S_df 59 | 60 | 61 | WTH.download(directory) 62 | path = f'{directory}/wth/datasets' 63 | 64 | y_df = pd.read_csv(f'{path}/WTH.csv') 65 | y_df, X_df = process_multiple_ts(y_df) 66 | 67 | S_df = None 68 | if cache: 69 | pd.to_pickle((y_df, X_df, S_df), file_cache) 70 | 71 | return y_df, X_df, S_df 72 | 73 | @staticmethod 74 | def download(directory: str) -> None: 75 | """Download WTH Dataset.""" 76 | path = f'{directory}/wth/datasets/' 77 | if not os.path.exists(path): 78 | os.makedirs(path) 79 | gdown.download(WTH.source_url, f'{path}/WTH.csv') -------------------------------------------------------------------------------- /src/data/scalers.py: -------------------------------------------------------------------------------- 1 | # AUTOGENERATED! DO NOT EDIT! File to edit: nbs/data__scalers.ipynb (unless otherwise specified). 2 | 3 | __all__ = ['Scaler', 'norm_scaler', 'inv_norm_scaler', 'norm1_scaler', 'inv_norm1_scaler', 'std_scaler', 4 | 'inv_std_scaler', 'median_scaler', 'inv_median_scaler', 'invariant_scaler', 'inv_invariant_scaler'] 5 | 6 | # Cell 7 | import numpy as np 8 | import statsmodels.api as sm 9 | 10 | # Cell 11 | import numpy as np 12 | import statsmodels.api as sm 13 | 14 | #TODO: rehacer todo, es codigo provisional porque corre 15 | #TODO: filtrar por adelantado con offset 16 | #TODO: codigo duplicado en clases muy parecidas 17 | #TODO: usar scaler sklearn? 18 | #TODO: shift scale abuso notacion con min, max 19 | #TODO: comentar cosas 20 | #TODO: hacer funcion devoluciondora de cosos particulares 21 | #TODO: subclase para cada scaler 22 | #TODO: funciona solo para una serie 23 | 24 | class Scaler(object): 25 | def __init__(self, normalizer): 26 | assert (normalizer in ['std', 'invariant', 'norm', 'norm1', 'median']), 'Normalizer not defined' 27 | self.normalizer = normalizer 28 | self.x_shift = None 29 | self.x_scale = None 30 | 31 | def scale(self, x, mask): 32 | if self.normalizer == 'invariant': 33 | x_scaled, x_shift, x_scale = invariant_scaler(x, mask) 34 | elif self.normalizer == 'median': 35 | x_scaled, x_shift, x_scale = median_scaler(x, mask) 36 | elif self.normalizer == 'std': 37 | x_scaled, x_shift, x_scale = std_scaler(x, mask) 38 | elif self.normalizer == 'norm': 39 | x_scaled, x_shift, x_scale = norm_scaler(x, mask) 40 | elif self.normalizer == 'norm1': 41 | x_scaled, x_shift, x_scale = norm1_scaler(x, mask) 42 | 43 | assert len(x[mask==1] == np.sum(mask)), 'Something weird is happening, call Cristian' 44 | nan_before_scale = np.sum(np.isnan(x)) 45 | nan_after_scale = np.sum(np.isnan(x_scaled)) 46 | assert nan_before_scale == nan_after_scale, 'Scaler induced nans' 47 | 48 | self.x_shift = x_shift 49 | self.x_scale = x_scale 50 | return np.array(x_scaled) 51 | 52 | def inv_scale(self, x): 53 | assert self.x_shift is not None 54 | assert self.x_scale is not None 55 | 56 | if self.normalizer == 'invariant': 57 | x_inv_scaled = inv_invariant_scaler(x, self.x_shift, self.x_scale) 58 | elif self.normalizer == 'median': 59 | x_inv_scaled = inv_median_scaler(x, self.x_shift, self.x_scale) 60 | elif self.normalizer == 'std': 61 | x_inv_scaled = inv_std_scaler(x, self.x_shift, self.x_scale) 62 | elif self.normalizer == 'norm': 63 | x_inv_scaled = inv_norm_scaler(x, self.x_shift, self.x_scale) 64 | elif self.normalizer == 'norm1': 65 | x_inv_scaled = inv_norm1_scaler(x, self.x_shift, self.x_scale) 66 | 67 | return np.array(x_inv_scaled) 68 | 69 | # Norm 70 | def norm_scaler(x, mask): 71 | x_max = np.max(x[mask==1]) 72 | x_min = np.min(x[mask==1]) 73 | 74 | x = (x - x_min) / (x_max - x_min) #TODO: cuidado dividir por zero 75 | return x, x_min, x_max 76 | 77 | def inv_norm_scaler(x, x_min, x_max): 78 | return x * (x_max - x_min) + x_min 79 | 80 | # Norm1 81 | def norm1_scaler(x, mask): 82 | x_max = np.max(x[mask==1]) 83 | x_min = np.min(x[mask==1]) 84 | 85 | x = (x - x_min) / (x_max - x_min) #TODO: cuidado dividir por zero 86 | x = x * (2) - 1 87 | return x, x_min, x_max 88 | 89 | def inv_norm1_scaler(x, x_min, x_max): 90 | x = (x + 1) / 2 91 | return x * (x_max - x_min) + x_min 92 | 93 | # Std 94 | def std_scaler(x, mask): 95 | x_mean = np.mean(x[mask==1]) 96 | x_std = np.std(x[mask==1]) 97 | 98 | x = (x - x_mean) / x_std #TODO: cuidado dividir por zero 99 | return x, x_mean, x_std 100 | 101 | def inv_std_scaler(x, x_mean, x_std): 102 | return (x * x_std) + x_mean 103 | 104 | # Median 105 | def median_scaler(x, mask): 106 | x_median = np.median(x[mask==1]) 107 | x_mad = sm.robust.scale.mad(x[mask==1]) 108 | if x_mad == 0: 109 | x_mad = np.std(x[mask==1], ddof = 1) / 0.6744897501960817 110 | x = (x - x_median) / x_mad 111 | return x, x_median, x_mad 112 | 113 | def inv_median_scaler(x, x_median, x_mad): 114 | return x * x_mad + x_median 115 | 116 | # Invariant 117 | def invariant_scaler(x, mask): 118 | x_median = np.median(x[mask==1]) 119 | x_mad = sm.robust.scale.mad(x[mask==1]) 120 | if x_mad == 0: 121 | x_mad = np.std(x[mask==1], ddof = 1) / 0.6744897501960817 122 | x = np.arcsinh((x - x_median) / x_mad) 123 | return x, x_median, x_mad 124 | 125 | def inv_invariant_scaler(x, x_median, x_mad): 126 | return np.sinh(x) * x_mad + x_median 127 | -------------------------------------------------------------------------------- /src/data/tsloader.py: -------------------------------------------------------------------------------- 1 | # AUTOGENERATED! DO NOT EDIT! File to edit: nbs/data__tsloader.ipynb (unless otherwise specified). 2 | 3 | __all__ = ['TimeSeriesLoader', 'FastTimeSeriesLoader'] 4 | 5 | # Cell 6 | import warnings 7 | from collections.abc import Mapping 8 | from typing import Dict, List, Optional, Union 9 | 10 | import numpy as np 11 | import torch as t 12 | from fastcore.foundation import patch 13 | from torch.utils.data import DataLoader 14 | 15 | from .tsdataset import TimeSeriesDataset, WindowsDataset 16 | 17 | # Cell 18 | class TimeSeriesLoader(DataLoader): 19 | 20 | def __init__(self, dataset: Union[TimeSeriesDataset, WindowsDataset], 21 | eq_batch_size: bool = False, 22 | n_windows: Optional[int] = None, 23 | **kwargs) -> 'TimeSeriesLoader': 24 | """Wraps the pytorch `DataLoader` with a special collate function 25 | for the `TimeSeriesDataset` ouputs. 26 | 27 | The TimeSeriesDataset constructs all the trainable windows 28 | of `batch_size` series. The number of windows can be greater 29 | or smaller than the `batch_size`. For this reason, 30 | an additional boolean parameter, `eq_batch_size` is included 31 | that if `True` samples `batch_size` windows randomly, 32 | while `False` returns all windows. 33 | 34 | Parameters 35 | ---------- 36 | dataset: TimeSeriesDataset 37 | Stored time series. 38 | eq_batch_size: bool 39 | If `True` samples `batch_size` windows randomly, 40 | while `False` or `batch_size=None` returns all windows. 41 | n_windows: int 42 | Number of windows to sample after 43 | batching batch_size series. 44 | """ 45 | if 'collate_fn' in kwargs.keys(): 46 | warnings.warn( 47 | 'This class wraps the pytorch `DataLoader` with a ' 48 | 'special collate function. If you want to use yours ' 49 | 'simply use `DataLoader`. Removing collate_fn' 50 | ) 51 | kwargs.pop('collate_fn') 52 | 53 | kwargs_ = {**kwargs, **dict(collate_fn=self._collate_fn)} 54 | DataLoader.__init__(self, dataset=dataset, **kwargs_) 55 | self.eq_batch_size = eq_batch_size 56 | self.n_windows = n_windows 57 | self.w_idxs: Optional[np.ndarray] = None 58 | 59 | # Cell 60 | @patch 61 | def _check_batch_size(self: TimeSeriesLoader, batch: t.Tensor): 62 | complete_batch = batch 63 | if self.w_idxs is not None: 64 | complete_batch = batch[self.w_idxs] 65 | 66 | return complete_batch 67 | 68 | # Cell 69 | @patch 70 | def _collate_fn(self: TimeSeriesLoader, batch: Union[List, Dict[str, t.Tensor], t.Tensor]): 71 | """Special collate fn for the `TimeSeriesDataset`. 72 | 73 | Notes 74 | ----- 75 | [1] Adapted from https://github.com/pytorch/pytorch/blob/master/torch/utils/data/_utils/collate.py. 76 | """ 77 | elem = batch[0] 78 | # if len(batch) == 1: 79 | # return {key: self._check_batch_size(elem[key]) for key in elem} 80 | 81 | elem_type = type(elem) 82 | 83 | if isinstance(elem, t.Tensor): 84 | out = None 85 | if t.utils.data.get_worker_info() is not None: 86 | # If we're in a background process, concatenate directly into a 87 | # shared memory tensor to avoid an extra copy 88 | numel = sum([x.numel() for x in batch]) 89 | storage = elem.storage()._new_shared(numel) 90 | out = elem.new(storage) 91 | complete_batch = t.cat(batch, out=out) 92 | return self._check_batch_size(complete_batch) 93 | 94 | elif isinstance(elem, Mapping): 95 | n_windows = [elem_['Y'].size(0) for elem_ in batch] 96 | n_windows = sum(n_windows) 97 | if self.eq_batch_size and self.batch_size is not None: 98 | self.w_idxs = np.random.choice(n_windows, size=self.batch_size, 99 | replace=(n_windows < self.batch_size)) 100 | if not self.eq_batch_size and self.n_windows is not None: 101 | self.w_idxs = np.random.choice(n_windows, size=self.n_windows, 102 | replace=(n_windows < self.n_windows)) 103 | return {key: self.collate_fn([d[key] for d in batch]) for key in elem} 104 | 105 | raise TypeError(f'Unknown {elem_type}') 106 | 107 | # Cell 108 | class FastTimeSeriesLoader: 109 | """ 110 | A DataLoader-like object for a set of tensors that can be much faster than 111 | TensorDataset + DataLoader because dataloader grabs individual indices of 112 | the dataset and calls cat (slow). 113 | Source: https://discuss.pytorch.org/t/dataloader-much-slower-than-manual-batching/27014/6 114 | 115 | Notes 116 | ----- 117 | [1] Adapted from https://github.com/hcarlens/pytorch-tabular/blob/master/fast_tensor_data_loader.py. 118 | """ 119 | def __init__(self, dataset: TimeSeriesDataset, batch_size: int = 32, 120 | eq_batch_size: bool = False, 121 | n_windows: Optional[int] = None, 122 | shuffle: bool = False) -> 'FastTimeSeriesLoader': 123 | """Initialize a FastTimeSeriesLoader. 124 | 125 | The TimeSeriesDataset constructs all the trainable windows 126 | of `batch_size` series. The number of windows can be greater 127 | or smaller than the `batch_size`. For this reason, 128 | an additional boolean parameter, `eq_batch_size` is included 129 | that if `True` samples `batch_size` windows randomly, 130 | while `False` returns all windows. 131 | 132 | Parameters 133 | ----------- 134 | dataset: TimeSeriesDataset 135 | Stored time series. 136 | batch_size: int 137 | Batch size to load. 138 | n_windows: int 139 | Number of windows to sample after 140 | batching batch_size series. 141 | shuffle: bool 142 | If `True`, shuffle the data *in-place* whenever an 143 | iterator is created out of this object. 144 | """ 145 | self.dataset = dataset 146 | self.dataset_len = len(dataset) 147 | self.batch_size = batch_size 148 | self.eq_batch_size = eq_batch_size 149 | self.n_windows = n_windows 150 | self.shuffle = shuffle 151 | self.idxs = np.arange(self.dataset_len) 152 | 153 | # Calculate # batches 154 | n_batches, remainder = divmod(self.dataset_len, self.batch_size) 155 | if remainder > 0: 156 | n_batches += 1 157 | self.n_batches = n_batches 158 | self.w_idxs: Optional[np.ndarray] = None 159 | 160 | # Cell 161 | @patch 162 | def __iter__(self: FastTimeSeriesLoader): 163 | if self.shuffle: 164 | self.idxs = np.random.permutation(self.dataset_len) 165 | 166 | self.i = 0 167 | return self 168 | 169 | # Cell 170 | @patch 171 | def _check_batch_size(self: FastTimeSeriesLoader, batch: t.Tensor): 172 | complete_batch = batch 173 | if self.w_idxs is not None: 174 | complete_batch = batch[self.w_idxs] 175 | return complete_batch 176 | 177 | # Cell 178 | @patch 179 | def __next__(self: FastTimeSeriesLoader): 180 | if self.i >= self.dataset_len: 181 | raise StopIteration 182 | idxs = self.idxs[self.i:(self.i + self.batch_size)].tolist() 183 | batch = self.dataset[idxs] 184 | self.i += self.batch_size 185 | 186 | n_windows = batch['Y'].size(0) 187 | if self.eq_batch_size and self.batch_size is not None: 188 | self.w_idxs = np.random.choice(n_windows, size=self.batch_size, 189 | replace=(n_windows < self.batch_size)) 190 | 191 | if not self.eq_batch_size and self.n_windows is not None: 192 | self.w_idxs = np.random.choice(n_windows, size=self.n_windows, 193 | replace=(n_windows < self.n_windows)) 194 | 195 | return {key: self._check_batch_size(batch[key]) for key in batch} 196 | 197 | # Cell 198 | @patch 199 | def __len__(self: FastTimeSeriesLoader): 200 | return self.n_batches -------------------------------------------------------------------------------- /src/data/utils.py: -------------------------------------------------------------------------------- 1 | # AUTOGENERATED! DO NOT EDIT! File to edit: nbs/data__utils.ipynb (unless otherwise specified). 2 | 3 | __all__ = ['create_synthetic_tsdata'] 4 | 5 | # Cell 6 | from typing import Tuple 7 | 8 | import numpy as np 9 | import pandas as pd 10 | 11 | # Cell 12 | def create_synthetic_tsdata(n_ts: int = 64, 13 | sort: bool = False) -> Tuple[pd.DataFrame, 14 | pd.DataFrame, 15 | pd.DataFrame]: 16 | """Creates synthetic time serie data.""" 17 | uids = np.array([f'uid_{i + 1}' for i in range(n_ts)]) 18 | dss = pd.date_range(end='2020-12-31', periods=n_ts) 19 | 20 | df = [] 21 | for idx in range(n_ts): 22 | ts = pd.DataFrame({'unique_id': np.repeat(uids[idx], idx + 1), 23 | 'ds': dss[-(idx + 1):], 24 | 'y': 1 + np.arange(idx + 1)}) 25 | df.append(ts) 26 | 27 | df = pd.concat(df) 28 | df['day_of_week'] = df['ds'].dt.day_of_week 29 | df['future_1'] = df['y'] + 1 30 | df['id_ts'] = df['unique_id'].astype('category').cat.codes 31 | if sort: 32 | df = df.sort_values(['unique_id', 'ds']) 33 | 34 | Y_df = df.filter(items=['unique_id', 'ds', 'y']) 35 | X_df = df.filter(items=['unique_id', 'ds', 'day_of_week', 'future_1']) 36 | S_df = df.filter(items=['unique_id', 'id_ts']).drop_duplicates() 37 | 38 | return Y_df, X_df, S_df -------------------------------------------------------------------------------- /src/experiments/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cchallu/n-hits/d882ee60b34c0ab6b67b31001f735b181a9efb93/src/experiments/__init__.py -------------------------------------------------------------------------------- /src/losses/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cchallu/n-hits/d882ee60b34c0ab6b67b31001f735b181a9efb93/src/losses/__init__.py -------------------------------------------------------------------------------- /src/losses/pytorch.py: -------------------------------------------------------------------------------- 1 | # Cell 2 | import torch as t 3 | import torch.nn as nn 4 | 5 | # Cell 6 | def divide_no_nan(a, b): 7 | """ 8 | Auxiliary funtion to handle divide by 0 9 | """ 10 | div = a / b 11 | div[div != div] = 0.0 12 | div[div == float('inf')] = 0.0 13 | return div 14 | 15 | # Cell 16 | def MAPELoss(y, y_hat, mask=None): 17 | """MAPE Loss 18 | 19 | Calculates Mean Absolute Percentage Error between 20 | y and y_hat. MAPE measures the relative prediction 21 | accuracy of a forecasting method by calculating the 22 | percentual deviation of the prediction and the true 23 | value at a given time and averages these devations 24 | over the length of the series. 25 | As defined in: https://en.wikipedia.org/wiki/Mean_absolute_percentage_error 26 | 27 | Parameters 28 | ---------- 29 | y: tensor (batch_size, output_size) 30 | actual values in torch tensor. 31 | y_hat: tensor (batch_size, output_size) 32 | predicted values in torch tensor. 33 | mask: tensor (batch_size, output_size) 34 | specifies date stamps per serie 35 | to consider in loss 36 | 37 | Returns 38 | ------- 39 | mape: 40 | Mean absolute percentage error. 41 | """ 42 | if mask is None: mask = t.ones_like(y_hat) 43 | 44 | mask = divide_no_nan(mask, t.abs(y)) 45 | mape = t.abs(y - y_hat) * mask 46 | mape = t.mean(mape) 47 | return mape 48 | 49 | # Cell 50 | def MSELoss(y, y_hat, mask=None): 51 | """MSE Loss 52 | 53 | Calculates Mean Squared Error between 54 | y and y_hat. MAPE measures the relative prediction 55 | accuracy of a forecasting method by calculating the 56 | percentual deviation of the prediction and the true 57 | value at a given time and averages these devations 58 | over the length of the series. 59 | 60 | Parameters 61 | ---------- 62 | y: tensor (batch_size, output_size) 63 | actual values in torch tensor. 64 | y_hat: tensor (batch_size, output_size) 65 | predicted values in torch tensor. 66 | mask: tensor (batch_size, output_size) 67 | specifies date stamps per serie 68 | to consider in loss 69 | 70 | Returns 71 | ------- 72 | mse: 73 | Mean Squared Error. 74 | """ 75 | if mask is None: mask = t.ones_like(y_hat) 76 | 77 | mse = (y - y_hat)**2 78 | mse = mask * mse 79 | mse = t.mean(mse) 80 | return mse 81 | 82 | # Cell 83 | def RMSELoss(y, y_hat, mask=None): 84 | """RMSE Loss 85 | 86 | Calculates Mean Squared Error between 87 | y and y_hat. MAPE measures the relative prediction 88 | accuracy of a forecasting method by calculating the 89 | percentual deviation of the prediction and the true 90 | value at a given time and averages these devations 91 | over the length of the series. 92 | 93 | Parameters 94 | ---------- 95 | y: tensor (batch_size, output_size) 96 | actual values in torch tensor. 97 | y_hat: tensor (batch_size, output_size) 98 | predicted values in torch tensor. 99 | mask: tensor (batch_size, output_size) 100 | specifies date stamps per serie 101 | to consider in loss 102 | 103 | Returns 104 | ------- 105 | rmse: 106 | Root Mean Squared Error. 107 | """ 108 | if mask is None: mask = t.ones_like(y_hat) 109 | 110 | rmse = (y - y_hat)**2 111 | rmse = mask * rmse 112 | rmse = t.sqrt(t.mean(rmse)) 113 | return rmse 114 | 115 | # Cell 116 | def SMAPELoss(y, y_hat, mask=None): 117 | """SMAPE2 Loss 118 | 119 | Calculates Symmetric Mean Absolute Percentage Error. 120 | SMAPE measures the relative prediction accuracy of a 121 | forecasting method by calculating the relative deviation 122 | of the prediction and the true value scaled by the sum of the 123 | absolute values for the prediction and true value at a 124 | given time, then averages these devations over the length 125 | of the series. This allows the SMAPE to have bounds between 126 | 0% and 200% which is desireble compared to normal MAPE that 127 | may be undetermined. 128 | 129 | Parameters 130 | ---------- 131 | y: tensor (batch_size, output_size) 132 | actual values in torch tensor. 133 | y_hat: tensor (batch_size, output_size) 134 | predicted values in torch tensor. 135 | 136 | Returns 137 | ------- 138 | smape: 139 | symmetric mean absolute percentage error 140 | 141 | References 142 | ---------- 143 | [1] https://robjhyndman.com/hyndsight/smape/ (Makridakis 1993) 144 | """ 145 | if mask is None: mask = t.ones_like(y_hat) 146 | 147 | delta_y = t.abs((y - y_hat)) 148 | scale = t.abs(y) + t.abs(y_hat) 149 | smape = divide_no_nan(delta_y, scale) 150 | smape = smape * mask 151 | smape = 2 * t.mean(smape) 152 | return smape 153 | 154 | # Cell 155 | def MASELoss(y, y_hat, y_insample, seasonality, mask=None) : 156 | """ Calculates the M4 Mean Absolute Scaled Error. 157 | 158 | MASE measures the relative prediction accuracy of a 159 | forecasting method by comparinng the mean absolute errors 160 | of the prediction and the true value against the mean 161 | absolute errors of the seasonal naive model. 162 | 163 | Parameters 164 | ---------- 165 | seasonality: int 166 | main frequency of the time series 167 | Hourly 24, Daily 7, Weekly 52, 168 | Monthly 12, Quarterly 4, Yearly 1 169 | y: tensor (batch_size, output_size) 170 | actual test values 171 | y_hat: tensor (batch_size, output_size) 172 | predicted values 173 | y_train: tensor (batch_size, input_size) 174 | actual insample values for Seasonal Naive predictions 175 | 176 | Returns 177 | ------- 178 | mase: 179 | mean absolute scaled error 180 | 181 | References 182 | ---------- 183 | [1] https://robjhyndman.com/papers/mase.pdf 184 | """ 185 | if mask is None: mask = t.ones_like(y_hat) 186 | 187 | delta_y = t.abs(y - y_hat) 188 | scale = t.mean(t.abs(y_insample[:, seasonality:] - \ 189 | y_insample[:, :-seasonality]), axis=1) 190 | mase = divide_no_nan(delta_y, scale[:, None]) 191 | mase = mase * mask 192 | mase = t.mean(mase) 193 | return mase 194 | 195 | # Cell 196 | def MAELoss(y, y_hat, mask=None): 197 | """MAE Loss 198 | 199 | Calculates Mean Absolute Error between 200 | y and y_hat. MAE measures the relative prediction 201 | accuracy of a forecasting method by calculating the 202 | deviation of the prediction and the true 203 | value at a given time and averages these devations 204 | over the length of the series. 205 | 206 | Parameters 207 | ---------- 208 | y: tensor (batch_size, output_size) 209 | actual values in torch tensor. 210 | y_hat: tensor (batch_size, output_size) 211 | predicted values in torch tensor. 212 | mask: tensor (batch_size, output_size) 213 | specifies date stamps per serie 214 | to consider in loss 215 | 216 | Returns 217 | ------- 218 | mae: 219 | Mean absolute error. 220 | """ 221 | if mask is None: mask = t.ones_like(y_hat) 222 | 223 | mae = t.abs(y - y_hat) * mask 224 | mae = t.mean(mae) 225 | return mae 226 | 227 | # Cell 228 | def PinballLoss(y, y_hat, mask=None, tau=0.5): 229 | """Pinball Loss 230 | Computes the pinball loss between y and y_hat. 231 | 232 | Parameters 233 | ---------- 234 | y: tensor (batch_size, output_size) 235 | actual values in torch tensor. 236 | y_hat: tensor (batch_size, output_size) 237 | predicted values in torch tensor. 238 | tau: float, between 0 and 1 239 | the slope of the pinball loss, in the context of 240 | quantile regression, the value of tau determines the 241 | conditional quantile level. 242 | 243 | Returns 244 | ------- 245 | pinball: 246 | average accuracy for the predicted quantile 247 | """ 248 | if mask is None: mask = t.ones_like(y_hat) 249 | 250 | delta_y = t.sub(y, y_hat) 251 | pinball = t.max(t.mul(tau, delta_y), t.mul((tau - 1), delta_y)) 252 | pinball = pinball * mask 253 | pinball = t.mean(pinball) 254 | return pinball 255 | 256 | # Cell 257 | def LevelVariabilityLoss(levels, level_variability_penalty): 258 | """ Level Variability Loss 259 | Computes the variability penalty for the level. 260 | 261 | Parameters 262 | ---------- 263 | levels: tensor with shape (batch, n_time) 264 | levels obtained from exponential smoothing component of ESRNN 265 | level_variability_penalty: float 266 | this parameter controls the strength of the penalization 267 | to the wigglines of the level vector, induces smoothness 268 | in the output 269 | 270 | Returns 271 | ---------- 272 | level_var_loss: 273 | wiggliness loss for the level vector 274 | """ 275 | assert levels.shape[1] > 2 276 | level_prev = t.log(levels[:, :-1]) 277 | level_next = t.log(levels[:, 1:]) 278 | log_diff_of_levels = t.sub(level_prev, level_next) 279 | 280 | log_diff_prev = log_diff_of_levels[:, :-1] 281 | log_diff_next = log_diff_of_levels[:, 1:] 282 | diff = t.sub(log_diff_prev, log_diff_next) 283 | level_var_loss = diff**2 284 | level_var_loss = level_var_loss.mean() * level_variability_penalty 285 | 286 | return level_var_loss 287 | 288 | # Cell 289 | def SmylLoss(y, y_hat, levels, mask, tau, level_variability_penalty=0.0): 290 | """Computes the Smyl Loss that combines level variability with 291 | with Pinball loss. 292 | windows_y: tensor of actual values, 293 | shape (n_windows, batch_size, window_size). 294 | windows_y_hat: tensor of predicted values, 295 | shape (n_windows, batch_size, window_size). 296 | levels: levels obtained from exponential smoothing component of ESRNN. 297 | tensor with shape (batch, n_time). 298 | return: smyl_loss. 299 | """ 300 | 301 | if mask is None: mask = t.ones_like(y_hat) 302 | 303 | smyl_loss = PinballLoss(y, y_hat, mask, tau) 304 | 305 | if level_variability_penalty > 0: 306 | log_diff_of_levels = LevelVariabilityLoss(levels, level_variability_penalty) 307 | smyl_loss += log_diff_of_levels 308 | 309 | return smyl_loss 310 | 311 | # Cell 312 | def MQLoss(y, y_hat, quantiles, mask=None): 313 | """MQLoss 314 | 315 | Calculates Average Multi-quantile Loss function, for 316 | a given set of quantiles, based on the absolute 317 | difference between predicted and true values. 318 | 319 | Parameters 320 | ---------- 321 | y: tensor (batch_size, output_size) actual values in torch tensor. 322 | y_hat: tensor (batch_size, output_size, n_quantiles) predicted values in torch tensor. 323 | mask: tensor (batch_size, output_size, n_quantiles) specifies date stamps per serie 324 | to consider in loss 325 | quantiles: tensor(n_quantiles) quantiles to estimate from the distribution of y. 326 | 327 | Returns 328 | ------- 329 | lq: tensor(n_quantiles) average multi-quantile loss. 330 | """ 331 | assert len(quantiles) > 1, f'your quantiles are of len: {len(quantiles)}' 332 | 333 | if mask is None: mask = t.ones_like(y_hat) 334 | 335 | n_q = len(quantiles) 336 | 337 | error = y_hat - y.unsqueeze(-1) 338 | sq = t.maximum(-error, t.zeros_like(error)) 339 | s1_q = t.maximum(error, t.zeros_like(error)) 340 | loss = (quantiles * sq + (1 - quantiles) * s1_q) 341 | 342 | return t.mean(t.mean(loss, axis=1)) 343 | 344 | # Cell 345 | def wMQLoss(y, y_hat, quantiles, mask=None): 346 | """wMQLoss 347 | 348 | Calculates Average Multi-quantile Loss function, for 349 | a given set of quantiles, based on the absolute 350 | difference between predicted and true values. 351 | 352 | Parameters 353 | ---------- 354 | y: tensor (batch_size, output_size) actual values in torch tensor. 355 | y_hat: tensor (batch_size, output_size, n_quantiles) predicted values in torch tensor. 356 | mask: tensor (batch_size, output_size, n_quantiles) specifies date stamps per serie 357 | to consider in loss 358 | quantiles: tensor(n_quantiles) quantiles to estimate from the distribution of y. 359 | 360 | Returns 361 | ------- 362 | lq: tensor(n_quantiles) average multi-quantile loss. 363 | """ 364 | assert len(quantiles) > 1, f'your quantiles are of len: {len(quantiles)}' 365 | 366 | if mask is None: mask = t.ones_like(y_hat) 367 | 368 | n_q = len(quantiles) 369 | 370 | error = y_hat - y.unsqueeze(-1) 371 | 372 | sq = t.maximum(-error, t.zeros_like(error)) 373 | s1_q = t.maximum(error, t.zeros_like(error)) 374 | loss = (quantiles * sq + (1 - quantiles) * s1_q) 375 | 376 | loss = divide_no_nan(t.sum(loss * mask, axis=-2), 377 | t.sum(t.abs(y.unsqueeze(-1)) * mask, axis=-2)) 378 | 379 | return t.mean(loss) -------------------------------------------------------------------------------- /src/losses/utils.py: -------------------------------------------------------------------------------- 1 | # Cell 2 | from typing import Union, List, Optional 3 | 4 | import torch as t 5 | from fastcore.foundation import patch 6 | 7 | from .pytorch import ( 8 | MAPELoss, MASELoss, SMAPELoss, 9 | MSELoss, MAELoss, SmylLoss, 10 | PinballLoss, MQLoss, wMQLoss 11 | ) 12 | 13 | # Cell 14 | class LossFunction: 15 | def __init__(self, loss_name: str, seasonality: Optional[int] = None, 16 | percentile: Optional[Union[List[int], int]] = None, 17 | level_variability_penalty: Optional[int] = None) -> 'LossFunction': 18 | """Instantiates a callable class of the `loss_name` loss. 19 | 20 | Parameters 21 | ---------- 22 | loss_name: str 23 | Name of the loss. 24 | seasonality: int 25 | main frequency of the time series 26 | Hourly 24, Daily 7, Weekly 52, 27 | Monthly 12, Quarterly 4, Yearly. 28 | Default `None`. 29 | Mandatory for MASE loss. 30 | percentile: Union[List[int], int] 31 | Target percentile. 32 | For SMYL and PINBALL losses an int 33 | is expected. 34 | For MQ and wMQ losses a list of ints 35 | is expected. 36 | Default `None`. 37 | level_variability_penalty: int 38 | Only used for SMYL loss. 39 | """ 40 | if loss_name in ['SMYL', 'PINBALL'] and not isinstance(percentile, int): 41 | raise Exception(f'Percentile should be integer for {loss_name} loss.') 42 | elif loss_name in ['MQ', 'wMQ'] and not isinstance(percentile, list): 43 | raise Exception(f'Percentile should be list for {loss_name} loss') 44 | elif loss_name == 'MASE' and seasonality is None: 45 | raise Exception(f'Seasonality should be a list of integers for {loss_name} loss') 46 | 47 | 48 | self.loss_name = loss_name 49 | self.seasonality = seasonality 50 | self.percentile = percentile 51 | self.level_variability_penalty = level_variability_penalty 52 | 53 | self.tau = self.percentile / 100 if isinstance(percentile, int) else None 54 | self.quantiles = [tau / 100 for tau in percentile] if isinstance(percentile, list) else None 55 | 56 | # Cell 57 | @patch 58 | def __call__(self: LossFunction, 59 | y: t.Tensor, 60 | y_hat: t.Tensor, 61 | mask: Optional[t.Tensor] = None, 62 | y_insample: Optional[t.Tensor] = None, 63 | levels: Optional[t.Tensor] = None) -> t.Tensor: 64 | """Returns loss according to `loss_name`.""" 65 | if self.loss_name == 'SMYL': 66 | return SmylLoss(y=y, y_hat=y_hat, levels=levels, mask=mask, 67 | tau=self.tau, 68 | level_variability_penalty=self.level_variability_penalty) 69 | 70 | elif self.loss_name == 'PINBALL': 71 | return PinballLoss(y=y, y_hat=y_hat, mask=mask, 72 | tau=self.tau) 73 | 74 | elif self.loss_name == 'MQ': 75 | quantiles = t.Tensor(self.quantiles, device=y.device) 76 | return MQLoss(y=y, y_hat=y_hat, quantiles=quantiles, mask=mask) 77 | 78 | elif self.loss_name == 'wMQ': 79 | quantiles = t.Tensor(self.quantiles, device=y.device) 80 | return wMQLoss(y=y, y_hat=y_hat, quantiles=quantiles, mask=mask) 81 | 82 | elif self.loss_name == 'MAPE': 83 | return MAPELoss(y=y, y_hat=y_hat, mask=mask) 84 | 85 | elif self.loss_name == 'MASE': 86 | return MASELoss(y=y, y_hat=y_hat, y_insample=y_insample, 87 | seasonality=self.seasonality, mask=mask) 88 | 89 | elif self.loss_name == 'SMAPE': 90 | return SMAPELoss(y=y, y_hat=y_hat, mask=mask) 91 | 92 | elif self.loss_name == 'MSE': 93 | return MSELoss(y=y, y_hat=y_hat, mask=mask) 94 | 95 | elif self.loss_name == 'MAE': 96 | return MAELoss(y=y, y_hat=y_hat, mask=mask) 97 | 98 | raise Exception(f'Unknown loss function: {loss_name}') -------------------------------------------------------------------------------- /src/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cchallu/n-hits/d882ee60b34c0ab6b67b31001f735b181a9efb93/src/models/__init__.py -------------------------------------------------------------------------------- /src/models/components/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cchallu/n-hits/d882ee60b34c0ab6b67b31001f735b181a9efb93/src/models/components/__init__.py -------------------------------------------------------------------------------- /src/models/components/autocorrelation.py: -------------------------------------------------------------------------------- 1 | # Cell 2 | import math 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | # Cell 9 | class AutoCorrelation(nn.Module): 10 | """ 11 | AutoCorrelation Mechanism with the following two phases: 12 | (1) period-based dependencies discovery 13 | (2) time delay aggregation 14 | This block can replace the self-attention family mechanism seamlessly. 15 | """ 16 | def __init__(self, mask_flag=True, factor=1, scale=None, attention_dropout=0.1, output_attention=False): 17 | super(AutoCorrelation, self).__init__() 18 | self.factor = factor 19 | self.scale = scale 20 | self.mask_flag = mask_flag 21 | self.output_attention = output_attention 22 | self.dropout = nn.Dropout(attention_dropout) 23 | 24 | def time_delay_agg_training(self, values, corr): 25 | """ 26 | SpeedUp version of Autocorrelation (a batch-normalization style design) 27 | This is for the training phase. 28 | """ 29 | head = values.shape[1] 30 | channel = values.shape[2] 31 | length = values.shape[3] 32 | # find top k 33 | top_k = int(self.factor * math.log(length)) 34 | mean_value = torch.mean(torch.mean(corr, dim=1), dim=1) 35 | index = torch.topk(torch.mean(mean_value, dim=0), top_k, dim=-1)[1] 36 | weights = torch.stack([mean_value[:, index[i]] for i in range(top_k)], dim=-1) 37 | # update corr 38 | tmp_corr = torch.softmax(weights, dim=-1) 39 | # aggregation 40 | tmp_values = values 41 | delays_agg = torch.zeros_like(values, dtype=torch.float, device=values.device) 42 | for i in range(top_k): 43 | pattern = torch.roll(tmp_values, -int(index[i]), -1) 44 | delays_agg = delays_agg + pattern * \ 45 | (tmp_corr[:, i].unsqueeze(1).unsqueeze(1).unsqueeze(1).repeat(1, head, channel, length)) 46 | return delays_agg 47 | 48 | def time_delay_agg_inference(self, values, corr): 49 | """ 50 | SpeedUp version of Autocorrelation (a batch-normalization style design) 51 | This is for the inference phase. 52 | """ 53 | batch = values.shape[0] 54 | head = values.shape[1] 55 | channel = values.shape[2] 56 | length = values.shape[3] 57 | # index init 58 | init_index = torch.arange(length, device=values.device).unsqueeze(0).unsqueeze(0).unsqueeze(0).repeat(batch, head, channel, 1) 59 | # find top k 60 | top_k = int(self.factor * math.log(length)) 61 | mean_value = torch.mean(torch.mean(corr, dim=1), dim=1) 62 | weights = torch.topk(mean_value, top_k, dim=-1)[0] 63 | delay = torch.topk(mean_value, top_k, dim=-1)[1] 64 | # update corr 65 | tmp_corr = torch.softmax(weights, dim=-1) 66 | # aggregation 67 | tmp_values = values.repeat(1, 1, 1, 2) 68 | delays_agg = torch.zeros_like(values, dtype=torch.float, device=values.device) 69 | for i in range(top_k): 70 | tmp_delay = init_index + delay[:, i].unsqueeze(1).unsqueeze(1).unsqueeze(1).repeat(1, head, channel, length) 71 | pattern = torch.gather(tmp_values, dim=-1, index=tmp_delay) 72 | delays_agg = delays_agg + pattern * \ 73 | (tmp_corr[:, i].unsqueeze(1).unsqueeze(1).unsqueeze(1).repeat(1, head, channel, length)) 74 | return delays_agg 75 | 76 | def time_delay_agg_full(self, values, corr): 77 | """ 78 | Standard version of Autocorrelation 79 | """ 80 | batch = values.shape[0] 81 | head = values.shape[1] 82 | channel = values.shape[2] 83 | length = values.shape[3] 84 | # index init 85 | init_index = torch.arange(length, device=values.device).unsqueeze(0).unsqueeze(0).unsqueeze(0).repeat(batch, head, channel, 1) 86 | # find top k 87 | top_k = int(self.factor * math.log(length)) 88 | weights = torch.topk(corr, top_k, dim=-1)[0] 89 | delay = torch.topk(corr, top_k, dim=-1)[1] 90 | # update corr 91 | tmp_corr = torch.softmax(weights, dim=-1) 92 | # aggregation 93 | tmp_values = values.repeat(1, 1, 1, 2) 94 | delays_agg = torch.zeros_like(values, dtype=torch.float, device=values.device) 95 | for i in range(top_k): 96 | tmp_delay = init_index + delay[..., i].unsqueeze(-1) 97 | pattern = torch.gather(tmp_values, dim=-1, index=tmp_delay) 98 | delays_agg = delays_agg + pattern * (tmp_corr[..., i].unsqueeze(-1)) 99 | return delays_agg 100 | 101 | def forward(self, queries, keys, values, attn_mask): 102 | B, L, H, E = queries.shape 103 | _, S, _, D = values.shape 104 | if L > S: 105 | zeros = torch.zeros_like(queries[:, :(L - S), :], dtype=torch.float, device=queries.device) 106 | values = torch.cat([values, zeros], dim=1) 107 | keys = torch.cat([keys, zeros], dim=1) 108 | else: 109 | values = values[:, :L, :, :] 110 | keys = keys[:, :L, :, :] 111 | 112 | # period-based dependencies 113 | q_fft = torch.fft.rfft(queries.permute(0, 2, 3, 1).contiguous(), dim=-1) 114 | k_fft = torch.fft.rfft(keys.permute(0, 2, 3, 1).contiguous(), dim=-1) 115 | res = q_fft * torch.conj(k_fft) 116 | corr = torch.fft.irfft(res, dim=-1) 117 | 118 | # time delay agg 119 | if self.training: 120 | V = self.time_delay_agg_training(values.permute(0, 2, 3, 1).contiguous(), corr).permute(0, 3, 1, 2) 121 | else: 122 | V = self.time_delay_agg_inference(values.permute(0, 2, 3, 1).contiguous(), corr).permute(0, 3, 1, 2) 123 | 124 | if self.output_attention: 125 | return (V.contiguous(), corr.permute(0, 3, 1, 2)) 126 | else: 127 | return (V.contiguous(), None) 128 | 129 | 130 | class AutoCorrelationLayer(nn.Module): 131 | def __init__(self, correlation, d_model, n_heads, d_keys=None, 132 | d_values=None): 133 | super(AutoCorrelationLayer, self).__init__() 134 | 135 | d_keys = d_keys or (d_model // n_heads) 136 | d_values = d_values or (d_model // n_heads) 137 | 138 | self.inner_correlation = correlation 139 | self.query_projection = nn.Linear(d_model, d_keys * n_heads) 140 | self.key_projection = nn.Linear(d_model, d_keys * n_heads) 141 | self.value_projection = nn.Linear(d_model, d_values * n_heads) 142 | self.out_projection = nn.Linear(d_values * n_heads, d_model) 143 | self.n_heads = n_heads 144 | 145 | def forward(self, queries, keys, values, attn_mask): 146 | B, L, _ = queries.shape 147 | _, S, _ = keys.shape 148 | H = self.n_heads 149 | 150 | queries = self.query_projection(queries).view(B, L, H, -1) 151 | keys = self.key_projection(keys).view(B, S, H, -1) 152 | values = self.value_projection(values).view(B, S, H, -1) 153 | 154 | out, attn = self.inner_correlation( 155 | queries, 156 | keys, 157 | values, 158 | attn_mask 159 | ) 160 | out = out.view(B, L, -1) 161 | 162 | return self.out_projection(out), attn -------------------------------------------------------------------------------- /src/models/components/autoformer.py: -------------------------------------------------------------------------------- 1 | # Cell 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | # Cell 7 | class my_Layernorm(nn.Module): 8 | """ 9 | Special designed layernorm for the seasonal part 10 | """ 11 | def __init__(self, channels): 12 | super(my_Layernorm, self).__init__() 13 | self.layernorm = nn.LayerNorm(channels) 14 | 15 | def forward(self, x): 16 | x_hat = self.layernorm(x) 17 | bias = torch.mean(x_hat, dim=1).unsqueeze(1).repeat(1, x.shape[1], 1) 18 | return x_hat - bias 19 | 20 | 21 | class moving_avg(nn.Module): 22 | """ 23 | Moving average block to highlight the trend of time series 24 | """ 25 | def __init__(self, kernel_size, stride): 26 | super(moving_avg, self).__init__() 27 | self.kernel_size = kernel_size 28 | self.avg = nn.AvgPool1d(kernel_size=kernel_size, stride=stride, padding=0) 29 | 30 | def forward(self, x): 31 | # padding on the both ends of time series 32 | front = x[:, 0:1, :].repeat(1, (self.kernel_size - 1) // 2, 1) 33 | end = x[:, -1:, :].repeat(1, (self.kernel_size - 1) // 2, 1) 34 | x = torch.cat([front, x, end], dim=1) 35 | x = self.avg(x.permute(0, 2, 1)) 36 | x = x.permute(0, 2, 1) 37 | return x 38 | 39 | 40 | class series_decomp(nn.Module): 41 | """ 42 | Series decomposition block 43 | """ 44 | def __init__(self, kernel_size): 45 | super(series_decomp, self).__init__() 46 | self.moving_avg = moving_avg(kernel_size, stride=1) 47 | 48 | def forward(self, x): 49 | moving_mean = self.moving_avg(x) 50 | res = x - moving_mean 51 | return res, moving_mean 52 | 53 | 54 | class EncoderLayer(nn.Module): 55 | """ 56 | Autoformer encoder layer with the progressive decomposition architecture 57 | """ 58 | def __init__(self, attention, d_model, d_ff=None, moving_avg=25, dropout=0.1, activation="relu"): 59 | super(EncoderLayer, self).__init__() 60 | d_ff = d_ff or 4 * d_model 61 | self.attention = attention 62 | self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1, bias=False) 63 | self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1, bias=False) 64 | self.decomp1 = series_decomp(moving_avg) 65 | self.decomp2 = series_decomp(moving_avg) 66 | self.dropout = nn.Dropout(dropout) 67 | self.activation = F.relu if activation == "relu" else F.gelu 68 | 69 | def forward(self, x, attn_mask=None): 70 | new_x, attn = self.attention( 71 | x, x, x, 72 | attn_mask=attn_mask 73 | ) 74 | x = x + self.dropout(new_x) 75 | x, _ = self.decomp1(x) 76 | y = x 77 | y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1)))) 78 | y = self.dropout(self.conv2(y).transpose(-1, 1)) 79 | res, _ = self.decomp2(x + y) 80 | return res, attn 81 | 82 | 83 | class Encoder(nn.Module): 84 | """ 85 | Autoformer encoder 86 | """ 87 | def __init__(self, attn_layers, conv_layers=None, norm_layer=None): 88 | super(Encoder, self).__init__() 89 | self.attn_layers = nn.ModuleList(attn_layers) 90 | self.conv_layers = nn.ModuleList(conv_layers) if conv_layers is not None else None 91 | self.norm = norm_layer 92 | 93 | def forward(self, x, attn_mask=None): 94 | attns = [] 95 | if self.conv_layers is not None: 96 | for attn_layer, conv_layer in zip(self.attn_layers, self.conv_layers): 97 | x, attn = attn_layer(x, attn_mask=attn_mask) 98 | x = conv_layer(x) 99 | attns.append(attn) 100 | x, attn = self.attn_layers[-1](x) 101 | attns.append(attn) 102 | else: 103 | for attn_layer in self.attn_layers: 104 | x, attn = attn_layer(x, attn_mask=attn_mask) 105 | attns.append(attn) 106 | 107 | if self.norm is not None: 108 | x = self.norm(x) 109 | 110 | return x, attns 111 | 112 | 113 | class DecoderLayer(nn.Module): 114 | """ 115 | Autoformer decoder layer with the progressive decomposition architecture 116 | """ 117 | def __init__(self, self_attention, cross_attention, d_model, c_out, d_ff=None, 118 | moving_avg=25, dropout=0.1, activation="relu"): 119 | super(DecoderLayer, self).__init__() 120 | d_ff = d_ff or 4 * d_model 121 | self.self_attention = self_attention 122 | self.cross_attention = cross_attention 123 | self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1, bias=False) 124 | self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1, bias=False) 125 | self.decomp1 = series_decomp(moving_avg) 126 | self.decomp2 = series_decomp(moving_avg) 127 | self.decomp3 = series_decomp(moving_avg) 128 | self.dropout = nn.Dropout(dropout) 129 | self.projection = nn.Conv1d(in_channels=d_model, out_channels=c_out, kernel_size=3, stride=1, padding=1, 130 | padding_mode='circular', bias=False) 131 | self.activation = F.relu if activation == "relu" else F.gelu 132 | 133 | def forward(self, x, cross, x_mask=None, cross_mask=None): 134 | x = x + self.dropout(self.self_attention( 135 | x, x, x, 136 | attn_mask=x_mask 137 | )[0]) 138 | x, trend1 = self.decomp1(x) 139 | x = x + self.dropout(self.cross_attention( 140 | x, cross, cross, 141 | attn_mask=cross_mask 142 | )[0]) 143 | x, trend2 = self.decomp2(x) 144 | y = x 145 | y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1)))) 146 | y = self.dropout(self.conv2(y).transpose(-1, 1)) 147 | x, trend3 = self.decomp3(x + y) 148 | 149 | residual_trend = trend1 + trend2 + trend3 150 | residual_trend = self.projection(residual_trend.permute(0, 2, 1)).transpose(1, 2) 151 | return x, residual_trend 152 | 153 | 154 | class Decoder(nn.Module): 155 | """ 156 | Autoformer encoder 157 | """ 158 | def __init__(self, layers, norm_layer=None, projection=None): 159 | super(Decoder, self).__init__() 160 | self.layers = nn.ModuleList(layers) 161 | self.norm = norm_layer 162 | self.projection = projection 163 | 164 | def forward(self, x, cross, x_mask=None, cross_mask=None, trend=None): 165 | for layer in self.layers: 166 | x, residual_trend = layer(x, cross, x_mask=x_mask, cross_mask=cross_mask) 167 | trend = trend + residual_trend 168 | 169 | if self.norm is not None: 170 | x = self.norm(x) 171 | 172 | if self.projection is not None: 173 | x = self.projection(x) 174 | return x, trend -------------------------------------------------------------------------------- /src/models/components/common.py: -------------------------------------------------------------------------------- 1 | # Cell 2 | import torch as t 3 | import torch.nn as nn 4 | from torch.nn.utils import weight_norm 5 | from torch.autograd.function import Function 6 | 7 | # Cell 8 | class Chomp1d(nn.Module): 9 | """ 10 | Receives x input of dim [N,C,T], and trims it so that only 11 | 'time available' information is used. Used for one dimensional 12 | causal convolutions. 13 | : param chomp_size: lenght of outsample values to skip. 14 | """ 15 | def __init__(self, chomp_size): 16 | super(Chomp1d, self).__init__() 17 | self.chomp_size = chomp_size 18 | 19 | def forward(self, x): 20 | return x[:, :, :-self.chomp_size].contiguous() 21 | 22 | # Cell 23 | ACTIVATIONS = ['ReLU', 24 | 'Softplus', 25 | 'Tanh', 26 | 'SELU', 27 | 'LeakyReLU', 28 | 'PReLU', 29 | 'Sigmoid'] 30 | 31 | class CausalConv1d(nn.Module): 32 | """ 33 | Receives x input of dim [N,C,T], computes a unidimensional 34 | causal convolution. 35 | 36 | Parameters 37 | ---------- 38 | in_channels: int 39 | out_channels: int 40 | activation: str 41 | https://discuss.pytorch.org/t/call-activation-function-from-string 42 | padding: int 43 | kernel_size: int 44 | dilation: int 45 | 46 | Returns: 47 | x: tesor 48 | torch tensor of dim [N,C,T] 49 | activation(conv1d(inputs, kernel) + bias) 50 | """ 51 | def __init__(self, in_channels, out_channels, kernel_size, 52 | padding, dilation, activation, stride:int=1, with_weight_norm:bool=False): 53 | super(CausalConv1d, self).__init__() 54 | assert activation in ACTIVATIONS, f'{activation} is not in {ACTIVATIONS}' 55 | 56 | self.conv = nn.Conv1d(in_channels=in_channels, out_channels=out_channels, 57 | kernel_size=kernel_size, stride=stride, padding=padding, 58 | dilation=dilation) 59 | if with_weight_norm: self.conv = weight_norm(self.conv) 60 | 61 | self.chomp = Chomp1d(padding) 62 | self.activation = getattr(nn, activation)() 63 | self.causalconv = nn.Sequential(self.conv, self.chomp, self.activation) 64 | 65 | def forward(self, x): 66 | return self.causalconv(x) 67 | 68 | # Cell 69 | class TimeDistributed2d(nn.Module): 70 | """ 71 | Receives x input of dim [N,C,T], reshapes it to [T,N,C] 72 | Collapses input of dim [T,N,C] to [TxN,C] and applies module to C. 73 | Finally reshapes it to [N,C_out,T]. 74 | Allows handling of variable sequence lengths and minibatch sizes. 75 | : param module: Module to apply input to. 76 | """ 77 | def __init__(self, module): 78 | super(TimeDistributed2d, self).__init__() 79 | self.module = module 80 | 81 | def forward(self, x): 82 | N, C, T = x.size() 83 | x = x.permute(2, 0, 1).contiguous() 84 | x = x.view(T * N, -1) 85 | x = self.module(x) 86 | x = x.view(T, N, -1) 87 | x = x.permute(1, 2, 0).contiguous() 88 | return x 89 | 90 | # Cell 91 | class TimeDistributed3d(nn.Module): 92 | """ 93 | Receives x input of dim [N,L,C,T], reshapes it to [T,N,L,C] 94 | Collapses input of dim [T,N,L,C] to [TxNxL,C] and applies module to C. 95 | Finally reshapes it to [N,L,C_out,T]. 96 | Allows handling of variable sequence lengths and minibatch sizes. 97 | : param module: Module to apply input to. 98 | """ 99 | def __init__(self, module): 100 | super(TimeDistributed3d, self).__init__() 101 | self.module = module 102 | 103 | def forward(self, x): 104 | N, L, C, T = x.size() 105 | x = x.permute(3, 0, 1, 2).contiguous() #[N,L,C,T] --> [T,N,L,C] 106 | x = x.view(T * N * L, -1) 107 | x = self.module(x) 108 | x = x.view(T, N, L, -1) 109 | x = x.permute(1, 2, 3, 0).contiguous() #[T,N,L,C] --> [N,L,C,T] 110 | return x 111 | 112 | # Cell 113 | class RepeatVector(nn.Module): 114 | """ 115 | Receives x input of dim [N,C], and repeats the vector 116 | to create tensor of shape [N, C, K] 117 | : repeats: int, the number of repetitions for the vector. 118 | """ 119 | def __init__(self, repeats): 120 | super(RepeatVector, self).__init__() 121 | self.repeats = repeats 122 | 123 | def forward(self, x): 124 | x = x.unsqueeze(-1).repeat(1, 1, self.repeats) # <------------ Mejorar? 125 | return x 126 | 127 | # Cell 128 | class L1Regularizer(nn.Module): 129 | """ 130 | Layer meant to apply elementwise L1 regularization to a dimension. 131 | Receives x input of dim [N,C] and returns the input [N,C]. 132 | """ 133 | def __init__(self, in_features, l1_lambda): 134 | super(L1Regularizer, self).__init__() 135 | self.l1_lambda = l1_lambda 136 | self.weight = t.nn.Parameter(t.rand((in_features), dtype=t.float), 137 | requires_grad=True) 138 | 139 | def forward(self, x): 140 | # channelwise regularization, turns on or off channels 141 | x = t.einsum('bp,p->bp', x, self.weight) 142 | return x 143 | 144 | def regularization(self): 145 | return self.l1_lambda * t.norm(self.weight, 1) -------------------------------------------------------------------------------- /src/models/components/drnn.py: -------------------------------------------------------------------------------- 1 | # Cell 2 | import torch 3 | import torch.nn as nn 4 | import torch.autograd as autograd 5 | 6 | # Cell 7 | class LSTMCell(nn.Module): 8 | def __init__(self, input_size, hidden_size, dropout=0.): 9 | super(LSTMCell, self).__init__() 10 | self.input_size = input_size 11 | self.hidden_size = hidden_size 12 | self.weight_ih = nn.Parameter(torch.randn(4 * hidden_size, input_size)) 13 | self.weight_hh = nn.Parameter(torch.randn(4 * hidden_size, hidden_size)) 14 | self.bias_ih = nn.Parameter(torch.randn(4 * hidden_size)) 15 | self.bias_hh = nn.Parameter(torch.randn(4 * hidden_size)) 16 | self.dropout = dropout 17 | 18 | def forward(self, inputs, hidden): 19 | hx, cx = hidden[0].squeeze(0), hidden[1].squeeze(0) 20 | gates = (torch.matmul(inputs, self.weight_ih.t()) + self.bias_ih + 21 | torch.matmul(hx, self.weight_hh.t()) + self.bias_hh) 22 | ingate, forgetgate, cellgate, outgate = gates.chunk(4, 1) 23 | 24 | ingate = torch.sigmoid(ingate) 25 | forgetgate = torch.sigmoid(forgetgate) 26 | cellgate = torch.tanh(cellgate) 27 | outgate = torch.sigmoid(outgate) 28 | 29 | cy = (forgetgate * cx) + (ingate * cellgate) 30 | hy = outgate * torch.tanh(cy) 31 | 32 | return hy, (hy, cy) 33 | 34 | # Cell 35 | class ResLSTMCell(nn.Module): 36 | def __init__(self, input_size, hidden_size, dropout=0.): 37 | super(ResLSTMCell, self).__init__() 38 | self.register_buffer('input_size', torch.Tensor([input_size])) 39 | self.register_buffer('hidden_size', torch.Tensor([hidden_size])) 40 | self.weight_ii = nn.Parameter(torch.randn(3 * hidden_size, input_size)) 41 | self.weight_ic = nn.Parameter(torch.randn(3 * hidden_size, hidden_size)) 42 | self.weight_ih = nn.Parameter(torch.randn(3 * hidden_size, hidden_size)) 43 | self.bias_ii = nn.Parameter(torch.randn(3 * hidden_size)) 44 | self.bias_ic = nn.Parameter(torch.randn(3 * hidden_size)) 45 | self.bias_ih = nn.Parameter(torch.randn(3 * hidden_size)) 46 | self.weight_hh = nn.Parameter(torch.randn(1 * hidden_size, hidden_size)) 47 | self.bias_hh = nn.Parameter(torch.randn(1 * hidden_size)) 48 | self.weight_ir = nn.Parameter(torch.randn(hidden_size, input_size)) 49 | self.dropout = dropout 50 | 51 | def forward(self, inputs, hidden): 52 | hx, cx = hidden[0].squeeze(0), hidden[1].squeeze(0) 53 | 54 | ifo_gates = (torch.matmul(inputs, self.weight_ii.t()) + self.bias_ii + 55 | torch.matmul(hx, self.weight_ih.t()) + self.bias_ih + 56 | torch.matmul(cx, self.weight_ic.t()) + self.bias_ic) 57 | ingate, forgetgate, outgate = ifo_gates.chunk(3, 1) 58 | 59 | cellgate = torch.matmul(hx, self.weight_hh.t()) + self.bias_hh 60 | 61 | ingate = torch.sigmoid(ingate) 62 | forgetgate = torch.sigmoid(forgetgate) 63 | cellgate = torch.tanh(cellgate) 64 | outgate = torch.sigmoid(outgate) 65 | 66 | cy = (forgetgate * cx) + (ingate * cellgate) 67 | ry = torch.tanh(cy) 68 | 69 | if self.input_size == self.hidden_size: 70 | hy = outgate * (ry + inputs) 71 | else: 72 | hy = outgate * (ry + torch.matmul(inputs, self.weight_ir.t())) 73 | return hy, (hy, cy) 74 | 75 | # Cell 76 | class ResLSTMLayer(nn.Module): 77 | def __init__(self, input_size, hidden_size, dropout=0.): 78 | super(ResLSTMLayer, self).__init__() 79 | self.input_size = input_size 80 | self.hidden_size = hidden_size 81 | self.cell = ResLSTMCell(input_size, hidden_size, dropout=0.) 82 | 83 | def forward(self, inputs, hidden): 84 | inputs = inputs.unbind(0) 85 | outputs = [] 86 | for i in range(len(inputs)): 87 | out, hidden = self.cell(inputs[i], hidden) 88 | outputs += [out] 89 | outputs = torch.stack(outputs) 90 | return outputs, hidden 91 | 92 | # Cell 93 | class AttentiveLSTMLayer(nn.Module): 94 | def __init__(self, input_size, hidden_size, dropout=0.0): 95 | super(AttentiveLSTMLayer, self).__init__() 96 | self.input_size = input_size 97 | self.hidden_size = hidden_size 98 | attention_hsize = hidden_size 99 | self.attention_hsize = attention_hsize 100 | 101 | self.cell = LSTMCell(input_size, hidden_size) 102 | self.attn_layer = nn.Sequential(nn.Linear(2 * hidden_size + input_size, attention_hsize), 103 | nn.Tanh(), 104 | nn.Linear(attention_hsize, 1)) 105 | self.softmax = nn.Softmax(dim=0) 106 | self.dropout = dropout 107 | 108 | def forward(self, inputs, hidden): 109 | inputs = inputs.unbind(0) 110 | outputs = [] 111 | 112 | for t in range(len(inputs)): 113 | # attention on windows 114 | hx, cx = (tensor.squeeze(0) for tensor in hidden) 115 | hx_rep = hx.repeat(len(inputs), 1, 1) 116 | cx_rep = cx.repeat(len(inputs), 1, 1) 117 | x = torch.cat((inputs, hx_rep, cx_rep), dim=-1) 118 | l = self.attn_layer(x) 119 | beta = self.softmax(l) 120 | context = torch.bmm(beta.permute(1, 2, 0), 121 | inputs.permute(1, 0, 2)).squeeze(1) 122 | out, hidden = self.cell(context, hidden) 123 | outputs += [out] 124 | outputs = torch.stack(outputs) 125 | return outputs, hidden 126 | 127 | # Cell 128 | class DRNN(nn.Module): 129 | 130 | def __init__(self, n_input, n_hidden, n_layers, dilations, dropout=0, cell_type='GRU', batch_first=False): 131 | super(DRNN, self).__init__() 132 | 133 | self.dilations = dilations 134 | self.cell_type = cell_type 135 | self.batch_first = batch_first 136 | 137 | layers = [] 138 | if self.cell_type == "GRU": 139 | cell = nn.GRU 140 | elif self.cell_type == "RNN": 141 | cell = nn.RNN 142 | elif self.cell_type == "LSTM": 143 | cell = nn.LSTM 144 | elif self.cell_type == "ResLSTM": 145 | cell = ResLSTMLayer 146 | elif self.cell_type == "AttentiveLSTM": 147 | cell = AttentiveLSTMLayer 148 | else: 149 | raise NotImplementedError 150 | 151 | for i in range(n_layers): 152 | if i == 0: 153 | c = cell(n_input, n_hidden, dropout=dropout) 154 | else: 155 | c = cell(n_hidden, n_hidden, dropout=dropout) 156 | layers.append(c) 157 | self.cells = nn.Sequential(*layers) 158 | 159 | def forward(self, inputs, hidden=None): 160 | if self.batch_first: 161 | inputs = inputs.transpose(0, 1) 162 | outputs = [] 163 | for i, (cell, dilation) in enumerate(zip(self.cells, self.dilations)): 164 | if hidden is None: 165 | inputs, _ = self.drnn_layer(cell, inputs, dilation) 166 | else: 167 | inputs, hidden[i] = self.drnn_layer(cell, inputs, dilation, hidden[i]) 168 | 169 | outputs.append(inputs[-dilation:]) 170 | 171 | if self.batch_first: 172 | inputs = inputs.transpose(0, 1) 173 | return inputs, outputs 174 | 175 | def drnn_layer(self, cell, inputs, rate, hidden=None): 176 | n_steps = len(inputs) 177 | batch_size = inputs[0].size(0) 178 | hidden_size = cell.hidden_size 179 | 180 | inputs, dilated_steps = self._pad_inputs(inputs, n_steps, rate) 181 | dilated_inputs = self._prepare_inputs(inputs, rate) 182 | 183 | if hidden is None: 184 | dilated_outputs, hidden = self._apply_cell(dilated_inputs, cell, batch_size, rate, hidden_size) 185 | else: 186 | hidden = self._prepare_inputs(hidden, rate) 187 | dilated_outputs, hidden = self._apply_cell(dilated_inputs, cell, batch_size, rate, hidden_size, 188 | hidden=hidden) 189 | 190 | splitted_outputs = self._split_outputs(dilated_outputs, rate) 191 | outputs = self._unpad_outputs(splitted_outputs, n_steps) 192 | 193 | return outputs, hidden 194 | 195 | def _apply_cell(self, dilated_inputs, cell, batch_size, rate, hidden_size, hidden=None): 196 | if hidden is None: 197 | hidden = torch.zeros(batch_size * rate, hidden_size, 198 | dtype=dilated_inputs.dtype, 199 | device=dilated_inputs.device) 200 | hidden = hidden.unsqueeze(0) 201 | 202 | if self.cell_type in ['LSTM', 'ResLSTM', 'AttentiveLSTM']: 203 | hidden = (hidden, hidden) 204 | 205 | dilated_outputs, hidden = cell(dilated_inputs, hidden) # compatibility hack 206 | 207 | return dilated_outputs, hidden 208 | 209 | def _unpad_outputs(self, splitted_outputs, n_steps): 210 | return splitted_outputs[:n_steps] 211 | 212 | def _split_outputs(self, dilated_outputs, rate): 213 | batchsize = dilated_outputs.size(1) // rate 214 | 215 | blocks = [dilated_outputs[:, i * batchsize: (i + 1) * batchsize, :] for i in range(rate)] 216 | 217 | interleaved = torch.stack((blocks)).transpose(1, 0).contiguous() 218 | interleaved = interleaved.view(dilated_outputs.size(0) * rate, 219 | batchsize, 220 | dilated_outputs.size(2)) 221 | return interleaved 222 | 223 | def _pad_inputs(self, inputs, n_steps, rate): 224 | iseven = (n_steps % rate) == 0 225 | 226 | if not iseven: 227 | dilated_steps = n_steps // rate + 1 228 | 229 | zeros_ = torch.zeros(dilated_steps * rate - inputs.size(0), 230 | inputs.size(1), 231 | inputs.size(2), 232 | dtype=inputs.dtype, 233 | device=inputs.device) 234 | inputs = torch.cat((inputs, zeros_)) 235 | else: 236 | dilated_steps = n_steps // rate 237 | 238 | return inputs, dilated_steps 239 | 240 | def _prepare_inputs(self, inputs, rate): 241 | dilated_inputs = torch.cat([inputs[j::rate, :, :] for j in range(rate)], 1) 242 | return dilated_inputs -------------------------------------------------------------------------------- /src/models/components/embed.py: -------------------------------------------------------------------------------- 1 | # Cell 2 | import math 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | from torch.nn.utils import weight_norm 8 | 9 | # Cell 10 | class PositionalEmbedding(nn.Module): 11 | def __init__(self, d_model, max_len=5000): 12 | super(PositionalEmbedding, self).__init__() 13 | # Compute the positional encodings once in log space. 14 | pe = torch.zeros(max_len, d_model).float() 15 | pe.require_grad = False 16 | 17 | position = torch.arange(0, max_len).float().unsqueeze(1) 18 | div_term = (torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model)).exp() 19 | 20 | pe[:, 0::2] = torch.sin(position * div_term) 21 | pe[:, 1::2] = torch.cos(position * div_term) 22 | 23 | pe = pe.unsqueeze(0) 24 | self.register_buffer('pe', pe) 25 | 26 | def forward(self, x): 27 | return self.pe[:, :x.size(1)] 28 | 29 | 30 | class TokenEmbedding(nn.Module): 31 | def __init__(self, c_in, d_model): 32 | super(TokenEmbedding, self).__init__() 33 | padding = 1 if torch.__version__ >= '1.5.0' else 2 34 | self.tokenConv = nn.Conv1d(in_channels=c_in, out_channels=d_model, 35 | kernel_size=3, padding=padding, padding_mode='circular', bias=False) 36 | for m in self.modules(): 37 | if isinstance(m, nn.Conv1d): 38 | nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='leaky_relu') 39 | 40 | def forward(self, x): 41 | x = self.tokenConv(x.permute(0, 2, 1)).transpose(1, 2) 42 | return x 43 | 44 | 45 | class FixedEmbedding(nn.Module): 46 | def __init__(self, c_in, d_model): 47 | super(FixedEmbedding, self).__init__() 48 | 49 | w = torch.zeros(c_in, d_model).float() 50 | w.require_grad = False 51 | 52 | position = torch.arange(0, c_in).float().unsqueeze(1) 53 | div_term = (torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model)).exp() 54 | 55 | w[:, 0::2] = torch.sin(position * div_term) 56 | w[:, 1::2] = torch.cos(position * div_term) 57 | 58 | self.emb = nn.Embedding(c_in, d_model) 59 | self.emb.weight = nn.Parameter(w, requires_grad=False) 60 | 61 | def forward(self, x): 62 | return self.emb(x).detach() 63 | 64 | 65 | class TemporalEmbedding(nn.Module): 66 | def __init__(self, d_model, embed_type='fixed', freq='h'): 67 | super(TemporalEmbedding, self).__init__() 68 | 69 | minute_size = 4 70 | hour_size = 24 71 | weekday_size = 7 72 | day_size = 32 73 | month_size = 13 74 | 75 | Embed = FixedEmbedding if embed_type == 'fixed' else nn.Embedding 76 | if freq == 't': 77 | self.minute_embed = Embed(minute_size, d_model) 78 | self.hour_embed = Embed(hour_size, d_model) 79 | self.weekday_embed = Embed(weekday_size, d_model) 80 | self.day_embed = Embed(day_size, d_model) 81 | self.month_embed = Embed(month_size, d_model) 82 | 83 | def forward(self, x): 84 | x = x.long() 85 | 86 | minute_x = self.minute_embed(x[:, :, 4]) if hasattr(self, 'minute_embed') else 0. 87 | hour_x = self.hour_embed(x[:, :, 3]) 88 | weekday_x = self.weekday_embed(x[:, :, 2]) 89 | day_x = self.day_embed(x[:, :, 1]) 90 | month_x = self.month_embed(x[:, :, 0]) 91 | 92 | return hour_x + weekday_x + day_x + month_x + minute_x 93 | 94 | 95 | class TimeFeatureEmbedding(nn.Module): 96 | def __init__(self, d_model, embed_type='timeF', freq='h'): 97 | super(TimeFeatureEmbedding, self).__init__() 98 | 99 | freq_map = {'h': 4, 't': 5, 's': 6, 'm': 1, 'a': 1, 'w': 2, 'd': 3, 'b': 3} 100 | d_inp = freq_map[freq] 101 | self.embed = nn.Linear(d_inp, d_model, bias=False) 102 | 103 | def forward(self, x): 104 | return self.embed(x) 105 | 106 | 107 | class DataEmbedding(nn.Module): 108 | def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1): 109 | super(DataEmbedding, self).__init__() 110 | 111 | self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model) 112 | self.position_embedding = PositionalEmbedding(d_model=d_model) 113 | self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type, 114 | freq=freq) if embed_type != 'timeF' else TimeFeatureEmbedding( 115 | d_model=d_model, embed_type=embed_type, freq=freq) 116 | self.dropout = nn.Dropout(p=dropout) 117 | 118 | def forward(self, x, x_mark): 119 | x = self.value_embedding(x) + self.temporal_embedding(x_mark) + self.position_embedding(x) 120 | return self.dropout(x) 121 | 122 | 123 | class DataEmbedding_wo_pos(nn.Module): 124 | def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1): 125 | super(DataEmbedding_wo_pos, self).__init__() 126 | 127 | self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model) 128 | self.position_embedding = PositionalEmbedding(d_model=d_model) 129 | self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type, 130 | freq=freq) if embed_type != 'timeF' else TimeFeatureEmbedding( 131 | d_model=d_model, embed_type=embed_type, freq=freq) 132 | self.dropout = nn.Dropout(p=dropout) 133 | 134 | def forward(self, x, x_mark): 135 | x = self.value_embedding(x) + self.temporal_embedding(x_mark) 136 | return self.dropout(x) -------------------------------------------------------------------------------- /src/models/components/selfattention.py: -------------------------------------------------------------------------------- 1 | # Cell 2 | from math import sqrt 3 | 4 | import numpy as np 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | 9 | # Cell 10 | class TriangularCausalMask(): 11 | def __init__(self, B, L, device="cpu"): 12 | mask_shape = [B, 1, L, L] 13 | with torch.no_grad(): 14 | self._mask = torch.triu(torch.ones(mask_shape, dtype=torch.bool), diagonal=1).to(device) 15 | 16 | @property 17 | def mask(self): 18 | return self._mask 19 | 20 | 21 | class ProbMask(): 22 | def __init__(self, B, H, L, index, scores, device="cpu"): 23 | _mask = torch.ones(L, scores.shape[-1], dtype=torch.bool).to(device).triu(1) 24 | _mask_ex = _mask[None, None, :].expand(B, H, L, scores.shape[-1]) 25 | indicator = _mask_ex[torch.arange(B)[:, None, None], 26 | torch.arange(H)[None, :, None], 27 | index, :].to(device) 28 | self._mask = indicator.view(scores.shape).to(device) 29 | 30 | @property 31 | def mask(self): 32 | return self._mask 33 | 34 | # Cell 35 | class FullAttention(nn.Module): 36 | def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False): 37 | super(FullAttention, self).__init__() 38 | self.scale = scale 39 | self.mask_flag = mask_flag 40 | self.output_attention = output_attention 41 | self.dropout = nn.Dropout(attention_dropout) 42 | 43 | def forward(self, queries, keys, values, attn_mask): 44 | B, L, H, E = queries.shape 45 | _, S, _, D = values.shape 46 | scale = self.scale or 1. / sqrt(E) 47 | 48 | scores = torch.einsum("blhe,bshe->bhls", queries, keys) 49 | 50 | if self.mask_flag: 51 | if attn_mask is None: 52 | attn_mask = TriangularCausalMask(B, L, device=queries.device) 53 | 54 | scores.masked_fill_(attn_mask.mask, -np.inf) 55 | 56 | A = self.dropout(torch.softmax(scale * scores, dim=-1)) 57 | V = torch.einsum("bhls,bshd->blhd", A, values) 58 | 59 | if self.output_attention: 60 | return (V.contiguous(), A) 61 | else: 62 | return (V.contiguous(), None) 63 | 64 | 65 | class ProbAttention(nn.Module): 66 | def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False): 67 | super(ProbAttention, self).__init__() 68 | self.factor = factor 69 | self.scale = scale 70 | self.mask_flag = mask_flag 71 | self.output_attention = output_attention 72 | self.dropout = nn.Dropout(attention_dropout) 73 | 74 | def _prob_QK(self, Q, K, sample_k, n_top): # n_top: c*ln(L_q) 75 | # Q [B, H, L, D] 76 | B, H, L_K, E = K.shape 77 | _, _, L_Q, _ = Q.shape 78 | 79 | # calculate the sampled Q_K 80 | K_expand = K.unsqueeze(-3).expand(B, H, L_Q, L_K, E) 81 | index_sample = torch.randint(L_K, (L_Q, sample_k)) # real U = U_part(factor*ln(L_k))*L_q 82 | K_sample = K_expand[:, :, torch.arange(L_Q).unsqueeze(1), index_sample, :] 83 | Q_K_sample = torch.matmul(Q.unsqueeze(-2), K_sample.transpose(-2, -1)).squeeze() 84 | 85 | # find the Top_k query with sparisty measurement 86 | M = Q_K_sample.max(-1)[0] - torch.div(Q_K_sample.sum(-1), L_K) 87 | M_top = M.topk(n_top, sorted=False)[1] 88 | 89 | # use the reduced Q to calculate Q_K 90 | Q_reduce = Q[torch.arange(B)[:, None, None], 91 | torch.arange(H)[None, :, None], 92 | M_top, :] # factor*ln(L_q) 93 | Q_K = torch.matmul(Q_reduce, K.transpose(-2, -1)) # factor*ln(L_q)*L_k 94 | 95 | return Q_K, M_top 96 | 97 | def _get_initial_context(self, V, L_Q): 98 | B, H, L_V, D = V.shape 99 | if not self.mask_flag: 100 | # V_sum = V.sum(dim=-2) 101 | V_sum = V.mean(dim=-2) 102 | contex = V_sum.unsqueeze(-2).expand(B, H, L_Q, V_sum.shape[-1]).clone() 103 | else: # use mask 104 | assert (L_Q == L_V) # requires that L_Q == L_V, i.e. for self-attention only 105 | contex = V.cumsum(dim=-2) 106 | return contex 107 | 108 | def _update_context(self, context_in, V, scores, index, L_Q, attn_mask): 109 | B, H, L_V, D = V.shape 110 | 111 | if self.mask_flag: 112 | attn_mask = ProbMask(B, H, L_Q, index, scores, device=V.device) 113 | scores.masked_fill_(attn_mask.mask, -np.inf) 114 | 115 | attn = torch.softmax(scores, dim=-1) # nn.Softmax(dim=-1)(scores) 116 | 117 | context_in[torch.arange(B)[:, None, None], 118 | torch.arange(H)[None, :, None], 119 | index, :] = torch.matmul(attn, V).type_as(context_in) 120 | if self.output_attention: 121 | attns = (torch.ones([B, H, L_V, L_V]) / L_V).type_as(attn).to(attn.device) 122 | attns[torch.arange(B)[:, None, None], torch.arange(H)[None, :, None], index, :] = attn 123 | return (context_in, attns) 124 | else: 125 | return (context_in, None) 126 | 127 | def forward(self, queries, keys, values, attn_mask): 128 | B, L_Q, H, D = queries.shape 129 | _, L_K, _, _ = keys.shape 130 | 131 | queries = queries.transpose(2, 1) 132 | keys = keys.transpose(2, 1) 133 | values = values.transpose(2, 1) 134 | 135 | U_part = self.factor * np.ceil(np.log(L_K)).astype('int').item() # c*ln(L_k) 136 | u = self.factor * np.ceil(np.log(L_Q)).astype('int').item() # c*ln(L_q) 137 | 138 | U_part = U_part if U_part < L_K else L_K 139 | u = u if u < L_Q else L_Q 140 | 141 | scores_top, index = self._prob_QK(queries, keys, sample_k=U_part, n_top=u) 142 | 143 | # add scale factor 144 | scale = self.scale or 1. / sqrt(D) 145 | if scale is not None: 146 | scores_top = scores_top * scale 147 | # get the context 148 | context = self._get_initial_context(values, L_Q) 149 | # update the context with selected top_k queries 150 | context, attn = self._update_context(context, values, scores_top, index, L_Q, attn_mask) 151 | 152 | return context.contiguous(), attn 153 | 154 | 155 | class AttentionLayer(nn.Module): 156 | def __init__(self, attention, d_model, n_heads, d_keys=None, 157 | d_values=None): 158 | super(AttentionLayer, self).__init__() 159 | 160 | d_keys = d_keys or (d_model // n_heads) 161 | d_values = d_values or (d_model // n_heads) 162 | 163 | self.inner_attention = attention 164 | self.query_projection = nn.Linear(d_model, d_keys * n_heads) 165 | self.key_projection = nn.Linear(d_model, d_keys * n_heads) 166 | self.value_projection = nn.Linear(d_model, d_values * n_heads) 167 | self.out_projection = nn.Linear(d_values * n_heads, d_model) 168 | self.n_heads = n_heads 169 | 170 | def forward(self, queries, keys, values, attn_mask): 171 | B, L, _ = queries.shape 172 | _, S, _ = keys.shape 173 | H = self.n_heads 174 | 175 | queries = self.query_projection(queries).view(B, L, H, -1) 176 | keys = self.key_projection(keys).view(B, S, H, -1) 177 | values = self.value_projection(values).view(B, S, H, -1) 178 | 179 | out, attn = self.inner_attention( 180 | queries, 181 | keys, 182 | values, 183 | attn_mask 184 | ) 185 | out = out.view(B, L, -1) 186 | 187 | return self.out_projection(out), attn -------------------------------------------------------------------------------- /src/models/components/tcn.py: -------------------------------------------------------------------------------- 1 | # Cell 2 | import torch 3 | import torch.nn as nn 4 | from torch.nn.utils import weight_norm 5 | 6 | from .common import Chomp1d 7 | from .common import CausalConv1d 8 | 9 | # Cell 10 | # https://github.com/locuslab/TCN 11 | class _TemporalBlock(nn.Module): 12 | def __init__(self, n_inputs, n_outputs, kernel_size, stride, dilation, padding, dropout=0.2): 13 | super(_TemporalBlock, self).__init__() 14 | self.conv1 = weight_norm(nn.Conv1d(n_inputs, n_outputs, kernel_size, 15 | stride=stride, padding=padding, dilation=dilation)) 16 | self.chomp1 = Chomp1d(padding) 17 | self.relu1 = nn.ReLU() 18 | self.dropout1 = nn.Dropout(dropout) 19 | 20 | self.conv2 = weight_norm(nn.Conv1d(n_outputs, n_outputs, kernel_size, 21 | stride=stride, padding=padding, dilation=dilation)) 22 | self.chomp2 = Chomp1d(padding) 23 | self.relu2 = nn.ReLU() 24 | self.dropout2 = nn.Dropout(dropout) 25 | 26 | self.net = nn.Sequential(self.conv1, self.chomp1, self.relu1, self.dropout1, 27 | self.conv2, self.chomp2, self.relu2, self.dropout2) 28 | self.downsample = nn.Conv1d(n_inputs, n_outputs, 1) if n_inputs != n_outputs else None 29 | self.relu = nn.ReLU() 30 | self.init_weights() 31 | 32 | def init_weights(self): 33 | self.conv1.weight.data.normal_(0, 0.01) 34 | self.conv2.weight.data.normal_(0, 0.01) 35 | if self.downsample is not None: 36 | self.downsample.weight.data.normal_(0, 0.01) 37 | 38 | def forward(self, x): 39 | out = self.net(x) 40 | res = x if self.downsample is None else self.downsample(x) 41 | return self.relu(out + res) 42 | 43 | # Cell 44 | class _TemporalBlock2(nn.Module): 45 | def __init__(self, n_inputs, n_outputs, kernel_size, stride, dilation, padding, dropout=0.2): 46 | super(_TemporalBlock2, self).__init__() 47 | self.causalconv1 = CausalConv1d(in_channels=n_inputs, out_channels=n_outputs, 48 | kernel_size=kernel_size, stride=stride, padding=padding, 49 | dilation=dilation, activation='ReLU', with_weight_norm=True) 50 | 51 | self.causalconv2 = CausalConv1d(in_channels=n_outputs, out_channels=n_outputs, 52 | kernel_size=kernel_size, stride=stride, padding=padding, 53 | dilation=dilation, activation='ReLU', with_weight_norm=True) 54 | 55 | self.net = nn.Sequential(self.causalconv1, nn.Dropout(dropout), 56 | self.causalconv2, nn.Dropout(dropout)) 57 | 58 | self.downsample = nn.Conv1d(n_inputs, n_outputs, 1) if n_inputs != n_outputs else None 59 | self.relu = nn.ReLU() 60 | self.init_weights() 61 | 62 | def init_weights(self): 63 | self.causalconv1.conv.weight.data.normal_(0, 0.01) 64 | self.causalconv2.conv.weight.data.normal_(0, 0.01) 65 | if self.downsample is not None: 66 | self.downsample.weight.data.normal_(0, 0.01) 67 | 68 | def forward(self, x): 69 | out = self.net(x) 70 | res = x if self.downsample is None else self.downsample(x) 71 | return self.relu(out + res) 72 | 73 | # Cell 74 | class _TemporalConvNet(nn.Module): 75 | def __init__(self, num_inputs, num_channels, kernel_size=2, dropout=0.2): 76 | super(_TemporalConvNet, self).__init__() 77 | layers = [] 78 | num_levels = len(num_channels) 79 | for i in range(num_levels): 80 | dilation_size = 2 ** i 81 | in_channels = num_inputs if i == 0 else num_channels[i-1] 82 | out_channels = num_channels[i] 83 | #layers += [_TemporalBlock(in_channels, out_channels, kernel_size, stride=1, dilation=dilation_size, 84 | # padding=(kernel_size-1) * dilation_size, dropout=dropout)] 85 | layers += [_TemporalBlock2(in_channels, out_channels, kernel_size, stride=1, dilation=dilation_size, 86 | padding=(kernel_size-1) * dilation_size, dropout=dropout)] 87 | 88 | self.network = nn.Sequential(*layers) 89 | 90 | def forward(self, x): 91 | return self.network(x) -------------------------------------------------------------------------------- /src/models/components/transformer.py: -------------------------------------------------------------------------------- 1 | # Cell 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | # Cell 7 | class ConvLayer(nn.Module): 8 | def __init__(self, c_in): 9 | super(ConvLayer, self).__init__() 10 | self.downConv = nn.Conv1d(in_channels=c_in, 11 | out_channels=c_in, 12 | kernel_size=3, 13 | padding=2, 14 | padding_mode='circular') 15 | self.norm = nn.BatchNorm1d(c_in) 16 | self.activation = nn.ELU() 17 | self.maxPool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1) 18 | 19 | def forward(self, x): 20 | x = self.downConv(x.permute(0, 2, 1)) 21 | x = self.norm(x) 22 | x = self.activation(x) 23 | x = self.maxPool(x) 24 | x = x.transpose(1, 2) 25 | return x 26 | 27 | 28 | class EncoderLayer(nn.Module): 29 | def __init__(self, attention, d_model, d_ff=None, dropout=0.1, activation="relu"): 30 | super(EncoderLayer, self).__init__() 31 | d_ff = d_ff or 4 * d_model 32 | self.attention = attention 33 | self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1) 34 | self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1) 35 | self.norm1 = nn.LayerNorm(d_model) 36 | self.norm2 = nn.LayerNorm(d_model) 37 | self.dropout = nn.Dropout(dropout) 38 | self.activation = F.relu if activation == "relu" else F.gelu 39 | 40 | def forward(self, x, attn_mask=None): 41 | new_x, attn = self.attention( 42 | x, x, x, 43 | attn_mask=attn_mask 44 | ) 45 | x = x + self.dropout(new_x) 46 | 47 | y = x = self.norm1(x) 48 | y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1)))) 49 | y = self.dropout(self.conv2(y).transpose(-1, 1)) 50 | 51 | return self.norm2(x + y), attn 52 | 53 | 54 | class Encoder(nn.Module): 55 | def __init__(self, attn_layers, conv_layers=None, norm_layer=None): 56 | super(Encoder, self).__init__() 57 | self.attn_layers = nn.ModuleList(attn_layers) 58 | self.conv_layers = nn.ModuleList(conv_layers) if conv_layers is not None else None 59 | self.norm = norm_layer 60 | 61 | def forward(self, x, attn_mask=None): 62 | # x [B, L, D] 63 | attns = [] 64 | if self.conv_layers is not None: 65 | for attn_layer, conv_layer in zip(self.attn_layers, self.conv_layers): 66 | x, attn = attn_layer(x, attn_mask=attn_mask) 67 | x = conv_layer(x) 68 | attns.append(attn) 69 | x, attn = self.attn_layers[-1](x) 70 | attns.append(attn) 71 | else: 72 | for attn_layer in self.attn_layers: 73 | x, attn = attn_layer(x, attn_mask=attn_mask) 74 | attns.append(attn) 75 | 76 | if self.norm is not None: 77 | x = self.norm(x) 78 | 79 | return x, attns 80 | 81 | 82 | class DecoderLayer(nn.Module): 83 | def __init__(self, self_attention, cross_attention, d_model, d_ff=None, 84 | dropout=0.1, activation="relu"): 85 | super(DecoderLayer, self).__init__() 86 | d_ff = d_ff or 4 * d_model 87 | self.self_attention = self_attention 88 | self.cross_attention = cross_attention 89 | self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1) 90 | self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1) 91 | self.norm1 = nn.LayerNorm(d_model) 92 | self.norm2 = nn.LayerNorm(d_model) 93 | self.norm3 = nn.LayerNorm(d_model) 94 | self.dropout = nn.Dropout(dropout) 95 | self.activation = F.relu if activation == "relu" else F.gelu 96 | 97 | def forward(self, x, cross, x_mask=None, cross_mask=None): 98 | x = x + self.dropout(self.self_attention( 99 | x, x, x, 100 | attn_mask=x_mask 101 | )[0]) 102 | x = self.norm1(x) 103 | 104 | x = x + self.dropout(self.cross_attention( 105 | x, cross, cross, 106 | attn_mask=cross_mask 107 | )[0]) 108 | 109 | y = x = self.norm2(x) 110 | y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1)))) 111 | y = self.dropout(self.conv2(y).transpose(-1, 1)) 112 | 113 | return self.norm3(x + y) 114 | 115 | 116 | class Decoder(nn.Module): 117 | def __init__(self, layers, norm_layer=None, projection=None): 118 | super(Decoder, self).__init__() 119 | self.layers = nn.ModuleList(layers) 120 | self.norm = norm_layer 121 | self.projection = projection 122 | 123 | def forward(self, x, cross, x_mask=None, cross_mask=None): 124 | for layer in self.layers: 125 | x = layer(x, cross, x_mask=x_mask, cross_mask=cross_mask) 126 | 127 | if self.norm is not None: 128 | x = self.norm(x) 129 | 130 | if self.projection is not None: 131 | x = self.projection(x) 132 | return x -------------------------------------------------------------------------------- /src/models/esrnn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cchallu/n-hits/d882ee60b34c0ab6b67b31001f735b181a9efb93/src/models/esrnn/__init__.py -------------------------------------------------------------------------------- /src/models/nbeats/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cchallu/n-hits/d882ee60b34c0ab6b67b31001f735b181a9efb93/src/models/nbeats/__init__.py -------------------------------------------------------------------------------- /src/models/nhits/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cchallu/n-hits/d882ee60b34c0ab6b67b31001f735b181a9efb93/src/models/nhits/__init__.py -------------------------------------------------------------------------------- /src/models/transformer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cchallu/n-hits/d882ee60b34c0ab6b67b31001f735b181a9efb93/src/models/transformer/__init__.py -------------------------------------------------------------------------------- /src/models/transformer/autoformer.py: -------------------------------------------------------------------------------- 1 | # Cell 2 | import math 3 | import random 4 | 5 | import numpy as np 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | import pytorch_lightning as pl 10 | from torch import optim 11 | 12 | from ..components.embed import DataEmbedding, DataEmbedding_wo_pos 13 | from ..components.autocorrelation import ( 14 | AutoCorrelation, AutoCorrelationLayer 15 | ) 16 | from ..components.autoformer import ( 17 | Encoder, Decoder, EncoderLayer, DecoderLayer, 18 | my_Layernorm, series_decomp 19 | ) 20 | from ...losses.utils import LossFunction 21 | 22 | # Cell 23 | class _Autoformer(nn.Module): 24 | """ 25 | Autoformer is the first method to achieve the series-wise connection, 26 | with inherent O(LlogL) complexity 27 | """ 28 | def __init__(self, seq_len, 29 | label_len, pred_len, output_attention, 30 | enc_in, dec_in, d_model, c_out, embed, freq, dropout, 31 | factor, n_heads, d_ff, moving_avg, activation, e_layers, 32 | d_layers): 33 | super(_Autoformer, self).__init__() 34 | self.seq_len = seq_len 35 | self.label_len = label_len 36 | self.pred_len = pred_len 37 | self.output_attention = output_attention 38 | 39 | # Decomp 40 | kernel_size = moving_avg 41 | self.decomp = series_decomp(kernel_size) 42 | 43 | # Embedding 44 | # The series-wise connection inherently contains the sequential information. 45 | # Thus, we can discard the position embedding of transformers. 46 | self.enc_embedding = DataEmbedding_wo_pos(enc_in, d_model, embed, freq, 47 | dropout) 48 | self.dec_embedding = DataEmbedding_wo_pos(dec_in, d_model, embed, freq, 49 | dropout) 50 | 51 | # Encoder 52 | self.encoder = Encoder( 53 | [ 54 | EncoderLayer( 55 | AutoCorrelationLayer( 56 | AutoCorrelation(False, factor, attention_dropout=dropout, 57 | output_attention=output_attention), 58 | d_model, n_heads), 59 | d_model, 60 | d_ff, 61 | moving_avg=moving_avg, 62 | dropout=dropout, 63 | activation=activation 64 | ) for l in range(e_layers) 65 | ], 66 | norm_layer=my_Layernorm(d_model) 67 | ) 68 | # Decoder 69 | self.decoder = Decoder( 70 | [ 71 | DecoderLayer( 72 | AutoCorrelationLayer( 73 | AutoCorrelation(True, factor, attention_dropout=dropout, 74 | output_attention=False), 75 | d_model, n_heads), 76 | AutoCorrelationLayer( 77 | AutoCorrelation(False, factor, attention_dropout=dropout, 78 | output_attention=False), 79 | d_model, n_heads), 80 | d_model, 81 | c_out, 82 | d_ff, 83 | moving_avg=moving_avg, 84 | dropout=dropout, 85 | activation=activation, 86 | ) 87 | for l in range(d_layers) 88 | ], 89 | norm_layer=my_Layernorm(d_model), 90 | projection=nn.Linear(d_model, c_out, bias=True) 91 | ) 92 | 93 | def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, 94 | enc_self_mask=None, dec_self_mask=None, dec_enc_mask=None): 95 | # decomp init 96 | mean = torch.mean(x_enc, dim=1).unsqueeze(1).repeat(1, self.pred_len, 1) 97 | zeros = torch.zeros([x_dec.shape[0], self.pred_len, x_dec.shape[2]], device=x_enc.device) 98 | seasonal_init, trend_init = self.decomp(x_enc) 99 | # decoder input 100 | trend_init = torch.cat([trend_init[:, -self.label_len:, :], mean], dim=1) 101 | seasonal_init = torch.cat([seasonal_init[:, -self.label_len:, :], zeros], dim=1) 102 | # enc 103 | enc_out = self.enc_embedding(x_enc, x_mark_enc) 104 | enc_out, attns = self.encoder(enc_out, attn_mask=enc_self_mask) 105 | # dec 106 | dec_out = self.dec_embedding(seasonal_init, x_mark_dec) 107 | seasonal_part, trend_part = self.decoder(dec_out, enc_out, x_mask=dec_self_mask, cross_mask=dec_enc_mask, 108 | trend=trend_init) 109 | # final 110 | dec_out = trend_part + seasonal_part 111 | 112 | if self.output_attention: 113 | return dec_out[:, -self.pred_len:, :], attns 114 | else: 115 | return dec_out[:, -self.pred_len:, :] 116 | 117 | # Cell 118 | class Autoformer(pl.LightningModule): 119 | def __init__(self, seq_len, 120 | label_len, pred_len, output_attention, 121 | enc_in, dec_in, d_model, c_out, embed, freq, dropout, 122 | factor, n_heads, d_ff, moving_avg, activation, e_layers, d_layers, 123 | loss_train, loss_valid, loss_hypar, learning_rate, 124 | lr_decay, weight_decay, lr_decay_step_size, 125 | random_seed): 126 | super(Autoformer, self).__init__() 127 | 128 | #------------------------ Model Attributes ------------------------# 129 | # Architecture parameters 130 | self.seq_len = seq_len 131 | self.label_len = label_len 132 | self.pred_len = pred_len 133 | self.output_attention = output_attention 134 | self.enc_in = enc_in 135 | self.dec_in = dec_in 136 | self.d_model = d_model 137 | self.c_out = c_out 138 | self.embed = embed 139 | self.freq = freq 140 | self.dropout = dropout 141 | self.factor = factor 142 | self.n_heads = n_heads 143 | self.d_ff = d_ff 144 | self.moving_avg = moving_avg 145 | self.activation = activation 146 | self.e_layers = e_layers 147 | self.d_layers = d_layers 148 | 149 | # Loss functions 150 | self.loss_train = loss_train 151 | self.loss_hypar = loss_hypar 152 | self.loss_valid = loss_valid 153 | self.loss_fn_train = LossFunction(loss_train, 154 | seasonality=self.loss_hypar) 155 | self.loss_fn_valid = LossFunction(loss_valid, 156 | seasonality=self.loss_hypar) 157 | 158 | # Regularization and optimization parameters 159 | self.learning_rate = learning_rate 160 | self.lr_decay = lr_decay 161 | self.weight_decay = weight_decay 162 | self.lr_decay_step_size = lr_decay_step_size 163 | self.random_seed = random_seed 164 | 165 | self.model = _Autoformer(seq_len, 166 | label_len, pred_len, output_attention, 167 | enc_in, dec_in, d_model, c_out, 168 | embed, freq, dropout, 169 | factor, n_heads, d_ff, 170 | moving_avg, activation, e_layers, 171 | d_layers) 172 | 173 | def forward(self, batch): 174 | """ 175 | Autoformer needs batch of shape (batch_size, time, series) for y 176 | and (batch_size, time, exogenous) for x 177 | and doesnt need X for each time series. 178 | USE DataLoader from pytorch instead of TimeSeriesLoader. 179 | """ 180 | Y = batch['Y'].permute(0, 2, 1) 181 | X = batch['X'][:, 0, :, :].permute(0, 2, 1) 182 | sample_mask = batch['sample_mask'].permute(0, 2, 1) 183 | available_mask = batch['available_mask'] 184 | 185 | s_begin = 0 186 | s_end = s_begin + self.seq_len 187 | r_begin = s_end - self.label_len 188 | r_end = r_begin + self.label_len + self.pred_len 189 | 190 | batch_x = Y[:, s_begin:s_end, :] 191 | batch_y = Y[:, r_begin:r_end, :] 192 | batch_x_mark = X[:, s_begin:s_end, :] 193 | batch_y_mark = X[:, r_begin:r_end, :] 194 | outsample_mask = sample_mask[:, r_begin:r_end, :] 195 | 196 | dec_inp = torch.zeros_like(batch_y[:, -self.pred_len:, :]) 197 | dec_inp = torch.cat([batch_y[:, :self.label_len, :], dec_inp], dim=1) 198 | 199 | if self.output_attention: 200 | forecast = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0] 201 | else: 202 | forecast = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark) 203 | 204 | batch_y = batch_y[:, -self.pred_len:, :] 205 | outsample_mask = outsample_mask[:, -self.pred_len:, :] 206 | 207 | return batch_y, forecast, outsample_mask 208 | 209 | def training_step(self, batch, batch_idx): 210 | 211 | outsample_y, forecast, outsample_mask = self(batch) 212 | 213 | loss = self.loss_fn_train(y=outsample_y, 214 | y_hat=forecast, 215 | mask=outsample_mask, 216 | y_insample= batch['Y'].permute(0, 2, 1)) 217 | 218 | self.log('train_loss', loss, prog_bar=True, on_epoch=True) 219 | 220 | return loss 221 | 222 | def validation_step(self, batch, idx): 223 | 224 | outsample_y, forecast, outsample_mask = self(batch) 225 | 226 | loss = self.loss_fn_valid(y=outsample_y, 227 | y_hat=forecast, 228 | mask=outsample_mask, 229 | y_insample= batch['Y'].permute(0, 2, 1)) 230 | 231 | self.log('val_loss', loss, prog_bar=True) 232 | 233 | return loss 234 | 235 | def on_fit_start(self): 236 | torch.manual_seed(self.random_seed) 237 | np.random.seed(self.random_seed) 238 | random.seed(self.random_seed) 239 | 240 | def configure_optimizers(self): 241 | optimizer = optim.Adam(self.model.parameters(), 242 | lr=self.learning_rate, 243 | weight_decay=self.weight_decay) 244 | 245 | lr_scheduler = optim.lr_scheduler.StepLR(optimizer, 246 | step_size=self.lr_decay_step_size, 247 | gamma=self.lr_decay) 248 | 249 | return {'optimizer': optimizer, 'lr_scheduler': lr_scheduler} -------------------------------------------------------------------------------- /src/models/transformer/informer.py: -------------------------------------------------------------------------------- 1 | # Cell 2 | import math 3 | import random 4 | 5 | import numpy as np 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | import pytorch_lightning as pl 10 | from torch import optim 11 | 12 | from ..components.transformer import Decoder, DecoderLayer, Encoder, EncoderLayer, ConvLayer 13 | from ..components.selfattention import ( 14 | TriangularCausalMask, ProbMask, 15 | FullAttention, ProbAttention, AttentionLayer 16 | ) 17 | from ..components.embed import DataEmbedding 18 | from ...losses.utils import LossFunction 19 | 20 | # Cell 21 | class _Informer(nn.Module): 22 | """ 23 | Informer with Propspare attention in O(LlogL) complexity 24 | """ 25 | def __init__(self, pred_len, output_attention, 26 | enc_in, dec_in, d_model, c_out, embed, freq, dropout, 27 | factor, n_heads, d_ff, activation, e_layers, 28 | d_layers, distil): 29 | super(_Informer, self).__init__() 30 | self.pred_len = pred_len 31 | self.output_attention = output_attention 32 | 33 | # Embedding 34 | self.enc_embedding = DataEmbedding(enc_in, d_model, embed, freq, 35 | dropout) 36 | self.dec_embedding = DataEmbedding(dec_in, d_model, embed, freq, 37 | dropout) 38 | 39 | # Encoder 40 | self.encoder = Encoder( 41 | [ 42 | EncoderLayer( 43 | AttentionLayer( 44 | ProbAttention(False, factor, attention_dropout=dropout, 45 | output_attention=output_attention), 46 | d_model, n_heads), 47 | d_model, 48 | d_ff, 49 | dropout=dropout, 50 | activation=activation 51 | ) for l in range(e_layers) 52 | ], 53 | [ 54 | ConvLayer( 55 | d_model 56 | ) for l in range(e_layers - 1) 57 | ] if distil else None, 58 | norm_layer=torch.nn.LayerNorm(d_model) 59 | ) 60 | # Decoder 61 | self.decoder = Decoder( 62 | [ 63 | DecoderLayer( 64 | AttentionLayer( 65 | ProbAttention(True, factor, attention_dropout=dropout, output_attention=False), 66 | d_model, n_heads), 67 | AttentionLayer( 68 | ProbAttention(False, factor, attention_dropout=dropout, output_attention=False), 69 | d_model, n_heads), 70 | d_model, 71 | d_ff, 72 | dropout=dropout, 73 | activation=activation, 74 | ) 75 | for l in range(d_layers) 76 | ], 77 | norm_layer=torch.nn.LayerNorm(d_model), 78 | projection=nn.Linear(d_model, c_out, bias=True) 79 | ) 80 | 81 | def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, 82 | enc_self_mask=None, dec_self_mask=None, dec_enc_mask=None): 83 | 84 | enc_out = self.enc_embedding(x_enc, x_mark_enc) 85 | enc_out, attns = self.encoder(enc_out, attn_mask=enc_self_mask) 86 | 87 | dec_out = self.dec_embedding(x_dec, x_mark_dec) 88 | dec_out = self.decoder(dec_out, enc_out, x_mask=dec_self_mask, cross_mask=dec_enc_mask) 89 | 90 | if self.output_attention: 91 | return dec_out[:, -self.pred_len:, :], attns 92 | else: 93 | return dec_out[:, -self.pred_len:, :] # [B, L, D] 94 | 95 | # Cell 96 | class Informer(pl.LightningModule): 97 | def __init__(self, seq_len, 98 | label_len, pred_len, output_attention, 99 | enc_in, dec_in, d_model, c_out, embed, freq, dropout, 100 | factor, n_heads, d_ff, activation, e_layers, d_layers, distil, 101 | loss_train, loss_valid, loss_hypar, learning_rate, 102 | lr_decay, weight_decay, lr_decay_step_size, 103 | random_seed): 104 | super(Informer, self).__init__() 105 | 106 | #------------------------ Model Attributes ------------------------# 107 | # Architecture parameters 108 | self.seq_len = seq_len 109 | self.label_len = label_len 110 | self.pred_len = pred_len 111 | self.output_attention = output_attention 112 | self.enc_in = enc_in 113 | self.dec_in = dec_in 114 | self.d_model = d_model 115 | self.c_out = c_out 116 | self.embed = embed 117 | self.freq = freq 118 | self.dropout = dropout 119 | self.factor = factor 120 | self.n_heads = n_heads 121 | self.d_ff = d_ff 122 | self.activation = activation 123 | self.e_layers = e_layers 124 | self.d_layers = d_layers 125 | self.distil = distil 126 | 127 | # Loss functions 128 | self.loss_train = loss_train 129 | self.loss_hypar = loss_hypar 130 | self.loss_valid = loss_valid 131 | self.loss_fn_train = LossFunction(loss_train, 132 | seasonality=self.loss_hypar) 133 | self.loss_fn_valid = LossFunction(loss_valid, 134 | seasonality=self.loss_hypar) 135 | 136 | # Regularization and optimization parameters 137 | self.learning_rate = learning_rate 138 | self.lr_decay = lr_decay 139 | self.weight_decay = weight_decay 140 | self.lr_decay_step_size = lr_decay_step_size 141 | self.random_seed = random_seed 142 | 143 | self.model = _Informer(pred_len, output_attention, 144 | enc_in, dec_in, d_model, c_out, 145 | embed, freq, dropout, 146 | factor, n_heads, d_ff, 147 | activation, e_layers, 148 | d_layers, distil) 149 | 150 | def forward(self, batch): 151 | """ 152 | Autoformer needs batch of shape (batch_size, time, series) for y 153 | and (batch_size, time, exogenous) for x 154 | and doesnt need X for each time series. 155 | USE DataLoader from pytorch instead of TimeSeriesLoader. 156 | """ 157 | Y = batch['Y'].permute(0, 2, 1) 158 | X = batch['X'][:, 0, :, :].permute(0, 2, 1) 159 | sample_mask = batch['sample_mask'].permute(0, 2, 1) 160 | available_mask = batch['available_mask'] 161 | 162 | s_begin = 0 163 | s_end = s_begin + self.seq_len 164 | r_begin = s_end - self.label_len 165 | r_end = r_begin + self.label_len + self.pred_len 166 | 167 | batch_x = Y[:, s_begin:s_end, :] 168 | batch_y = Y[:, r_begin:r_end, :] 169 | batch_x_mark = X[:, s_begin:s_end, :] 170 | batch_y_mark = X[:, r_begin:r_end, :] 171 | outsample_mask = sample_mask[:, r_begin:r_end, :] 172 | 173 | dec_inp = torch.zeros_like(batch_y[:, -self.pred_len:, :]) 174 | dec_inp = torch.cat([batch_y[:, :self.label_len, :], dec_inp], dim=1) 175 | 176 | if self.output_attention: 177 | forecast = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0] 178 | else: 179 | forecast = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark) 180 | 181 | batch_y = batch_y[:, -self.pred_len:, :] 182 | outsample_mask = outsample_mask[:, -self.pred_len:, :] 183 | 184 | return batch_y, forecast, outsample_mask, Y 185 | 186 | def training_step(self, batch, batch_idx): 187 | 188 | outsample_y, forecast, outsample_mask, Y = self(batch) 189 | 190 | loss = self.loss_fn_train(y=outsample_y, 191 | y_hat=forecast, 192 | mask=outsample_mask, 193 | y_insample=Y) 194 | 195 | self.log('train_loss', loss, prog_bar=True, on_epoch=True) 196 | 197 | return loss 198 | 199 | def validation_step(self, batch, idx): 200 | 201 | outsample_y, forecast, outsample_mask, Y = self(batch) 202 | 203 | loss = self.loss_fn_valid(y=outsample_y, 204 | y_hat=forecast, 205 | mask=outsample_mask, 206 | y_insample=Y) 207 | 208 | self.log('val_loss', loss, prog_bar=True) 209 | 210 | return loss 211 | 212 | def on_fit_start(self): 213 | torch.manual_seed(self.random_seed) 214 | np.random.seed(self.random_seed) 215 | random.seed(self.random_seed) 216 | 217 | def configure_optimizers(self): 218 | optimizer = optim.Adam(self.model.parameters(), 219 | lr=self.learning_rate, 220 | weight_decay=self.weight_decay) 221 | 222 | lr_scheduler = optim.lr_scheduler.StepLR(optimizer, 223 | step_size=self.lr_decay_step_size, 224 | gamma=self.lr_decay) 225 | 226 | return {'optimizer': optimizer, 'lr_scheduler': lr_scheduler} -------------------------------------------------------------------------------- /src/models/transformer/transformer.py: -------------------------------------------------------------------------------- 1 | # Cell 2 | import random 3 | 4 | import numpy as np 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | import pytorch_lightning as pl 9 | from torch import optim 10 | 11 | from ..components.transformer import Decoder, DecoderLayer, Encoder, EncoderLayer, ConvLayer 12 | from ..components.selfattention import FullAttention, AttentionLayer 13 | from ..components.embed import DataEmbedding 14 | from ...losses.utils import LossFunction 15 | 16 | # Cell 17 | class _Transformer(nn.Module): 18 | """ 19 | Vanilla Transformer with O(L^2) complexity 20 | """ 21 | def __init__(self, pred_len, output_attention, 22 | enc_in, dec_in, d_model, c_out, embed, freq, dropout, 23 | factor, n_heads, d_ff, activation, e_layers, 24 | d_layers): 25 | super(_Transformer, self).__init__() 26 | self.pred_len = pred_len 27 | self.output_attention = output_attention 28 | 29 | # Embedding 30 | self.enc_embedding = DataEmbedding(enc_in, d_model, embed, freq, 31 | dropout) 32 | self.dec_embedding = DataEmbedding(dec_in, d_model, embed, freq, 33 | dropout) 34 | # Encoder 35 | self.encoder = Encoder( 36 | [ 37 | EncoderLayer( 38 | AttentionLayer( 39 | FullAttention(False, factor, attention_dropout=dropout, 40 | output_attention=output_attention), d_model, n_heads), 41 | d_model, 42 | d_ff, 43 | dropout=dropout, 44 | activation=activation 45 | ) for l in range(e_layers) 46 | ], 47 | norm_layer=torch.nn.LayerNorm(d_model) 48 | ) 49 | # Decoder 50 | self.decoder = Decoder( 51 | [ 52 | DecoderLayer( 53 | AttentionLayer( 54 | FullAttention(True, factor, attention_dropout=dropout, output_attention=False), 55 | d_model, n_heads), 56 | AttentionLayer( 57 | FullAttention(False, factor, attention_dropout=dropout, output_attention=False), 58 | d_model, n_heads), 59 | d_model, 60 | d_ff, 61 | dropout=dropout, 62 | activation=activation, 63 | ) 64 | for l in range(d_layers) 65 | ], 66 | norm_layer=torch.nn.LayerNorm(d_model), 67 | projection=nn.Linear(d_model, c_out, bias=True) 68 | ) 69 | 70 | def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, 71 | enc_self_mask=None, dec_self_mask=None, dec_enc_mask=None): 72 | 73 | enc_out = self.enc_embedding(x_enc, x_mark_enc) 74 | enc_out, attns = self.encoder(enc_out, attn_mask=enc_self_mask) 75 | 76 | dec_out = self.dec_embedding(x_dec, x_mark_dec) 77 | dec_out = self.decoder(dec_out, enc_out, x_mask=dec_self_mask, cross_mask=dec_enc_mask) 78 | 79 | if self.output_attention: 80 | return dec_out[:, -self.pred_len:, :], attns 81 | else: 82 | return dec_out[:, -self.pred_len:, :] # [B, L, D] 83 | 84 | # Cell 85 | class Transformer(pl.LightningModule): 86 | def __init__(self, seq_len, 87 | label_len, pred_len, output_attention, 88 | enc_in, dec_in, d_model, c_out, embed, freq, dropout, 89 | factor, n_heads, d_ff, activation, e_layers, d_layers, 90 | loss_train, loss_valid, loss_hypar, learning_rate, 91 | lr_decay, weight_decay, lr_decay_step_size, 92 | random_seed): 93 | super(Transformer, self).__init__() 94 | 95 | #------------------------ Model Attributes ------------------------# 96 | # Architecture parameters 97 | self.seq_len = seq_len 98 | self.label_len = label_len 99 | self.pred_len = pred_len 100 | self.output_attention = output_attention 101 | self.enc_in = enc_in 102 | self.dec_in = dec_in 103 | self.d_model = d_model 104 | self.c_out = c_out 105 | self.embed = embed 106 | self.freq = freq 107 | self.dropout = dropout 108 | self.factor = factor 109 | self.n_heads = n_heads 110 | self.d_ff = d_ff 111 | self.activation = activation 112 | self.e_layers = e_layers 113 | self.d_layers = d_layers 114 | 115 | # Loss functions 116 | self.loss_train = loss_train 117 | self.loss_hypar = loss_hypar 118 | self.loss_valid = loss_valid 119 | self.loss_fn_train = LossFunction(loss_train, 120 | seasonality=self.loss_hypar) 121 | self.loss_fn_valid = LossFunction(loss_valid, 122 | seasonality=self.loss_hypar) 123 | 124 | # Regularization and optimization parameters 125 | self.learning_rate = learning_rate 126 | self.lr_decay = lr_decay 127 | self.weight_decay = weight_decay 128 | self.lr_decay_step_size = lr_decay_step_size 129 | self.random_seed = random_seed 130 | 131 | self.model = _Transformer(pred_len, output_attention, 132 | enc_in, dec_in, d_model, c_out, 133 | embed, freq, dropout, 134 | factor, n_heads, d_ff, 135 | activation, e_layers, 136 | d_layers) 137 | 138 | def forward(self, batch): 139 | """ 140 | Autoformer needs batch of shape (batch_size, time, series) for y 141 | and (batch_size, time, exogenous) for x 142 | and doesnt need X for each time series. 143 | USE DataLoader from pytorch instead of TimeSeriesLoader. 144 | """ 145 | Y = batch['Y'].permute(0, 2, 1) 146 | X = batch['X'][:, 0, :, :].permute(0, 2, 1) 147 | sample_mask = batch['sample_mask'].permute(0, 2, 1) 148 | available_mask = batch['available_mask'] 149 | 150 | s_begin = 0 151 | s_end = s_begin + self.seq_len 152 | r_begin = s_end - self.label_len 153 | r_end = r_begin + self.label_len + self.pred_len 154 | 155 | batch_x = Y[:, s_begin:s_end, :] 156 | batch_y = Y[:, r_begin:r_end, :] 157 | batch_x_mark = X[:, s_begin:s_end, :] 158 | batch_y_mark = X[:, r_begin:r_end, :] 159 | outsample_mask = sample_mask[:, r_begin:r_end, :] 160 | 161 | dec_inp = torch.zeros_like(batch_y[:, -self.pred_len:, :]) 162 | dec_inp = torch.cat([batch_y[:, :self.label_len, :], dec_inp], dim=1) 163 | 164 | if self.output_attention: 165 | forecast = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0] 166 | else: 167 | forecast = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark) 168 | 169 | batch_y = batch_y[:, -self.pred_len:, :] 170 | outsample_mask = outsample_mask[:, -self.pred_len:, :] 171 | 172 | return batch_y, forecast, outsample_mask, Y 173 | 174 | def training_step(self, batch, batch_idx): 175 | 176 | outsample_y, forecast, outsample_mask, Y = self(batch) 177 | 178 | loss = self.loss_fn_train(y=outsample_y, 179 | y_hat=forecast, 180 | mask=outsample_mask, 181 | y_insample=Y) 182 | 183 | self.log('train_loss', loss, prog_bar=True, on_epoch=True) 184 | 185 | return loss 186 | 187 | def validation_step(self, batch, idx): 188 | 189 | outsample_y, forecast, outsample_mask, Y = self(batch) 190 | 191 | loss = self.loss_fn_valid(y=outsample_y, 192 | y_hat=forecast, 193 | mask=outsample_mask, 194 | y_insample=Y) 195 | 196 | self.log('val_loss', loss, prog_bar=True) 197 | 198 | return loss 199 | 200 | def on_fit_start(self): 201 | torch.manual_seed(self.random_seed) 202 | np.random.seed(self.random_seed) 203 | random.seed(self.random_seed) 204 | 205 | def configure_optimizers(self): 206 | optimizer = optim.Adam(self.model.parameters(), 207 | lr=self.learning_rate, 208 | weight_decay=self.weight_decay) 209 | 210 | lr_scheduler = optim.lr_scheduler.StepLR(optimizer, 211 | step_size=self.lr_decay_step_size, 212 | gamma=self.lr_decay) 213 | 214 | return {'optimizer': optimizer, 'lr_scheduler': lr_scheduler} --------------------------------------------------------------------------------