├── MAINTAINERS ├── pts ├── dataset │ ├── __init__.py │ ├── repository │ │ ├── __init__.py │ │ ├── datasets.py │ │ └── _m5.py │ └── loader.py ├── model │ ├── lstnet │ │ ├── __init__.py │ │ ├── lstnet_estimator.py │ │ └── lstnet_network.py │ ├── deepvar │ │ └── __init__.py │ ├── transformer │ │ └── __init__.py │ ├── transformer_tempflow │ │ └── __init__.py │ ├── __init__.py │ ├── n_beats │ │ ├── __init__.py │ │ └── n_beats_estimator.py │ ├── deepar │ │ └── __init__.py │ ├── tempflow │ │ ├── __init__.py │ │ └── tempflow_estimator.py │ ├── causal_deepar │ │ ├── __init__.py │ │ └── README.md │ ├── time_grad │ │ ├── __init__.py │ │ ├── epsilon_theta.py │ │ └── time_grad_estimator.py │ ├── tft │ │ ├── __init__.py │ │ ├── tft_output.py │ │ ├── tft_transform.py │ │ └── tft_modules.py │ ├── simple_feedforward │ │ ├── __init__.py │ │ ├── simple_feedforward_network.py │ │ └── simple_feedforward_estimator.py │ ├── utils.py │ └── estimator.py ├── feature │ ├── __init__.py │ ├── lags.py │ ├── fourier_date_feature.py │ └── holiday.py ├── distributions │ ├── __init__.py │ ├── utils.py │ ├── implicit_quantile.py │ ├── zero_inflated.py │ └── piecewise_linear.py ├── __init__.py ├── modules │ ├── __init__.py │ ├── iqn_modules.py │ ├── feature.py │ └── scaler.py └── trainer.py ├── examples └── images │ ├── readme_0.png │ └── readme_1.png ├── CITATION.cff ├── setup.py ├── LICENSE ├── .github └── workflows │ └── publish-to-pypi.yml ├── test ├── model │ ├── deepar │ │ ├── test_lags.py │ │ └── test_auxillary_outputs.py │ ├── test_lstnet.py │ ├── test_forecast.py │ └── test_deepvar.py ├── distributions │ ├── test_zero_inflated.py │ └── test_piecewise_linear.py ├── modules │ ├── test_scaler.py │ ├── test_implicit_quantile_distr_output.py │ └── test_feature.py └── feature │ └── test_holiday.py ├── .gitignore └── README.md /MAINTAINERS: -------------------------------------------------------------------------------- 1 | Kashif Rasul 2 | -------------------------------------------------------------------------------- /pts/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | from .loader import TransformedIterableDataset 2 | -------------------------------------------------------------------------------- /pts/dataset/repository/__init__.py: -------------------------------------------------------------------------------- 1 | from .datasets import dataset_recipes 2 | -------------------------------------------------------------------------------- /pts/model/lstnet/__init__.py: -------------------------------------------------------------------------------- 1 | from .lstnet_estimator import LSTNetEstimator 2 | -------------------------------------------------------------------------------- /pts/model/deepvar/__init__.py: -------------------------------------------------------------------------------- 1 | from .deepvar_estimator import DeepVAREstimator 2 | -------------------------------------------------------------------------------- /pts/model/transformer/__init__.py: -------------------------------------------------------------------------------- 1 | from .transformer_estimator import TransformerEstimator 2 | -------------------------------------------------------------------------------- /examples/images/readme_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zalandoresearch/pytorch-ts/HEAD/examples/images/readme_0.png -------------------------------------------------------------------------------- /examples/images/readme_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zalandoresearch/pytorch-ts/HEAD/examples/images/readme_1.png -------------------------------------------------------------------------------- /pts/model/transformer_tempflow/__init__.py: -------------------------------------------------------------------------------- 1 | from .transformer_tempflow_estimator import TransformerTempFlowEstimator 2 | -------------------------------------------------------------------------------- /pts/model/__init__.py: -------------------------------------------------------------------------------- 1 | from .utils import get_module_forward_input_names, weighted_average 2 | from .estimator import PyTorchEstimator 3 | -------------------------------------------------------------------------------- /pts/model/n_beats/__init__.py: -------------------------------------------------------------------------------- 1 | from .n_beats_ensemble import NBEATSEnsembleEstimator 2 | from .n_beats_estimator import NBEATSEstimator 3 | -------------------------------------------------------------------------------- /pts/model/deepar/__init__.py: -------------------------------------------------------------------------------- 1 | from .deepar_estimator import DeepAREstimator 2 | from .deepar_network import DeepARNetwork, DeepARTrainingNetwork 3 | -------------------------------------------------------------------------------- /pts/model/tempflow/__init__.py: -------------------------------------------------------------------------------- 1 | from .tempflow_estimator import TempFlowEstimator 2 | from .tempflow_network import TempFlowTrainingNetwork, TempFlowPredictionNetwork 3 | -------------------------------------------------------------------------------- /pts/model/causal_deepar/__init__.py: -------------------------------------------------------------------------------- 1 | from .causal_deepar_estimator import CausalDeepAREstimator 2 | from .causal_deepar_network import CausalDeepARNetwork, CausalDeepARTrainingNetwork 3 | -------------------------------------------------------------------------------- /pts/model/time_grad/__init__.py: -------------------------------------------------------------------------------- 1 | from .time_grad_estimator import TimeGradEstimator 2 | from .time_grad_network import TimeGradTrainingNetwork, TimeGradPredictionNetwork 3 | from .epsilon_theta import EpsilonTheta 4 | -------------------------------------------------------------------------------- /pts/model/tft/__init__.py: -------------------------------------------------------------------------------- 1 | from .tft_estimator import TemporalFusionTransformerEstimator 2 | from .tft_network import ( 3 | TemporalFusionTransformerTrainingNetwork, 4 | TemporalFusionTransformerPredictionNetwork, 5 | ) 6 | -------------------------------------------------------------------------------- /pts/model/simple_feedforward/__init__.py: -------------------------------------------------------------------------------- 1 | from .simple_feedforward_estimator import SimpleFeedForwardEstimator 2 | from .simple_feedforward_network import ( 3 | SimpleFeedForwardTrainingNetwork, 4 | SimpleFeedForwardPredictionNetwork, 5 | ) 6 | -------------------------------------------------------------------------------- /pts/feature/__init__.py: -------------------------------------------------------------------------------- 1 | from .holiday import ( 2 | CustomDateFeatureSet, 3 | CustomHolidayFeatureSet, 4 | ) 5 | from .fourier_date_feature import fourier_time_features_from_frequency 6 | from .lags import lags_for_fourier_time_features_from_frequency 7 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: 1.2.0 2 | message: If you use this software, please cite it using the following metadata 3 | title: PyTorchTS 4 | authors: 5 | - family-names: Rasul 6 | given-names: Kashif 7 | license: MIT 8 | repository-code: https://github.com/zalandoresearch/pytorch-ts 9 | version: 0.6.0 10 | -------------------------------------------------------------------------------- /pts/dataset/repository/datasets.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | 3 | from gluonts.dataset.repository.datasets import dataset_recipes 4 | 5 | from ._m5 import generate_pts_m5_dataset 6 | 7 | dataset_recipes["pts_m5"] = partial( 8 | generate_pts_m5_dataset, pandas_freq="D", prediction_length=28 9 | ) 10 | -------------------------------------------------------------------------------- /pts/distributions/__init__.py: -------------------------------------------------------------------------------- 1 | from .utils import broadcast_shape 2 | from .zero_inflated import ( 3 | ZeroInflatedDistribution, 4 | ZeroInflatedPoisson, 5 | ZeroInflatedNegativeBinomial, 6 | ) 7 | from .piecewise_linear import PiecewiseLinear, TransformedPiecewiseLinear 8 | from .implicit_quantile import ImplicitQuantile, TransformedImplicitQuantile 9 | -------------------------------------------------------------------------------- /pts/__init__.py: -------------------------------------------------------------------------------- 1 | from pkgutil import extend_path 2 | 3 | from pkg_resources import get_distribution, DistributionNotFound 4 | 5 | from .trainer import Trainer 6 | 7 | __path__ = extend_path(__path__, __name__) # type: ignore 8 | 9 | try: 10 | __version__ = get_distribution(__name__).version 11 | except DistributionNotFound: 12 | __version__ = "0.0.0-unknown" -------------------------------------------------------------------------------- /pts/modules/__init__.py: -------------------------------------------------------------------------------- 1 | from .distribution_output import ( 2 | NormalOutput, 3 | StudentTOutput, 4 | BetaOutput, 5 | PoissonOutput, 6 | ZeroInflatedPoissonOutput, 7 | PiecewiseLinearOutput, 8 | NegativeBinomialOutput, 9 | ZeroInflatedNegativeBinomialOutput, 10 | NormalMixtureOutput, 11 | StudentTMixtureOutput, 12 | IndependentNormalOutput, 13 | LowRankMultivariateNormalOutput, 14 | MultivariateNormalOutput, 15 | FlowOutput, 16 | DiffusionOutput, 17 | ImplicitQuantileOutput, 18 | ) 19 | from .feature import FeatureEmbedder, FeatureAssembler 20 | from .flows import RealNVP, MAF 21 | from .scaler import MeanScaler, NOPScaler 22 | from .gaussian_diffusion import GaussianDiffusion 23 | -------------------------------------------------------------------------------- /pts/feature/lags.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional 2 | 3 | from pandas.tseries.frequencies import to_offset 4 | 5 | 6 | def lags_for_fourier_time_features_from_frequency( 7 | freq_str: str, num_lags: Optional[int] = None 8 | ) -> List[int]: 9 | offset = to_offset(freq_str) 10 | multiple, granularity = offset.n, offset.name 11 | 12 | if granularity == "M": 13 | lags = [[1, 12]] 14 | elif granularity == "D": 15 | lags = [[1, 7, 14]] 16 | elif granularity == "B": 17 | lags = [[1, 2]] 18 | elif granularity == "H": 19 | lags = [[1, 24, 168]] 20 | elif granularity in ("T", "min"): 21 | lags = [[1, 4, 12, 24, 48]] 22 | else: 23 | lags = [[1]] 24 | 25 | # use less lags 26 | output_lags = list([int(lag) for sub_list in lags for lag in sub_list]) 27 | output_lags = sorted(list(set(output_lags))) 28 | return output_lags[:num_lags] 29 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | setup( 4 | name="pytorchts", 5 | version="0.6.0", 6 | description="PyTorch Probabilistic Time Series Modeling framework", 7 | long_description=open("README.md").read(), 8 | long_description_content_type="text/markdown", 9 | author="Kashif Rasul", 10 | author_email="kashif.rasul@zalando.de", 11 | url="https://github.com/zalandoresearch/pytorch-ts", 12 | license="MIT", 13 | packages=find_packages(exclude=["tests"]), 14 | include_package_data=True, 15 | zip_safe=True, 16 | python_requires=">=3.6", 17 | install_requires=[ 18 | "torch>=1.8.0", 19 | "gluonts>=0.9.0", 20 | "holidays", 21 | "numpy~=1.16", 22 | "pandas~=1.1", 23 | "scipy", 24 | "tqdm", 25 | "matplotlib", 26 | "tensorboard", 27 | ], 28 | test_suite="tests", 29 | tests_require=["flake8", "pytest"], 30 | ) 31 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2020 Zalando SE 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /.github/workflows/publish-to-pypi.yml: -------------------------------------------------------------------------------- 1 | name: Publish Python 🐍 distributions 📦 to PyPI and TestPyPI 2 | 3 | on: 4 | release: 5 | types: [created] 6 | 7 | jobs: 8 | build-n-publish: 9 | name: Build and publish Python 🐍 distributions 📦 to PyPI and TestPyPI 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@master 13 | - name: Set up Python 3.x 14 | uses: actions/setup-python@v1 15 | with: 16 | python-version: '3.x' 17 | - name: Install dependencies 18 | run: >- 19 | python -m 20 | pip install --upgrade 21 | pip setuptools twine 22 | --user 23 | - name: Build a binary wheel and a source tarball 24 | run: >- 25 | python setup.py sdist 26 | - name: Publish distribution 📦 to PyPI 27 | uses: pypa/gh-action-pypi-publish@master 28 | with: 29 | user: __token__ 30 | password: ${{ secrets.pypi_password }} 31 | - name: Publish distribution 📦 to Test PyPI 32 | uses: pypa/gh-action-pypi-publish@master 33 | with: 34 | user: __token__ 35 | password: ${{ secrets.test_pypi_password }} 36 | repository_url: https://test.pypi.org/legacy/ 37 | -------------------------------------------------------------------------------- /pts/distributions/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-2019 Uber Technologies, Inc. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | 5 | def broadcast_shape(*shapes, **kwargs): 6 | """ 7 | Similar to ``np.broadcast()`` but for shapes. 8 | Equivalent to ``np.broadcast(*map(np.empty, shapes)).shape``. 9 | 10 | :param tuple shapes: shapes of tensors. 11 | :param bool strict: whether to use extend-but-not-resize broadcasting. 12 | :returns: broadcasted shape 13 | :rtype: tuple 14 | :raises: ValueError 15 | """ 16 | strict = kwargs.pop("strict", False) 17 | reversed_shape = [] 18 | for shape in shapes: 19 | for i, size in enumerate(reversed(shape)): 20 | if i >= len(reversed_shape): 21 | reversed_shape.append(size) 22 | elif reversed_shape[i] == 1 and not strict: 23 | reversed_shape[i] = size 24 | elif reversed_shape[i] != size and (size != 1 or strict): 25 | raise ValueError( 26 | "shape mismatch: objects cannot be broadcast to a single shape: {}".format( 27 | " vs ".join(map(str, shapes)) 28 | ) 29 | ) 30 | return tuple(reversed(reversed_shape)) 31 | -------------------------------------------------------------------------------- /pts/dataset/loader.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | import itertools 3 | 4 | from torch.utils.data import IterableDataset 5 | 6 | from gluonts.dataset.common import Dataset 7 | from gluonts.transform import Transformation, TransformedDataset 8 | from gluonts.itertools import Cyclic, PseudoShuffled, Cached 9 | 10 | 11 | class TransformedIterableDataset(IterableDataset): 12 | def __init__( 13 | self, 14 | dataset: Dataset, 15 | transform: Transformation, 16 | is_train: bool = True, 17 | shuffle_buffer_length: Optional[int] = None, 18 | cache_data: bool = False, 19 | ): 20 | super().__init__() 21 | self.shuffle_buffer_length = shuffle_buffer_length 22 | 23 | self.transformed_dataset = TransformedDataset( 24 | Cyclic(dataset) if not cache_data else Cached(Cyclic(dataset)), 25 | transform, 26 | is_train=is_train, 27 | ) 28 | 29 | def __iter__(self): 30 | if self.shuffle_buffer_length is None: 31 | return iter(self.transformed_dataset) 32 | else: 33 | shuffled = PseudoShuffled( 34 | self.transformed_dataset, 35 | shuffle_buffer_length=self.shuffle_buffer_length, 36 | ) 37 | return iter(shuffled) 38 | -------------------------------------------------------------------------------- /pts/model/utils.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | from typing import Optional 3 | 4 | import torch 5 | import torch.nn as nn 6 | 7 | 8 | def get_module_forward_input_names(module: nn.Module): 9 | params = inspect.signature(module.forward).parameters 10 | param_names = [k for k, v in params.items() if not str(v).startswith("*")] 11 | return param_names 12 | 13 | 14 | def weighted_average( 15 | x: torch.Tensor, weights: Optional[torch.Tensor] = None, dim=None 16 | ) -> torch.Tensor: 17 | """ 18 | Computes the weighted average of a given tensor across a given dim, masking 19 | values associated with weight zero, 20 | meaning instead of `nan * 0 = nan` you will get `0 * 0 = 0`. 21 | 22 | Parameters 23 | ---------- 24 | x 25 | Input tensor, of which the average must be computed. 26 | weights 27 | Weights tensor, of the same shape as `x`. 28 | dim 29 | The dim along which to average `x` 30 | 31 | Returns 32 | ------- 33 | Tensor: 34 | The tensor with values averaged along the specified `dim`. 35 | """ 36 | if weights is not None: 37 | weighted_tensor = torch.where(weights != 0, x * weights, torch.zeros_like(x)) 38 | sum_weights = torch.clamp( 39 | weights.sum(dim=dim) if dim else weights.sum(), min=1.0 40 | ) 41 | return ( 42 | weighted_tensor.sum(dim=dim) if dim else weighted_tensor.sum() 43 | ) / sum_weights 44 | else: 45 | return x.mean(dim=dim) 46 | -------------------------------------------------------------------------------- /test/model/deepar/test_lags.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). 4 | # You may not use this file except in compliance with the License. 5 | # A copy of the License is located at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # or in the "license" file accompanying this file. This file is distributed 10 | # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 11 | # express or implied. See the License for the specific language governing 12 | # permissions and limitations under the License. 13 | 14 | # Standard library imports 15 | import itertools 16 | 17 | # Third-party imports 18 | import torch 19 | 20 | # First-party imports 21 | from pts.model.deepar import DeepARTrainingNetwork 22 | 23 | 24 | def test_lagged_subsequences(): 25 | N = 8 26 | T = 96 27 | C = 2 28 | lags = [1, 2, 3, 24, 48] 29 | I = len(lags) 30 | sequence = torch.randn((N, T, C)) 31 | S = 48 32 | 33 | # (batch_size, sub_seq_len, target_dim, num_lags) 34 | lagged_subsequences = DeepARTrainingNetwork.get_lagged_subsequences( 35 | sequence=sequence, 36 | sequence_length=sequence.shape[1], 37 | indices=lags, 38 | subsequences_length=S, 39 | ) 40 | 41 | assert (N, S, C, I) == lagged_subsequences.shape 42 | 43 | # checks that lags value behave as described as in the get_lagged_subsequences contract 44 | for i, j, k in itertools.product(range(N), range(S), range(I)): 45 | assert ( 46 | (lagged_subsequences[i, j, :, k] == sequence[i, -lags[k] - S + j, :]) 47 | .numpy() 48 | .all() 49 | ) 50 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | # other 107 | wandb/ 108 | .idea/ 109 | runs/ 110 | .vscode/ 111 | -------------------------------------------------------------------------------- /pts/modules/iqn_modules.py: -------------------------------------------------------------------------------- 1 | from math import pi 2 | 3 | import torch 4 | from torch import nn as nn 5 | 6 | 7 | class ImplicitQuantileModule(nn.Module): 8 | """See arXiv: 1806.06923 9 | This module, in combination with quantile loss, 10 | learns how to generate the quantile of the distribution of the target. 11 | A quantile value, tau, is randomly generated with a Uniform([0, 1])). 12 | This quantile value is embedded in this module and also passed to the quantile loss: 13 | this should force the model to learn the appropriate quantile. 14 | """ 15 | 16 | def __init__(self, in_features, output_domain_cls): 17 | super(ImplicitQuantileModule, self).__init__() 18 | self.in_features = in_features 19 | self.quantile_layer = QuantileLayer(in_features) 20 | self.output_layer = nn.Sequential( 21 | nn.Linear(in_features, in_features), 22 | nn.Softplus(), 23 | nn.Linear(in_features, 1), 24 | output_domain_cls(), 25 | ) 26 | 27 | def forward(self, input_data, tau): 28 | embedded_tau = self.quantile_layer(tau) 29 | new_input_data = input_data * (torch.ones_like(embedded_tau) + embedded_tau) 30 | return self.output_layer(new_input_data).squeeze(-1) 31 | 32 | 33 | class QuantileLayer(nn.Module): 34 | """Define quantile embedding layer, i.e. phi in the IQN paper (arXiv: 1806.06923).""" 35 | 36 | def __init__(self, num_output): 37 | super(QuantileLayer, self).__init__() 38 | self.n_cos_embedding = 64 39 | self.num_output = num_output 40 | self.output_layer = nn.Sequential( 41 | nn.Linear(self.n_cos_embedding, self.n_cos_embedding), 42 | nn.PReLU(), 43 | nn.Linear(self.n_cos_embedding, num_output), 44 | ) 45 | 46 | def forward(self, tau): 47 | cos_embedded_tau = self.cos_embed(tau) 48 | final_output = self.output_layer(cos_embedded_tau) 49 | return final_output 50 | 51 | def cos_embed(self, tau): 52 | integers = torch.repeat_interleave( 53 | torch.arange(0, self.n_cos_embedding).unsqueeze(dim=0), 54 | repeats=tau.shape[-1], 55 | dim=0, 56 | ).to(tau.device) 57 | return torch.cos(pi * tau.unsqueeze(dim=-1) * integers) 58 | -------------------------------------------------------------------------------- /pts/feature/fourier_date_feature.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). 4 | # You may not use this file except in compliance with the License. 5 | # A copy of the License is located at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # or in the "license" file accompanying this file. This file is distributed 10 | # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 11 | # express or implied. See the License for the specific language governing 12 | # permissions and limitations under the License. 13 | 14 | from typing import List 15 | 16 | import numpy as np 17 | import pandas as pd 18 | from pandas.tseries.frequencies import to_offset 19 | 20 | from gluonts.core.component import validated 21 | from gluonts.time_feature import TimeFeature, norm_freq_str 22 | 23 | 24 | class FourierDateFeatures(TimeFeature): 25 | @validated() 26 | def __init__(self, freq: str) -> None: 27 | super().__init__() 28 | # reocurring freq 29 | freqs = [ 30 | "month", 31 | "day", 32 | "hour", 33 | "minute", 34 | "weekofyear", 35 | "weekday", 36 | "dayofweek", 37 | "dayofyear", 38 | "daysinmonth", 39 | ] 40 | 41 | assert freq in freqs 42 | self.freq = freq 43 | 44 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 45 | values = getattr(index, self.freq) 46 | num_values = max(values) + 1 47 | steps = [x * 2.0 * np.pi / num_values for x in values] 48 | return np.vstack([np.cos(steps), np.sin(steps)]) 49 | 50 | 51 | def fourier_time_features_from_frequency(freq_str: str) -> List[TimeFeature]: 52 | offset = to_offset(freq_str) 53 | granularity = norm_freq_str(offset.name) 54 | 55 | features = { 56 | "M": ["weekofyear"], 57 | "W": ["daysinmonth", "weekofyear"], 58 | "D": ["dayofweek"], 59 | "B": ["dayofweek", "dayofyear"], 60 | "H": ["hour", "dayofweek"], 61 | "min": ["minute", "hour", "dayofweek"], 62 | "T": ["minute", "hour", "dayofweek"], 63 | } 64 | 65 | assert granularity in features, f"freq {granularity} not supported" 66 | 67 | feature_classes: List[TimeFeature] = [ 68 | FourierDateFeatures(freq=freq) for freq in features[granularity] 69 | ] 70 | return feature_classes 71 | -------------------------------------------------------------------------------- /test/model/deepar/test_auxillary_outputs.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). 4 | # You may not use this file except in compliance with the License. 5 | # A copy of the License is located at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # or in the "license" file accompanying this file. This file is distributed 10 | # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 11 | # express or implied. See the License for the specific language governing 12 | # permissions and limitations under the License. 13 | 14 | from itertools import islice 15 | 16 | import torch 17 | 18 | from gluonts.dataset.artificial import constant_dataset 19 | from gluonts.dataset.loader import TrainDataLoader 20 | from gluonts.torch.batchify import batchify 21 | 22 | from pts import Trainer 23 | from pts.model import get_module_forward_input_names 24 | from pts.model.deepar import DeepAREstimator 25 | from pts.modules import StudentTOutput 26 | 27 | ds_info, train_ds, test_ds = constant_dataset() 28 | freq = ds_info.metadata.freq 29 | prediction_length = ds_info.prediction_length 30 | 31 | 32 | def test_distribution(): 33 | """ 34 | Makes sure additional tensors can be accessed and have expected shapes 35 | """ 36 | prediction_length = ds_info.prediction_length 37 | estimator = DeepAREstimator( 38 | freq=freq, 39 | prediction_length=prediction_length, 40 | input_size=15, 41 | trainer=Trainer(epochs=1, num_batches_per_epoch=1), 42 | distr_output=StudentTOutput(), 43 | ) 44 | 45 | train_output = estimator.train_model(train_ds) 46 | 47 | # todo adapt loader to anomaly detection use-case 48 | batch_size = 2 49 | num_samples = 3 50 | 51 | training_data_loader = TrainDataLoader( 52 | train_ds, 53 | transform=train_output.transformation 54 | + estimator.create_instance_splitter("training"), 55 | batch_size=batch_size, 56 | num_batches_per_epoch=estimator.trainer.num_batches_per_epoch, 57 | stack_fn=batchify, 58 | ) 59 | 60 | seq_len = 2 * ds_info.prediction_length 61 | 62 | for data_entry in islice(training_data_loader, 1): 63 | input_names = get_module_forward_input_names(train_output.trained_net) 64 | 65 | distr = train_output.trained_net.distribution( 66 | *[data_entry[k] for k in input_names] 67 | ) 68 | 69 | assert distr.sample((num_samples,)).shape == ( 70 | num_samples, 71 | batch_size, 72 | seq_len, 73 | ) 74 | -------------------------------------------------------------------------------- /pts/distributions/implicit_quantile.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.distributions import Distribution, TransformedDistribution, AffineTransform 3 | 4 | 5 | class ImplicitQuantile(Distribution): 6 | arg_constraints = {} 7 | 8 | def __init__( 9 | self, 10 | implicit_quantile_function, 11 | taus, 12 | nn_output, 13 | predicted_quantiles, 14 | validate_args=None, 15 | ): 16 | self.predicted_quantiles = predicted_quantiles[0] 17 | self.taus = taus 18 | self.quantile_function = implicit_quantile_function 19 | self.input_data = nn_output 20 | 21 | super(ImplicitQuantile, self).__init__( 22 | batch_shape=self.predicted_quantiles.shape, validate_args=validate_args 23 | ) 24 | 25 | @torch.no_grad() 26 | def sample(self, sample_shape=torch.Size()): 27 | """See arXiv: 1806.06923 28 | Once the model has learned how to predict a given quantile tau, one can sample from the 29 | distribution of the target, by sampling tau values. 30 | """ 31 | if len(sample_shape) == 0: 32 | num_parallel_samples = 1 33 | else: 34 | num_parallel_samples = sample_shape[0] 35 | input_data = torch.repeat_interleave( 36 | self.input_data, repeats=num_parallel_samples, dim=0 37 | ) 38 | batch_size = input_data.shape[0] 39 | forecast_length = input_data.shape[1] 40 | device = input_data.device 41 | 42 | taus = torch.rand(size=(batch_size, forecast_length), device=device) 43 | samples = self.quantile_function(input_data, taus) 44 | if len(sample_shape) == 0: 45 | return samples 46 | else: 47 | return samples.reshape((num_parallel_samples, -1, forecast_length)) 48 | 49 | def log_prob(self, value): 50 | # Assumes same distribution for all steps in the future, conditionally on the input data 51 | return -self.quantile_loss(self.predicted_quantiles, value, self.taus) 52 | 53 | @staticmethod 54 | def quantile_loss(quantile_forecast, target, tau): 55 | return torch.abs( 56 | (quantile_forecast - target) * ((target <= quantile_forecast).float() - tau) 57 | ) 58 | 59 | 60 | class TransformedImplicitQuantile(TransformedDistribution): 61 | def __init__(self, base_distribution, transforms): 62 | super().__init__(base_distribution, transforms) 63 | 64 | def log_prob(self, x): 65 | scale = 1.0 66 | for transform in reversed(self.transforms): 67 | assert isinstance(transform, AffineTransform), "Not an AffineTransform" 68 | x = transform.inv(x) 69 | scale *= transform.scale 70 | p = self.base_dist.log_prob(x) 71 | return p * scale 72 | -------------------------------------------------------------------------------- /pts/model/causal_deepar/README.md: -------------------------------------------------------------------------------- 1 | # Causal `DeepAR` 2 | 3 | Causal `DeepAR` model augments the `DeepAR` model by incorporating a causal structure via a `control` time-dependent signal. 4 | 5 | The main assumption of this model is that the `target` at time `t` depends not only on the covariates up till time `t` and but also on `control` till time `t`. Thus we encode this structure via a model which now adds a `control_output` distribution layer. The main assumption we have is that at training time our dataset of time series now have an additional `control` key with the corresponding 1-d control-variate values (with the array being as large as `target`). 6 | 7 | We translate this structure into the following schematics of Causal-`DeepAR` at time `t`: 8 | 9 | ``` 10 | ┌─────────┐ ┌────────┐ 11 | │control_t│──────┐ │target_t│ 12 | └─────────┘ │ └────────┘ 13 | ▲ │ ▲ 14 | │ │ │ 15 | log-prob/ │ log-prob/ 16 | sample │ sample 17 | │ ground-truth/ │ 18 | ┌────────┐ sample/ ┌───────┐ 19 | │control │ do │target │ 20 | │ dist │ └──────▶│ dist │ 21 | └────────┘ └───────┘ 22 | ▲ ▲ 23 | └────────────┬──────────┘ 24 | │ 25 | ┌────┐ 26 | ───h_t-1─▶│RNN │───h_t──▶ 27 | └────┘ 28 | ▲ 29 | ┌─────┴────┐ 30 | │target_t-1│ 31 | └─────┬────┘ 32 | ┌─────┴─────┐ 33 | │control_t-1│ 34 | └─────┬─────┘ 35 | ┌───┴──┐ 36 | │cov_t │ 37 | └──────┘ 38 | ``` 39 | 40 | 41 | The model is trained as per the DeepAR assumption, which mainly implies that the covariates and `control` are known for all the time points while training and only the covariates are known for all the time points that we wish to forecast for. 42 | 43 | In terms of the `control` at inference time, we now have two choices: 44 | 45 | 1. We can predict by setting the `control` array to `np.Nan` for the duration of the prediction length in which case the model will sample values from the `control_output.distribution` and feed it auto-regressively to the `target` head and RNN. 46 | 47 | 2. On the other hand we can in the prediction window "do" an intervention by setting the values of `control` to some fixed value of our choosing for the time steps in the future we are interested in. In this case this model will "break" the causal connections and just use the supplied values and feed those to the `target` head and RNN at the appropriate time points. 48 | 49 | ## Possible uses 50 | 51 | * `target` can be sales and `control` can be discounts 52 | * `target` can be sales and `control` can be the temperature 53 | 54 | -------------------------------------------------------------------------------- /pts/model/tft/tft_output.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional, Tuple 2 | import numpy as np 3 | 4 | import torch 5 | import torch.nn as nn 6 | from gluonts.core.component import validated 7 | 8 | 9 | class QuantileLoss(nn.Module): 10 | @validated() 11 | def __init__( 12 | self, 13 | quantiles: List[float], 14 | quantile_weights: Optional[List[float]] = None, 15 | ) -> None: 16 | super().__init__() 17 | 18 | self.quantiles = quantiles 19 | self.num_quantiles = len(quantiles) 20 | self.quantile_weights = ( 21 | [1.0 / self.num_quantiles for i in range(self.num_quantiles)] 22 | if not quantile_weights 23 | else quantile_weights 24 | ) 25 | 26 | def forward(self, y_true: torch.Tensor, y_pred: torch.Tensor, sample_weight=None): 27 | if self.num_quantiles > 1: 28 | y_pred_all = torch.chunk(y_pred, self.num_quantiles, dim=-1) 29 | else: 30 | y_pred_all = [y_pred] 31 | 32 | qt_loss = [] 33 | for i, y_pred_q in enumerate(y_pred_all): 34 | q = self.quantiles[i] 35 | weighted_qt = ( 36 | self.compute_quantile_loss(y_true, y_pred_q.squeeze(-1), q) 37 | * self.quantile_weights[i] 38 | ) 39 | qt_loss.append(weighted_qt) 40 | stacked_qt_losses = torch.stack(qt_loss, dim=-1) 41 | sum_qt_loss = torch.mean(stacked_qt_losses, dim=-1) 42 | if sample_weight is not None: 43 | return sample_weight * sum 44 | else: 45 | return sum_qt_loss 46 | 47 | @staticmethod 48 | def compute_quantile_loss( 49 | y_true: torch.Tensor, y_pred_p: torch.Tensor, p: float 50 | ) -> torch.Tensor: 51 | under_bias = p * torch.clamp(y_true - y_pred_p, min=0) 52 | over_bias = (1 - p) * torch.clamp(y_pred_p - y_true, min=0) 53 | 54 | qt_loss = 2 * (under_bias + over_bias) 55 | return qt_loss 56 | 57 | 58 | class ProjectParams(nn.Module): 59 | @validated() 60 | def __init__(self, in_features, num_quantiles): 61 | super().__init__() 62 | self.projection = nn.Linear(in_features=in_features, out_features=num_quantiles) 63 | 64 | def forward(self, x: torch.Tensor) -> torch.Tensor: 65 | return self.projection(x) 66 | 67 | 68 | class QuantileOutput: 69 | @validated() 70 | def __init__( 71 | self, 72 | input_size, 73 | quantiles: List[float], 74 | quantile_weights: Optional[List[float]] = None, 75 | ) -> None: 76 | self.input_size = input_size 77 | self.quantiles = quantiles 78 | self.quantile_weights = quantile_weights 79 | 80 | def get_loss(self) -> nn.Module: 81 | return QuantileLoss( 82 | quantiles=self.quantiles, quantile_weights=self.quantile_weights 83 | ) 84 | 85 | def get_quantile_proj(self) -> nn.Module: 86 | return ProjectParams( 87 | in_features=self.input_size, num_quantiles=len(self.quantiles) 88 | ) 89 | -------------------------------------------------------------------------------- /pts/modules/feature.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional 2 | 3 | import torch 4 | import torch.nn as nn 5 | 6 | 7 | class FeatureEmbedder(nn.Module): 8 | def __init__( 9 | self, 10 | cardinalities: List[int], 11 | embedding_dims: List[int], 12 | ) -> None: 13 | super().__init__() 14 | 15 | self.__num_features = len(cardinalities) 16 | 17 | def create_embedding(c: int, d: int) -> nn.Embedding: 18 | embedding = nn.Embedding(c, d) 19 | return embedding 20 | 21 | self.__embedders = nn.ModuleList( 22 | [create_embedding(c, d) for c, d in zip(cardinalities, embedding_dims)] 23 | ) 24 | 25 | def forward(self, features: torch.Tensor) -> torch.Tensor: 26 | if self.__num_features > 1: 27 | # we slice the last dimension, giving an array of length 28 | # self.__num_features with shape (N,T) or (N) 29 | cat_feature_slices = torch.chunk(features, self.__num_features, dim=-1) 30 | else: 31 | cat_feature_slices = [features] 32 | 33 | return torch.cat( 34 | [ 35 | embed(cat_feature_slice.squeeze(-1)) 36 | for embed, cat_feature_slice in zip( 37 | self.__embedders, cat_feature_slices 38 | ) 39 | ], 40 | dim=-1, 41 | ) 42 | 43 | 44 | class FeatureAssembler(nn.Module): 45 | def __init__( 46 | self, 47 | T: int, 48 | embed_static: Optional[FeatureEmbedder] = None, 49 | embed_dynamic: Optional[FeatureEmbedder] = None, 50 | ) -> None: 51 | super().__init__() 52 | 53 | self.T = T 54 | self.embeddings = nn.ModuleDict( 55 | {"embed_static": embed_static, "embed_dynamic": embed_dynamic} 56 | ) 57 | 58 | def forward( 59 | self, 60 | feat_static_cat: torch.Tensor, 61 | feat_static_real: torch.Tensor, 62 | feat_dynamic_cat: torch.Tensor, 63 | feat_dynamic_real: torch.Tensor, 64 | ) -> torch.Tensor: 65 | processed_features = [ 66 | self.process_static_cat(feat_static_cat), 67 | self.process_static_real(feat_static_real), 68 | self.process_dynamic_cat(feat_dynamic_cat), 69 | self.process_dynamic_real(feat_dynamic_real), 70 | ] 71 | 72 | return torch.cat(processed_features, dim=-1) 73 | 74 | def process_static_cat(self, feature: torch.Tensor) -> torch.Tensor: 75 | if self.embeddings["embed_static"] is not None: 76 | feature = self.embeddings["embed_static"](feature) 77 | return feature.unsqueeze(1).expand(-1, self.T, -1).float() 78 | 79 | def process_dynamic_cat(self, feature: torch.Tensor) -> torch.Tensor: 80 | if self.embeddings["embed_dynamic"] is None: 81 | return feature.float() 82 | else: 83 | return self.embeddings["embed_dynamic"](feature) 84 | 85 | def process_static_real(self, feature: torch.Tensor) -> torch.Tensor: 86 | return feature.unsqueeze(1).expand(-1, self.T, -1) 87 | 88 | def process_dynamic_real(self, feature: torch.Tensor) -> torch.Tensor: 89 | return feature 90 | -------------------------------------------------------------------------------- /test/distributions/test_zero_inflated.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-2019 Uber Technologies, Inc. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | import pytest 5 | import torch 6 | 7 | from torch.distributions import ( 8 | NegativeBinomial, 9 | Normal, 10 | Poisson, 11 | ) 12 | from pts.distributions import ( 13 | ZeroInflatedDistribution, 14 | ZeroInflatedNegativeBinomial, 15 | ZeroInflatedPoisson, 16 | broadcast_shape, 17 | ) 18 | 19 | from numpy.testing import assert_allclose as assert_close 20 | 21 | 22 | @pytest.mark.parametrize("gate_shape", [(), (2,), (3, 1), (3, 2)]) 23 | @pytest.mark.parametrize("base_shape", [(), (2,), (3, 1), (3, 2)]) 24 | def test_zid_shape(gate_shape, base_shape): 25 | gate = torch.rand(gate_shape) 26 | base_dist = Normal(torch.randn(base_shape), torch.randn(base_shape).exp()) 27 | 28 | d = ZeroInflatedDistribution(gate, base_dist) 29 | assert d.batch_shape == broadcast_shape(gate_shape, base_shape) 30 | assert d.support == base_dist.support 31 | 32 | d2 = d.expand([4, 3, 2]) 33 | assert d2.batch_shape == (4, 3, 2) 34 | 35 | 36 | @pytest.mark.parametrize("rate", [0.1, 0.5, 0.9, 1.0, 1.1, 2.0, 10.0]) 37 | def test_zip_0_gate(rate): 38 | # if gate is 0 ZIP is Poisson 39 | zip_ = ZeroInflatedPoisson(torch.zeros(1), torch.tensor(rate)) 40 | pois = Poisson(torch.tensor(rate)) 41 | s = pois.sample((20,)) 42 | zip_prob = zip_.log_prob(s) 43 | pois_prob = pois.log_prob(s) 44 | assert_close(zip_prob, pois_prob, atol=1e-06) 45 | 46 | 47 | @pytest.mark.parametrize("gate", [0.0, 0.25, 0.5, 0.75, 1.0]) 48 | @pytest.mark.parametrize("rate", [0.1, 0.5, 0.9, 1.0, 1.1, 2.0, 10.0]) 49 | def test_zip_mean_variance(gate, rate): 50 | num_samples = 1000000 51 | zip_ = ZeroInflatedPoisson(torch.tensor(gate), torch.tensor(rate)) 52 | s = zip_.sample((num_samples,)) 53 | expected_mean = zip_.mean 54 | estimated_mean = s.mean() 55 | expected_std = zip_.stddev 56 | estimated_std = s.std() 57 | assert_close(expected_mean, estimated_mean, atol=1e-02) 58 | assert_close(expected_std, estimated_std, atol=1e-02) 59 | 60 | 61 | @pytest.mark.parametrize("total_count", [0.1, 0.5, 0.9, 1.0, 1.1, 2.0, 10.0]) 62 | @pytest.mark.parametrize("probs", [0.1, 0.5, 0.9]) 63 | def test_zinb_0_gate(total_count, probs): 64 | # if gate is 0 ZINB is NegativeBinomial 65 | zinb_ = ZeroInflatedNegativeBinomial( 66 | torch.zeros(1), total_count=torch.tensor(total_count), probs=torch.tensor(probs) 67 | ) 68 | neg_bin = NegativeBinomial(torch.tensor(total_count), probs=torch.tensor(probs)) 69 | s = neg_bin.sample((20,)) 70 | zinb_prob = zinb_.log_prob(s) 71 | neg_bin_prob = neg_bin.log_prob(s) 72 | assert_close(zinb_prob, neg_bin_prob, atol=1e-06) 73 | 74 | 75 | @pytest.mark.parametrize("gate", [0.0, 0.25, 0.5, 0.75, 1.0]) 76 | @pytest.mark.parametrize("total_count", [0.1, 0.5, 0.9, 1.0, 1.1, 2.0, 10.0]) 77 | @pytest.mark.parametrize("logits", [-0.5, 0.5, -0.9, 1.9]) 78 | def test_zinb_mean_variance(gate, total_count, logits): 79 | num_samples = 1000000 80 | zinb_ = ZeroInflatedNegativeBinomial( 81 | torch.tensor(gate), 82 | total_count=torch.tensor(total_count), 83 | logits=torch.tensor(logits), 84 | ) 85 | s = zinb_.sample((num_samples,)) 86 | expected_mean = zinb_.mean 87 | estimated_mean = s.mean() 88 | expected_std = zinb_.stddev 89 | estimated_std = s.std() 90 | assert_close(expected_mean, estimated_mean, atol=1e-01) 91 | assert_close(expected_std, estimated_std, atol=1e-1) 92 | -------------------------------------------------------------------------------- /test/model/test_lstnet.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). 4 | # You may not use this file except in compliance with the License. 5 | # A copy of the License is located at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # or in the "license" file accompanying this file. This file is distributed 10 | # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 11 | # express or implied. See the License for the specific language governing 12 | # permissions and limitations under the License. 13 | 14 | # Third-party imports 15 | import pytest 16 | import numpy as np 17 | import pandas as pd 18 | 19 | # First-party imports 20 | from gluonts.dataset.artificial import constant_dataset 21 | from gluonts.dataset.common import TrainDatasets 22 | from gluonts.dataset.multivariate_grouper import MultivariateGrouper 23 | from gluonts.evaluation import MultivariateEvaluator 24 | from gluonts.evaluation.backtest import make_evaluation_predictions 25 | 26 | from pts.model.lstnet import LSTNetEstimator 27 | from pts import Trainer 28 | 29 | 30 | NUM_SERIES = 10 31 | NUM_SAMPLES = 5 32 | 33 | 34 | def load_multivariate_constant_dataset(): 35 | metadata, train_ds, test_ds = constant_dataset() 36 | grouper_train = MultivariateGrouper(max_target_dim=NUM_SERIES) 37 | grouper_test = MultivariateGrouper(max_target_dim=NUM_SERIES) 38 | return TrainDatasets( 39 | metadata=metadata, train=grouper_train(train_ds), test=grouper_test(test_ds), 40 | ) 41 | 42 | 43 | dataset = load_multivariate_constant_dataset() 44 | freq = dataset.metadata.metadata.freq 45 | prediction_length = dataset.metadata.prediction_length 46 | 47 | 48 | @pytest.mark.parametrize("skip_size", [1, 2]) 49 | @pytest.mark.parametrize("ar_window", [1, 2]) 50 | @pytest.mark.parametrize( 51 | "horizon, prediction_length", 52 | [[prediction_length, None], [None, prediction_length]], 53 | ) 54 | def test_lstnet(skip_size, ar_window, horizon, prediction_length): 55 | estimator = LSTNetEstimator( 56 | skip_size=skip_size, 57 | ar_window=ar_window, 58 | num_series=NUM_SERIES, 59 | channels=6, 60 | kernel_size=2, 61 | context_length=4, 62 | freq=freq, 63 | horizon=horizon, 64 | prediction_length=prediction_length, 65 | trainer=Trainer(epochs=3, batch_size=2, learning_rate=0.01,), 66 | ) 67 | 68 | predictor = estimator.train(dataset.train) 69 | forecast_it, ts_it = make_evaluation_predictions( 70 | dataset=dataset.test, predictor=predictor, num_samples=NUM_SAMPLES 71 | ) 72 | forecasts = list(forecast_it) 73 | tss = list(ts_it) 74 | assert len(forecasts) == len(tss) == len(dataset.test) 75 | test_ds = dataset.test.list_data[0] 76 | for fct in forecasts: 77 | assert fct.freq == freq 78 | if estimator.horizon: 79 | assert fct.samples.shape == (NUM_SAMPLES, 1, NUM_SERIES) 80 | else: 81 | assert fct.samples.shape == (NUM_SAMPLES, prediction_length, NUM_SERIES,) 82 | assert ( 83 | fct.start_date 84 | == pd.date_range( 85 | start=str(test_ds["start"]), 86 | periods=test_ds["target"].shape[1], # number of test periods 87 | freq=freq, 88 | closed="right", 89 | )[-(horizon or prediction_length)] 90 | ) 91 | 92 | evaluator = MultivariateEvaluator( 93 | quantiles=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] 94 | ) 95 | agg_metrics, item_metrics = evaluator( 96 | iter(tss), iter(forecasts), num_series=len(dataset.test) 97 | ) 98 | assert agg_metrics["ND"] < 0.21 99 | -------------------------------------------------------------------------------- /test/model/test_forecast.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). 4 | # You may not use this file except in compliance with the License. 5 | # A copy of the License is located at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # or in the "license" file accompanying this file. This file is distributed 10 | # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 11 | # express or implied. See the License for the specific language governing 12 | # permissions and limitations under the License. 13 | 14 | # Third-party imports 15 | import numpy as np 16 | import pandas as pd 17 | import pytest 18 | import torch 19 | from torch.distributions import Uniform 20 | 21 | # First-party imports 22 | from gluonts.model.forecast import SampleForecast 23 | from gluonts.torch.model.forecast import DistributionForecast 24 | 25 | QUANTILES = np.arange(1, 100) / 100 26 | SAMPLES = np.arange(101).reshape(101, 1) / 100 27 | START_DATE = pd.Timestamp(2017, 1, 1, 12) 28 | FREQ = "1D" 29 | 30 | FORECASTS = { 31 | "SampleForecast": SampleForecast(samples=SAMPLES, start_date=START_DATE, freq=FREQ), 32 | "DistributionForecast": DistributionForecast( 33 | distribution=Uniform(low=torch.zeros(1), high=torch.ones(1)), 34 | start_date=START_DATE, 35 | freq=FREQ, 36 | ), 37 | } 38 | 39 | 40 | @pytest.mark.parametrize("name", FORECASTS.keys()) 41 | def test_Forecast(name): 42 | forecast = FORECASTS[name] 43 | 44 | def percentile(value): 45 | return f"p{int(round(value * 100)):02d}" 46 | 47 | num_samples, pred_length = SAMPLES.shape 48 | 49 | for quantile in QUANTILES: 50 | test_cases = [quantile, str(quantile), percentile(quantile)] 51 | for quant_pred in map(forecast.quantile, test_cases): 52 | assert np.isclose( 53 | quant_pred[0], quantile 54 | ), f"Expected {percentile(quantile)} quantile {quantile}. Obtained {quant_pred}." 55 | 56 | assert forecast.prediction_length == 1 57 | assert len(forecast.index) == pred_length 58 | assert forecast.index[0] == pd.Timestamp(START_DATE) 59 | 60 | 61 | def test_DistributionForecast(): 62 | forecast = DistributionForecast( 63 | distribution=Uniform( 64 | low=torch.tensor([0.0, 0.0]), high=torch.tensor([1.0, 2.0]) 65 | ), 66 | start_date=START_DATE, 67 | freq=FREQ, 68 | ) 69 | 70 | def percentile(value): 71 | return f"p{int(round(value * 100)):02d}" 72 | 73 | for quantile in QUANTILES: 74 | test_cases = [quantile, str(quantile), percentile(quantile)] 75 | for quant_pred in map(forecast.quantile, test_cases): 76 | expected = quantile * np.array([1.0, 2.0]) 77 | assert np.allclose( 78 | quant_pred, expected 79 | ), f"Expected {percentile(quantile)} quantile {quantile}. Obtained {quant_pred}." 80 | 81 | pred_length = 2 82 | assert forecast.prediction_length == pred_length 83 | assert len(forecast.index) == pred_length 84 | assert forecast.index[0] == pd.Timestamp(START_DATE) 85 | 86 | 87 | @pytest.mark.parametrize( 88 | "forecast, exp_index", 89 | [ 90 | ( 91 | SampleForecast( 92 | samples=np.random.normal(size=(100, 7, 3)), 93 | start_date=pd.Timestamp("2020-01-01 00:00:00"), 94 | freq="1D", 95 | ), 96 | pd.date_range( 97 | start=pd.Timestamp("2020-01-01 00:00:00"), freq="1D", periods=7, 98 | ), 99 | ), 100 | ( 101 | DistributionForecast( 102 | Uniform(low=torch.zeros(size=(5, 2)), high=torch.ones(size=(5, 2)),), 103 | start_date=pd.Timestamp("2020-01-01 00:00:00"), 104 | freq="W", 105 | ), 106 | pd.date_range( 107 | start=pd.Timestamp("2020-01-01 00:00:00"), freq="W", periods=5, 108 | ), 109 | ), 110 | ], 111 | ) 112 | def test_forecast_multivariate(forecast, exp_index): 113 | assert forecast.prediction_length == len(exp_index) 114 | assert np.all(forecast.index == exp_index) 115 | -------------------------------------------------------------------------------- /test/model/test_deepvar.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). 4 | # You may not use this file except in compliance with the License. 5 | # A copy of the License is located at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # or in the "license" file accompanying this file. This file is distributed 10 | # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 11 | # express or implied. See the License for the specific language governing 12 | # permissions and limitations under the License. 13 | 14 | # First-party imports 15 | import pytest 16 | 17 | from gluonts.dataset.common import TrainDatasets 18 | from gluonts.dataset.artificial import constant_dataset 19 | from gluonts.dataset.multivariate_grouper import MultivariateGrouper 20 | from gluonts.evaluation import MultivariateEvaluator 21 | from gluonts.evaluation.backtest import backtest_metrics 22 | from pts.model.deepvar import DeepVAREstimator 23 | from pts.modules import ( 24 | NormalOutput, 25 | LowRankMultivariateNormalOutput, 26 | MultivariateNormalOutput, 27 | ) 28 | from pts import Trainer 29 | 30 | 31 | def load_multivariate_constant_dataset(): 32 | dataset_info, train_ds, test_ds = constant_dataset() 33 | grouper_train = MultivariateGrouper(max_target_dim=10) 34 | grouper_test = MultivariateGrouper(num_test_dates=1, max_target_dim=10) 35 | metadata = dataset_info.metadata 36 | metadata.prediction_length = dataset_info.prediction_length 37 | return TrainDatasets( 38 | metadata=dataset_info.metadata, 39 | train=grouper_train(train_ds), 40 | test=grouper_test(test_ds), 41 | ) 42 | 43 | 44 | dataset = load_multivariate_constant_dataset() 45 | target_dim = int(dataset.metadata.feat_static_cat[0].cardinality) 46 | metadata = dataset.metadata 47 | estimator = DeepVAREstimator 48 | 49 | 50 | @pytest.mark.parametrize( 51 | "distr_output, num_batches_per_epoch, Estimator, use_marginal_transformation", 52 | [ 53 | ( 54 | NormalOutput(dim=target_dim), 55 | 10, 56 | estimator, 57 | True, 58 | ), 59 | ( 60 | NormalOutput(dim=target_dim), 61 | 10, 62 | estimator, 63 | False, 64 | ), 65 | ( 66 | LowRankMultivariateNormalOutput(dim=target_dim, rank=2), 67 | 10, 68 | estimator, 69 | True, 70 | ), 71 | ( 72 | LowRankMultivariateNormalOutput(dim=target_dim, rank=2), 73 | 10, 74 | estimator, 75 | False, 76 | ), 77 | (None, 10, estimator, True), 78 | ( 79 | MultivariateNormalOutput(dim=target_dim), 80 | 10, 81 | estimator, 82 | True, 83 | ), 84 | ( 85 | MultivariateNormalOutput(dim=target_dim), 86 | 10, 87 | estimator, 88 | False, 89 | ), 90 | ], 91 | ) 92 | def test_deepvar( 93 | distr_output, 94 | num_batches_per_epoch, 95 | Estimator, 96 | use_marginal_transformation, 97 | ): 98 | 99 | estimator = Estimator( 100 | input_size=47, 101 | num_cells=20, 102 | num_layers=1, 103 | dropout_rate=0.0, 104 | pick_incomplete=True, 105 | target_dim=target_dim, 106 | prediction_length=metadata.prediction_length, 107 | freq=metadata.freq, 108 | distr_output=distr_output, 109 | scaling=False, 110 | use_marginal_transformation=use_marginal_transformation, 111 | trainer=Trainer( 112 | epochs=1, 113 | batch_size=8, 114 | learning_rate=1e-10, 115 | num_batches_per_epoch=num_batches_per_epoch, 116 | ), 117 | ) 118 | 119 | predictor = estimator.train(training_data=dataset.train) 120 | 121 | agg_metrics, _ = backtest_metrics( 122 | test_dataset=dataset.test, 123 | predictor=predictor, 124 | evaluator=MultivariateEvaluator( 125 | quantiles=(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9) 126 | ), 127 | ) 128 | 129 | assert agg_metrics["ND"] < 1.5 130 | -------------------------------------------------------------------------------- /pts/modules/scaler.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import Tuple 3 | 4 | import torch 5 | import torch.nn as nn 6 | 7 | from gluonts.core.component import validated 8 | 9 | 10 | class Scaler(ABC, nn.Module): 11 | def __init__(self, keepdim: bool = False, time_first: bool = True): 12 | super().__init__() 13 | self.keepdim = keepdim 14 | self.time_first = time_first 15 | 16 | @abstractmethod 17 | def compute_scale( 18 | self, data: torch.Tensor, observed_indicator: torch.Tensor 19 | ) -> torch.Tensor: 20 | pass 21 | 22 | def forward( 23 | self, data: torch.Tensor, observed_indicator: torch.Tensor 24 | ) -> Tuple[torch.Tensor, torch.Tensor]: 25 | """ 26 | Parameters 27 | ---------- 28 | data 29 | tensor of shape (N, T, C) if ``time_first == True`` or (N, C, T) 30 | if ``time_first == False`` containing the data to be scaled 31 | 32 | observed_indicator 33 | observed_indicator: binary tensor with the same shape as 34 | ``data``, that has 1 in correspondence of observed data points, 35 | and 0 in correspondence of missing data points. 36 | 37 | Returns 38 | ------- 39 | Tensor 40 | Tensor containing the "scaled" data, shape: (N, T, C) or (N, C, T). 41 | Tensor 42 | Tensor containing the scale, of shape (N, C) if ``keepdim == False``, 43 | and shape (N, 1, C) or (N, C, 1) if ``keepdim == True``. 44 | """ 45 | 46 | scale = self.compute_scale(data, observed_indicator) 47 | 48 | if self.time_first: 49 | dim = 1 50 | else: 51 | dim = 2 52 | if self.keepdim: 53 | scale = scale.unsqueeze(dim=dim) 54 | return data / scale, scale 55 | else: 56 | return data / scale.unsqueeze(dim=dim), scale 57 | 58 | 59 | class MeanScaler(Scaler): 60 | """ 61 | The ``MeanScaler`` computes a per-item scale according to the average 62 | absolute value over time of each item. The average is computed only among 63 | the observed values in the data tensor, as indicated by the second 64 | argument. Items with no observed data are assigned a scale based on the 65 | global average. 66 | 67 | Parameters 68 | ---------- 69 | minimum_scale 70 | default scale that is used if the time series has only zeros. 71 | """ 72 | 73 | @validated() 74 | def __init__(self, minimum_scale: float = 1e-10, *args, **kwargs): 75 | super().__init__(*args, **kwargs) 76 | self.register_buffer("minimum_scale", torch.tensor(minimum_scale)) 77 | 78 | def compute_scale( 79 | self, data: torch.Tensor, observed_indicator: torch.Tensor 80 | ) -> torch.Tensor: 81 | 82 | if self.time_first: 83 | dim = 1 84 | else: 85 | dim = 2 86 | 87 | # these will have shape (N, C) 88 | num_observed = observed_indicator.sum(dim=dim) 89 | sum_observed = (data.abs() * observed_indicator).sum(dim=dim) 90 | 91 | # first compute a global scale per-dimension 92 | total_observed = num_observed.sum(dim=0) 93 | denominator = torch.max(total_observed, torch.ones_like(total_observed)) 94 | default_scale = sum_observed.sum(dim=0) / denominator 95 | 96 | # then compute a per-item, per-dimension scale 97 | denominator = torch.max(num_observed, torch.ones_like(num_observed)) 98 | scale = sum_observed / denominator 99 | 100 | # use per-batch scale when no element is observed 101 | # or when the sequence contains only zeros 102 | scale = torch.where( 103 | sum_observed > torch.zeros_like(sum_observed), 104 | scale, 105 | default_scale * torch.ones_like(num_observed), 106 | ) 107 | 108 | return torch.max(scale, self.minimum_scale).detach() 109 | 110 | 111 | class NOPScaler(Scaler): 112 | """ 113 | The ``NOPScaler`` assigns a scale equals to 1 to each input item, i.e., 114 | no scaling is applied upon calling the ``NOPScaler``. 115 | """ 116 | 117 | @validated() 118 | def __init__(self, *args, **kwargs): 119 | super().__init__(*args, **kwargs) 120 | 121 | def compute_scale( 122 | self, data: torch.Tensor, observed_indicator: torch.Tensor 123 | ) -> torch.Tensor: 124 | if self.time_first: 125 | dim = 1 126 | else: 127 | dim = 2 128 | return torch.ones_like(data).mean(dim=dim) 129 | -------------------------------------------------------------------------------- /pts/model/simple_feedforward/simple_feedforward_network.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import torch 4 | import torch.nn as nn 5 | from torch.distributions import Distribution 6 | 7 | from gluonts.core.component import validated 8 | from gluonts.torch.modules.distribution_output import DistributionOutput 9 | from gluonts.torch.modules.lambda_layer import LambdaLayer 10 | from pts.modules import MeanScaler, NOPScaler 11 | 12 | 13 | class SimpleFeedForwardNetworkBase(nn.Module): 14 | """ 15 | Abstract base class to implement feed-forward networks for probabilistic 16 | time series prediction. 17 | 18 | This class does not implement hybrid_forward: this is delegated 19 | to the two subclasses SimpleFeedForwardTrainingNetwork and 20 | SimpleFeedForwardPredictionNetwork, that define respectively how to 21 | compute the loss and how to generate predictions. 22 | 23 | Parameters 24 | ---------- 25 | num_hidden_dimensions 26 | Number of hidden nodes in each layer. 27 | prediction_length 28 | Number of time units to predict. 29 | context_length 30 | Number of time units that condition the predictions. 31 | batch_normalization 32 | Whether to use batch normalization. 33 | mean_scaling 34 | Scale the network input by the data mean and the network output by 35 | its inverse. 36 | distr_output 37 | Distribution to fit. 38 | kwargs 39 | """ 40 | 41 | @validated() 42 | def __init__( 43 | self, 44 | num_hidden_dimensions: List[int], 45 | prediction_length: int, 46 | context_length: int, 47 | batch_normalization: bool, 48 | mean_scaling: bool, 49 | distr_output: DistributionOutput, 50 | ) -> None: 51 | super().__init__() 52 | 53 | self.num_hidden_dimensions = num_hidden_dimensions 54 | self.prediction_length = prediction_length 55 | self.context_length = context_length 56 | self.batch_normalization = batch_normalization 57 | self.mean_scaling = mean_scaling 58 | self.distr_output = distr_output 59 | 60 | modules = [] 61 | dims = self.num_hidden_dimensions 62 | for i, units in enumerate(dims[:-1]): 63 | if i == 0: 64 | input_size = context_length 65 | else: 66 | input_size = dims[i - 1] 67 | modules += [nn.Linear(input_size, units), nn.ReLU()] 68 | if self.batch_normalization: 69 | modules.append(nn.BatchNorm1d(units)) 70 | if len(dims) == 1: 71 | modules.append(nn.Linear(context_length, dims[-1] * prediction_length)) 72 | else: 73 | modules.append(nn.Linear(dims[-2], dims[-1] * prediction_length)) 74 | modules.append( 75 | LambdaLayer(lambda o: torch.reshape(o, (-1, prediction_length, dims[-1]))) 76 | ) 77 | self.mlp = nn.Sequential(*modules) 78 | 79 | self.distr_args_proj = self.distr_output.get_args_proj(dims[-1]) 80 | 81 | self.scaler = MeanScaler() if mean_scaling else NOPScaler() 82 | 83 | def get_distr(self, past_target: torch.Tensor) -> Distribution: 84 | # (batch_size, seq_len, target_dim) and (batch_size, seq_len, target_dim) 85 | scaled_target, target_scale = self.scaler( 86 | past_target, 87 | torch.ones_like(past_target), # TODO: pass the actual observed here 88 | ) 89 | 90 | mlp_outputs = self.mlp(scaled_target) 91 | distr_args = self.distr_args_proj(mlp_outputs) 92 | return self.distr_output.distribution( 93 | distr_args, scale=target_scale.unsqueeze(1) 94 | ) 95 | 96 | 97 | class SimpleFeedForwardTrainingNetwork(SimpleFeedForwardNetworkBase): 98 | def forward( 99 | self, past_target: torch.Tensor, future_target: torch.Tensor 100 | ) -> torch.Tensor: 101 | distr = self.get_distr(past_target) 102 | 103 | # (batch_size, prediction_length, target_dim) 104 | loss = -distr.log_prob(future_target) 105 | 106 | return loss.mean() 107 | 108 | 109 | class SimpleFeedForwardPredictionNetwork(SimpleFeedForwardNetworkBase): 110 | @validated() 111 | def __init__(self, num_parallel_samples: int = 100, *args, **kwargs) -> None: 112 | super().__init__(*args, **kwargs) 113 | self.num_parallel_samples = num_parallel_samples 114 | 115 | def forward(self, past_target: torch.Tensor) -> torch.Tensor: 116 | distr = self.get_distr(past_target) 117 | 118 | # (num_samples, batch_size, prediction_length) 119 | samples = distr.sample((self.num_parallel_samples,)) 120 | 121 | return samples.permute(1, 0, 2) 122 | -------------------------------------------------------------------------------- /pts/trainer.py: -------------------------------------------------------------------------------- 1 | import time 2 | from typing import List, Optional, Union 3 | 4 | from tqdm.auto import tqdm 5 | 6 | import torch 7 | import torch.nn as nn 8 | from torch.optim import Adam 9 | from torch.optim.lr_scheduler import OneCycleLR 10 | from torch.utils.data import DataLoader 11 | 12 | from gluonts.core.component import validated 13 | 14 | 15 | class Trainer: 16 | @validated() 17 | def __init__( 18 | self, 19 | epochs: int = 100, 20 | batch_size: int = 32, 21 | num_batches_per_epoch: int = 50, 22 | learning_rate: float = 1e-3, 23 | weight_decay: float = 1e-6, 24 | maximum_learning_rate: float = 1e-2, 25 | clip_gradient: Optional[float] = None, 26 | device: Optional[Union[torch.device, str]] = None, 27 | **kwargs, 28 | ) -> None: 29 | self.epochs = epochs 30 | self.batch_size = batch_size 31 | self.num_batches_per_epoch = num_batches_per_epoch 32 | self.learning_rate = learning_rate 33 | self.weight_decay = weight_decay 34 | self.maximum_learning_rate = maximum_learning_rate 35 | self.clip_gradient = clip_gradient 36 | self.device = device 37 | 38 | def __call__( 39 | self, 40 | net: nn.Module, 41 | train_iter: DataLoader, 42 | validation_iter: Optional[DataLoader] = None, 43 | ) -> None: 44 | optimizer = Adam( 45 | net.parameters(), lr=self.learning_rate, weight_decay=self.weight_decay 46 | ) 47 | 48 | lr_scheduler = OneCycleLR( 49 | optimizer, 50 | max_lr=self.maximum_learning_rate, 51 | steps_per_epoch=self.num_batches_per_epoch, 52 | epochs=self.epochs, 53 | ) 54 | 55 | for epoch_no in range(self.epochs): 56 | # mark epoch start time 57 | tic = time.time() 58 | cumm_epoch_loss = 0.0 59 | total = self.num_batches_per_epoch - 1 60 | 61 | # training loop 62 | with tqdm(train_iter, total=total) as it: 63 | for batch_no, data_entry in enumerate(it, start=1): 64 | optimizer.zero_grad() 65 | 66 | inputs = [v.to(self.device) for v in data_entry.values()] 67 | output = net(*inputs) 68 | 69 | if isinstance(output, (list, tuple)): 70 | loss = output[0] 71 | else: 72 | loss = output 73 | 74 | cumm_epoch_loss += loss.item() 75 | avg_epoch_loss = cumm_epoch_loss / batch_no 76 | it.set_postfix( 77 | { 78 | "epoch": f"{epoch_no + 1}/{self.epochs}", 79 | "avg_loss": avg_epoch_loss, 80 | }, 81 | refresh=False, 82 | ) 83 | 84 | loss.backward() 85 | if self.clip_gradient is not None: 86 | nn.utils.clip_grad_norm_(net.parameters(), self.clip_gradient) 87 | 88 | optimizer.step() 89 | lr_scheduler.step() 90 | 91 | if self.num_batches_per_epoch == batch_no: 92 | break 93 | it.close() 94 | 95 | # validation loop 96 | if validation_iter is not None: 97 | cumm_epoch_loss_val = 0.0 98 | with tqdm(validation_iter, total=total, colour="green") as it: 99 | 100 | for batch_no, data_entry in enumerate(it, start=1): 101 | inputs = [v.to(self.device) for v in data_entry.values()] 102 | with torch.no_grad(): 103 | output = net(*inputs) 104 | if isinstance(output, (list, tuple)): 105 | loss = output[0] 106 | else: 107 | loss = output 108 | 109 | cumm_epoch_loss_val += loss.item() 110 | avg_epoch_loss_val = cumm_epoch_loss_val / batch_no 111 | it.set_postfix( 112 | { 113 | "epoch": f"{epoch_no + 1}/{self.epochs}", 114 | "avg_loss": avg_epoch_loss, 115 | "avg_val_loss": avg_epoch_loss_val, 116 | }, 117 | refresh=False, 118 | ) 119 | 120 | if self.num_batches_per_epoch == batch_no: 121 | break 122 | 123 | it.close() 124 | 125 | # mark epoch end time and log time cost of current epoch 126 | toc = time.time() 127 | -------------------------------------------------------------------------------- /pts/model/time_grad/epsilon_theta.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch 4 | from torch import nn 5 | import torch.nn.functional as F 6 | 7 | 8 | class DiffusionEmbedding(nn.Module): 9 | def __init__(self, dim, proj_dim, max_steps=500): 10 | super().__init__() 11 | self.register_buffer( 12 | "embedding", self._build_embedding(dim, max_steps), persistent=False 13 | ) 14 | self.projection1 = nn.Linear(dim * 2, proj_dim) 15 | self.projection2 = nn.Linear(proj_dim, proj_dim) 16 | 17 | def forward(self, diffusion_step): 18 | x = self.embedding[diffusion_step] 19 | x = self.projection1(x) 20 | x = F.silu(x) 21 | x = self.projection2(x) 22 | x = F.silu(x) 23 | return x 24 | 25 | def _build_embedding(self, dim, max_steps): 26 | steps = torch.arange(max_steps).unsqueeze(1) # [T,1] 27 | dims = torch.arange(dim).unsqueeze(0) # [1,dim] 28 | table = steps * 10.0 ** (dims * 4.0 / dim) # [T,dim] 29 | table = torch.cat([torch.sin(table), torch.cos(table)], dim=1) 30 | return table 31 | 32 | 33 | class ResidualBlock(nn.Module): 34 | def __init__(self, hidden_size, residual_channels, dilation): 35 | super().__init__() 36 | self.dilated_conv = nn.Conv1d( 37 | residual_channels, 38 | 2 * residual_channels, 39 | 3, 40 | padding=dilation, 41 | dilation=dilation, 42 | padding_mode="circular", 43 | ) 44 | self.diffusion_projection = nn.Linear(hidden_size, residual_channels) 45 | self.conditioner_projection = nn.Conv1d( 46 | 1, 2 * residual_channels, 1, padding=2, padding_mode="circular" 47 | ) 48 | self.output_projection = nn.Conv1d(residual_channels, 2 * residual_channels, 1) 49 | 50 | nn.init.kaiming_normal_(self.conditioner_projection.weight) 51 | nn.init.kaiming_normal_(self.output_projection.weight) 52 | 53 | def forward(self, x, conditioner, diffusion_step): 54 | diffusion_step = self.diffusion_projection(diffusion_step).unsqueeze(-1) 55 | conditioner = self.conditioner_projection(conditioner) 56 | 57 | y = x + diffusion_step 58 | y = self.dilated_conv(y) + conditioner 59 | 60 | gate, filter = torch.chunk(y, 2, dim=1) 61 | y = torch.sigmoid(gate) * torch.tanh(filter) 62 | 63 | y = self.output_projection(y) 64 | y = F.leaky_relu(y, 0.4) 65 | residual, skip = torch.chunk(y, 2, dim=1) 66 | return (x + residual) / math.sqrt(2.0), skip 67 | 68 | 69 | class CondUpsampler(nn.Module): 70 | def __init__(self, cond_length, target_dim): 71 | super().__init__() 72 | self.linear1 = nn.Linear(cond_length, target_dim // 2) 73 | self.linear2 = nn.Linear(target_dim // 2, target_dim) 74 | 75 | def forward(self, x): 76 | x = self.linear1(x) 77 | x = F.leaky_relu(x, 0.4) 78 | x = self.linear2(x) 79 | x = F.leaky_relu(x, 0.4) 80 | return x 81 | 82 | 83 | class EpsilonTheta(nn.Module): 84 | def __init__( 85 | self, 86 | target_dim, 87 | cond_length, 88 | time_emb_dim=16, 89 | residual_layers=8, 90 | residual_channels=8, 91 | dilation_cycle_length=2, 92 | residual_hidden=64, 93 | ): 94 | super().__init__() 95 | self.input_projection = nn.Conv1d( 96 | 1, residual_channels, 1, padding=2, padding_mode="circular" 97 | ) 98 | self.diffusion_embedding = DiffusionEmbedding( 99 | time_emb_dim, proj_dim=residual_hidden 100 | ) 101 | self.cond_upsampler = CondUpsampler( 102 | target_dim=target_dim, cond_length=cond_length 103 | ) 104 | self.residual_layers = nn.ModuleList( 105 | [ 106 | ResidualBlock( 107 | residual_channels=residual_channels, 108 | dilation=2 ** (i % dilation_cycle_length), 109 | hidden_size=residual_hidden, 110 | ) 111 | for i in range(residual_layers) 112 | ] 113 | ) 114 | self.skip_projection = nn.Conv1d(residual_channels, residual_channels, 3) 115 | self.output_projection = nn.Conv1d(residual_channels, 1, 3) 116 | 117 | nn.init.kaiming_normal_(self.input_projection.weight) 118 | nn.init.kaiming_normal_(self.skip_projection.weight) 119 | nn.init.zeros_(self.output_projection.weight) 120 | 121 | def forward(self, inputs, time, cond): 122 | x = self.input_projection(inputs) 123 | x = F.leaky_relu(x, 0.4) 124 | 125 | diffusion_step = self.diffusion_embedding(time) 126 | cond_up = self.cond_upsampler(cond) 127 | skip = [] 128 | for layer in self.residual_layers: 129 | x, skip_connection = layer(x, cond_up, diffusion_step) 130 | skip.append(skip_connection) 131 | 132 | x = torch.sum(torch.stack(skip), dim=0) / math.sqrt(len(self.residual_layers)) 133 | x = self.skip_projection(x) 134 | x = F.leaky_relu(x, 0.4) 135 | x = self.output_projection(x) 136 | return x 137 | -------------------------------------------------------------------------------- /pts/distributions/zero_inflated.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-2019 Uber Technologies, Inc. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | import torch 5 | from torch.distributions import constraints, NegativeBinomial, Poisson, Distribution 6 | from torch.distributions.utils import broadcast_all, lazy_property 7 | 8 | from .utils import broadcast_shape 9 | 10 | 11 | class ZeroInflatedDistribution(Distribution): 12 | """ 13 | Generic Zero Inflated distribution. 14 | 15 | This can be used directly or can be used as a base class as e.g. for 16 | :class:`ZeroInflatedPoisson` and :class:`ZeroInflatedNegativeBinomial`. 17 | 18 | :param torch.Tensor gate: probability of extra zeros given via a Bernoulli distribution. 19 | :param TorchDistribution base_dist: the base distribution. 20 | """ 21 | 22 | arg_constraints = {"gate": constraints.unit_interval} 23 | 24 | def __init__(self, gate, base_dist, validate_args=None): 25 | if base_dist.event_shape: 26 | raise ValueError( 27 | "ZeroInflatedDistribution expected empty " 28 | "base_dist.event_shape but got {}".format(base_dist.event_shape) 29 | ) 30 | batch_shape = broadcast_shape(gate.shape, base_dist.batch_shape) 31 | self.gate = gate.expand(batch_shape) 32 | self.base_dist = base_dist.expand(batch_shape) 33 | event_shape = torch.Size() 34 | 35 | super().__init__(batch_shape, event_shape, validate_args) 36 | 37 | @property 38 | def support(self): 39 | return self.base_dist.support 40 | 41 | def log_prob(self, value): 42 | if self._validate_args: 43 | self._validate_sample(value) 44 | 45 | gate, value = broadcast_all(self.gate, value) 46 | log_prob = (-gate).log1p() + self.base_dist.log_prob(value) 47 | log_prob = torch.where(value == 0, (gate + log_prob.exp()).log(), log_prob) 48 | return log_prob 49 | 50 | def sample(self, sample_shape=torch.Size()): 51 | shape = self._extended_shape(sample_shape) 52 | with torch.no_grad(): 53 | mask = torch.bernoulli(self.gate.expand(shape)).bool() 54 | samples = self.base_dist.expand(shape).sample() 55 | samples = torch.where(mask, samples.new_zeros(()), samples) 56 | return samples 57 | 58 | @lazy_property 59 | def mean(self): 60 | return (1 - self.gate) * self.base_dist.mean 61 | 62 | @lazy_property 63 | def variance(self): 64 | return (1 - self.gate) * ( 65 | self.base_dist.mean ** 2 + self.base_dist.variance 66 | ) - (self.mean) ** 2 67 | 68 | def expand(self, batch_shape, _instance=None): 69 | new = self._get_checked_instance(type(self), _instance) 70 | batch_shape = torch.Size(batch_shape) 71 | gate = self.gate.expand(batch_shape) 72 | base_dist = self.base_dist.expand(batch_shape) 73 | ZeroInflatedDistribution.__init__(new, gate, base_dist, validate_args=False) 74 | new._validate_args = self._validate_args 75 | return new 76 | 77 | 78 | class ZeroInflatedPoisson(ZeroInflatedDistribution): 79 | """ 80 | A Zero Inflated Poisson distribution. 81 | 82 | :param torch.Tensor gate: probability of extra zeros. 83 | :param torch.Tensor rate: rate of poisson distribution. 84 | """ 85 | 86 | arg_constraints = {"gate": constraints.unit_interval, "rate": constraints.positive} 87 | support = constraints.nonnegative_integer 88 | 89 | def __init__(self, gate, rate, validate_args=None): 90 | base_dist = Poisson(rate=rate, validate_args=False) 91 | base_dist._validate_args = validate_args 92 | 93 | super().__init__(gate, base_dist, validate_args=validate_args) 94 | 95 | @property 96 | def rate(self): 97 | return self.base_dist.rate 98 | 99 | 100 | class ZeroInflatedNegativeBinomial(ZeroInflatedDistribution): 101 | """ 102 | A Zero Inflated Negative Binomial distribution. 103 | 104 | :param torch.Tensor gate: probability of extra zeros. 105 | :param total_count: non-negative number of negative Bernoulli trials. 106 | :type total_count: float or torch.Tensor 107 | :param torch.Tensor probs: Event probabilities of success in the half open interval [0, 1). 108 | :param torch.Tensor logits: Event log-odds for probabilities of success. 109 | """ 110 | 111 | arg_constraints = { 112 | "gate": constraints.unit_interval, 113 | "total_count": constraints.greater_than_eq(0), 114 | "probs": constraints.half_open_interval(0.0, 1.0), 115 | "logits": constraints.real, 116 | } 117 | support = constraints.nonnegative_integer 118 | 119 | def __init__(self, gate, total_count, probs=None, logits=None, validate_args=None): 120 | base_dist = NegativeBinomial( 121 | total_count=total_count, 122 | probs=probs, 123 | logits=logits, 124 | validate_args=False, 125 | ) 126 | base_dist._validate_args = validate_args 127 | 128 | super().__init__(gate, base_dist, validate_args=validate_args) 129 | 130 | @property 131 | def total_count(self): 132 | return self.base_dist.total_count 133 | 134 | @property 135 | def probs(self): 136 | return self.base_dist.probs 137 | 138 | @property 139 | def logits(self): 140 | return self.base_dist.logits 141 | -------------------------------------------------------------------------------- /pts/distributions/piecewise_linear.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). 4 | # You may not use this file except in compliance with the License. 5 | # A copy of the License is located at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # or in the "license" file accompanying this file. This file is distributed 10 | # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 11 | # express or implied. See the License for the specific language governing 12 | # permissions and limitations under the License. 13 | 14 | import torch 15 | import torch.nn.functional as F 16 | from torch.distributions import ( 17 | constraints, 18 | NegativeBinomial, 19 | Poisson, 20 | Distribution, 21 | TransformedDistribution, 22 | AffineTransform, 23 | ) 24 | from torch.distributions.utils import broadcast_all, lazy_property 25 | 26 | from .utils import broadcast_shape 27 | 28 | 29 | class PiecewiseLinear(Distribution): 30 | def __init__(self, gamma, slopes, knot_spacings, validate_args=None): 31 | self.gamma = gamma 32 | self.slopes = slopes 33 | self.knot_spacings = knot_spacings 34 | 35 | self.b, self.knot_positions = PiecewiseLinear._to_orig_params( 36 | slopes=slopes, knot_spacings=knot_spacings 37 | ) 38 | super(PiecewiseLinear, self).__init__( 39 | batch_shape=self.gamma.shape, validate_args=validate_args 40 | ) 41 | 42 | @staticmethod 43 | def _to_orig_params(slopes, knot_spacings): 44 | # b: the difference between slopes of consecutive pieces 45 | b = slopes[..., 1:] - slopes[..., 0:-1] 46 | 47 | # Add slope of first piece to b: b_0 = m_0 48 | m_0 = slopes[..., 0:1] 49 | b = torch.cat((m_0, b), dim=-1) 50 | 51 | # The actual position of the knots is obtained by cumulative sum of 52 | # the knot spacings. The first knot position is always 0 for quantile 53 | # functions. 54 | knot_positions = torch.cumsum(knot_spacings, dim=-1) - knot_spacings 55 | 56 | return b, knot_positions 57 | 58 | @torch.no_grad() 59 | def sample(self, sample_shape=torch.Size()): 60 | shape = self._extended_shape(sample_shape) 61 | u = torch.rand_like(self.gamma.expand(shape)) 62 | 63 | sample = self.quantile(u) 64 | 65 | if len(sample_shape) == 0: 66 | sample = sample.squeeze(0) 67 | 68 | return sample 69 | 70 | def quantile(self, level): 71 | return self.quantile_internal(level, dim=0) 72 | 73 | def quantile_internal(self, x, dim=None): 74 | if dim is not None: 75 | gamma = self.gamma.unsqueeze(dim=dim if dim == 0 else -1) 76 | knot_positions = self.knot_positions.unsqueeze(dim) 77 | b = self.b.unsqueeze(dim) 78 | else: 79 | gamma, knot_positions, b = self.gamma, self.knot_positions, self.b 80 | 81 | x_minus_knots = x.unsqueeze(-1) - knot_positions 82 | 83 | quantile = gamma + (b * F.relu(x_minus_knots)).sum(-1) 84 | 85 | return quantile 86 | 87 | def log_prob(self, value): 88 | return -self.crps(value) 89 | 90 | def cdf(self, x): 91 | gamma, b, knot_positions = self.gamma, self.b, self.knot_positions 92 | 93 | quantiles_at_knots = self.quantile_internal(knot_positions, dim=-2) 94 | 95 | # Mask to nullify the terms corresponding to knots larger than l_0, 96 | # which is the largest knot (quantile level) such that the quantile 97 | # at l_0, s(l_0) < x. 98 | mask = torch.le(quantiles_at_knots, x.unsqueeze(-1)) 99 | 100 | slope_l0 = (b * mask).sum(-1) 101 | 102 | # slope_l0 can be zero in which case a_tilde = 0. 103 | # The following is to circumvent an issue where the 104 | # backward() returns nans when slope_l0 is zero in the where 105 | slope_l0_nz = torch.where(slope_l0 == 0.0, torch.ones_like(x), slope_l0) 106 | 107 | a_tilde = torch.where( 108 | slope_l0 == 0.0, 109 | torch.zeros_like(x), 110 | (x - gamma + (b * knot_positions * mask).sum(-1)) / slope_l0_nz, 111 | ) 112 | 113 | return torch.clamp(a_tilde, min=0.0, max=1.0) 114 | 115 | def crps(self, x): 116 | gamma, b, knot_positions = self.gamma, self.b, self.knot_positions 117 | 118 | a_tilde = self.cdf(x) 119 | 120 | max_a_tilde_knots = torch.max(a_tilde.unsqueeze(-1), knot_positions) 121 | 122 | knots_cubed = torch.pow(knot_positions, 3.0) 123 | coeff = ( 124 | (1.0 - knots_cubed) / 3.0 125 | - knot_positions 126 | - torch.square(max_a_tilde_knots) 127 | + 2 * max_a_tilde_knots * knot_positions 128 | ) 129 | 130 | return (2 * a_tilde - 1) * x + (1 - 2 * a_tilde) * gamma + (b * coeff).sum(-1) 131 | 132 | 133 | class TransformedPiecewiseLinear(TransformedDistribution): 134 | def __init__(self, base_distribution, transforms): 135 | super().__init__(base_distribution, transforms) 136 | 137 | def crps(self, x): 138 | scale = 1.0 139 | 140 | for transform in reversed(self.transforms): 141 | assert isinstance(transform, AffineTransform), "Not an AffineTransform" 142 | x = transform.inv(x) 143 | scale *= transform.scale 144 | 145 | p = self.base_dist.crps(x) 146 | return p * scale 147 | -------------------------------------------------------------------------------- /pts/model/tft/tft_transform.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). 4 | # You may not use this file except in compliance with the License. 5 | # A copy of the License is located at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # or in the "license" file accompanying this file. This file is distributed 10 | # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 11 | # express or implied. See the License for the specific language governing 12 | # permissions and limitations under the License. 13 | 14 | from collections import Counter 15 | from typing import Iterator, List, Optional 16 | 17 | import numpy as np 18 | 19 | from gluonts.core.component import validated 20 | from gluonts.dataset.common import DataEntry 21 | from gluonts.dataset.field_names import FieldName 22 | from gluonts.transform import ( 23 | InstanceSplitter, 24 | MapTransformation, 25 | shift_timestamp, 26 | target_transformation_length, 27 | ) 28 | from gluonts.transform.sampler import InstanceSampler 29 | 30 | 31 | class BroadcastTo(MapTransformation): 32 | @validated() 33 | def __init__( 34 | self, 35 | field: str, 36 | ext_length: int = 0, 37 | target_field: str = FieldName.TARGET, 38 | ) -> None: 39 | self.field = field 40 | self.ext_length = ext_length 41 | self.target_field = target_field 42 | 43 | def map_transform(self, data: DataEntry, is_train: bool) -> DataEntry: 44 | length = target_transformation_length( 45 | data[self.target_field], self.ext_length, is_train 46 | ) 47 | data[self.field] = np.broadcast_to( 48 | data[self.field], 49 | (data[self.field].shape[:-1] + (length,)), 50 | ) 51 | return data 52 | 53 | 54 | class TFTInstanceSplitter(InstanceSplitter): 55 | @validated() 56 | def __init__( 57 | self, 58 | instance_sampler: InstanceSampler, 59 | past_length: int, 60 | future_length: int, 61 | target_field: str = FieldName.TARGET, 62 | is_pad_field: str = FieldName.IS_PAD, 63 | start_field: str = FieldName.START, 64 | forecast_start_field: str = FieldName.FORECAST_START, 65 | observed_value_field: str = FieldName.OBSERVED_VALUES, 66 | lead_time: int = 0, 67 | output_NTC: bool = True, 68 | time_series_fields: List[str] = [], 69 | past_time_series_fields: List[str] = [], 70 | dummy_value: float = 0.0, 71 | ) -> None: 72 | 73 | super().__init__( 74 | target_field=target_field, 75 | is_pad_field=is_pad_field, 76 | start_field=start_field, 77 | forecast_start_field=forecast_start_field, 78 | instance_sampler=instance_sampler, 79 | past_length=past_length, 80 | future_length=future_length, 81 | lead_time=lead_time, 82 | output_NTC=output_NTC, 83 | time_series_fields=time_series_fields, 84 | dummy_value=dummy_value, 85 | ) 86 | 87 | assert past_length > 0, "The value of `past_length` should be > 0" 88 | assert future_length > 0, "The value of `future_length` should be > 0" 89 | 90 | self.observed_value_field = observed_value_field 91 | self.past_ts_fields = past_time_series_fields 92 | 93 | def flatmap_transform(self, data: DataEntry, is_train: bool) -> Iterator[DataEntry]: 94 | pl = self.future_length 95 | lt = self.lead_time 96 | target = data[self.target_field] 97 | 98 | sampled_indices = self.instance_sampler(target) 99 | 100 | slice_cols = ( 101 | self.ts_fields 102 | + self.past_ts_fields 103 | + [self.target_field, self.observed_value_field] 104 | ) 105 | for i in sampled_indices: 106 | pad_length = max(self.past_length - i, 0) 107 | d = data.copy() 108 | 109 | for field in slice_cols: 110 | if i >= self.past_length: 111 | past_piece = d[field][..., i - self.past_length : i] 112 | else: 113 | pad_block = np.full( 114 | shape=d[field].shape[:-1] + (pad_length,), 115 | fill_value=self.dummy_value, 116 | dtype=d[field].dtype, 117 | ) 118 | past_piece = np.concatenate([pad_block, d[field][..., :i]], axis=-1) 119 | future_piece = d[field][..., (i + lt) : (i + lt + pl)] 120 | if field in self.ts_fields: 121 | piece = np.concatenate([past_piece, future_piece], axis=-1) 122 | if self.output_NTC: 123 | piece = piece.transpose() 124 | d[field] = piece 125 | else: 126 | if self.output_NTC: 127 | past_piece = past_piece.transpose() 128 | future_piece = future_piece.transpose() 129 | if field not in self.past_ts_fields: 130 | d[self._past(field)] = past_piece 131 | d[self._future(field)] = future_piece 132 | del d[field] 133 | else: 134 | d[field] = past_piece 135 | pad_indicator = np.zeros(self.past_length) 136 | if pad_length > 0: 137 | pad_indicator[:pad_length] = 1 138 | d[self._past(self.is_pad_field)] = pad_indicator 139 | d[self.forecast_start_field] = shift_timestamp(d[self.start_field], i + lt) 140 | yield d 141 | -------------------------------------------------------------------------------- /test/modules/test_scaler.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). 4 | # You may not use this file except in compliance with the License. 5 | # A copy of the License is located at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # or in the "license" file accompanying this file. This file is distributed 10 | # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 11 | # express or implied. See the License for the specific language governing 12 | # permissions and limitations under the License. 13 | 14 | import numpy as np 15 | import pytest 16 | import torch 17 | 18 | from pts.modules import MeanScaler, NOPScaler 19 | 20 | test_cases = [ 21 | ( 22 | MeanScaler(), 23 | torch.tensor( 24 | [ 25 | [1.0] * 50, 26 | [0.0] * 25 + [3.0] * 25, 27 | [2.0] * 49 + [1.5] * 1, 28 | [0.0] * 50, 29 | [1.0] * 50, 30 | ] 31 | ), 32 | torch.tensor( 33 | [ 34 | [1.0] * 50, 35 | [0.0] * 25 + [1.0] * 25, 36 | [0.0] * 49 + [1.0] * 1, 37 | [1.0] * 50, 38 | [0.0] * 50, 39 | ] 40 | ), 41 | torch.tensor([1.0, 3.0, 1.5, 1.00396824, 1.00396824]), 42 | ), 43 | ( 44 | MeanScaler(keepdim=True), 45 | torch.tensor( 46 | [ 47 | [1.0] * 50, 48 | [0.0] * 25 + [3.0] * 25, 49 | [2.0] * 49 + [1.5] * 1, 50 | [0.0] * 50, 51 | [1.0] * 50, 52 | ] 53 | ), 54 | torch.tensor( 55 | [ 56 | [1.0] * 50, 57 | [0.0] * 25 + [1.0] * 25, 58 | [0.0] * 49 + [1.0] * 1, 59 | [1.0] * 50, 60 | [0.0] * 50, 61 | ] 62 | ), 63 | torch.tensor([1.0, 3.0, 1.5, 1.00396824, 1.00396824]).unsqueeze(1), 64 | ), 65 | ( 66 | MeanScaler(), 67 | torch.tensor( 68 | [ 69 | [[1.0]] * 50, 70 | [[0.0]] * 25 + [[3.0]] * 25, 71 | [[2.0]] * 49 + [[1.5]] * 1, 72 | [[0.0]] * 50, 73 | [[1.0]] * 50, 74 | ] 75 | ), 76 | torch.tensor( 77 | [ 78 | [[1.0]] * 50, 79 | [[0.0]] * 25 + [[1.0]] * 25, 80 | [[0.0]] * 49 + [[1.0]] * 1, 81 | [[1.0]] * 50, 82 | [[0.0]] * 50, 83 | ] 84 | ), 85 | torch.tensor([1.0, 3.0, 1.5, 1.00396824, 1.00396824]).unsqueeze(1), 86 | ), 87 | ( 88 | MeanScaler(minimum_scale=1e-8), 89 | torch.tensor( 90 | [ 91 | [[1.0, 2.0]] * 50, 92 | [[0.0, 0.0]] * 25 + [[3.0, 6.0]] * 25, 93 | [[2.0, 4.0]] * 49 + [[1.5, 3.0]] * 1, 94 | [[0.0, 0.0]] * 50, 95 | [[1.0, 2.0]] * 50, 96 | ] 97 | ), 98 | torch.tensor( 99 | [ 100 | [[1.0, 1.0]] * 50, 101 | [[0.0, 1.0]] * 25 + [[1.0, 0.0]] * 25, 102 | [[1.0, 0.0]] * 49 + [[0.0, 1.0]] * 1, 103 | [[1.0, 0.0]] * 50, 104 | [[0.0, 1.0]] * 50, 105 | ] 106 | ), 107 | torch.tensor( 108 | [ 109 | [1.0, 2.0], 110 | [3.0, 1.61111116], 111 | [2.0, 3.0], 112 | [1.28160918, 1.61111116], 113 | [1.28160918, 2.0], 114 | ] 115 | ), 116 | ), 117 | ( 118 | MeanScaler(), 119 | torch.tensor( 120 | [ 121 | [120.0] * 25 + [150.0] * 25, 122 | [0.0] * 10 + [3.0] * 20 + [61.0] * 20, 123 | [0.0] * 50, 124 | [2e-2] * 10 + [0.0] * 30 + [3e-2] * 10, 125 | ] 126 | ), 127 | torch.tensor( 128 | [ 129 | [1.0] * 25 + [1.0] * 25, 130 | [0.0] * 10 + [1.0] * 20 + [1.0] * 20, 131 | [0.0] * 50, 132 | [1.0] * 10 + [0.0] * 30 + [1.0] * 10, 133 | ] 134 | ), 135 | torch.tensor([135.0, 32.0, 73.00454712, 2.5e-2]), 136 | ), 137 | ( 138 | MeanScaler(), 139 | torch.randn((5, 30)), 140 | torch.zeros((5, 30)), 141 | 1e-10 * torch.ones((5,)), 142 | ), 143 | ( 144 | MeanScaler(minimum_scale=1e-6), 145 | torch.randn((5, 30, 1)), 146 | torch.zeros((5, 30, 1)), 147 | 1e-6 * torch.ones((5, 1)), 148 | ), 149 | ( 150 | MeanScaler(minimum_scale=1e-12), 151 | torch.randn((5, 30, 3)), 152 | torch.zeros((5, 30, 3)), 153 | 1e-12 * torch.ones((5, 3)), 154 | ), 155 | ( 156 | NOPScaler(), 157 | torch.randn((10, 20, 30)), 158 | torch.randn((10, 20, 30)) > 0, 159 | torch.ones((10, 30)), 160 | ), 161 | ( 162 | NOPScaler(), 163 | torch.randn((10, 20, 30)), 164 | torch.ones((10, 20, 30)), 165 | torch.ones((10, 30)), 166 | ), 167 | ( 168 | NOPScaler(), 169 | torch.randn((10, 20, 30)), 170 | torch.zeros((10, 20, 30)), 171 | torch.ones((10, 30)), 172 | ), 173 | ] 174 | 175 | 176 | @pytest.mark.parametrize("s, target, observed, expected_scale", test_cases) 177 | def test_scaler(s, target, observed, expected_scale): 178 | target_scaled, scale = s(target, observed) 179 | 180 | assert np.allclose( 181 | expected_scale.numpy(), scale.numpy() 182 | ), "mismatch in the scale computation" 183 | 184 | if s.keepdim: 185 | expected_target_scaled = target / expected_scale 186 | else: 187 | expected_target_scaled = target / expected_scale.unsqueeze(1) 188 | 189 | assert np.allclose( 190 | expected_target_scaled.numpy(), target_scaled.numpy() 191 | ), "mismatch in the scaled target computation" 192 | 193 | 194 | @pytest.mark.parametrize("target, observed", []) 195 | def test_nopscaler(target, observed): 196 | s = NOPScaler() 197 | target_scaled, scale = s(target, observed) 198 | 199 | assert torch.norm(target - target_scaled) == 0 200 | assert torch.norm(torch.ones_like(target).mean(dim=1) - scale) == 0 201 | -------------------------------------------------------------------------------- /pts/model/lstnet/lstnet_estimator.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional 2 | 3 | import numpy as np 4 | import torch 5 | import torch.nn as nn 6 | 7 | from gluonts.core.component import validated 8 | from gluonts.dataset.field_names import FieldName 9 | from gluonts.torch.util import copy_parameters 10 | from gluonts.torch.model.predictor import PyTorchPredictor 11 | from gluonts.model.predictor import Predictor 12 | from gluonts.transform import ( 13 | InstanceSplitter, 14 | ValidationSplitSampler, 15 | TestSplitSampler, 16 | Transformation, 17 | Chain, 18 | ExpectedNumInstanceSampler, 19 | AddObservedValuesIndicator, 20 | AsNumpyArray, 21 | ) 22 | 23 | from pts.model import PyTorchEstimator 24 | from pts import Trainer 25 | from pts.model.utils import get_module_forward_input_names 26 | 27 | from .lstnet_network import LSTNetTrain, LSTNetPredict 28 | 29 | 30 | class LSTNetEstimator(PyTorchEstimator): 31 | @validated() 32 | def __init__( 33 | self, 34 | freq: str, 35 | prediction_length: Optional[int], 36 | context_length: int, 37 | num_series: int, 38 | ar_window: int = 24, 39 | skip_size: int = 24, 40 | channels: int = 100, 41 | kernel_size: int = 6, 42 | horizon: Optional[int] = None, 43 | trainer: Trainer = Trainer(), 44 | dropout_rate: Optional[float] = 0.2, 45 | output_activation: Optional[str] = None, 46 | rnn_cell_type: str = "GRU", 47 | rnn_num_cells: int = 100, 48 | skip_rnn_cell_type: str = "GRU", 49 | skip_rnn_num_cells: int = 5, 50 | scaling: bool = True, 51 | dtype: np.dtype = np.float32, 52 | ): 53 | super().__init__(trainer, dtype=dtype) 54 | 55 | self.freq = freq 56 | self.num_series = num_series 57 | self.skip_size = skip_size 58 | self.ar_window = ar_window 59 | self.horizon = horizon 60 | self.prediction_length = prediction_length 61 | 62 | self.future_length = horizon if horizon is not None else prediction_length 63 | self.context_length = context_length 64 | self.channels = channels 65 | self.kernel_size = kernel_size 66 | self.dropout_rate = dropout_rate 67 | self.output_activation = output_activation 68 | self.rnn_cell_type = rnn_cell_type 69 | self.rnn_num_cells = rnn_num_cells 70 | self.skip_rnn_cell_type = skip_rnn_cell_type 71 | self.skip_rnn_num_cells = skip_rnn_num_cells 72 | self.scaling = scaling 73 | 74 | self.train_sampler = ExpectedNumInstanceSampler( 75 | num_instances=1.0, min_future=self.future_length 76 | ) 77 | self.validation_sampler = ValidationSplitSampler(min_future=self.future_length) 78 | 79 | self.dtype = dtype 80 | 81 | def create_transformation(self) -> Transformation: 82 | return Chain( 83 | trans=[ 84 | AsNumpyArray(field=FieldName.TARGET, expected_ndim=2, dtype=self.dtype), 85 | AddObservedValuesIndicator( 86 | target_field=FieldName.TARGET, 87 | output_field=FieldName.OBSERVED_VALUES, 88 | dtype=self.dtype, 89 | ), 90 | ] 91 | ) 92 | 93 | def create_instance_splitter(self, mode: str): 94 | assert mode in ["training", "validation", "test"] 95 | 96 | instance_sampler = { 97 | "training": self.train_sampler, 98 | "validation": self.validation_sampler, 99 | "test": TestSplitSampler(), 100 | }[mode] 101 | 102 | return InstanceSplitter( 103 | target_field=FieldName.TARGET, 104 | is_pad_field=FieldName.IS_PAD, 105 | start_field=FieldName.START, 106 | forecast_start_field=FieldName.FORECAST_START, 107 | instance_sampler=instance_sampler, 108 | time_series_fields=[FieldName.OBSERVED_VALUES], 109 | past_length=self.context_length, 110 | future_length=self.future_length, 111 | output_NTC=False, 112 | ) 113 | 114 | def create_training_network(self, device: torch.device) -> LSTNetTrain: 115 | return LSTNetTrain( 116 | num_series=self.num_series, 117 | channels=self.channels, 118 | kernel_size=self.kernel_size, 119 | rnn_cell_type=self.rnn_cell_type, 120 | rnn_num_cells=self.rnn_num_cells, 121 | skip_rnn_cell_type=self.skip_rnn_cell_type, 122 | skip_rnn_num_cells=self.skip_rnn_num_cells, 123 | skip_size=self.skip_size, 124 | ar_window=self.ar_window, 125 | context_length=self.context_length, 126 | horizon=self.horizon, 127 | prediction_length=self.prediction_length, 128 | dropout_rate=self.dropout_rate, 129 | output_activation=self.output_activation, 130 | scaling=self.scaling, 131 | ).to(device) 132 | 133 | def create_predictor( 134 | self, 135 | transformation: Transformation, 136 | trained_network: LSTNetTrain, 137 | device: torch.device, 138 | ) -> PyTorchPredictor: 139 | prediction_network = LSTNetPredict( 140 | num_series=self.num_series, 141 | channels=self.channels, 142 | kernel_size=self.kernel_size, 143 | rnn_cell_type=self.rnn_cell_type, 144 | rnn_num_cells=self.rnn_num_cells, 145 | skip_rnn_cell_type=self.skip_rnn_cell_type, 146 | skip_rnn_num_cells=self.skip_rnn_num_cells, 147 | skip_size=self.skip_size, 148 | ar_window=self.ar_window, 149 | context_length=self.context_length, 150 | horizon=self.horizon, 151 | prediction_length=self.prediction_length, 152 | dropout_rate=self.dropout_rate, 153 | output_activation=self.output_activation, 154 | scaling=self.scaling, 155 | ).to(device) 156 | 157 | copy_parameters(trained_network, prediction_network) 158 | input_names = get_module_forward_input_names(prediction_network) 159 | prediction_splitter = self.create_instance_splitter("test") 160 | 161 | return PyTorchPredictor( 162 | input_transform=transformation + prediction_splitter, 163 | input_names=input_names, 164 | prediction_net=prediction_network, 165 | batch_size=self.trainer.batch_size, 166 | freq=self.freq, 167 | prediction_length=self.horizon or self.prediction_length, 168 | device=device, 169 | ) 170 | -------------------------------------------------------------------------------- /pts/model/estimator.py: -------------------------------------------------------------------------------- 1 | from typing import NamedTuple, Optional 2 | from functools import partial 3 | 4 | import numpy as np 5 | 6 | import torch 7 | import torch.nn as nn 8 | from torch.utils import data 9 | from torch.utils.data import DataLoader 10 | 11 | from gluonts.env import env 12 | from gluonts.core.component import validated 13 | from gluonts.dataset.common import Dataset 14 | from gluonts.model.estimator import Estimator 15 | from gluonts.torch.model.predictor import PyTorchPredictor 16 | from gluonts.transform import SelectFields, Transformation 17 | from gluonts.itertools import maybe_len 18 | 19 | from pts import Trainer 20 | from pts.model import get_module_forward_input_names 21 | from pts.dataset.loader import TransformedIterableDataset 22 | 23 | 24 | class TrainOutput(NamedTuple): 25 | transformation: Transformation 26 | trained_net: nn.Module 27 | predictor: PyTorchPredictor 28 | 29 | 30 | class PyTorchEstimator(Estimator): 31 | @validated() 32 | def __init__( 33 | self, trainer: Trainer, lead_time: int = 0, dtype: np.dtype = np.float32 34 | ) -> None: 35 | super().__init__(lead_time=lead_time) 36 | self.trainer = trainer 37 | self.dtype = dtype 38 | 39 | def create_transformation(self) -> Transformation: 40 | """ 41 | Create and return the transformation needed for training and inference. 42 | 43 | Returns 44 | ------- 45 | Transformation 46 | The transformation that will be applied entry-wise to datasets, 47 | at training and inference time. 48 | """ 49 | raise NotImplementedError 50 | 51 | def create_instance_splitter(self, mode: str) -> Transformation: 52 | """ 53 | Create and return the instance splitter needed for training, validation or testing. 54 | 55 | Returns 56 | ------- 57 | Transformation 58 | The InstanceSplitter that will be applied entry-wise to datasets, 59 | at training, validation and inference time based on mode. 60 | """ 61 | raise NotImplementedError 62 | 63 | def create_training_network(self, device: torch.device) -> nn.Module: 64 | """ 65 | Create and return the network used for training (i.e., computing the 66 | loss). 67 | 68 | Returns 69 | ------- 70 | nn.Module 71 | The network that computes the loss given input data. 72 | """ 73 | raise NotImplementedError 74 | 75 | def create_predictor( 76 | self, 77 | transformation: Transformation, 78 | trained_network: nn.Module, 79 | device: torch.device, 80 | ) -> PyTorchPredictor: 81 | """ 82 | Create and return a predictor object. 83 | 84 | Returns 85 | ------- 86 | Predictor 87 | A predictor wrapping a `nn.Module` used for inference. 88 | """ 89 | raise NotImplementedError 90 | 91 | def train_model( 92 | self, 93 | training_data: Dataset, 94 | validation_data: Optional[Dataset] = None, 95 | num_workers: int = 0, 96 | prefetch_factor: int = 2, 97 | shuffle_buffer_length: Optional[int] = None, 98 | cache_data: bool = False, 99 | **kwargs, 100 | ) -> TrainOutput: 101 | transformation = self.create_transformation() 102 | 103 | trained_net = self.create_training_network(self.trainer.device) 104 | 105 | input_names = get_module_forward_input_names(trained_net) 106 | 107 | with env._let(max_idle_transforms=maybe_len(training_data) or 0): 108 | training_instance_splitter = self.create_instance_splitter("training") 109 | training_iter_dataset = TransformedIterableDataset( 110 | dataset=training_data, 111 | transform=transformation 112 | + training_instance_splitter 113 | + SelectFields(input_names), 114 | is_train=True, 115 | shuffle_buffer_length=shuffle_buffer_length, 116 | cache_data=cache_data, 117 | ) 118 | 119 | training_data_loader = DataLoader( 120 | training_iter_dataset, 121 | batch_size=self.trainer.batch_size, 122 | num_workers=num_workers, 123 | prefetch_factor=prefetch_factor, 124 | pin_memory=True, 125 | worker_init_fn=self._worker_init_fn, 126 | **kwargs, 127 | ) 128 | 129 | validation_data_loader = None 130 | if validation_data is not None: 131 | with env._let(max_idle_transforms=maybe_len(validation_data) or 0): 132 | validation_instance_splitter = self.create_instance_splitter("validation") 133 | validation_iter_dataset = TransformedIterableDataset( 134 | dataset=validation_data, 135 | transform=transformation 136 | + validation_instance_splitter 137 | + SelectFields(input_names), 138 | is_train=True, 139 | cache_data=cache_data, 140 | ) 141 | validation_data_loader = DataLoader( 142 | validation_iter_dataset, 143 | batch_size=self.trainer.batch_size, 144 | num_workers=num_workers, 145 | prefetch_factor=prefetch_factor, 146 | pin_memory=True, 147 | worker_init_fn=self._worker_init_fn, 148 | **kwargs, 149 | ) 150 | 151 | self.trainer( 152 | net=trained_net, 153 | train_iter=training_data_loader, 154 | validation_iter=validation_data_loader, 155 | ) 156 | 157 | return TrainOutput( 158 | transformation=transformation, 159 | trained_net=trained_net, 160 | predictor=self.create_predictor( 161 | transformation, trained_net, self.trainer.device 162 | ), 163 | ) 164 | 165 | @staticmethod 166 | def _worker_init_fn(worker_id): 167 | np.random.seed(np.random.get_state()[1][0] + worker_id) 168 | 169 | def train( 170 | self, 171 | training_data: Dataset, 172 | validation_data: Optional[Dataset] = None, 173 | num_workers: int = 0, 174 | prefetch_factor: int = 2, 175 | shuffle_buffer_length: Optional[int] = None, 176 | cache_data: bool = False, 177 | **kwargs, 178 | ) -> PyTorchPredictor: 179 | return self.train_model( 180 | training_data, 181 | validation_data, 182 | num_workers=num_workers, 183 | prefetch_factor=prefetch_factor, 184 | shuffle_buffer_length=shuffle_buffer_length, 185 | cache_data=cache_data, 186 | **kwargs, 187 | ).predictor 188 | -------------------------------------------------------------------------------- /pts/feature/holiday.py: -------------------------------------------------------------------------------- 1 | from typing import Callable, List 2 | import numpy as np 3 | import pandas as pd 4 | from pandas.tseries.holiday import Holiday 5 | 6 | from gluonts.time_feature.holiday import indicator, distance_to_holiday 7 | 8 | 9 | class CustomDateFeatureSet: 10 | """ 11 | Implements calculation of date features. The CustomDateFeatureSet is 12 | applied on a pandas Series with Datetimeindex and returns a 1D array of 13 | the shape (1, len(date_indices)). 14 | 15 | Note that for lower than daily granularity the distance to the holiday is 16 | still computed on a per-day basis. 17 | 18 | Example use: 19 | 20 | >>> import pandas as pd 21 | >>> cfs = CustomDateFeatureSet([pd.to_datetime('20191129', format='%Y%m%d'), 22 | ... pd.to_datetime('20200101', format='%Y%m%d')]) 23 | >>> date_indices = pd.date_range( 24 | ... start="2019-11-24", 25 | ... end="2019-12-31", 26 | ... freq='D' 27 | ... ) 28 | >>> cfs(date_indices) 29 | array([[0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 30 | 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 31 | 0., 0., 0., 0., 0., 0.]]) 32 | 33 | Example use for using a squared exponential kernel: 34 | 35 | >>> kernel = squared_exponential_kernel(alpha=0.5) 36 | >>> cfs = CustomDateFeatureSet([pd.to_datetime('20191129', format='%Y%m%d'), 37 | ... pd.to_datetime('20200101', format='%Y%m%d')], kernel) 38 | >>> cfs(date_indices) 39 | array([[3.72665317e-06, 3.35462628e-04, 1.11089965e-02, 1.35335283e-01, 40 | 6.06530660e-01, 1.00000000e+00, 6.06530660e-01, 1.35335283e-01, 41 | 1.11089965e-02, 3.35462628e-04, 3.72665317e-06, 1.52299797e-08, 42 | 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 43 | 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 44 | 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 45 | 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 46 | 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 47 | 1.52299797e-08, 3.72665317e-06, 3.35462628e-04, 1.11089965e-02, 48 | 1.35335283e-01, 6.06530660e-01]]) 49 | """ 50 | 51 | def __init__( 52 | self, 53 | reference_dates: List[pd.Timestamp], 54 | kernel_function: Callable[[int], int] = indicator, 55 | ): 56 | """ 57 | Parameters 58 | ---------- 59 | reference_dates 60 | list of panda timestamps for which features should be created. 61 | kernel_function 62 | kernel function to pass the feature value based 63 | on distance in days. Can be indicator function (default), 64 | exponential_kernel, squared_exponential_kernel or user defined. 65 | """ 66 | self.reference_dates = reference_dates 67 | self.kernel_function = kernel_function 68 | 69 | def __call__(self, dates): 70 | """ 71 | Transform a pandas series with timestamps to holiday features. 72 | 73 | Parameters 74 | ---------- 75 | dates 76 | Pandas series with Datetimeindex timestamps. 77 | """ 78 | return np.vstack( 79 | [ 80 | np.hstack( 81 | [self.kernel_function((index - ref_date).days) for index in dates] 82 | ) 83 | for ref_date in self.reference_dates 84 | ] 85 | ).sum(0, keepdims=True) 86 | 87 | 88 | class CustomHolidayFeatureSet: 89 | """ 90 | Implements calculation of holiday features. The CustomHolidayFeatureSet is 91 | applied on a pandas Series with Datetimeindex and returns a 2D array of 92 | the shape (len(dates), num_features), where num_features are the number 93 | of holidays. 94 | 95 | Note that for lower than daily granularity the distance to the holiday is 96 | still computed on a per-day basis. 97 | 98 | Example use: 99 | 100 | >>> from pts.features import ( 101 | ... squared_exponential_kernel, 102 | ... SpecialDateFeatureSet, 103 | ... CHRISTMAS_DAY, 104 | ... CHRISTMAS_EVE 105 | ... ) 106 | >>> import pandas as pd 107 | >>> from pandas.tseries.holiday import Holiday 108 | >>> cfs = CustomHolidayFeatureSet([Holiday("New Years Day", month=1, day=1), Holiday("Christmas Day", month=12, day=25)]) 109 | >>> date_indices = pd.date_range( 110 | ... start="2016-12-24", 111 | ... end="2016-12-31", 112 | ... freq='D' 113 | ... ) 114 | >>> cfs(date_indices) 115 | array([[1., 0., 0., 0., 0., 0., 0., 0.], 116 | [0., 1., 0., 0., 0., 0., 0., 0.]]) 117 | 118 | Example use for using a squared exponential kernel: 119 | 120 | >>> kernel = squared_exponential_kernel(alpha=1.0) 121 | >>> sfs = SpecialDateFeatureSet([Holiday("New Years Day", month=1, day=1), Holiday("Christmas Day", month=12, day=25)], kernel) 122 | >>> sfs(date_indices) 123 | array([[1.00000000e+00, 3.67879441e-01, 1.83156389e-02, 1.23409804e-04, 124 | 1.12535175e-07, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00], 125 | [3.67879441e-01, 1.00000000e+00, 3.67879441e-01, 1.83156389e-02, 126 | 1.23409804e-04, 1.12535175e-07, 0.00000000e+00, 0.00000000e+00]]) 127 | 128 | """ 129 | 130 | def __init__( 131 | self, 132 | custom_holidays: List[Holiday], 133 | kernel_function: Callable[[int], int] = indicator, 134 | ): 135 | """ 136 | Parameters 137 | ---------- 138 | feature_names 139 | list of strings with holiday names for which features should be created. 140 | kernel_function 141 | kernel function to pass the feature value based 142 | on distance in days. Can be indicator function (default), 143 | exponential_kernel, squared_exponential_kernel or user defined. 144 | """ 145 | self.custom_holidays = custom_holidays 146 | self.kernel_function = kernel_function 147 | 148 | def __call__(self, dates): 149 | """ 150 | Transform a pandas series with timestamps to holiday features. 151 | 152 | Parameters 153 | ---------- 154 | dates 155 | Pandas series with Datetimeindex timestamps. 156 | """ 157 | return np.vstack( 158 | [ 159 | np.hstack( 160 | [ 161 | self.kernel_function(distance_to_holiday(custom_holiday)(index)) 162 | for index in dates 163 | ] 164 | ) 165 | for custom_holiday in self.custom_holidays 166 | ] 167 | ) 168 | -------------------------------------------------------------------------------- /pts/model/lstnet/lstnet_network.py: -------------------------------------------------------------------------------- 1 | from typing import List, Tuple, Optional 2 | 3 | import numpy as np 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | from pts.modules import MeanScaler, NOPScaler 9 | 10 | 11 | class LSTNetBase(nn.Module): 12 | def __init__( 13 | self, 14 | num_series: int, 15 | channels: int, 16 | kernel_size: int, 17 | rnn_cell_type: str, 18 | rnn_num_cells: int, 19 | skip_rnn_cell_type: str, 20 | skip_rnn_num_cells: int, 21 | skip_size: int, 22 | ar_window: int, 23 | context_length: int, 24 | horizon: Optional[int], 25 | prediction_length: Optional[int], 26 | dropout_rate: float, 27 | output_activation: Optional[str], 28 | scaling: bool, 29 | *args, 30 | **kwargs, 31 | ) -> None: 32 | super().__init__(*args, **kwargs) 33 | 34 | self.num_series = num_series 35 | self.channels = channels 36 | assert ( 37 | channels % skip_size == 0 38 | ), "number of conv1d `channels` must be divisible by the `skip_size`" 39 | self.skip_size = skip_size 40 | assert ar_window > 0, "auto-regressive window must be a positive integer" 41 | self.ar_window = ar_window 42 | assert not ((horizon is None)) == ( 43 | prediction_length is None 44 | ), "Exactly one of `horizon` and `prediction_length` must be set at a time" 45 | assert horizon is None or horizon > 0, "`horizon` must be greater than zero" 46 | assert ( 47 | prediction_length is None or prediction_length > 0 48 | ), "`prediction_length` must be greater than zero" 49 | self.prediction_length = prediction_length 50 | self.horizon = horizon 51 | assert context_length > 0, "`context_length` must be greater than zero" 52 | self.context_length = context_length 53 | if output_activation is not None: 54 | assert output_activation in [ 55 | "sigmoid", 56 | "tanh", 57 | ], "`output_activation` must be either 'sigmiod' or 'tanh' " 58 | self.output_activation = output_activation 59 | assert rnn_cell_type in [ 60 | "GRU", 61 | "LSTM", 62 | ], "`rnn_cell_type` must be either 'GRU' or 'LSTM' " 63 | assert skip_rnn_cell_type in [ 64 | "GRU", 65 | "LSTM", 66 | ], "`skip_rnn_cell_type` must be either 'GRU' or 'LSTM' " 67 | 68 | conv_out = context_length - kernel_size 69 | self.conv_skip = conv_out // skip_size 70 | assert self.conv_skip > 0, ( 71 | "conv1d output size must be greater than or equal to `skip_size`\n" 72 | "Choose a smaller `kernel_size` or bigger `context_length`" 73 | ) 74 | 75 | self.cnn = nn.Conv2d( 76 | in_channels=1, out_channels=channels, kernel_size=(num_series, kernel_size) 77 | ) 78 | 79 | self.dropout = nn.Dropout(p=dropout_rate) 80 | 81 | rnn = {"LSTM": nn.LSTM, "GRU": nn.GRU}[rnn_cell_type] 82 | self.rnn = rnn( 83 | input_size=channels, 84 | hidden_size=rnn_num_cells, 85 | # dropout=dropout_rate, 86 | ) 87 | 88 | skip_rnn = {"LSTM": nn.LSTM, "GRU": nn.GRU}[skip_rnn_cell_type] 89 | self.skip_rnn_num_cells = skip_rnn_num_cells 90 | self.skip_rnn = skip_rnn( 91 | input_size=channels, 92 | hidden_size=skip_rnn_num_cells, 93 | # dropout=dropout_rate, 94 | ) 95 | 96 | self.fc = nn.Linear(rnn_num_cells + skip_size * skip_rnn_num_cells, num_series) 97 | 98 | if self.horizon: 99 | self.ar_fc = nn.Linear(ar_window, 1) 100 | else: 101 | self.ar_fc = nn.Linear(ar_window, prediction_length) 102 | 103 | if scaling: 104 | self.scaler = MeanScaler(keepdim=True, time_first=False) 105 | else: 106 | self.scaler = NOPScaler(keepdim=True, time_first=False) 107 | 108 | def forward( 109 | self, past_target: torch.Tensor, past_observed_values: torch.Tensor 110 | ) -> torch.Tensor: 111 | scaled_past_target, scale = self.scaler( 112 | past_target[..., -self.context_length :], # [B, C, T] 113 | past_observed_values[..., -self.context_length :], # [B, C, T] 114 | ) 115 | 116 | # CNN 117 | c = F.relu(self.cnn(scaled_past_target.unsqueeze(1))) 118 | c = self.dropout(c) 119 | c = c.squeeze(2) # [B, C, T] 120 | 121 | # RNN 122 | r = c.permute(2, 0, 1) # [F (T), B, C] 123 | _, r = self.rnn(r) # [1, B, H] 124 | r = self.dropout(r.squeeze(0)) # [B, H] 125 | 126 | # Skip-RNN 127 | skip_c = c[..., -self.conv_skip * self.skip_size :] 128 | skip_c = skip_c.reshape(-1, self.channels, self.conv_skip, self.skip_size) 129 | skip_c = skip_c.permute(2, 0, 3, 1) 130 | skip_c = skip_c.reshape((self.conv_skip, -1, self.channels)) 131 | _, skip_c = self.skip_rnn(skip_c) 132 | skip_c = skip_c.reshape((-1, self.skip_size * self.skip_rnn_num_cells)) 133 | skip_c = self.dropout(skip_c) 134 | 135 | res = self.fc(torch.cat((r, skip_c), 1)).unsqueeze(-1) 136 | 137 | # Highway 138 | ar_x = scaled_past_target[..., -self.ar_window :] 139 | ar_x = ar_x.reshape(-1, self.ar_window) 140 | 141 | ar_x = self.ar_fc(ar_x) 142 | if self.horizon: 143 | ar_x = ar_x.reshape(-1, self.num_series, 1) 144 | else: 145 | ar_x = ar_x.reshape(-1, self.num_series, self.prediction_length) 146 | out = res + ar_x 147 | 148 | if self.output_activation is None: 149 | return out, scale 150 | 151 | return ( 152 | ( 153 | torch.sigmoid(out) 154 | if self.output_activation == "sigmoid" 155 | else torch.tanh(out) 156 | ), 157 | scale, 158 | ) 159 | 160 | 161 | class LSTNetTrain(LSTNetBase): 162 | def __init__(self, *args, **kwargs) -> None: 163 | super().__init__(*args, **kwargs) 164 | self.loss_fn = nn.L1Loss() 165 | 166 | def forward( 167 | self, 168 | past_target: torch.Tensor, 169 | past_observed_values: torch.Tensor, 170 | future_target: torch.Tensor, 171 | ) -> torch.Tensor: 172 | ret, scale = super().forward(past_target, past_observed_values) 173 | 174 | if self.horizon: 175 | future_target = future_target[..., -1:] 176 | 177 | loss = self.loss_fn(ret * scale, future_target) 178 | return loss 179 | 180 | 181 | class LSTNetPredict(LSTNetBase): 182 | def forward( 183 | self, past_target: torch.Tensor, past_observed_values: torch.Tensor 184 | ) -> torch.Tensor: 185 | ret, scale = super().forward(past_target, past_observed_values) 186 | ret = (ret * scale).permute(0, 2, 1) 187 | 188 | return ret.unsqueeze(1) 189 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PyTorchTS 2 | 3 | PyTorchTS is a [PyTorch](https://github.com/pytorch/pytorch) Probabilistic Time Series forecasting framework which provides state of the art PyTorch time series models by utilizing [GluonTS](https://github.com/awslabs/gluon-ts) as its back-end API and for loading, transforming and back-testing time series data sets. 4 | 5 | ## Installation 6 | 7 | ``` 8 | $ pip3 install pytorchts 9 | ``` 10 | 11 | ## Quick start 12 | 13 | Here we highlight the the API changes via the GluonTS README. 14 | 15 | ```python 16 | import matplotlib.pyplot as plt 17 | import pandas as pd 18 | import torch 19 | 20 | from gluonts.dataset.common import ListDataset 21 | from gluonts.dataset.util import to_pandas 22 | 23 | from pts.model.deepar import DeepAREstimator 24 | from pts import Trainer 25 | ``` 26 | 27 | This simple example illustrates how to train a model on some data, and then use it to make predictions. As a first step, we need to collect some data: in this example we will use the volume of tweets mentioning the AMZN ticker symbol. 28 | 29 | ```python 30 | url = "https://raw.githubusercontent.com/numenta/NAB/master/data/realTweets/Twitter_volume_AMZN.csv" 31 | df = pd.read_csv(url, header=0, index_col=0, parse_dates=True) 32 | ``` 33 | 34 | The first 100 data points look like follows: 35 | 36 | ```python 37 | df[:100].plot(linewidth=2) 38 | plt.grid(which='both') 39 | plt.show() 40 | ``` 41 | 42 | ![png](https://github.com/zalandoresearch/pytorch-ts/blob/master/examples/images/readme_0.png?raw=true) 43 | 44 | 45 | We can now prepare a training dataset for our model to train on. Datasets are essentially iterable collections of dictionaries: each dictionary represents a time series with possibly associated features. For this example, we only have one entry, specified by the `"start"` field which is the timestamp of the first data point, and the `"target"` field containing time series data. For training, we will use data up to midnight on April 5th, 2015. 46 | 47 | 48 | ```python 49 | training_data = ListDataset( 50 | [{"start": df.index[0], "target": df.value[:"2015-04-05 00:00:00"]}], 51 | freq = "5min" 52 | ) 53 | ``` 54 | 55 | A forecasting model is a *predictor* object. One way of obtaining predictors is by training a correspondent estimator. Instantiating an estimator requires specifying the frequency of the time series that it will handle, as well as the number of time steps to predict. In our example we're using 5 minutes data, so `req="5min"`, and we will train a model to predict the next hour, so `prediction_length=12`. The input to the model will be a vector of size `input_size=43` at each time point. We also specify some minimal training options in particular training on a `device` for `epoch=10`. 56 | 57 | 58 | ```python 59 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 60 | 61 | estimator = DeepAREstimator(freq="5min", 62 | prediction_length=12, 63 | input_size=19, 64 | trainer=Trainer(epochs=10, 65 | device=device)) 66 | predictor = estimator.train(training_data=training_data, num_workers=4) 67 | ``` 68 | ``` 69 | 45it [00:01, 37.60it/s, avg_epoch_loss=4.64, epoch=0] 70 | 48it [00:01, 39.56it/s, avg_epoch_loss=4.2, epoch=1] 71 | 45it [00:01, 38.11it/s, avg_epoch_loss=4.1, epoch=2] 72 | 43it [00:01, 36.29it/s, avg_epoch_loss=4.05, epoch=3] 73 | 44it [00:01, 35.98it/s, avg_epoch_loss=4.03, epoch=4] 74 | 48it [00:01, 39.48it/s, avg_epoch_loss=4.01, epoch=5] 75 | 48it [00:01, 38.65it/s, avg_epoch_loss=4, epoch=6] 76 | 46it [00:01, 37.12it/s, avg_epoch_loss=3.99, epoch=7] 77 | 48it [00:01, 38.86it/s, avg_epoch_loss=3.98, epoch=8] 78 | 48it [00:01, 39.49it/s, avg_epoch_loss=3.97, epoch=9] 79 | ``` 80 | 81 | During training, useful information about the progress will be displayed. To get a full overview of the available options, please refer to the source code of `DeepAREstimator` (or other estimators) and `Trainer`. 82 | 83 | We're now ready to make predictions: we will forecast the hour following the midnight on April 15th, 2015. 84 | 85 | 86 | ```python 87 | test_data = ListDataset( 88 | [{"start": df.index[0], "target": df.value[:"2015-04-15 00:00:00"]}], 89 | freq = "5min" 90 | ) 91 | ``` 92 | 93 | 94 | ```python 95 | for test_entry, forecast in zip(test_data, predictor.predict(test_data)): 96 | to_pandas(test_entry)[-60:].plot(linewidth=2) 97 | forecast.plot(color='g', prediction_intervals=[50.0, 90.0]) 98 | plt.grid(which='both') 99 | ``` 100 | 101 | ![png](https://github.com/zalandoresearch/pytorch-ts/blob/master/examples/images/readme_1.png?raw=true) 102 | 103 | 104 | Note that the forecast is displayed in terms of a probability distribution: the shaded areas represent the 50% and 90% prediction intervals, respectively, centered around the median (dark green line). 105 | 106 | 107 | ## Development 108 | 109 | ``` 110 | pip install -e . 111 | pytest test 112 | ``` 113 | 114 | ## Citing 115 | 116 | To cite this repository: 117 | 118 | ```tex 119 | @software{pytorchgithub, 120 | author = {Kashif Rasul}, 121 | title = {{P}yTorch{TS}}, 122 | url = {https://github.com/zalandoresearch/pytorch-ts}, 123 | version = {0.6.x}, 124 | year = {2021}, 125 | } 126 | ``` 127 | 128 | ## Scientific Article 129 | 130 | We have implemented the following model using this framework: 131 | 132 | * [Multi-variate Probabilistic Time Series Forecasting via Conditioned Normalizing Flows](https://arxiv.org/abs/2002.06103) 133 | ```tex 134 | @INPROCEEDINGS{rasul2020tempflow, 135 | author = {Kashif Rasul and Abdul-Saboor Sheikh and Ingmar Schuster and Urs Bergmann and Roland Vollgraf}, 136 | title = {{M}ultivariate {P}robabilistic {T}ime {S}eries {F}orecasting via {C}onditioned {N}ormalizing {F}lows}, 137 | year = {2021}, 138 | url = {https://openreview.net/forum?id=WiGQBFuVRv}, 139 | booktitle = {International Conference on Learning Representations 2021}, 140 | } 141 | ``` 142 | 143 | * [Autoregressive Denoising Diffusion Models for Multivariate Probabilistic Time Series Forecasting 144 | ](http://proceedings.mlr.press/v139/rasul21a.html) 145 | ```tex 146 | @InProceedings{pmlr-v139-rasul21a, 147 | title = {{A}utoregressive {D}enoising {D}iffusion {M}odels for {M}ultivariate {P}robabilistic {T}ime {S}eries {F}orecasting}, 148 | author = {Rasul, Kashif and Seward, Calvin and Schuster, Ingmar and Vollgraf, Roland}, 149 | booktitle = {Proceedings of the 38th International Conference on Machine Learning}, 150 | pages = {8857--8868}, 151 | year = {2021}, 152 | editor = {Meila, Marina and Zhang, Tong}, 153 | volume = {139}, 154 | series = {Proceedings of Machine Learning Research}, 155 | month = {18--24 Jul}, 156 | publisher = {PMLR}, 157 | pdf = {http://proceedings.mlr.press/v139/rasul21a/rasul21a.pdf}, 158 | url = {http://proceedings.mlr.press/v139/rasul21a.html}, 159 | } 160 | ``` 161 | 162 | * [Probabilistic Time Series Forecasting with Implicit Quantile Networks](https://arxiv.org/abs/2107.03743) 163 | ```tex 164 | @misc{gouttes2021probabilistic, 165 | title={{P}robabilistic {T}ime {S}eries {F}orecasting with {I}mplicit {Q}uantile {N}etworks}, 166 | author={Adèle Gouttes and Kashif Rasul and Mateusz Koren and Johannes Stephan and Tofigh Naghibi}, 167 | year={2021}, 168 | eprint={2107.03743}, 169 | archivePrefix={arXiv}, 170 | primaryClass={cs.LG} 171 | } 172 | ``` 173 | -------------------------------------------------------------------------------- /test/distributions/test_piecewise_linear.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). 4 | # You may not use this file except in compliance with the License. 5 | # A copy of the License is located at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # or in the "license" file accompanying this file. This file is distributed 10 | # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 11 | # express or implied. See the License for the specific language governing 12 | # permissions and limitations under the License. 13 | 14 | from typing import Tuple, List 15 | import pytest 16 | 17 | import torch 18 | import numpy as np 19 | 20 | from pts.distributions import PiecewiseLinear 21 | from pts.modules import PiecewiseLinearOutput 22 | 23 | 24 | def empirical_cdf( 25 | samples: np.ndarray, num_bins: int = 100 26 | ) -> Tuple[np.ndarray, np.ndarray]: 27 | """ 28 | Calculate the empirical cdf from the given samples. 29 | Parameters 30 | ---------- 31 | samples 32 | Tensor of samples of shape (num_samples, batch_shape) 33 | Returns 34 | ------- 35 | Tensor 36 | Empirically calculated cdf values. shape (num_bins, batch_shape) 37 | Tensor 38 | Bin edges corresponding to the cdf values. shape (num_bins + 1, batch_shape) 39 | """ 40 | 41 | # calculate histogram separately for each dimension in the batch size 42 | cdfs = [] 43 | edges = [] 44 | 45 | batch_shape = samples.shape[1:] 46 | agg_batch_dim = np.prod(batch_shape, dtype=np.int) 47 | 48 | samples = samples.reshape((samples.shape[0], -1)) 49 | 50 | for i in range(agg_batch_dim): 51 | s = samples[:, i] 52 | bins = np.linspace(s.min(), s.max(), num_bins + 1) 53 | hist, edge = np.histogram(s, bins=bins) 54 | cdfs.append(np.cumsum(hist / len(s))) 55 | edges.append(edge) 56 | 57 | empirical_cdf = np.stack(cdfs, axis=-1).reshape(num_bins, *batch_shape) 58 | edges = np.stack(edges, axis=-1).reshape(num_bins + 1, *batch_shape) 59 | return empirical_cdf, edges 60 | 61 | 62 | @pytest.mark.parametrize( 63 | "distr, target, expected_target_cdf, expected_target_crps", 64 | [ 65 | ( 66 | PiecewiseLinear( 67 | gamma=torch.ones(size=(1,)), 68 | slopes=torch.Tensor([2, 3, 1]).reshape(shape=(1, 3)), 69 | knot_spacings=torch.Tensor([0.3, 0.4, 0.3]).reshape(shape=(1, 3)), 70 | ), 71 | [2.2], 72 | [0.5], 73 | [0.223000], 74 | ), 75 | ( 76 | PiecewiseLinear( 77 | gamma=torch.ones(size=(2,)), 78 | slopes=torch.Tensor([[1, 1], [1, 2]]).reshape(shape=(2, 2)), 79 | knot_spacings=torch.Tensor([[0.4, 0.6], [0.4, 0.6]]).reshape( 80 | shape=(2, 2) 81 | ), 82 | ), 83 | [1.5, 1.6], 84 | [0.5, 0.5], 85 | [0.083333, 0.145333], 86 | ), 87 | ], 88 | ) 89 | def test_values( 90 | distr: PiecewiseLinear, 91 | target: List[float], 92 | expected_target_cdf: List[float], 93 | expected_target_crps: List[float], 94 | ): 95 | target = torch.Tensor(target).reshape(shape=(len(target),)) 96 | expected_target_cdf = np.array(expected_target_cdf).reshape( 97 | (len(expected_target_cdf),) 98 | ) 99 | expected_target_crps = np.array(expected_target_crps).reshape( 100 | (len(expected_target_crps),) 101 | ) 102 | 103 | assert all(np.isclose(distr.cdf(target).numpy(), expected_target_cdf)) 104 | assert all(np.isclose(distr.crps(target).numpy(), expected_target_crps)) 105 | 106 | # compare with empirical cdf from samples 107 | num_samples = 100_000 108 | samples = distr.sample((num_samples,)).numpy() 109 | assert np.isfinite(samples).all() 110 | 111 | emp_cdf, edges = empirical_cdf(samples) 112 | calc_cdf = distr.cdf(torch.Tensor(edges)).numpy() 113 | assert np.allclose(calc_cdf[1:, :], emp_cdf, atol=1e-2) 114 | 115 | 116 | @pytest.mark.parametrize( 117 | "batch_shape, num_pieces, num_samples", 118 | [((3, 4, 5), 10, 100), ((1,), 2, 1), ((10,), 10, 10), ((10, 5), 2, 1)], 119 | ) 120 | def test_shapes(batch_shape: Tuple, num_pieces: int, num_samples: int): 121 | gamma = torch.ones(size=(*batch_shape,)) 122 | slopes = torch.ones(size=(*batch_shape, num_pieces)) # all positive 123 | knot_spacings = ( 124 | torch.ones(size=(*batch_shape, num_pieces)) / num_pieces 125 | ) # positive and sum to 1 126 | target = torch.ones(size=batch_shape) # shape of gamma 127 | 128 | distr = PiecewiseLinear(gamma=gamma, slopes=slopes, knot_spacings=knot_spacings) 129 | 130 | # assert that the parameters and target have proper shapes 131 | assert gamma.shape == target.shape 132 | assert knot_spacings.shape == slopes.shape 133 | assert len(gamma.shape) + 1 == len(knot_spacings.shape) 134 | 135 | # assert that batch_shape is computed properly 136 | assert distr.batch_shape == batch_shape 137 | 138 | # assert that shapes of original parameters are correct 139 | assert distr.b.shape == slopes.shape 140 | assert distr.knot_positions.shape == knot_spacings.shape 141 | 142 | # assert that the shape of crps is correct 143 | assert distr.crps(target).shape == batch_shape 144 | 145 | # assert that the quantile shape is correct when computing the 146 | # quantile values at knot positions - used for a_tilde 147 | assert distr.quantile_internal(knot_spacings, dim=-2).shape == ( 148 | *batch_shape, 149 | num_pieces, 150 | ) 151 | 152 | # assert that the samples and the quantile values shape when num_samples 153 | # is None is correct 154 | samples = distr.sample() 155 | assert samples.shape == batch_shape 156 | assert distr.quantile_internal(samples).shape == batch_shape 157 | 158 | # assert that the samples and the quantile values shape when num_samples 159 | # is not None is correct 160 | samples = distr.sample((num_samples,)) 161 | assert samples.shape == (num_samples, *batch_shape) 162 | assert distr.quantile_internal(samples, dim=0).shape == (num_samples, *batch_shape,) 163 | 164 | 165 | def test_simple_symmetric(): 166 | gamma = torch.Tensor([-1.0]) 167 | slopes = torch.Tensor([[2.0, 2.0]]) 168 | knot_spacings = torch.Tensor([[0.5, 0.5]]) 169 | 170 | distr = PiecewiseLinear(gamma=gamma, slopes=slopes, knot_spacings=knot_spacings) 171 | 172 | assert distr.cdf(torch.Tensor([-2.0])).numpy().item() == 0.0 173 | assert distr.cdf(torch.Tensor([+2.0])).numpy().item() == 1.0 174 | 175 | expected_crps = np.array([1.0 + 2.0 / 3.0]) 176 | 177 | assert np.allclose(distr.crps(torch.Tensor([-2.0])).numpy(), expected_crps) 178 | 179 | assert np.allclose(distr.crps(torch.Tensor([2.0])).numpy(), expected_crps) 180 | 181 | 182 | def test_robustness(): 183 | distr_out = PiecewiseLinearOutput(num_pieces=10) 184 | args_proj = distr_out.get_args_proj(in_features=30) 185 | 186 | net_out = torch.normal(mean=0.0, size=(1000, 30), std=1e2) 187 | gamma, slopes, knot_spacings = args_proj(net_out) 188 | distr = distr_out.distribution((gamma, slopes, knot_spacings)) 189 | 190 | # compute the 1-quantile (the 0-quantile is gamma) 191 | sup_support = gamma + (slopes * knot_spacings).sum(-1) 192 | 193 | assert torch.le(gamma, sup_support).numpy().all() 194 | 195 | width = sup_support - gamma 196 | x = torch.from_numpy( 197 | np.random.uniform( 198 | low=(gamma - width).detach().numpy(), 199 | high=(sup_support + width).detach().numpy(), 200 | ).astype(np.float32), 201 | ) 202 | 203 | # check that 0 < cdf < 1 204 | cdf_x = distr.cdf(x) 205 | assert torch.min(cdf_x).item() >= 0.0 and torch.max(cdf_x).item() <= 1.0 206 | 207 | # check that 0 <= crps 208 | crps_x = distr.crps(x) 209 | assert torch.min(crps_x).item() >= 0.0 210 | -------------------------------------------------------------------------------- /pts/model/simple_feedforward/simple_feedforward_estimator.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional 2 | 3 | import torch 4 | import torch.nn as nn 5 | 6 | from gluonts.core.component import validated 7 | from gluonts.torch.util import copy_parameters 8 | from gluonts.torch.model.predictor import PyTorchPredictor 9 | from gluonts.torch.modules.distribution_output import DistributionOutput 10 | from gluonts.model.predictor import Predictor 11 | from gluonts.dataset.field_names import FieldName 12 | from gluonts.transform import ( 13 | Transformation, 14 | Chain, 15 | InstanceSplitter, 16 | ExpectedNumInstanceSampler, 17 | ValidationSplitSampler, 18 | TestSplitSampler, 19 | ) 20 | 21 | from pts.model.utils import get_module_forward_input_names 22 | from pts import Trainer 23 | from pts.model import PyTorchEstimator 24 | from pts.modules import StudentTOutput 25 | 26 | from .simple_feedforward_network import ( 27 | SimpleFeedForwardTrainingNetwork, 28 | SimpleFeedForwardPredictionNetwork, 29 | ) 30 | 31 | 32 | class SimpleFeedForwardEstimator(PyTorchEstimator): 33 | """ 34 | SimpleFeedForwardEstimator shows how to build a simple MLP model predicting 35 | the next target time-steps given the previous ones. 36 | 37 | Given that we want to define a pytorch model trainable by SGD, we inherit the 38 | parent class `PyTorchEstimator` that handles most of the logic for fitting a 39 | neural-network. 40 | 41 | We thus only have to define: 42 | 43 | 1. How the data is transformed before being fed to our model:: 44 | 45 | def create_transformation(self) -> Transformation 46 | 47 | 2. How the training happens:: 48 | 49 | def create_training_network(self) -> nn.Module 50 | 51 | 3. how the predictions can be made for a batch given a trained network:: 52 | 53 | def create_predictor( 54 | self, 55 | transformation: Transformation, 56 | trained_net: nn.Module, 57 | ) -> Predictor 58 | 59 | 60 | Parameters 61 | ---------- 62 | freq 63 | Time time granularity of the data 64 | prediction_length 65 | Length of the prediction horizon 66 | trainer 67 | Trainer object to be used (default: Trainer()) 68 | num_hidden_dimensions 69 | Number of hidden nodes in each layer (default: [40, 40]) 70 | context_length 71 | Number of time units that condition the predictions 72 | (default: None, in which case context_length = prediction_length) 73 | distr_output 74 | Distribution to fit (default: StudentTOutput()) 75 | batch_normalization 76 | Whether to use batch normalization (default: False) 77 | mean_scaling 78 | Scale the network input by the data mean and the network output by 79 | its inverse (default: True) 80 | num_parallel_samples 81 | Number of evaluation samples per time series to increase parallelism during inference. 82 | This is a model optimization that does not affect the accuracy (default: 100) 83 | """ 84 | 85 | @validated() 86 | def __init__( 87 | self, 88 | freq: str, 89 | prediction_length: int, 90 | trainer: Trainer = Trainer(), 91 | num_hidden_dimensions: Optional[List[int]] = None, 92 | context_length: Optional[int] = None, 93 | distr_output: DistributionOutput = StudentTOutput(), 94 | batch_normalization: bool = False, 95 | mean_scaling: bool = True, 96 | num_parallel_samples: int = 100, 97 | ) -> None: 98 | """ 99 | Defines an estimator. All parameters should be serializable. 100 | """ 101 | super().__init__(trainer=trainer) 102 | 103 | self.num_hidden_dimensions = ( 104 | num_hidden_dimensions 105 | if num_hidden_dimensions is not None 106 | else list([40, 40]) 107 | ) 108 | self.prediction_length = prediction_length 109 | self.context_length = ( 110 | context_length if context_length is not None else prediction_length 111 | ) 112 | self.freq = freq 113 | self.distr_output = distr_output 114 | self.batch_normalization = batch_normalization 115 | self.mean_scaling = mean_scaling 116 | self.num_parallel_samples = num_parallel_samples 117 | 118 | self.train_sampler = ExpectedNumInstanceSampler( 119 | num_instances=1, min_future=prediction_length 120 | ) 121 | self.validation_sampler = ValidationSplitSampler(min_future=prediction_length) 122 | 123 | # here we do only a simple operation to convert the input data to a form 124 | # that can be digested by our model by only splitting the target in two, a 125 | # conditioning part and a to-predict part, for each training example. 126 | # For a more complex transformation example, see the `pts.model.deepar` 127 | # transformation that includes time features, age feature, observed values 128 | # indicator, etc. 129 | def create_transformation(self) -> Transformation: 130 | return Chain([]) 131 | 132 | def create_instance_splitter(self, mode: str): 133 | assert mode in ["training", "validation", "test"] 134 | instance_sampler = { 135 | "training": self.train_sampler, 136 | "validation": self.validation_sampler, 137 | "test": TestSplitSampler(), 138 | }[mode] 139 | 140 | return InstanceSplitter( 141 | target_field=FieldName.TARGET, 142 | is_pad_field=FieldName.IS_PAD, 143 | start_field=FieldName.START, 144 | forecast_start_field=FieldName.FORECAST_START, 145 | instance_sampler=instance_sampler, 146 | past_length=self.context_length, 147 | future_length=self.prediction_length, 148 | time_series_fields=[], # [FieldName.FEAT_DYNAMIC_REAL] 149 | ) 150 | 151 | # defines the network, we get to see one batch to initialize it. 152 | # the network should return at least one tensor that is used as a loss to minimize in the training loop. 153 | # several tensors can be returned for instance for analysis, see DeepARTrainingNetwork for an example. 154 | def create_training_network( 155 | self, device: torch.device 156 | ) -> SimpleFeedForwardTrainingNetwork: 157 | return SimpleFeedForwardTrainingNetwork( 158 | num_hidden_dimensions=self.num_hidden_dimensions, 159 | prediction_length=self.prediction_length, 160 | context_length=self.context_length, 161 | distr_output=self.distr_output, 162 | batch_normalization=self.batch_normalization, 163 | mean_scaling=self.mean_scaling, 164 | ).to(device) 165 | 166 | # we now define how the prediction happens given that we are provided a 167 | # training network. 168 | def create_predictor( 169 | self, 170 | transformation: Transformation, 171 | trained_network: nn.Module, 172 | device: torch.device, 173 | ) -> Predictor: 174 | prediction_splitter = self.create_instance_splitter("test") 175 | 176 | prediction_network = SimpleFeedForwardPredictionNetwork( 177 | num_hidden_dimensions=self.num_hidden_dimensions, 178 | prediction_length=self.prediction_length, 179 | context_length=self.context_length, 180 | distr_output=self.distr_output, 181 | batch_normalization=self.batch_normalization, 182 | mean_scaling=self.mean_scaling, 183 | num_parallel_samples=self.num_parallel_samples, 184 | ).to(device) 185 | 186 | copy_parameters(trained_network, prediction_network) 187 | input_names = get_module_forward_input_names(prediction_network) 188 | 189 | return PyTorchPredictor( 190 | input_transform=transformation + prediction_splitter, 191 | input_names=input_names, 192 | prediction_net=prediction_network, 193 | batch_size=self.trainer.batch_size, 194 | freq=self.freq, 195 | prediction_length=self.prediction_length, 196 | device=device, 197 | ) 198 | -------------------------------------------------------------------------------- /pts/model/n_beats/n_beats_estimator.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional 2 | 3 | import torch 4 | import torch.nn as nn 5 | 6 | from gluonts.core.component import validated 7 | from gluonts.dataset.field_names import FieldName 8 | from gluonts.model.predictor import Predictor 9 | from gluonts.torch.model.predictor import PyTorchPredictor 10 | from gluonts.torch.util import copy_parameters 11 | from gluonts.transform import ( 12 | InstanceSplitter, 13 | ValidationSplitSampler, 14 | TestSplitSampler, 15 | AddObservedValuesIndicator, 16 | Transformation, 17 | Chain, 18 | RemoveFields, 19 | ExpectedNumInstanceSampler, 20 | ) 21 | 22 | from pts import Trainer 23 | from pts.model import PyTorchEstimator 24 | from pts.model.utils import get_module_forward_input_names 25 | 26 | from .n_beats_network import ( 27 | NBEATSPredictionNetwork, 28 | NBEATSTrainingNetwork, 29 | VALID_N_BEATS_STACK_TYPES, 30 | ) 31 | 32 | 33 | class NBEATSEstimator(PyTorchEstimator): 34 | @validated() 35 | def __init__( 36 | self, 37 | freq: str, 38 | prediction_length: int, 39 | context_length: Optional[int] = None, 40 | trainer: Trainer = Trainer(), 41 | num_stacks: int = 30, 42 | widths: Optional[List[int]] = None, 43 | num_blocks: Optional[List[int]] = None, 44 | num_block_layers: Optional[List[int]] = None, 45 | expansion_coefficient_lengths: Optional[List[int]] = None, 46 | sharing: Optional[List[bool]] = None, 47 | stack_types: Optional[List[str]] = None, 48 | loss_function: Optional[str] = "MAPE", 49 | **kwargs, 50 | ) -> None: 51 | super().__init__(trainer=trainer, **kwargs) 52 | 53 | self.freq = freq 54 | self.prediction_length = prediction_length 55 | self.context_length = ( 56 | context_length if context_length is not None else 2 * prediction_length 57 | ) 58 | # num_stacks has to be handled separately because other arguments have to match its length 59 | self.num_stacks = num_stacks 60 | self.loss_function = loss_function 61 | 62 | self.widths = self._validate_nbeats_argument( 63 | argument_value=widths, 64 | argument_name="widths", 65 | default_value=[512], 66 | validation_condition=lambda val: val > 0, 67 | invalidation_message="Values of 'widths' should be > 0", 68 | ) 69 | self.num_blocks = self._validate_nbeats_argument( 70 | argument_value=num_blocks, 71 | argument_name="num_blocks", 72 | default_value=[1], 73 | validation_condition=lambda val: val > 0, 74 | invalidation_message="Values of 'num_blocks' should be > 0", 75 | ) 76 | self.num_block_layers = self._validate_nbeats_argument( 77 | argument_value=num_block_layers, 78 | argument_name="num_block_layers", 79 | default_value=[4], 80 | validation_condition=lambda val: val > 0, 81 | invalidation_message="Values of 'block_layers' should be > 0", 82 | ) 83 | self.sharing = self._validate_nbeats_argument( 84 | argument_value=sharing, 85 | argument_name="sharing", 86 | default_value=[False], 87 | validation_condition=lambda val: True, 88 | invalidation_message="", 89 | ) 90 | self.expansion_coefficient_lengths = self._validate_nbeats_argument( 91 | argument_value=expansion_coefficient_lengths, 92 | argument_name="expansion_coefficient_lengths", 93 | default_value=[32], 94 | validation_condition=lambda val: val > 0, 95 | invalidation_message="Values of 'expansion_coefficient_lengths' should be > 0", 96 | ) 97 | self.stack_types = self._validate_nbeats_argument( 98 | argument_value=stack_types, 99 | argument_name="stack_types", 100 | default_value=["G"], 101 | validation_condition=lambda val: val in VALID_N_BEATS_STACK_TYPES, 102 | invalidation_message=f"Values of 'stack_types' should be one of {VALID_N_BEATS_STACK_TYPES}", 103 | ) 104 | 105 | self.train_sampler = ExpectedNumInstanceSampler( 106 | num_instances=1.0, min_future=prediction_length 107 | ) 108 | self.validation_sampler = ValidationSplitSampler(min_future=prediction_length) 109 | 110 | def _validate_nbeats_argument( 111 | self, 112 | argument_value, 113 | argument_name, 114 | default_value, 115 | validation_condition, 116 | invalidation_message, 117 | ): 118 | # set default value if applicable 119 | new_value = argument_value if argument_value is not None else default_value 120 | 121 | # check whether dimension of argument matches num_stack dimension 122 | assert len(new_value) == 1 or len(new_value) == self.num_stacks, ( 123 | f"Invalid lengths of argument {argument_name}: {len(new_value)}. Argument must have " 124 | f"length 1 or {self.num_stacks} " 125 | ) 126 | 127 | # check validity of actual values 128 | assert all( 129 | [validation_condition(val) for val in new_value] 130 | ), invalidation_message 131 | 132 | # make length of arguments consistent 133 | if len(new_value) == 1: 134 | return new_value * self.num_stacks 135 | else: 136 | return new_value 137 | 138 | # Here we do only a simple operation to convert the input data to a form 139 | # that can be digested by our model by only splitting the target in two, a 140 | # conditioning part and a to-predict part, for each training example. 141 | def create_transformation(self) -> Transformation: 142 | return Chain( 143 | [ 144 | RemoveFields( 145 | field_names=[ 146 | FieldName.FEAT_STATIC_REAL, 147 | FieldName.FEAT_DYNAMIC_REAL, 148 | FieldName.FEAT_DYNAMIC_CAT, 149 | ] 150 | ), 151 | AddObservedValuesIndicator( 152 | target_field=FieldName.TARGET, 153 | output_field=FieldName.OBSERVED_VALUES, 154 | dtype=self.dtype, 155 | ), 156 | ] 157 | ) 158 | 159 | def create_instance_splitter(self, mode: str): 160 | assert mode in ["training", "validation", "test"] 161 | 162 | instance_sampler = { 163 | "training": self.train_sampler, 164 | "validation": self.validation_sampler, 165 | "test": TestSplitSampler(), 166 | }[mode] 167 | 168 | return InstanceSplitter( 169 | target_field=FieldName.TARGET, 170 | is_pad_field=FieldName.IS_PAD, 171 | start_field=FieldName.START, 172 | forecast_start_field=FieldName.FORECAST_START, 173 | instance_sampler=instance_sampler, 174 | past_length=self.context_length, 175 | future_length=self.prediction_length, 176 | time_series_fields=[FieldName.OBSERVED_VALUES], 177 | ) 178 | 179 | def create_training_network(self, device: torch.device) -> NBEATSTrainingNetwork: 180 | return NBEATSTrainingNetwork( 181 | prediction_length=self.prediction_length, 182 | context_length=self.context_length, 183 | num_stacks=self.num_stacks, 184 | widths=self.widths, 185 | num_blocks=self.num_blocks, 186 | num_block_layers=self.num_block_layers, 187 | expansion_coefficient_lengths=self.expansion_coefficient_lengths, 188 | sharing=self.sharing, 189 | stack_types=self.stack_types, 190 | loss_function=self.loss_function, 191 | freq=self.freq, 192 | ).to(device) 193 | 194 | def create_predictor( 195 | self, 196 | transformation: Transformation, 197 | trained_network: nn.Module, 198 | device: torch.device, 199 | ) -> Predictor: 200 | prediction_network = NBEATSPredictionNetwork( 201 | prediction_length=self.prediction_length, 202 | context_length=self.context_length, 203 | num_stacks=self.num_stacks, 204 | widths=self.widths, 205 | num_blocks=self.num_blocks, 206 | num_block_layers=self.num_block_layers, 207 | expansion_coefficient_lengths=self.expansion_coefficient_lengths, 208 | sharing=self.sharing, 209 | stack_types=self.stack_types, 210 | ).to(device) 211 | 212 | copy_parameters(trained_network, prediction_network) 213 | input_names = get_module_forward_input_names(prediction_network) 214 | prediction_splitter = self.create_instance_splitter("test") 215 | 216 | return PyTorchPredictor( 217 | input_transform=transformation + prediction_splitter, 218 | input_names=input_names, 219 | prediction_net=prediction_network, 220 | batch_size=self.trainer.batch_size, 221 | freq=self.freq, 222 | prediction_length=self.prediction_length, 223 | device=device, 224 | ) 225 | -------------------------------------------------------------------------------- /test/feature/test_holiday.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). 4 | # You may not use this file except in compliance with the License. 5 | # A copy of the License is located at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # or in the "license" file accompanying this file. This file is distributed 10 | # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 11 | # express or implied. See the License for the specific language governing 12 | # permissions and limitations under the License. 13 | 14 | # Third-party imports 15 | import numpy as np 16 | import pandas as pd 17 | import pytest 18 | from pandas.tseries.holiday import Holiday 19 | 20 | 21 | # First-party imports 22 | from gluonts.time_feature.holiday import ( 23 | CHRISTMAS_DAY, 24 | CHRISTMAS_EVE, 25 | COLUMBUS_DAY, 26 | EASTER_MONDAY, 27 | EASTER_SUNDAY, 28 | GOOD_FRIDAY, 29 | INDEPENDENCE_DAY, 30 | LABOR_DAY, 31 | MARTIN_LUTHER_KING_DAY, 32 | MEMORIAL_DAY, 33 | MOTHERS_DAY, 34 | NEW_YEARS_DAY, 35 | NEW_YEARS_EVE, 36 | PRESIDENTS_DAY, 37 | SPECIAL_DATE_FEATURES, 38 | SUPERBOWL, 39 | THANKSGIVING, 40 | BLACK_FRIDAY, 41 | CYBER_MONDAY, 42 | SpecialDateFeatureSet, 43 | squared_exponential_kernel, 44 | exponential_kernel, 45 | ) 46 | from pts.feature.holiday import CustomDateFeatureSet, CustomHolidayFeatureSet 47 | 48 | test_dates = { 49 | NEW_YEARS_DAY: [ 50 | "2015-01-01", 51 | "2016-01-01", 52 | "2017-01-01", 53 | "2018-01-01", 54 | "2019-01-01", 55 | ], 56 | MARTIN_LUTHER_KING_DAY: [ 57 | "2012-01-16", 58 | "2014-01-20", 59 | "2015-01-19", 60 | "2018-01-15", 61 | "2019-01-21", 62 | ], 63 | SUPERBOWL: ["2011-02-06", "2017-02-05", "2018-02-04", "2019-02-03"], 64 | PRESIDENTS_DAY: ["2011-02-21", "2017-02-20", "2018-02-19", "2019-02-18"], 65 | MEMORIAL_DAY: [ 66 | "2015-05-25", 67 | "2016-05-30", 68 | "2017-05-29", 69 | "2018-05-28", 70 | "2019-05-27", 71 | ], 72 | GOOD_FRIDAY: [ 73 | "2014-04-18", 74 | "2015-04-03", 75 | "2017-04-14", 76 | "2018-03-30", 77 | "2019-04-19", 78 | ], 79 | EASTER_SUNDAY: [ 80 | "2014-04-20", 81 | "2015-04-05", 82 | "2017-04-16", 83 | "2018-04-01", 84 | "2019-04-21", 85 | ], 86 | EASTER_MONDAY: [ 87 | "2014-04-21", 88 | "2015-04-06", 89 | "2017-04-17", 90 | "2018-04-02", 91 | "2019-04-22", 92 | ], 93 | MOTHERS_DAY: ["2016-05-08", "2017-05-14", "2018-05-13", "2019-05-12"], 94 | INDEPENDENCE_DAY: ["2016-07-04", "2017-07-04", "2018-07-04", "2019-07-04"], 95 | LABOR_DAY: ["2014-09-01", "2016-09-05", "2018-09-03", "2019-09-02"], 96 | COLUMBUS_DAY: ["2016-10-10", "2017-10-09", "2018-10-08", "2019-10-14"], 97 | THANKSGIVING: [ 98 | "2015-11-26", 99 | "2016-11-24", 100 | "2017-11-23", 101 | "2018-11-22", 102 | "2019-11-28", 103 | ], 104 | CHRISTMAS_EVE: ["2016-12-24", "2017-12-24", "2018-12-24", "2019-12-24"], 105 | CHRISTMAS_DAY: ["2016-12-25", "2017-12-25", "2018-12-25", "2019-12-25"], 106 | NEW_YEARS_EVE: ["2016-12-31", "2017-12-31", "2018-12-31", "2019-12-31"], 107 | BLACK_FRIDAY: [ 108 | "2016-11-25", 109 | "2017-11-24", 110 | "2018-11-23", 111 | "2019-11-29", 112 | "2020-11-27", 113 | ], 114 | CYBER_MONDAY: ["2016-11-28", "2017-11-27", "2018-11-26", "2019-12-2", "2020-11-30"], 115 | } 116 | 117 | 118 | @pytest.mark.parametrize("holiday", test_dates.keys()) 119 | def test_holidays(holiday): 120 | for test_date in test_dates[holiday]: 121 | test_date = pd.to_datetime(test_date) 122 | distance_function = SPECIAL_DATE_FEATURES[holiday] 123 | assert ( 124 | distance_function(test_date) == 0 125 | ), "The supplied date should be {} but is not!".format(holiday) 126 | 127 | 128 | def test_special_date_feature_set_daily(): 129 | date_indices = pd.date_range(start="2016-12-24", end="2016-12-31", freq="D") 130 | 131 | reference_features = np.array( 132 | [[1, 0, 0, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 1],] 133 | ) 134 | sfs = SpecialDateFeatureSet([CHRISTMAS_EVE, CHRISTMAS_DAY, NEW_YEARS_EVE]) 135 | computed_features = sfs(date_indices) 136 | 137 | assert ( 138 | computed_features == reference_features 139 | ).all(), "Computed features do not match reference features." 140 | 141 | 142 | def test_special_date_feature_set_hourly(): 143 | date_indices = pd.date_range(start="2016-12-24", end="2016-12-25", freq="H") 144 | 145 | reference_features = np.array( 146 | [ 147 | [ 148 | 1, 149 | 1, 150 | 1, 151 | 1, 152 | 1, 153 | 1, 154 | 1, 155 | 1, 156 | 1, 157 | 1, 158 | 1, 159 | 1, 160 | 1, 161 | 1, 162 | 1, 163 | 1, 164 | 1, 165 | 1, 166 | 1, 167 | 1, 168 | 1, 169 | 1, 170 | 1, 171 | 1, 172 | 0, 173 | ], 174 | [ 175 | 0, 176 | 0, 177 | 0, 178 | 0, 179 | 0, 180 | 0, 181 | 0, 182 | 0, 183 | 0, 184 | 0, 185 | 0, 186 | 0, 187 | 0, 188 | 0, 189 | 0, 190 | 0, 191 | 0, 192 | 0, 193 | 0, 194 | 0, 195 | 0, 196 | 0, 197 | 0, 198 | 0, 199 | 1, 200 | ], 201 | [ 202 | 0, 203 | 0, 204 | 0, 205 | 0, 206 | 0, 207 | 0, 208 | 0, 209 | 0, 210 | 0, 211 | 0, 212 | 0, 213 | 0, 214 | 0, 215 | 0, 216 | 0, 217 | 0, 218 | 0, 219 | 0, 220 | 0, 221 | 0, 222 | 0, 223 | 0, 224 | 0, 225 | 0, 226 | 0, 227 | ], 228 | ] 229 | ) 230 | sfs = SpecialDateFeatureSet([CHRISTMAS_EVE, CHRISTMAS_DAY, NEW_YEARS_EVE]) 231 | computed_features = sfs(date_indices) 232 | 233 | assert ( 234 | computed_features == reference_features 235 | ).all(), "Computed features do not match reference features." 236 | 237 | 238 | def test_special_date_feature_set_daily_squared_exponential(): 239 | date_indices = pd.date_range(start="2016-12-24", end="2016-12-29", freq="D") 240 | reference_features = np.array( 241 | [ 242 | [ 243 | 1.00000e00, 244 | 3.67879e-01, 245 | 1.83156e-02, 246 | 1.23410e-04, 247 | 1.12535e-07, 248 | 0.00000e00, 249 | ], 250 | [ 251 | 3.67879e-01, 252 | 1.00000e00, 253 | 3.67879e-01, 254 | 1.83156e-02, 255 | 1.23410e-04, 256 | 1.12535e-07, 257 | ], 258 | ], 259 | dtype=float, 260 | ) 261 | 262 | squared_exp_kernel = squared_exponential_kernel(alpha=1.0) 263 | sfs = SpecialDateFeatureSet([CHRISTMAS_EVE, CHRISTMAS_DAY], squared_exp_kernel) 264 | computed_features = sfs(date_indices) 265 | np.testing.assert_almost_equal(computed_features, reference_features, decimal=6) 266 | 267 | 268 | def test_custom_date_feature_set(): 269 | 270 | ref_dates = [ 271 | pd.to_datetime("20191129", format="%Y%m%d"), 272 | pd.to_datetime("20200101", format="%Y%m%d"), 273 | ] 274 | 275 | kernel = exponential_kernel(alpha=1.0) 276 | 277 | cfs = CustomDateFeatureSet(ref_dates, kernel) 278 | sfs = SpecialDateFeatureSet([BLACK_FRIDAY, NEW_YEARS_DAY], kernel) 279 | 280 | date_indices = pd.date_range( 281 | start=pd.to_datetime("20191101", format="%Y%m%d"), 282 | end=pd.to_datetime("20200131", format="%Y%m%d"), 283 | freq="D", 284 | ) 285 | 286 | assert ( 287 | np.sum(cfs(date_indices) - sfs(date_indices).sum(0, keepdims=True)) == 0 288 | ), "Features don't match" 289 | 290 | 291 | def test_custom_holiday_feature_set(): 292 | 293 | custom_holidays = [ 294 | Holiday("New Years Day", month=1, day=1), 295 | Holiday("Christmas Day", month=12, day=25), 296 | ] 297 | 298 | kernel = exponential_kernel(alpha=1.0) 299 | 300 | cfs = CustomHolidayFeatureSet(custom_holidays, kernel) 301 | sfs = SpecialDateFeatureSet([NEW_YEARS_DAY, CHRISTMAS_DAY], kernel) 302 | 303 | date_indices = pd.date_range( 304 | start=pd.to_datetime("20191101", format="%Y%m%d"), 305 | end=pd.to_datetime("20200131", format="%Y%m%d"), 306 | freq="D", 307 | ) 308 | 309 | assert np.sum(cfs(date_indices) - sfs(date_indices)) == 0, "Features don't match" 310 | -------------------------------------------------------------------------------- /pts/dataset/repository/_m5.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | from pathlib import Path 4 | from functools import lru_cache 5 | 6 | import numpy as np 7 | import pandas as pd 8 | 9 | from gluonts.dataset.field_names import FieldName 10 | from gluonts.dataset.repository._util import metadata, save_to_file 11 | from gluonts.time_feature.holiday import squared_exponential_kernel 12 | from pts.feature import CustomDateFeatureSet 13 | 14 | 15 | def generate_pts_m5_dataset( 16 | dataset_path: Path, 17 | pandas_freq: str, 18 | prediction_length: int = 28, 19 | alpha: float = 0.5, 20 | ): 21 | cal_path = f"{dataset_path}/calendar.csv" 22 | sales_path = f"{dataset_path}/sales_train_validation.csv" 23 | sales_test_path = f"{dataset_path}/sales_train_evaluation.csv" 24 | sell_prices_path = f"{dataset_path}/sell_prices.csv" 25 | 26 | if not os.path.exists(cal_path) or not os.path.exists(sales_path): 27 | raise RuntimeError( 28 | f"M5 data is available on Kaggle (https://www.kaggle.com/c/m5-forecasting-accuracy/data). " 29 | f"You first need to agree to the terms of the competition before being able to download the data. " 30 | f"After you have done that, please copy the files into {dataset_path}." 31 | ) 32 | 33 | # Read M5 data from dataset_path 34 | calendar = pd.read_csv(cal_path, parse_dates=True) 35 | calendar.sort_index(inplace=True) 36 | calendar.date = pd.to_datetime(calendar.date) 37 | 38 | sales_train_validation = pd.read_csv( 39 | sales_path, 40 | index_col=["id", "item_id", "dept_id", "cat_id", "store_id", "state_id"], 41 | ) 42 | sales_train_validation.sort_index(inplace=True) 43 | 44 | sales_train_evaluation = pd.read_csv( 45 | sales_test_path, 46 | index_col=["id", "item_id", "dept_id", "cat_id", "store_id", "state_id"], 47 | ) 48 | sales_train_evaluation.sort_index(inplace=True) 49 | 50 | sell_prices = pd.read_csv(sell_prices_path, index_col=["item_id", "store_id"]) 51 | sell_prices.sort_index(inplace=True) 52 | 53 | @lru_cache(maxsize=None) 54 | def get_sell_price(item_id, store_id): 55 | return calendar.merge( 56 | sell_prices.loc[item_id, store_id], on=["wm_yr_wk"], how="left" 57 | ).sell_price 58 | 59 | # Build dynamic features 60 | kernel = squared_exponential_kernel(alpha=alpha) 61 | event_1 = CustomDateFeatureSet(calendar[calendar.event_name_1.notna()].date, kernel) 62 | event_2 = CustomDateFeatureSet(calendar[calendar.event_name_2.notna()].date, kernel) 63 | 64 | snap_CA = CustomDateFeatureSet(calendar[calendar.snap_CA == 1].date, kernel) 65 | snap_TX = CustomDateFeatureSet(calendar[calendar.snap_TX == 1].date, kernel) 66 | snap_WI = CustomDateFeatureSet(calendar[calendar.snap_WI == 1].date, kernel) 67 | 68 | time_index = pd.to_datetime(calendar.date) 69 | event_1_feature = event_1(time_index) 70 | event_2_feature = event_2(time_index) 71 | 72 | snap_CA_feature = snap_CA(time_index) 73 | snap_TX_feature = snap_TX(time_index) 74 | snap_WI_feature = snap_WI(time_index) 75 | 76 | # Build static features 77 | sales_train_validation["state"] = pd.CategoricalIndex( 78 | sales_train_validation.index.get_level_values(5) 79 | ).codes 80 | sales_train_validation["store"] = pd.CategoricalIndex( 81 | sales_train_validation.index.get_level_values(4) 82 | ).codes 83 | sales_train_validation["cat"] = pd.CategoricalIndex( 84 | sales_train_validation.index.get_level_values(3) 85 | ).codes 86 | sales_train_validation["dept"] = pd.CategoricalIndex( 87 | sales_train_validation.index.get_level_values(2) 88 | ).codes 89 | sales_train_validation["item"] = pd.CategoricalIndex( 90 | sales_train_validation.index.get_level_values(1) 91 | ).codes 92 | 93 | sales_train_evaluation["state"] = pd.CategoricalIndex( 94 | sales_train_evaluation.index.get_level_values(5) 95 | ).codes 96 | sales_train_evaluation["store"] = pd.CategoricalIndex( 97 | sales_train_evaluation.index.get_level_values(4) 98 | ).codes 99 | sales_train_evaluation["cat"] = pd.CategoricalIndex( 100 | sales_train_evaluation.index.get_level_values(3) 101 | ).codes 102 | sales_train_evaluation["dept"] = pd.CategoricalIndex( 103 | sales_train_evaluation.index.get_level_values(2) 104 | ).codes 105 | sales_train_evaluation["item"] = pd.CategoricalIndex( 106 | sales_train_evaluation.index.get_level_values(1) 107 | ).codes 108 | 109 | feat_static_cat = [ 110 | { 111 | "name": "state_id", 112 | "cardinality": len(sales_train_validation["state"].unique()), 113 | }, 114 | { 115 | "name": "store_id", 116 | "cardinality": len(sales_train_validation["store"].unique()), 117 | }, 118 | {"name": "cat_id", "cardinality": len(sales_train_validation["cat"].unique())}, 119 | { 120 | "name": "dept_id", 121 | "cardinality": len(sales_train_validation["dept"].unique()), 122 | }, 123 | { 124 | "name": "item_id", 125 | "cardinality": len(sales_train_validation["item"].unique()), 126 | }, 127 | ] 128 | 129 | feat_dynamic_real = [ 130 | {"name": "sell_price", "cardinality": 1}, 131 | {"name": "event_1", "cardinality": 1}, 132 | {"name": "event_2", "cardinality": 1}, 133 | {"name": "snap", "cardinality": 1}, 134 | ] 135 | 136 | # Build training set 137 | train_file = dataset_path / "train" / "data.json" 138 | train_ds = [] 139 | for index, item in sales_train_validation.iterrows(): 140 | id, item_id, dept_id, cat_id, store_id, state_id = index 141 | start_index = np.nonzero(item.iloc[:1913].values)[0][0] 142 | start_date = time_index[start_index] 143 | time_series = {} 144 | 145 | state_enc, store_enc, cat_enc, dept_enc, item_enc = item.iloc[1913:] 146 | 147 | time_series["start"] = str(start_date) 148 | time_series["item_id"] = id[:-11] 149 | 150 | time_series["feat_static_cat"] = [ 151 | state_enc, 152 | store_enc, 153 | cat_enc, 154 | dept_enc, 155 | item_enc, 156 | ] 157 | 158 | sell_price = get_sell_price(item_id, store_id) 159 | snap_feature = { 160 | "CA": snap_CA_feature, 161 | "TX": snap_TX_feature, 162 | "WI": snap_WI_feature, 163 | }[state_id] 164 | 165 | time_series["target"] = ( 166 | item.iloc[start_index:1913].values.astype(np.float32).tolist() 167 | ) 168 | time_series["feat_dynamic_real"] = ( 169 | np.concatenate( 170 | ( 171 | np.expand_dims(sell_price.iloc[start_index:1913].values, 0), 172 | event_1_feature[:, start_index:1913], 173 | event_2_feature[:, start_index:1913], 174 | snap_feature[:, start_index:1913], 175 | ), 176 | 0, 177 | ) 178 | .astype(np.float32) 179 | .tolist() 180 | ) 181 | 182 | train_ds.append(time_series.copy()) 183 | 184 | # Build training set 185 | train_file = dataset_path / "train" / "data.json" 186 | save_to_file(train_file, train_ds) 187 | 188 | # Create metadata file 189 | meta_file = dataset_path / "metadata.json" 190 | with open(meta_file, "w") as f: 191 | f.write( 192 | json.dumps( 193 | { 194 | "freq": pandas_freq, 195 | "prediction_length": prediction_length, 196 | "feat_static_cat": feat_static_cat, 197 | "feat_dynamic_real": feat_dynamic_real, 198 | "cardinality": len(train_ds), 199 | } 200 | ) 201 | ) 202 | 203 | # Build testing set 204 | test_file = dataset_path / "test" / "data.json" 205 | test_ds = [] 206 | for index, item in sales_train_evaluation.iterrows(): 207 | id, item_id, dept_id, cat_id, store_id, state_id = index 208 | start_index = np.nonzero(item.iloc[:1941].values)[0][0] 209 | start_date = time_index[start_index] 210 | time_series = {} 211 | 212 | state_enc, store_enc, cat_enc, dept_enc, item_enc = item.iloc[1941:] 213 | 214 | time_series["start"] = str(start_date) 215 | time_series["item_id"] = id[:-11] 216 | 217 | time_series["feat_static_cat"] = [ 218 | state_enc, 219 | store_enc, 220 | cat_enc, 221 | dept_enc, 222 | item_enc, 223 | ] 224 | 225 | sell_price = get_sell_price(item_id, store_id) 226 | snap_feature = { 227 | "CA": snap_CA_feature, 228 | "TX": snap_TX_feature, 229 | "WI": snap_WI_feature, 230 | }[state_id] 231 | 232 | time_series["target"] = ( 233 | item.iloc[start_index:1941].values.astype(np.float32).tolist() 234 | ) 235 | time_series["feat_dynamic_real"] = ( 236 | np.concatenate( 237 | ( 238 | np.expand_dims(sell_price.iloc[start_index:1941].values, 0), 239 | event_1_feature[:, start_index:1941], 240 | event_2_feature[:, start_index:1941], 241 | snap_feature[:, start_index:1941], 242 | ), 243 | 0, 244 | ) 245 | .astype(np.float32) 246 | .tolist() 247 | ) 248 | 249 | test_ds.append(time_series.copy()) 250 | 251 | save_to_file(test_file, test_ds) 252 | -------------------------------------------------------------------------------- /pts/model/tempflow/tempflow_estimator.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional 2 | 3 | import torch 4 | 5 | from gluonts.core.component import validated 6 | from gluonts.dataset.field_names import FieldName 7 | from gluonts.time_feature import TimeFeature 8 | from gluonts.torch.model.predictor import PyTorchPredictor 9 | from gluonts.torch.util import copy_parameters 10 | from gluonts.model.predictor import Predictor 11 | from gluonts.torch.model.predictor import PyTorchPredictor 12 | from gluonts.transform import ( 13 | Transformation, 14 | Chain, 15 | InstanceSplitter, 16 | ValidationSplitSampler, 17 | TestSplitSampler, 18 | ExpectedNumInstanceSampler, 19 | RenameFields, 20 | AsNumpyArray, 21 | ExpandDimArray, 22 | AddObservedValuesIndicator, 23 | AddTimeFeatures, 24 | VstackFeatures, 25 | SetFieldIfNotPresent, 26 | TargetDimIndicator, 27 | ) 28 | 29 | from pts import Trainer 30 | from pts.feature import ( 31 | fourier_time_features_from_frequency, 32 | lags_for_fourier_time_features_from_frequency, 33 | ) 34 | from pts.model.utils import get_module_forward_input_names 35 | from pts.model import PyTorchEstimator 36 | 37 | from .tempflow_network import TempFlowTrainingNetwork, TempFlowPredictionNetwork 38 | 39 | 40 | class TempFlowEstimator(PyTorchEstimator): 41 | @validated() 42 | def __init__( 43 | self, 44 | input_size: int, 45 | freq: str, 46 | prediction_length: int, 47 | target_dim: int, 48 | trainer: Trainer = Trainer(), 49 | context_length: Optional[int] = None, 50 | num_layers: int = 2, 51 | num_cells: int = 40, 52 | cell_type: str = "LSTM", 53 | num_parallel_samples: int = 100, 54 | dropout_rate: float = 0.1, 55 | cardinality: List[int] = [1], 56 | embedding_dimension: int = 5, 57 | flow_type="RealNVP", 58 | n_blocks=3, 59 | hidden_size=100, 60 | n_hidden=2, 61 | conditioning_length: int = 200, 62 | dequantize: bool = False, 63 | scaling: bool = True, 64 | pick_incomplete: bool = False, 65 | lags_seq: Optional[List[int]] = None, 66 | time_features: Optional[List[TimeFeature]] = None, 67 | **kwargs, 68 | ) -> None: 69 | super().__init__(trainer=trainer, **kwargs) 70 | 71 | self.freq = freq 72 | self.context_length = ( 73 | context_length if context_length is not None else prediction_length 74 | ) 75 | 76 | self.input_size = input_size 77 | self.prediction_length = prediction_length 78 | self.target_dim = target_dim 79 | self.num_layers = num_layers 80 | self.num_cells = num_cells 81 | self.cell_type = cell_type 82 | self.num_parallel_samples = num_parallel_samples 83 | self.dropout_rate = dropout_rate 84 | self.cardinality = cardinality 85 | self.embedding_dimension = embedding_dimension 86 | 87 | self.flow_type = flow_type 88 | self.n_blocks = n_blocks 89 | self.hidden_size = hidden_size 90 | self.n_hidden = n_hidden 91 | self.conditioning_length = conditioning_length 92 | self.dequantize = dequantize 93 | 94 | self.lags_seq = ( 95 | lags_seq 96 | if lags_seq is not None 97 | else lags_for_fourier_time_features_from_frequency(freq_str=freq) 98 | ) 99 | 100 | self.time_features = ( 101 | time_features 102 | if time_features is not None 103 | else fourier_time_features_from_frequency(self.freq) 104 | ) 105 | 106 | self.history_length = self.context_length + max(self.lags_seq) 107 | self.pick_incomplete = pick_incomplete 108 | self.scaling = scaling 109 | 110 | self.train_sampler = ExpectedNumInstanceSampler( 111 | num_instances=1.0, 112 | min_past=0 if pick_incomplete else self.history_length, 113 | min_future=prediction_length, 114 | ) 115 | 116 | self.validation_sampler = ValidationSplitSampler( 117 | min_past=0 if pick_incomplete else self.history_length, 118 | min_future=prediction_length, 119 | ) 120 | 121 | def create_transformation(self) -> Transformation: 122 | return Chain( 123 | [ 124 | AsNumpyArray( 125 | field=FieldName.TARGET, 126 | expected_ndim=2, 127 | ), 128 | # maps the target to (1, T) 129 | # if the target data is uni dimensional 130 | ExpandDimArray( 131 | field=FieldName.TARGET, 132 | axis=None, 133 | ), 134 | AddObservedValuesIndicator( 135 | target_field=FieldName.TARGET, 136 | output_field=FieldName.OBSERVED_VALUES, 137 | ), 138 | AddTimeFeatures( 139 | start_field=FieldName.START, 140 | target_field=FieldName.TARGET, 141 | output_field=FieldName.FEAT_TIME, 142 | time_features=self.time_features, 143 | pred_length=self.prediction_length, 144 | ), 145 | VstackFeatures( 146 | output_field=FieldName.FEAT_TIME, 147 | input_fields=[FieldName.FEAT_TIME], 148 | ), 149 | SetFieldIfNotPresent(field=FieldName.FEAT_STATIC_CAT, value=[0]), 150 | TargetDimIndicator( 151 | field_name="target_dimension_indicator", 152 | target_field=FieldName.TARGET, 153 | ), 154 | AsNumpyArray(field=FieldName.FEAT_STATIC_CAT, expected_ndim=1), 155 | ] 156 | ) 157 | 158 | def create_instance_splitter(self, mode: str): 159 | assert mode in ["training", "validation", "test"] 160 | 161 | instance_sampler = { 162 | "training": self.train_sampler, 163 | "validation": self.validation_sampler, 164 | "test": TestSplitSampler(), 165 | }[mode] 166 | 167 | return InstanceSplitter( 168 | target_field=FieldName.TARGET, 169 | is_pad_field=FieldName.IS_PAD, 170 | start_field=FieldName.START, 171 | forecast_start_field=FieldName.FORECAST_START, 172 | instance_sampler=instance_sampler, 173 | past_length=self.history_length, 174 | future_length=self.prediction_length, 175 | time_series_fields=[ 176 | FieldName.FEAT_TIME, 177 | FieldName.OBSERVED_VALUES, 178 | ], 179 | ) + ( 180 | RenameFields( 181 | { 182 | f"past_{FieldName.TARGET}": f"past_{FieldName.TARGET}_cdf", 183 | f"future_{FieldName.TARGET}": f"future_{FieldName.TARGET}_cdf", 184 | } 185 | ) 186 | ) 187 | 188 | def create_training_network(self, device: torch.device) -> TempFlowTrainingNetwork: 189 | return TempFlowTrainingNetwork( 190 | input_size=self.input_size, 191 | target_dim=self.target_dim, 192 | num_layers=self.num_layers, 193 | num_cells=self.num_cells, 194 | cell_type=self.cell_type, 195 | history_length=self.history_length, 196 | context_length=self.context_length, 197 | prediction_length=self.prediction_length, 198 | dropout_rate=self.dropout_rate, 199 | cardinality=self.cardinality, 200 | embedding_dimension=self.embedding_dimension, 201 | lags_seq=self.lags_seq, 202 | scaling=self.scaling, 203 | flow_type=self.flow_type, 204 | n_blocks=self.n_blocks, 205 | hidden_size=self.hidden_size, 206 | n_hidden=self.n_hidden, 207 | conditioning_length=self.conditioning_length, 208 | dequantize=self.dequantize, 209 | ).to(device) 210 | 211 | def create_predictor( 212 | self, 213 | transformation: Transformation, 214 | trained_network: TempFlowTrainingNetwork, 215 | device: torch.device, 216 | ) -> Predictor: 217 | prediction_network = TempFlowPredictionNetwork( 218 | input_size=self.input_size, 219 | target_dim=self.target_dim, 220 | num_layers=self.num_layers, 221 | num_cells=self.num_cells, 222 | cell_type=self.cell_type, 223 | history_length=self.history_length, 224 | context_length=self.context_length, 225 | prediction_length=self.prediction_length, 226 | dropout_rate=self.dropout_rate, 227 | cardinality=self.cardinality, 228 | embedding_dimension=self.embedding_dimension, 229 | lags_seq=self.lags_seq, 230 | scaling=self.scaling, 231 | flow_type=self.flow_type, 232 | n_blocks=self.n_blocks, 233 | hidden_size=self.hidden_size, 234 | n_hidden=self.n_hidden, 235 | conditioning_length=self.conditioning_length, 236 | dequantize=self.dequantize, 237 | num_parallel_samples=self.num_parallel_samples, 238 | ).to(device) 239 | 240 | copy_parameters(trained_network, prediction_network) 241 | input_names = get_module_forward_input_names(prediction_network) 242 | prediction_splitter = self.create_instance_splitter("test") 243 | 244 | return PyTorchPredictor( 245 | input_transform=transformation + prediction_splitter, 246 | input_names=input_names, 247 | prediction_net=prediction_network, 248 | batch_size=self.trainer.batch_size, 249 | freq=self.freq, 250 | prediction_length=self.prediction_length, 251 | device=device, 252 | ) 253 | -------------------------------------------------------------------------------- /test/modules/test_implicit_quantile_distr_output.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import numpy as np 4 | import torch 5 | import torch.nn as nn 6 | from torch.distributions import Normal, Uniform, Bernoulli 7 | from torch.nn.utils import clip_grad_norm_ 8 | from torch.optim import SGD 9 | from torch.utils.data import TensorDataset, DataLoader 10 | 11 | from gluonts.dataset.repository.datasets import get_dataset 12 | from gluonts.evaluation import Evaluator 13 | from gluonts.evaluation.backtest import make_evaluation_predictions 14 | from gluonts.torch.modules.distribution_output import DistributionOutput 15 | from pts import Trainer 16 | from pts.model.deepar import DeepAREstimator 17 | from pts.modules import ImplicitQuantileOutput 18 | 19 | NUM_SAMPLES = 2000 20 | BATCH_SIZE = 32 21 | TOL = 0.3 22 | START_TOL_MULTIPLE = 1 23 | 24 | 25 | def inv_softplus(y: np.ndarray) -> np.ndarray: 26 | return np.log(np.exp(y) - 1) 27 | 28 | 29 | def learn_distribution( 30 | distr_output: DistributionOutput, 31 | samples: torch.Tensor, 32 | init_biases: List[np.ndarray] = None, 33 | num_epochs: int = 5, 34 | learning_rate: float = 1e-2, 35 | ): 36 | arg_proj = distr_output.get_args_proj(in_features=1) 37 | 38 | if init_biases is not None: 39 | for param, bias in zip(arg_proj.proj, init_biases): 40 | nn.init.constant_(param.bias, bias) 41 | 42 | dummy_data = torch.ones((len(samples), 1, 1)) 43 | 44 | dataset = TensorDataset(dummy_data, samples) 45 | train_data = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True) 46 | 47 | optimizer = SGD(arg_proj.parameters(), lr=learning_rate) 48 | 49 | for e in range(num_epochs): 50 | cumulative_loss = 0 51 | num_batches = 0 52 | 53 | for i, (data, sample_label) in enumerate(train_data): 54 | optimizer.zero_grad() 55 | distr_args = arg_proj(data) 56 | distr = distr_output.distribution(distr_args) 57 | loss = -distr.log_prob(sample_label).mean() 58 | loss.backward() 59 | #clip_grad_norm_(arg_proj.parameters(), 10.0) 60 | optimizer.step() 61 | 62 | num_batches += 1 63 | cumulative_loss += loss.item() 64 | print("Epoch %s, loss: %s" % (e, cumulative_loss / num_batches)) 65 | 66 | sampling_dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True) 67 | i, (data, sample_label) = next(enumerate(sampling_dataloader)) 68 | distr_args = arg_proj(data) 69 | distr = distr_output.distribution(distr_args) 70 | samples = distr.sample((NUM_SAMPLES,)) 71 | 72 | with torch.no_grad(): 73 | percentile_90 = distr.quantile_function( 74 | torch.ones((1, 1, 1)), torch.ones((1, 1)) * 0.9 75 | ) 76 | percentile_10 = distr.quantile_function( 77 | torch.ones((1, 1, 1)), torch.ones((1, 1)) * 0.1 78 | ) 79 | 80 | return samples.mean(), samples.std(), percentile_10.squeeze(), percentile_90.squeeze() 81 | 82 | 83 | def test_independent_implicit_quantile() -> None: 84 | num_samples = NUM_SAMPLES 85 | 86 | # # Normal distrib 87 | distr_mean = torch.Tensor([10.0]) 88 | distr_std = torch.Tensor([4.0]) 89 | distr_pp10 = distr_mean - 1.282 * distr_std 90 | distr_pp90 = distr_mean + 1.282 * distr_std 91 | distr = Normal(loc=distr_mean, scale=distr_std) 92 | 93 | samples = distr.sample((num_samples,)) 94 | learned_mean, learned_std, learned_pp10, learned_pp90 = learn_distribution( 95 | ImplicitQuantileOutput(output_domain="Real"), 96 | samples=samples, 97 | num_epochs=50, 98 | learning_rate=1e-2, 99 | ) 100 | 101 | torch.testing.assert_allclose( 102 | learned_mean, distr_mean.squeeze(), rtol=0.1, atol=0.1 * 10 103 | ) 104 | torch.testing.assert_allclose( 105 | learned_std, distr_std.squeeze(), rtol=0.1, atol=0.1 * 4 106 | ) 107 | torch.testing.assert_allclose( 108 | learned_pp90, distr_pp90.squeeze(), rtol=0.1, atol=0.1 * 4 109 | ) 110 | torch.testing.assert_allclose( 111 | learned_pp10, distr_pp10.squeeze(), rtol=0.1, atol=0.1 * 4 112 | ) 113 | 114 | # Uniform distrib 115 | a = torch.Tensor([0.0]) 116 | b = torch.Tensor([20.0]) 117 | distr_mean = 0.5 * (a + b) 118 | distr_std = (1.0 / 12.0 * (b - a) ** 2) ** 0.5 119 | distr_pp10 = 0.1 * (a + b) 120 | distr_pp90 = 0.9 * (a + b) 121 | distr = Uniform(low=a, high=b) 122 | 123 | samples = distr.sample((num_samples,)) 124 | learned_mean, learned_std, learned_pp10, learned_pp90 = learn_distribution( 125 | ImplicitQuantileOutput(output_domain="Positive"), 126 | samples=samples, 127 | num_epochs=50, 128 | learning_rate=1e-2, 129 | ) 130 | 131 | torch.testing.assert_allclose( 132 | learned_mean, distr_mean.squeeze(), atol=1.0, rtol=0.1 133 | ) 134 | torch.testing.assert_allclose(learned_std, distr_std.squeeze(), atol=0.5, rtol=0.1) 135 | torch.testing.assert_allclose( 136 | learned_pp90, distr_pp90.squeeze(), rtol=0.1, atol=0.1 * 18 137 | ) 138 | torch.testing.assert_allclose( 139 | learned_pp10, distr_pp10.squeeze(), rtol=0.2, atol=0.2 * 2 140 | ) 141 | 142 | # Bernoulli distrib 143 | distr_mean = torch.Tensor([0.2]) 144 | distr_std = distr_mean * (1 - distr_mean) 145 | distr_pp10 = torch.Tensor([0.0]) 146 | distr_pp90 = torch.Tensor([1.0]) 147 | distr = Bernoulli(probs=distr_mean) 148 | 149 | samples = distr.sample((num_samples,)) 150 | learned_mean, learned_std, learned_pp10, learned_pp90 = learn_distribution( 151 | ImplicitQuantileOutput(output_domain="Positive"), 152 | samples=samples, 153 | num_epochs=50, 154 | learning_rate=1e-2, 155 | ) 156 | 157 | torch.testing.assert_allclose( 158 | learned_mean, distr_mean.squeeze(), atol=1.0, rtol=0.1 159 | ) 160 | torch.testing.assert_allclose(learned_std, distr_std.squeeze(), atol=0.5, rtol=0.1) 161 | torch.testing.assert_allclose( 162 | learned_pp90, distr_pp90.squeeze(), rtol=0.1, atol=0.1 * 18 163 | ) 164 | torch.testing.assert_allclose( 165 | learned_pp10, distr_pp10.squeeze(), rtol=0.1, atol=0.1 * 2 166 | ) 167 | 168 | 169 | def test_training_with_implicit_quantile_output(): 170 | dataset = get_dataset("constant") 171 | metadata = dataset.metadata 172 | 173 | deepar_estimator = DeepAREstimator( 174 | distr_output=ImplicitQuantileOutput(output_domain="Real"), 175 | freq=metadata.freq, 176 | prediction_length=metadata.prediction_length, 177 | trainer=Trainer( 178 | device="cpu", 179 | epochs=5, 180 | learning_rate=1e-3, 181 | num_batches_per_epoch=3, 182 | batch_size=256, 183 | ), 184 | input_size=15, 185 | ) 186 | deepar_predictor = deepar_estimator.train(dataset.train, num_workers=1) 187 | forecast_it, ts_it = make_evaluation_predictions( 188 | dataset=dataset.test, # test dataset 189 | predictor=deepar_predictor, # predictor 190 | num_samples=100, # number of sample paths we want for evaluation 191 | ) 192 | forecasts = list(forecast_it) 193 | tss = list(ts_it) 194 | evaluator = Evaluator(num_workers=0) 195 | agg_metrics, item_metrics = evaluator( 196 | iter(tss), iter(forecasts), num_series=len(dataset.test) 197 | ) 198 | 199 | assert agg_metrics["MSE"] > 0 200 | 201 | 202 | def test_instanciation_of_args_proj(): 203 | class MockedImplicitQuantileOutput(ImplicitQuantileOutput): 204 | method_calls = 0 205 | 206 | @classmethod 207 | def set_args_proj(cls): 208 | super().set_args_proj() 209 | cls.method_calls += 1 210 | 211 | dataset = get_dataset("constant") 212 | metadata = dataset.metadata 213 | 214 | distr_output = MockedImplicitQuantileOutput(output_domain="Real") 215 | 216 | deepar_estimator = DeepAREstimator( 217 | distr_output=distr_output, 218 | freq=metadata.freq, 219 | prediction_length=metadata.prediction_length, 220 | trainer=Trainer( 221 | device="cpu", 222 | epochs=3, 223 | learning_rate=1e-3, 224 | num_batches_per_epoch=1, 225 | batch_size=256, 226 | ), 227 | input_size=15, 228 | ) 229 | assert distr_output.method_calls == 1 230 | deepar_predictor = deepar_estimator.train(dataset.train, num_workers=1) 231 | 232 | # Method should be called when the MockedImplicitQuantileOutput is instanciated, 233 | # and one more time because in_features is different from 1 234 | assert distr_output.method_calls == 2 235 | 236 | forecast_it, ts_it = make_evaluation_predictions( 237 | dataset=dataset.test, # test dataset 238 | predictor=deepar_predictor, # predictor 239 | num_samples=100, # number of sample paths we want for evaluation 240 | ) 241 | forecasts = list(forecast_it) 242 | tss = list(ts_it) 243 | evaluator = Evaluator(num_workers=0) 244 | agg_metrics, item_metrics = evaluator( 245 | iter(tss), iter(forecasts), num_series=len(dataset.test) 246 | ) 247 | assert distr_output.method_calls == 2 248 | 249 | # Test that the implicit output module is proper reset 250 | new_estimator = DeepAREstimator( 251 | distr_output=MockedImplicitQuantileOutput(output_domain="Real"), 252 | freq=metadata.freq, 253 | prediction_length=metadata.prediction_length, 254 | trainer=Trainer( 255 | device="cpu", 256 | epochs=3, 257 | learning_rate=1e-3, 258 | num_batches_per_epoch=1, 259 | batch_size=256, 260 | ), 261 | input_size=15, 262 | ) 263 | assert distr_output.method_calls == 3 264 | new_estimator.train(dataset.train, num_workers=1) 265 | assert ( 266 | distr_output.method_calls == 3 267 | ) # Since in_feature is the same as before, there should be no additional call 268 | -------------------------------------------------------------------------------- /pts/model/time_grad/time_grad_estimator.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional 2 | 3 | import torch 4 | 5 | from gluonts.dataset.field_names import FieldName 6 | from gluonts.time_feature import TimeFeature 7 | from gluonts.torch.model.predictor import PyTorchPredictor 8 | from gluonts.torch.util import copy_parameters 9 | from gluonts.model.predictor import Predictor 10 | from gluonts.transform import ( 11 | Transformation, 12 | Chain, 13 | InstanceSplitter, 14 | ExpectedNumInstanceSampler, 15 | ValidationSplitSampler, 16 | TestSplitSampler, 17 | RenameFields, 18 | AsNumpyArray, 19 | ExpandDimArray, 20 | AddObservedValuesIndicator, 21 | AddTimeFeatures, 22 | VstackFeatures, 23 | SetFieldIfNotPresent, 24 | TargetDimIndicator, 25 | ) 26 | 27 | from pts import Trainer 28 | from pts.feature import ( 29 | fourier_time_features_from_frequency, 30 | lags_for_fourier_time_features_from_frequency, 31 | ) 32 | from pts.model import PyTorchEstimator 33 | from pts.model.utils import get_module_forward_input_names 34 | 35 | from .time_grad_network import TimeGradTrainingNetwork, TimeGradPredictionNetwork 36 | 37 | 38 | class TimeGradEstimator(PyTorchEstimator): 39 | def __init__( 40 | self, 41 | input_size: int, 42 | freq: str, 43 | prediction_length: int, 44 | target_dim: int, 45 | trainer: Trainer = Trainer(), 46 | context_length: Optional[int] = None, 47 | num_layers: int = 2, 48 | num_cells: int = 40, 49 | cell_type: str = "LSTM", 50 | num_parallel_samples: int = 100, 51 | dropout_rate: float = 0.1, 52 | cardinality: List[int] = [1], 53 | embedding_dimension: int = 5, 54 | conditioning_length: int = 100, 55 | diff_steps: int = 100, 56 | loss_type: str = "l2", 57 | beta_end=0.1, 58 | beta_schedule="linear", 59 | residual_layers=8, 60 | residual_channels=8, 61 | dilation_cycle_length=2, 62 | scaling: bool = True, 63 | pick_incomplete: bool = False, 64 | lags_seq: Optional[List[int]] = None, 65 | time_features: Optional[List[TimeFeature]] = None, 66 | **kwargs, 67 | ) -> None: 68 | super().__init__(trainer=trainer, **kwargs) 69 | 70 | self.freq = freq 71 | self.context_length = ( 72 | context_length if context_length is not None else prediction_length 73 | ) 74 | 75 | self.input_size = input_size 76 | self.prediction_length = prediction_length 77 | self.target_dim = target_dim 78 | self.num_layers = num_layers 79 | self.num_cells = num_cells 80 | self.cell_type = cell_type 81 | self.num_parallel_samples = num_parallel_samples 82 | self.dropout_rate = dropout_rate 83 | self.cardinality = cardinality 84 | self.embedding_dimension = embedding_dimension 85 | 86 | self.conditioning_length = conditioning_length 87 | self.diff_steps = diff_steps 88 | self.loss_type = loss_type 89 | self.beta_end = beta_end 90 | self.beta_schedule = beta_schedule 91 | self.residual_layers = residual_layers 92 | self.residual_channels = residual_channels 93 | self.dilation_cycle_length = dilation_cycle_length 94 | 95 | self.lags_seq = ( 96 | lags_seq 97 | if lags_seq is not None 98 | else lags_for_fourier_time_features_from_frequency(freq_str=freq) 99 | ) 100 | 101 | self.time_features = ( 102 | time_features 103 | if time_features is not None 104 | else fourier_time_features_from_frequency(self.freq) 105 | ) 106 | 107 | self.history_length = self.context_length + max(self.lags_seq) 108 | self.pick_incomplete = pick_incomplete 109 | self.scaling = scaling 110 | 111 | self.train_sampler = ExpectedNumInstanceSampler( 112 | num_instances=1.0, 113 | min_past=0 if pick_incomplete else self.history_length, 114 | min_future=prediction_length, 115 | ) 116 | 117 | self.validation_sampler = ValidationSplitSampler( 118 | min_past=0 if pick_incomplete else self.history_length, 119 | min_future=prediction_length, 120 | ) 121 | 122 | def create_transformation(self) -> Transformation: 123 | return Chain( 124 | [ 125 | AsNumpyArray( 126 | field=FieldName.TARGET, 127 | expected_ndim=2, 128 | ), 129 | # maps the target to (1, T) 130 | # if the target data is uni dimensional 131 | ExpandDimArray( 132 | field=FieldName.TARGET, 133 | axis=None, 134 | ), 135 | AddObservedValuesIndicator( 136 | target_field=FieldName.TARGET, 137 | output_field=FieldName.OBSERVED_VALUES, 138 | ), 139 | AddTimeFeatures( 140 | start_field=FieldName.START, 141 | target_field=FieldName.TARGET, 142 | output_field=FieldName.FEAT_TIME, 143 | time_features=self.time_features, 144 | pred_length=self.prediction_length, 145 | ), 146 | VstackFeatures( 147 | output_field=FieldName.FEAT_TIME, 148 | input_fields=[FieldName.FEAT_TIME], 149 | ), 150 | SetFieldIfNotPresent(field=FieldName.FEAT_STATIC_CAT, value=[0]), 151 | TargetDimIndicator( 152 | field_name="target_dimension_indicator", 153 | target_field=FieldName.TARGET, 154 | ), 155 | AsNumpyArray(field=FieldName.FEAT_STATIC_CAT, expected_ndim=1), 156 | ] 157 | ) 158 | 159 | def create_instance_splitter(self, mode: str): 160 | assert mode in ["training", "validation", "test"] 161 | 162 | instance_sampler = { 163 | "training": self.train_sampler, 164 | "validation": self.validation_sampler, 165 | "test": TestSplitSampler(), 166 | }[mode] 167 | 168 | return InstanceSplitter( 169 | target_field=FieldName.TARGET, 170 | is_pad_field=FieldName.IS_PAD, 171 | start_field=FieldName.START, 172 | forecast_start_field=FieldName.FORECAST_START, 173 | instance_sampler=instance_sampler, 174 | past_length=self.history_length, 175 | future_length=self.prediction_length, 176 | time_series_fields=[ 177 | FieldName.FEAT_TIME, 178 | FieldName.OBSERVED_VALUES, 179 | ], 180 | ) + ( 181 | RenameFields( 182 | { 183 | f"past_{FieldName.TARGET}": f"past_{FieldName.TARGET}_cdf", 184 | f"future_{FieldName.TARGET}": f"future_{FieldName.TARGET}_cdf", 185 | } 186 | ) 187 | ) 188 | 189 | def create_training_network(self, device: torch.device) -> TimeGradTrainingNetwork: 190 | return TimeGradTrainingNetwork( 191 | input_size=self.input_size, 192 | target_dim=self.target_dim, 193 | num_layers=self.num_layers, 194 | num_cells=self.num_cells, 195 | cell_type=self.cell_type, 196 | history_length=self.history_length, 197 | context_length=self.context_length, 198 | prediction_length=self.prediction_length, 199 | dropout_rate=self.dropout_rate, 200 | cardinality=self.cardinality, 201 | embedding_dimension=self.embedding_dimension, 202 | diff_steps=self.diff_steps, 203 | loss_type=self.loss_type, 204 | beta_end=self.beta_end, 205 | beta_schedule=self.beta_schedule, 206 | residual_layers=self.residual_layers, 207 | residual_channels=self.residual_channels, 208 | dilation_cycle_length=self.dilation_cycle_length, 209 | lags_seq=self.lags_seq, 210 | scaling=self.scaling, 211 | conditioning_length=self.conditioning_length, 212 | ).to(device) 213 | 214 | def create_predictor( 215 | self, 216 | transformation: Transformation, 217 | trained_network: TimeGradTrainingNetwork, 218 | device: torch.device, 219 | ) -> Predictor: 220 | prediction_network = TimeGradPredictionNetwork( 221 | input_size=self.input_size, 222 | target_dim=self.target_dim, 223 | num_layers=self.num_layers, 224 | num_cells=self.num_cells, 225 | cell_type=self.cell_type, 226 | history_length=self.history_length, 227 | context_length=self.context_length, 228 | prediction_length=self.prediction_length, 229 | dropout_rate=self.dropout_rate, 230 | cardinality=self.cardinality, 231 | embedding_dimension=self.embedding_dimension, 232 | diff_steps=self.diff_steps, 233 | loss_type=self.loss_type, 234 | beta_end=self.beta_end, 235 | beta_schedule=self.beta_schedule, 236 | residual_layers=self.residual_layers, 237 | residual_channels=self.residual_channels, 238 | dilation_cycle_length=self.dilation_cycle_length, 239 | lags_seq=self.lags_seq, 240 | scaling=self.scaling, 241 | conditioning_length=self.conditioning_length, 242 | num_parallel_samples=self.num_parallel_samples, 243 | ).to(device) 244 | 245 | copy_parameters(trained_network, prediction_network) 246 | input_names = get_module_forward_input_names(prediction_network) 247 | prediction_splitter = self.create_instance_splitter("test") 248 | 249 | return PyTorchPredictor( 250 | input_transform=transformation + prediction_splitter, 251 | input_names=input_names, 252 | prediction_net=prediction_network, 253 | batch_size=self.trainer.batch_size, 254 | freq=self.freq, 255 | prediction_length=self.prediction_length, 256 | device=device, 257 | ) 258 | -------------------------------------------------------------------------------- /pts/model/tft/tft_modules.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional, Tuple 2 | 3 | import numpy as np 4 | 5 | import torch 6 | import torch.nn as nn 7 | 8 | from pts.modules import FeatureEmbedder as BaseFeatureEmbedder 9 | 10 | 11 | class FeatureProjector(nn.Module): 12 | def __init__( 13 | self, 14 | feature_dims: List[int], 15 | embedding_dims: List[int], 16 | ): 17 | super().__init__() 18 | 19 | self.__num_features = len(feature_dims) 20 | if self.__num_features > 1: 21 | self.feature_slices = ( 22 | feature_dims[0:1] + np.cumsum(feature_dims)[:-1].tolist() 23 | ) 24 | else: 25 | self.feature_slices = feature_dims 26 | self.feature_dims = feature_dims 27 | 28 | self._projector = nn.ModuleList( 29 | [ 30 | nn.Linear(in_features=in_feature, out_features=out_features) 31 | for in_feature, out_features in zip(self.feature_dims, embedding_dims) 32 | ] 33 | ) 34 | 35 | def forward(self, features: torch.Tensor) -> List[torch.Tensor]: 36 | if self.__num_features > 1: 37 | real_feature_slices = torch.tensor_split( 38 | features, self.feature_slices[1:], dim=-1 39 | ) 40 | else: 41 | real_feature_slices = [features] 42 | 43 | return [ 44 | proj(real_feature_slice) 45 | for proj, real_feature_slice in zip(self._projector, real_feature_slices) 46 | ] 47 | 48 | 49 | class FeatureEmbedder(BaseFeatureEmbedder): 50 | def forward(self, features: torch.Tensor) -> List[torch.Tensor]: 51 | concat_features = super(FeatureEmbedder, self).forward(features=features) 52 | 53 | if self.__num_features > 1: 54 | features = torch.chunk(concat_features, self.__num_features, dim=-1) 55 | else: 56 | features = [concat_features] 57 | 58 | return features 59 | 60 | 61 | class GatedLinearUnit(nn.Module): 62 | def __init__(self, dim: int = -1, nonlinear: bool = True): 63 | super().__init__() 64 | self.dim = dim 65 | self.nonlinear = nonlinear 66 | 67 | def forward(self, x: torch.Tensor) -> torch.Tensor: 68 | val, gate = torch.chunk(x, 2, dim=self.dim) 69 | if self.nonlinear: 70 | val = torch.tanh(val) 71 | return torch.sigmoid(gate) * val 72 | 73 | 74 | class GatedResidualNetwork(nn.Module): 75 | def __init__( 76 | self, 77 | d_hidden: int, 78 | d_input: Optional[int] = None, 79 | d_output: Optional[int] = None, 80 | d_static: Optional[int] = None, 81 | dropout: float = 0.0, 82 | ): 83 | super().__init__() 84 | 85 | d_input = d_input or d_hidden 86 | d_static = d_static or 0 87 | if d_output is None: 88 | d_output = d_input 89 | self.add_skip = False 90 | else: 91 | if d_output != d_input: 92 | self.add_skip = True 93 | self.skip_proj = nn.Linear(in_features=d_input, out_features=d_output) 94 | else: 95 | self.add_skip = False 96 | 97 | self.mlp = nn.Sequential( 98 | nn.Linear(in_features=d_input + d_static, out_features=d_hidden), 99 | nn.ELU(), 100 | nn.Linear(in_features=d_hidden, out_features=d_hidden), 101 | nn.Dropout(p=dropout), 102 | nn.Linear(in_features=d_hidden, out_features=d_output * 2), 103 | GatedLinearUnit(nonlinear=False), 104 | ) 105 | 106 | self.lnorm = nn.LayerNorm(d_output) 107 | 108 | def forward( 109 | self, x: torch.Tensor, c: Optional[torch.Tensor] = None 110 | ) -> torch.Tensor: 111 | if self.add_skip: 112 | skip = self.skip_proj(x) 113 | else: 114 | skip = x 115 | 116 | if c is not None: 117 | x = torch.cat((x, c), dim=-1) 118 | x = self.mlp(x) 119 | x = self.lnorm(x + skip) 120 | return x 121 | 122 | 123 | class VariableSelectionNetwork(nn.Module): 124 | def __init__( 125 | self, 126 | d_hidden: int, 127 | n_vars: int, 128 | dropout: float = 0.0, 129 | add_static: bool = False, 130 | ): 131 | super().__init__() 132 | self.weight_network = GatedResidualNetwork( 133 | d_hidden=d_hidden, 134 | d_input=d_hidden * n_vars, 135 | d_output=n_vars, 136 | d_static=d_hidden if add_static else None, 137 | dropout=dropout, 138 | ) 139 | 140 | self.variable_network = nn.ModuleList( 141 | [ 142 | GatedResidualNetwork(d_hidden=d_hidden, dropout=dropout) 143 | for _ in range(n_vars) 144 | ] 145 | ) 146 | 147 | def forward( 148 | self, variables: List[torch.Tensor], static: Optional[torch.Tensor] = None 149 | ) -> Tuple[torch.Tensor, torch.Tensor]: 150 | flatten = torch.cat(variables, dim=-1) 151 | if static is not None: 152 | static = static.expand_as(variables[0]) 153 | weight = self.weight_network(flatten, static) 154 | weight = torch.softmax(weight.unsqueeze(-2), dim=-1) 155 | 156 | var_encodings = [net(var) for var, net in zip(variables, self.variable_network)] 157 | var_encodings = torch.stack(var_encodings, dim=-1) 158 | 159 | var_encodings = torch.sum(var_encodings * weight, dim=-1) 160 | 161 | return var_encodings, weight 162 | 163 | 164 | class TemporalFusionEncoder(nn.Module): 165 | def __init__( 166 | self, 167 | d_input: int, 168 | d_hidden: int, 169 | ): 170 | super().__init__() 171 | 172 | self.encoder_lstm = nn.LSTM( 173 | input_size=d_input, hidden_size=d_hidden, batch_first=True 174 | ) 175 | self.decoder_lstm = nn.LSTM( 176 | input_size=d_input, hidden_size=d_hidden, batch_first=True 177 | ) 178 | 179 | self.gate = nn.Sequential( 180 | nn.Linear(in_features=d_hidden, out_features=d_hidden * 2), 181 | GatedLinearUnit(nonlinear=False), 182 | ) 183 | if d_input != d_hidden: 184 | self.skip_proj = nn.Linear(in_features=d_input, out_features=d_hidden) 185 | self.add_skip = True 186 | else: 187 | self.add_skip = False 188 | 189 | self.lnorm = nn.LayerNorm(d_hidden) 190 | 191 | def forward( 192 | self, 193 | ctx_input: torch.Tensor, 194 | tgt_input: torch.Tensor, 195 | states: List[torch.Tensor], 196 | ): 197 | ctx_encodings, states = self.encoder_lstm(ctx_input, states) 198 | 199 | tgt_encodings, _ = self.decoder_lstm(tgt_input, states) 200 | 201 | encodings = torch.cat((ctx_encodings, tgt_encodings), dim=1) 202 | skip = torch.cat((ctx_input, tgt_input), dim=1) 203 | if self.add_skip: 204 | skip = self.skip_proj(skip) 205 | encodings = self.gate(encodings) 206 | encodings = self.lnorm(skip + encodings) 207 | return encodings 208 | 209 | 210 | class TemporalFusionDecoder(nn.Module): 211 | def __init__( 212 | self, 213 | context_length: int, 214 | prediction_length: int, 215 | d_hidden: int, 216 | d_var: int, 217 | n_head: int, 218 | dropout: float = 0.0, 219 | ): 220 | super().__init__() 221 | self.context_length = context_length 222 | self.prediction_length = prediction_length 223 | 224 | self.enrich = GatedResidualNetwork( 225 | d_hidden=d_hidden, 226 | d_static=d_var, 227 | dropout=dropout, 228 | ) 229 | 230 | self.attention = nn.MultiheadAttention( 231 | embed_dim=d_hidden, num_heads=n_head, dropout=dropout 232 | ) 233 | 234 | self.att_net = nn.Sequential( 235 | nn.Linear(in_features=d_hidden, out_features=d_hidden * 2), 236 | GatedLinearUnit(nonlinear=False), 237 | ) 238 | self.att_lnorm = nn.LayerNorm(d_hidden) 239 | 240 | self.ff_net = nn.Sequential( 241 | GatedResidualNetwork(d_hidden=d_hidden, dropout=dropout), 242 | nn.Linear(in_features=d_hidden, out_features=d_hidden * 2), 243 | GatedLinearUnit(nonlinear=False), 244 | ) 245 | self.ff_lnorm = nn.LayerNorm(d_hidden) 246 | 247 | self.register_buffer( 248 | "attn_mask", 249 | self._generate_subsequent_mask( 250 | prediction_length, prediction_length + context_length 251 | ), 252 | ) 253 | 254 | @staticmethod 255 | def _generate_subsequent_mask( 256 | target_length: int, source_length: int 257 | ) -> torch.Tensor: 258 | mask = (torch.triu(torch.ones(source_length, target_length)) == 1).transpose( 259 | 0, 1 260 | ) 261 | mask = ( 262 | mask.float() 263 | .masked_fill(mask == 0, float("-inf")) 264 | .masked_fill(mask == 1, float(0.0)) 265 | ) 266 | return mask 267 | 268 | def forward( 269 | self, x: torch.Tensor, static: torch.Tensor, mask: torch.Tensor 270 | ) -> torch.Tensor: 271 | static = static.repeat((1, self.context_length + self.prediction_length, 1)) 272 | 273 | skip = x[:, self.context_length :, ...] 274 | x = self.enrich(x, static) 275 | 276 | mask_pad = torch.ones_like(mask)[:, 0:1, ...] 277 | mask_pad = mask_pad.repeat((1, self.prediction_length)) 278 | key_padding_mask = torch.cat((mask, mask_pad), dim=1).bool() 279 | 280 | query_key_value = x.permute(1, 0, 2) 281 | 282 | attn_output, _ = self.attention( 283 | query=query_key_value[-self.prediction_length :, ...], 284 | key=query_key_value, 285 | value=query_key_value, 286 | # key_padding_mask=key_padding_mask, # does not work on GPU :-( 287 | attn_mask=self.attn_mask, 288 | ) 289 | att = self.att_net(attn_output.permute(1, 0, 2)) 290 | 291 | x = x[:, self.context_length :, ...] 292 | x = self.att_lnorm(x + att) 293 | x = self.ff_net(x) 294 | x = self.ff_lnorm(x + skip) 295 | 296 | return x 297 | -------------------------------------------------------------------------------- /test/modules/test_feature.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). 4 | # You may not use this file except in compliance with the License. 5 | # A copy of the License is located at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # or in the "license" file accompanying this file. This file is distributed 10 | # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 11 | # express or implied. See the License for the specific language governing 12 | # permissions and limitations under the License. 13 | 14 | 15 | from itertools import chain, combinations 16 | 17 | import pytest 18 | import torch 19 | import torch.nn as nn 20 | 21 | from pts.modules import FeatureEmbedder, FeatureAssembler 22 | 23 | 24 | @pytest.mark.parametrize( 25 | "config", 26 | ( 27 | lambda N, T: [ 28 | # single static feature 29 | dict(shape=(N, 1), kwargs=dict(cardinalities=[50], embedding_dims=[10]),), 30 | # single dynamic feature 31 | dict(shape=(N, T, 1), kwargs=dict(cardinalities=[2], embedding_dims=[10]),), 32 | # multiple static features 33 | dict( 34 | shape=(N, 4), 35 | kwargs=dict( 36 | cardinalities=[50, 50, 50, 50], embedding_dims=[10, 20, 30, 40], 37 | ), 38 | ), 39 | # multiple dynamic features 40 | dict( 41 | shape=(N, T, 3), 42 | kwargs=dict(cardinalities=[30, 30, 30], embedding_dims=[10, 20, 30]), 43 | ), 44 | ] 45 | )(10, 20), 46 | ) 47 | def test_feature_embedder(config): 48 | out_shape = config["shape"][:-1] + (sum(config["kwargs"]["embedding_dims"]),) 49 | embed_feature = FeatureEmbedder(**config["kwargs"]) 50 | for embed in embed_feature._FeatureEmbedder__embedders: 51 | nn.init.constant_(embed.weight, 1.0) 52 | 53 | def test_parameters_length(): 54 | exp_params_len = len([p for p in embed_feature.parameters()]) 55 | act_params_len = len(config["kwargs"]["embedding_dims"]) 56 | assert exp_params_len == act_params_len 57 | 58 | def test_forward_pass(): 59 | act_output = embed_feature(torch.ones(config["shape"]).to(torch.long)) 60 | exp_output = torch.ones(out_shape) 61 | 62 | assert act_output.shape == exp_output.shape 63 | assert torch.abs(torch.sum(act_output - exp_output)) < 1e-20 64 | 65 | test_parameters_length() 66 | test_forward_pass() 67 | 68 | 69 | @pytest.mark.parametrize( 70 | "config", 71 | ( 72 | lambda N, T: [ 73 | dict( 74 | N=N, 75 | T=T, 76 | static_cat=dict(C=2), 77 | static_real=dict(C=5), 78 | dynamic_cat=dict(C=3), 79 | dynamic_real=dict(C=4), 80 | embed_static=dict(cardinalities=[2, 4], embedding_dims=[3, 6],), 81 | embed_dynamic=dict( 82 | cardinalities=[30, 30, 30], embedding_dims=[10, 20, 30], 83 | ), 84 | ) 85 | ] 86 | )(10, 25), 87 | ) 88 | def test_feature_assembler(config): 89 | # iterate over the power-set of all possible feature types, excluding the empty set 90 | feature_types = { 91 | "static_cat", 92 | "static_real", 93 | "dynamic_cat", 94 | "dynamic_real", 95 | } 96 | feature_combs = chain.from_iterable( 97 | combinations(feature_types, r) for r in range(1, len(feature_types) + 1) 98 | ) 99 | 100 | # iterate over the power-set of all possible feature types, including the empty set 101 | embedder_types = {"embed_static", "embed_dynamic"} 102 | embedder_combs = chain.from_iterable( 103 | combinations(embedder_types, r) for r in range(0, len(embedder_types) + 1) 104 | ) 105 | 106 | for enabled_embedders in embedder_combs: 107 | embed_static = ( 108 | FeatureEmbedder(**config["embed_static"]) 109 | if "embed_static" in enabled_embedders 110 | else None 111 | ) 112 | embed_dynamic = ( 113 | FeatureEmbedder(**config["embed_dynamic"]) 114 | if "embed_dynamic" in enabled_embedders 115 | else None 116 | ) 117 | 118 | for enabled_features in feature_combs: 119 | assemble_feature = FeatureAssembler( 120 | T=config["T"], embed_static=embed_static, embed_dynamic=embed_dynamic, 121 | ) 122 | # assemble_feature.collect_params().initialize(mx.initializer.One()) 123 | 124 | def test_parameters_length(): 125 | exp_params_len = sum( 126 | [ 127 | len(config[k]["embedding_dims"]) 128 | for k in ["embed_static", "embed_dynamic"] 129 | if k in enabled_embedders 130 | ] 131 | ) 132 | act_params_len = len([p for p in assemble_feature.parameters()]) 133 | assert exp_params_len == act_params_len 134 | 135 | def test_forward_pass(): 136 | N, T = config["N"], config["T"] 137 | 138 | inp_features = [] 139 | out_features = [] 140 | 141 | if "static_cat" not in enabled_features: 142 | inp_features.append(torch.zeros((N, 1))) 143 | out_features.append(torch.zeros((N, T, 1))) 144 | elif embed_static: # and 'static_cat' in enabled_features 145 | C = config["static_cat"]["C"] 146 | inp_features.append( 147 | torch.cat( 148 | [ 149 | torch.randint( 150 | 0, 151 | config["embed_static"]["cardinalities"][c], 152 | (N, 1), 153 | ) 154 | for c in range(C) 155 | ], 156 | dim=1, 157 | ) 158 | ) 159 | out_features.append( 160 | torch.ones( 161 | (N, T, sum(config["embed_static"]["embedding_dims"]),) 162 | ) 163 | ) 164 | else: # not embed_static and 'static_cat' in enabled_features 165 | C = config["static_cat"]["C"] 166 | inp_features.append( 167 | torch.cat( 168 | [ 169 | torch.randint( 170 | 0, 171 | config["embed_static"]["cardinalities"][c], 172 | (N, 1), 173 | ) 174 | for c in range(C) 175 | ], 176 | dim=1, 177 | ) 178 | ) 179 | out_features.append( 180 | inp_features[-1].unsqueeze(1).expand(-1, T, -1).float() 181 | ) 182 | 183 | if "static_real" not in enabled_features: 184 | inp_features.append(torch.zeros((N, 1))) 185 | out_features.append(torch.zeros((N, T, 1))) 186 | else: 187 | C = config["static_real"]["C"] 188 | static_real = torch.empty((N, C)).uniform_(0, 100) 189 | inp_features.append(static_real) 190 | out_features.append(static_real.unsqueeze(-2).expand(-1, T, -1)) 191 | 192 | if "dynamic_cat" not in enabled_features: 193 | inp_features.append(torch.zeros((N, T, 1))) 194 | out_features.append(torch.zeros((N, T, 1))) 195 | elif embed_dynamic: # and 'static_cat' in enabled_features 196 | C = config["dynamic_cat"]["C"] 197 | inp_features.append( 198 | torch.cat( 199 | [ 200 | torch.randint( 201 | 0, 202 | config["embed_dynamic"]["cardinalities"][c], 203 | (N, T, 1), 204 | ) 205 | for c in range(C) 206 | ], 207 | dim=2, 208 | ) 209 | ) 210 | out_features.append( 211 | torch.ones( 212 | (N, T, sum(config["embed_dynamic"]["embedding_dims"]),) 213 | ) 214 | ) 215 | else: # not embed_dynamic and 'dynamic_cat' in enabled_features 216 | C = config["dynamic_cat"]["C"] 217 | inp_features.append( 218 | torch.cat( 219 | [ 220 | torch.randint( 221 | 0, 222 | config["embed_dynamic"]["cardinalities"][c], 223 | (N, T, 1), 224 | ) 225 | for c in range(C) 226 | ], 227 | dim=2, 228 | ) 229 | ) 230 | out_features.append(inp_features[-1].float()) 231 | 232 | if "dynamic_real" not in enabled_features: 233 | inp_features.append(torch.zeros((N, T, 1))) 234 | out_features.append(torch.zeros((N, T, 1))) 235 | else: 236 | C = config["dynamic_real"]["C"] 237 | dynamic_real = torch.empty((N, T, C)).uniform_(0, 100) 238 | inp_features.append(dynamic_real) 239 | out_features.append(dynamic_real) 240 | 241 | act_output = assemble_feature(*inp_features) 242 | exp_output = torch.cat(out_features, dim=2) 243 | 244 | assert exp_output.shape == act_output.shape 245 | assert torch.sum(exp_output - act_output) < 1e-20 246 | 247 | test_parameters_length() 248 | test_forward_pass() 249 | --------------------------------------------------------------------------------