├── .gitignore
├── README.md
├── conf
├── __init__.py
├── conf.py
├── electricity.yaml
├── traffic.yaml
└── volatility.yaml
├── data_formatters
├── __init__.py
├── base.py
├── electricity.py
├── favorita.py
├── traffic.py
├── utils.py
└── volatility.py
├── dataset
├── __init__.py
└── ts_dataset.py
├── env.yml
├── inference.py
├── main.py
├── models
├── temporal_fusion_t
│ ├── __init__.py
│ ├── add_and_norm.py
│ ├── base.py
│ ├── gated_linear_unit.py
│ ├── gated_residual_network.py
│ ├── interpretable_multi_head_attention.py
│ ├── linear_layer.py
│ ├── lstm_combine_and_mask.py
│ ├── scaled_dot_product_attention.py
│ ├── static_combine_and_mask.py
│ ├── tft_model.py
│ └── time_distributed.py
├── transformer
│ ├── __init__.py
│ ├── decoder.py
│ ├── encoder.py
│ ├── loss.py
│ ├── multiHeadAttention.py
│ ├── positionwiseFeedForward.py
│ ├── transformer.py
│ └── utils.py
└── transformer_grn
│ ├── __init__.py
│ ├── decoder.py
│ ├── encoder.py
│ ├── loss.py
│ ├── multiHeadAttention.py
│ ├── positionwiseFeedForward.py
│ ├── transformer.py
│ └── utils.py
├── progress_bar.py
├── requirements.txt
├── scheduler.py
├── slurm.py
├── slurm
└── Traffic_5TR.sh
├── trainer.py
└── utils.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | ### Pycharm stuffs
10 | .idea/
11 | log/*
12 | *__pycache__/
13 |
14 | ### Pytorch model weights
15 | *.pth
16 |
17 | ### Dataset
18 | data/*
19 |
20 | ### Temp files
21 | tmp/*
22 |
23 | ### images
24 | *.jpg
25 | *.jpeg
26 | *.png
27 | *.tif
28 | *.tiff
29 |
30 | # Distribution / packaging
31 | .Python
32 | build/
33 | develop-eggs/
34 | dist/
35 | downloads/
36 | eggs/
37 | .eggs/
38 | lib/
39 | lib64/
40 | parts/
41 | sdist/
42 | var/
43 | wheels/
44 | pip-wheel-metadata/
45 | share/python-wheels/
46 | *.egg-info/
47 | .installed.cfg
48 | *.egg
49 | MANIFEST
50 |
51 | # PyInstaller
52 | # Usually these files are written by a python script from a template
53 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
54 | *.manifest
55 | *.spec
56 |
57 | # Installer logs
58 | pip-log.txt
59 | pip-delete-this-directory.txt
60 |
61 | # Unit test / coverage reports
62 | htmlcov/
63 | .tox/
64 | .nox/
65 | .coverage
66 | .coverage.*
67 | .cache
68 | nosetests.xml
69 | coverage.xml
70 | *.cover
71 | *.py,cover
72 | .hypothesis/
73 | .pytest_cache/
74 |
75 | # Translations
76 | *.mo
77 | *.pot
78 |
79 | # Django stuff:
80 | *.log
81 | local_settings.py
82 | db.sqlite3
83 | db.sqlite3-journal
84 |
85 | # Flask stuff:
86 | instance/
87 | .webassets-cache
88 |
89 | # Scrapy stuff:
90 | .scrapy
91 |
92 | # Sphinx documentation
93 | docs/_build/
94 |
95 | # PyBuilder
96 | target/
97 |
98 | # Jupyter Notebook
99 | .ipynb_checkpoints
100 |
101 | # IPython
102 | profile_default/
103 | ipython_config.py
104 |
105 | # pyenv
106 | .python-version
107 |
108 | # pipenv
109 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
110 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
111 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
112 | # install all needed dependencies.
113 | #Pipfile.lock
114 |
115 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
116 | __pypackages__/
117 |
118 | # Celery stuff
119 | celerybeat-schedule
120 | celerybeat.pid
121 |
122 | # SageMath parsed files
123 | *.sage.py
124 |
125 | # Environments
126 | .env
127 | .venv
128 | env/
129 | venv/
130 | ENV/
131 | env.bak/
132 | venv.bak/
133 |
134 | # Spyder project settings
135 | .spyderproject
136 | .spyproject
137 |
138 | # Rope project settings
139 | .ropeproject
140 |
141 | # mkdocs documentation
142 | /site
143 |
144 | # mypy
145 | .mypy_cache/
146 | .dmypy.json
147 | dmypy.json
148 |
149 | # Pyre type checker
150 | .pyre/
151 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # 🐐 Time Series Forecasting
2 | Time series forecasting models
3 |
4 | ## Set-up
5 |
6 | - Install the required packages (pip or conda)
7 | - `pip install -r requirements.txt`
8 | - `conda env create -f env.yml`
9 |
10 | - Download data
11 | - https://drive.google.com/file/d/1Na7e2yJy1Oix8-HcKQS97u1VZodpZ-OZ/view?usp=sharing
12 |
13 | - Train and Test on electricity dataset
14 | - `python ./main.py --exp_name electricity --conf_file_path ./conf/electricity.yaml`
15 |
16 | Plot prediction on Test set
17 | - `python ./main.py --exp_name electricity --conf_file_path ./conf/electricity.yaml --inference=True`
18 |
19 |
20 | ## Models
21 |
22 | - Temporal fusion transformer
23 | https://arxiv.org/pdf/1912.09363.pdf
24 |
25 | Usage:
26 | - `model: tf_transformer`
27 |
28 | - Transformer
29 | https://arxiv.org/pdf/1706.03762.pdf
30 | https://pytorch.org/tutorials/beginner/transformer_tutorial.html
31 |
32 | Usage:
33 | - `model: transformer`
34 |
35 | - GRN-Tranformer
36 | Use GRN block after multi-head attention to encode static variables
37 |
38 | Usage:
39 | - `model: grn_transformer`
40 |
--------------------------------------------------------------------------------
/conf/__init__.py:
--------------------------------------------------------------------------------
1 | from conf.conf import Conf
--------------------------------------------------------------------------------
/conf/conf.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # ---------------------
3 |
4 | import os
5 |
6 | PYTHONPATH = '..:.'
7 | if os.environ.get('PYTHONPATH', default=None) is None:
8 | os.environ['PYTHONPATH'] = PYTHONPATH
9 | else:
10 | os.environ['PYTHONPATH'] += (':' + PYTHONPATH)
11 |
12 | import yaml
13 | import socket
14 | import random
15 | import torch
16 | import numpy as np
17 | from path import Path
18 | from typing import Optional
19 | import termcolor
20 | from datetime import datetime
21 |
22 |
23 | def set_seed(seed=None):
24 | # type: (Optional[int]) -> int
25 | """
26 | set the random seed using the required value (`seed`)
27 | or a random value if `seed` is `None`
28 | :return: the newly set seed
29 | """
30 | if seed is None:
31 | seed = random.randint(1, 10000)
32 | random.seed(seed)
33 | torch.manual_seed(seed)
34 | np.random.seed(seed)
35 | return seed
36 |
37 |
38 | class Conf(object):
39 | HOSTNAME = socket.gethostname()
40 | LOG_PATH = Path('./logs/')
41 |
42 | def __init__(self, conf_file_path=None, seed=None, exp_name=None, log=True):
43 | # type: (str, int, str, bool) -> None
44 | """
45 | :param conf_file_path: optional path of the configuration file
46 | :param seed: desired seed for the RNG; if `None`, it will be chosen randomly
47 | :param exp_name: name of the experiment
48 | :param log: `True` if you want to log each step; `False` otherwise
49 | """
50 | self.exp_name = exp_name
51 | self.log_each_step = log
52 |
53 | # print project name and host name
54 | self.project_name = Path(__file__).parent.parent.basename()
55 | m_str = f'┃ {self.project_name}@{Conf.HOSTNAME} ┃'
56 | u_str = '┏' + '━' * (len(m_str) - 2) + '┓'
57 | b_str = '┗' + '━' * (len(m_str) - 2) + '┛'
58 | print(u_str + '\n' + m_str + '\n' + b_str)
59 |
60 | # define output paths
61 | self.project_log_path = Path('./log')
62 |
63 | # set random seed
64 | self.seed = set_seed(seed) # type: int
65 |
66 | self.keys_to_hide = list(self.__dict__.keys()) + ['keys_to_hide']
67 |
68 | # if the configuration file is not specified
69 | # try to load a configuration file based on the experiment name
70 | tmp = Path(__file__).parent / (self.exp_name + '.yaml')
71 | if conf_file_path is None and tmp.exists():
72 | conf_file_path = tmp
73 |
74 | # read the YAML configuation file
75 | if conf_file_path is None:
76 | y = {}
77 | else:
78 | conf_file = open(conf_file_path, 'r')
79 | y = yaml.load(conf_file, Loader=yaml.Loader)
80 |
81 | # read configuration parameters from YAML file
82 | # or set their default value
83 | self.lr = y.get('lr', 0.0001) # type: float
84 | self.epochs = y.get('num_epochs', 100) # type: int
85 | self.n_workers = y.get('n_workers', 1) # type: int
86 | self.batch_size = y.get('batch_size', 64) # type: int
87 | self.quantiles = y.get('quantiles', [0.1, 0.5, 0.9]) # type: list
88 | self.ds_name = y.get('ds_name', "electricity") # type: str
89 | self.all_params = y # type: dict
90 |
91 | self.exp_log_path = self.project_log_path / self.all_params["model"] / exp_name / datetime.now().strftime(
92 | "%m-%d-%Y - %H-%M-%S")
93 |
94 | default_device = 'cuda' if torch.cuda.is_available() else 'cpu'
95 | self.device = y.get('DEVICE', default_device) # type: str
96 |
97 | def write_to_file(self, out_file_path):
98 | # type: (str) -> None
99 | """
100 | Writes configuration parameters to `out_file_path`
101 | :param out_file_path: path of the output file
102 | """
103 | import re
104 |
105 | ansi_escape = re.compile(r'\x1B\[[0-?]*[ -/]*[@-~]')
106 | text = ansi_escape.sub('', str(self))
107 | with open(out_file_path, 'w') as out_file:
108 | print(text, file=out_file)
109 |
110 | def __str__(self):
111 | # type: () -> str
112 | out_str = ''
113 | for key in self.__dict__:
114 | if key in self.keys_to_hide:
115 | continue
116 | value = self.__dict__[key]
117 | if type(value) is Path or type(value) is str:
118 | value = value.replace(Conf.LOG_PATH, '$LOG_PATH')
119 | value = termcolor.colored(value, 'yellow')
120 | else:
121 | value = termcolor.colored(f'{value}', 'magenta')
122 | out_str += termcolor.colored(f'{key.upper()}', 'blue')
123 | out_str += termcolor.colored(': ', 'red')
124 | out_str += value
125 | out_str += '\n'
126 | return out_str[:-1]
127 |
128 | def no_color_str(self):
129 | # type: () -> str
130 | out_str = ''
131 | for key in self.__dict__:
132 | value = self.__dict__[key]
133 | if type(value) is Path or type(value) is str:
134 | value = value.replace(Conf.LOG_PATH, '$LOG_PATH')
135 | out_str += f'{key.upper()}: {value}\n'
136 | return out_str[:-1]
137 |
138 |
139 | def show_default_params():
140 | """
141 | Print default configuration parameters
142 | """
143 | cnf = Conf(exp_name='default')
144 | print(f'\nDefault configuration parameters: \n{cnf}')
145 |
146 |
147 | if __name__ == '__main__':
148 | show_default_params()
149 |
--------------------------------------------------------------------------------
/conf/electricity.yaml:
--------------------------------------------------------------------------------
1 | #Hyper Params
2 | batch_size: 64
3 | device: cuda
4 | lr: 0.001
5 | num_epochs: 20
6 | n_workers: 0
7 | model: transformer
8 | loader: base
9 |
10 | # Dataset
11 | ds_name: electricity
12 | train_samples: 450000
13 | test_samples: 50000
14 | val_samples: 50000
15 | input_size: 5
16 | output_size: 1
17 | total_time_steps: 192
18 | num_encoder_steps: 168
19 | static_input_loc:
20 | - 4
21 | input_obs_loc:
22 | - 0
23 | known_categorical_inputs:
24 | - 0
25 | known_regular_inputs:
26 | - 1
27 | - 2
28 | - 3
29 | category_counts:
30 | - 369
31 |
32 | # Model Temporal Fusion Transformer
33 | quantiles:
34 | - 0.1
35 | - 0.5
36 | - 0.9
37 | batch_first: true
38 | early_stopping_patience: 5
39 | hidden_layer_size: 160
40 | stack_size: 1
41 | dropout_rate: 0.1
42 | max_gradient_norm: 0.01
43 | num_heads: 4
44 |
45 | # Model Transformer
46 | d_model: 64
47 | q: 16
48 | v: 16
49 | h: 4
50 | N: 2
51 | attention_size: 0
52 | dropout: 0.1
53 | pe: original
54 | chunk_mode: None
55 | d_input: 5
56 | d_output: 3
57 |
--------------------------------------------------------------------------------
/conf/traffic.yaml:
--------------------------------------------------------------------------------
1 | # Hyper Params
2 | batch_size: 128
3 | device: cuda
4 | lr: 0.001
5 | num_epochs: 100
6 | n_workers: 0
7 | model: tf_transformer
8 |
9 | # Dataset
10 | ds_name: traffic
11 | train_samples: 10000
12 | test_samples: 1000
13 | val_samples: 1000
14 | input_size: 5
15 | output_size: 1
16 | total_time_steps: 192
17 | num_encoder_steps: 168
18 | static_input_loc:
19 | - 4
20 | input_obs_loc:
21 | - 0
22 | known_categorical_inputs:
23 | - 0
24 | known_regular_inputs:
25 | - 1
26 | - 2
27 | - 3
28 | category_counts:
29 | - 963
30 |
31 | # Model Temporal Fusion Transformer
32 | quantiles:
33 | - 0.1
34 | - 0.5
35 | - 0.9
36 | batch_first: true
37 | early_stopping_patience: 5
38 | hidden_layer_size: 320
39 | stack_size: 1
40 | dropout_rate: 0.3
41 | max_gradient_norm: 100.0
42 | num_heads: 4
43 | multiprocessing_workers: 5
44 |
45 | # Model Transformer
46 | d_model: 64
47 | q: 16
48 | v: 16
49 | h: 4
50 | N: 2
51 | attention_size: 0
52 | dropout: 0.1
53 | pe: original
54 | chunk_mode: None
55 | d_input: 5
56 | d_output: 3
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
--------------------------------------------------------------------------------
/conf/volatility.yaml:
--------------------------------------------------------------------------------
1 | batch_first: true
2 | batch_size: 64
3 | train_samples: 10000
4 | test_samples: 1000
5 | val_samples: 1000
6 | category_counts:
7 | - 7
8 | - 31
9 | - 53
10 | - 12
11 | - 4
12 | device: cuda
13 | dropout_rate: 0.3
14 | ds_name: volatility
15 | early_stopping_patience: 5
16 | hidden_layer_size: 160
17 | input_obs_loc:
18 | - 0
19 | input_size: 8
20 | known_categorical_inputs:
21 | - 0
22 | - 1
23 | - 2
24 | - 3
25 | - 4
26 | known_regular_inputs:
27 | - 2
28 | lr: 0.0001
29 | max_gradient_norm: 0.01
30 | n_workers: 0
31 | num_encoder_steps: 252
32 | num_epochs: 100
33 | num_heads: 1
34 | output_size: 1
35 | quantiles:
36 | - 0.1
37 | - 0.5
38 | - 0.9
39 | stack_size: 1
40 | static_input_loc:
41 | - 7
42 | total_time_steps: 257
43 |
--------------------------------------------------------------------------------
/data_formatters/__init__.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2019 The Google Research Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | from data_formatters.utils import make_data_formatter, csv_path_to_folder
17 | from data_formatters import volatility, electricity, favorita, traffic
18 |
--------------------------------------------------------------------------------
/data_formatters/base.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2019 The Google Research Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | # Lint as: python3
17 | """Default data formatting functions for experiments.
18 |
19 | For new datasets, inherit form GenericDataFormatter and implement
20 | all abstract functions.
21 |
22 | These dataset-specific methods:
23 | 1) Define the column and input types for tabular dataframes used by model
24 | 2) Perform the necessary input feature engineering & normalisation steps
25 | 3) Reverts the normalisation for predictions
26 | 4) Are responsible for train, validation and test splits
27 |
28 |
29 | """
30 |
31 | import abc
32 | import enum
33 |
34 |
35 | # Type defintions
36 | class DataTypes(enum.IntEnum):
37 | """Defines numerical types of each column."""
38 | REAL_VALUED = 0
39 | CATEGORICAL = 1
40 | DATE = 2
41 |
42 |
43 | class InputTypes(enum.IntEnum):
44 | """Defines input types of each column."""
45 | TARGET = 0
46 | OBSERVED_INPUT = 1
47 | KNOWN_INPUT = 2
48 | STATIC_INPUT = 3
49 | ID = 4 # Single column used as an entity identifier
50 | TIME = 5 # Single column exclusively used as a time index
51 |
52 |
53 | class GenericDataFormatter(abc.ABC):
54 | """Abstract base class for all data formatters.
55 |
56 | User can implement the abstract methods below to perform dataset-specific
57 | manipulations.
58 |
59 | """
60 |
61 | @abc.abstractmethod
62 | def set_scalers(self, df):
63 | """Calibrates scalers using the data supplied."""
64 | raise NotImplementedError()
65 |
66 | @abc.abstractmethod
67 | def transform_inputs(self, df):
68 | """Performs feature transformation."""
69 | raise NotImplementedError()
70 |
71 | @abc.abstractmethod
72 | def format_predictions(self, df):
73 | """Reverts any normalisation to give predictions in original scale."""
74 | raise NotImplementedError()
75 |
76 | @abc.abstractmethod
77 | def split_data(self, df):
78 | """Performs the default train, validation and test splits."""
79 | raise NotImplementedError()
80 |
81 | @property
82 | @abc.abstractmethod
83 | def _column_definition(self):
84 | """Defines order, input type and data type of each column."""
85 | raise NotImplementedError()
86 |
87 | @abc.abstractmethod
88 | def get_fixed_params(self):
89 | """Defines the fixed parameters used by the model for training.
90 |
91 | Requires the following keys:
92 | 'total_time_steps': Defines the total number of time steps used by TFT
93 | 'num_encoder_steps': Determines length of LSTM encoder (i.e. history)
94 | 'num_epochs': Maximum number of epochs for training
95 | 'early_stopping_patience': Early stopping param for keras
96 | 'multiprocessing_workers': # of cpus for data processing
97 |
98 |
99 | Returns:
100 | A dictionary of fixed parameters, e.g.:
101 |
102 | fixed_params = {
103 | 'total_time_steps': 252 + 5,
104 | 'num_encoder_steps': 252,
105 | 'num_epochs': 100,
106 | 'early_stopping_patience': 5,
107 | 'multiprocessing_workers': 5,
108 | }
109 | """
110 | raise NotImplementedError
111 |
112 | # Shared functions across data-formatters
113 | @property
114 | def num_classes_per_cat_input(self):
115 | """Returns number of categories per relevant input.
116 |
117 | This is seqeuently required for keras embedding layers.
118 | """
119 | return self._num_classes_per_cat_input
120 |
121 | def get_num_samples_for_calibration(self):
122 | """Gets the default number of training and validation samples.
123 |
124 | Use to sub-sample the data for network calibration and a value of -1 uses
125 | all available samples.
126 |
127 | Returns:
128 | Tuple of (training samples, validation samples)
129 | """
130 | return -1, -1
131 |
132 | def get_column_definition(self):
133 | """"Returns formatted column definition in order expected by the TFT."""
134 |
135 | column_definition = self._column_definition
136 |
137 | # Sanity checks first.
138 | # Ensure only one ID and time column exist
139 | def _check_single_column(input_type):
140 |
141 | length = len([tup for tup in column_definition if tup[2] == input_type])
142 |
143 | if length != 1:
144 | raise ValueError('Illegal number of inputs ({}) of type {}'.format(
145 | length, input_type))
146 |
147 | _check_single_column(InputTypes.ID)
148 | _check_single_column(InputTypes.TIME)
149 |
150 | identifier = [tup for tup in column_definition if tup[2] == InputTypes.ID]
151 | time = [tup for tup in column_definition if tup[2] == InputTypes.TIME]
152 | real_inputs = [
153 | tup for tup in column_definition if tup[1] == DataTypes.REAL_VALUED and
154 | tup[2] not in {InputTypes.ID, InputTypes.TIME}
155 | ]
156 | categorical_inputs = [
157 | tup for tup in column_definition if tup[1] == DataTypes.CATEGORICAL and
158 | tup[2] not in {InputTypes.ID, InputTypes.TIME}
159 | ]
160 |
161 | return identifier + time + real_inputs + categorical_inputs
162 |
163 | def _get_input_columns(self):
164 | """Returns names of all input columns."""
165 | return [
166 | tup[0]
167 | for tup in self.get_column_definition()
168 | if tup[2] not in {InputTypes.ID, InputTypes.TIME}
169 | ]
170 |
171 | def _get_tft_input_indices(self):
172 | """Returns the relevant indexes and input sizes required by TFT."""
173 |
174 | # Functions
175 | def _extract_tuples_from_data_type(data_type, defn):
176 | return [
177 | tup for tup in defn if tup[1] == data_type and
178 | tup[2] not in {InputTypes.ID, InputTypes.TIME}
179 | ]
180 |
181 | def _get_locations(input_types, defn):
182 | return [i for i, tup in enumerate(defn) if tup[2] in input_types]
183 |
184 | # Start extraction
185 | column_definition = [
186 | tup for tup in self.get_column_definition()
187 | if tup[2] not in {InputTypes.ID, InputTypes.TIME}
188 | ]
189 |
190 | categorical_inputs = _extract_tuples_from_data_type(DataTypes.CATEGORICAL,
191 | column_definition)
192 | real_inputs = _extract_tuples_from_data_type(DataTypes.REAL_VALUED,
193 | column_definition)
194 |
195 | locations = {
196 | 'input_size':
197 | len(self._get_input_columns()),
198 | 'output_size':
199 | len(_get_locations({InputTypes.TARGET}, column_definition)),
200 | 'category_counts':
201 | self.num_classes_per_cat_input,
202 | 'input_obs_loc':
203 | _get_locations({InputTypes.TARGET}, column_definition),
204 | 'static_input_loc':
205 | _get_locations({InputTypes.STATIC_INPUT}, column_definition),
206 | 'known_regular_inputs':
207 | _get_locations({InputTypes.STATIC_INPUT, InputTypes.KNOWN_INPUT},
208 | real_inputs),
209 | 'known_categorical_inputs':
210 | _get_locations({InputTypes.STATIC_INPUT, InputTypes.KNOWN_INPUT},
211 | categorical_inputs),
212 | }
213 |
214 | return locations
215 |
216 | def get_experiment_params(self):
217 | """Returns fixed model parameters for experiments."""
218 |
219 | required_keys = [
220 | 'total_time_steps', 'num_encoder_steps', 'num_epochs',
221 | 'early_stopping_patience', 'multiprocessing_workers'
222 | ]
223 |
224 | fixed_params = self.get_fixed_params()
225 |
226 | for k in required_keys:
227 | if k not in fixed_params:
228 | raise ValueError('Field {}'.format(k) +
229 | ' missing from fixed parameter definitions!')
230 |
231 | fixed_params['column_definition'] = self.get_column_definition()
232 |
233 | fixed_params.update(self._get_tft_input_indices())
234 |
235 | return fixed_params
236 |
237 |
--------------------------------------------------------------------------------
/data_formatters/electricity.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2019 The Google Research Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | # Lint as: python3
17 | """Custom formatting functions for Electricity dataset.
18 |
19 | Defines dataset specific column definitions and data transformations. Uses
20 | entity specific z-score normalization.
21 | """
22 |
23 | import data_formatters.base
24 | import data_formatters.utils as utils
25 | import pandas as pd
26 | import sklearn.preprocessing
27 |
28 | GenericDataFormatter = data_formatters.base.GenericDataFormatter
29 | DataTypes = data_formatters.base.DataTypes
30 | InputTypes = data_formatters.base.InputTypes
31 |
32 |
33 | class ElectricityFormatter(GenericDataFormatter):
34 | """Defines and formats data for the electricity dataset.
35 |
36 | Note that per-entity z-score normalization is used here, and is implemented
37 | across functions.
38 |
39 | Attributes:
40 | column_definition: Defines input and data type of column used in the
41 | experiment.
42 | identifiers: Entity identifiers used in experiments.
43 | """
44 |
45 | _column_definition = [
46 | ('id', DataTypes.REAL_VALUED, InputTypes.ID),
47 | ('hours_from_start', DataTypes.REAL_VALUED, InputTypes.TIME),
48 | ('power_usage', DataTypes.REAL_VALUED, InputTypes.TARGET),
49 | ('hour', DataTypes.REAL_VALUED, InputTypes.KNOWN_INPUT),
50 | ('day_of_week', DataTypes.REAL_VALUED, InputTypes.KNOWN_INPUT),
51 | ('hours_from_start', DataTypes.REAL_VALUED, InputTypes.KNOWN_INPUT),
52 | ('categorical_id', DataTypes.CATEGORICAL, InputTypes.STATIC_INPUT),
53 | ]
54 |
55 | def __init__(self):
56 | """Initialises formatter."""
57 |
58 | self.identifiers = None
59 | self._real_scalers = None
60 | self._cat_scalers = None
61 | self._target_scaler = None
62 | self._num_classes_per_cat_input = None
63 | self._time_steps = self.get_fixed_params()['total_time_steps']
64 |
65 | def split_data(self, df, valid_boundary=1315, test_boundary=1339):
66 | """Splits data frame into training-validation-test data frames.
67 |
68 | This also calibrates scaling object, and transforms data for each split.
69 |
70 | Args:
71 | df: Source data frame to split.
72 | valid_boundary: Starting year for validation data
73 | test_boundary: Starting year for test data
74 |
75 | Returns:
76 | Tuple of transformed (train, valid, test) data.
77 | """
78 |
79 | print('Formatting train-valid-test splits.')
80 |
81 | index = df['days_from_start']
82 | train = df.loc[index < valid_boundary]
83 | valid = df.loc[(index >= valid_boundary - 7) & (index < test_boundary)]
84 | test = df.loc[index >= test_boundary - 7]
85 |
86 | self.set_scalers(train)
87 |
88 | return (self.transform_inputs(data) for data in [train, valid, test])
89 |
90 | def set_scalers(self, df):
91 | """Calibrates scalers using the data supplied.
92 |
93 | Args:
94 | df: Data to use to calibrate scalers.
95 | """
96 | print('Setting scalers with training data...')
97 |
98 | column_definitions = self.get_column_definition()
99 | id_column = utils.get_single_col_by_input_type(InputTypes.ID,
100 | column_definitions)
101 | target_column = utils.get_single_col_by_input_type(InputTypes.TARGET,
102 | column_definitions)
103 |
104 | # Format real scalers
105 | real_inputs = utils.extract_cols_from_data_type(
106 | DataTypes.REAL_VALUED, column_definitions,
107 | {InputTypes.ID, InputTypes.TIME})
108 |
109 | # Initialise scaler caches
110 | self._real_scalers = {}
111 | self._target_scaler = {}
112 | identifiers = []
113 | for identifier, sliced in df.groupby(id_column):
114 |
115 | if len(sliced) >= self._time_steps:
116 |
117 | data = sliced[real_inputs].values
118 | targets = sliced[[target_column]].values
119 | self._real_scalers[identifier] \
120 | = sklearn.preprocessing.StandardScaler().fit(data)
121 |
122 | self._target_scaler[identifier] \
123 | = sklearn.preprocessing.StandardScaler().fit(targets)
124 | identifiers.append(identifier)
125 |
126 | # Format categorical scalers
127 | categorical_inputs = utils.extract_cols_from_data_type(
128 | DataTypes.CATEGORICAL, column_definitions,
129 | {InputTypes.ID, InputTypes.TIME})
130 |
131 | categorical_scalers = {}
132 | num_classes = []
133 | for col in categorical_inputs:
134 | # Set all to str so that we don't have mixed integer/string columns
135 | srs = df[col].apply(str)
136 | categorical_scalers[col] = sklearn.preprocessing.LabelEncoder().fit(
137 | srs.values)
138 | num_classes.append(srs.nunique())
139 |
140 | # Set categorical scaler outputs
141 | self._cat_scalers = categorical_scalers
142 | self._num_classes_per_cat_input = num_classes
143 |
144 | # Extract identifiers in case required
145 | self.identifiers = identifiers
146 |
147 | def transform_inputs(self, df):
148 | """Performs feature transformations.
149 |
150 | This includes both feature engineering, preprocessing and normalisation.
151 |
152 | Args:
153 | df: Data frame to transform.
154 |
155 | Returns:
156 | Transformed data frame.
157 |
158 | """
159 |
160 | if self._real_scalers is None and self._cat_scalers is None:
161 | raise ValueError('Scalers have not been set!')
162 |
163 | # Extract relevant columns
164 | column_definitions = self.get_column_definition()
165 | id_col = utils.get_single_col_by_input_type(InputTypes.ID,
166 | column_definitions)
167 | real_inputs = utils.extract_cols_from_data_type(
168 | DataTypes.REAL_VALUED, column_definitions,
169 | {InputTypes.ID, InputTypes.TIME})
170 | categorical_inputs = utils.extract_cols_from_data_type(
171 | DataTypes.CATEGORICAL, column_definitions,
172 | {InputTypes.ID, InputTypes.TIME})
173 |
174 | # Transform real inputs per entity
175 | df_list = []
176 | for identifier, sliced in df.groupby(id_col):
177 |
178 | # Filter out any trajectories that are too short
179 | if len(sliced) >= self._time_steps:
180 | sliced_copy = sliced.copy()
181 | sliced_copy[real_inputs] = self._real_scalers[identifier].transform(
182 | sliced_copy[real_inputs].values)
183 | df_list.append(sliced_copy)
184 |
185 | output = pd.concat(df_list, axis=0)
186 |
187 | # Format categorical inputs
188 | for col in categorical_inputs:
189 | string_df = df[col].apply(str)
190 | output[col] = self._cat_scalers[col].transform(string_df)
191 |
192 | return output
193 |
194 | def format_predictions(self, predictions):
195 | """Reverts any normalisation to give predictions in original scale.
196 |
197 | Args:
198 | predictions: Dataframe of model predictions.
199 |
200 | Returns:
201 | Data frame of unnormalised predictions.
202 | """
203 |
204 | if self._target_scaler is None:
205 | raise ValueError('Scalers have not been set!')
206 |
207 | column_names = predictions.columns
208 |
209 | df_list = []
210 | for identifier, sliced in predictions.groupby('identifier'):
211 | sliced_copy = sliced.copy()
212 | target_scaler = self._target_scaler[identifier]
213 |
214 | for col in column_names:
215 | if col not in {'forecast_time', 'identifier'}:
216 | sliced_copy[col] = target_scaler.inverse_transform(sliced_copy[col].values.reshape(-1,1))
217 | df_list.append(sliced_copy)
218 |
219 | output = pd.concat(df_list, axis=0)
220 |
221 | return output
222 |
223 | # Default params
224 | def get_fixed_params(self):
225 | """Returns fixed model parameters for experiments."""
226 |
227 | fixed_params = {
228 | 'total_time_steps': 8 * 24,
229 | 'num_encoder_steps': 7 * 24,
230 | 'num_epochs': 100,
231 | 'early_stopping_patience': 5,
232 | 'multiprocessing_workers': 5
233 | }
234 |
235 | return fixed_params
236 |
237 | def get_default_model_params(self):
238 | """Returns default optimised model parameters."""
239 |
240 | model_params = {
241 | 'dropout_rate': 0.1,
242 | 'hidden_layer_size': 160,
243 | 'learning_rate': 0.001,
244 | 'minibatch_size': 64,
245 | 'max_gradient_norm': 0.01,
246 | 'num_heads': 4,
247 | 'stack_size': 1
248 | }
249 |
250 | return model_params
251 |
252 | def get_num_samples_for_calibration(self):
253 | """Gets the default number of training and validation samples.
254 |
255 | Use to sub-sample the data for network calibration and a value of -1 uses
256 | all available samples.
257 |
258 | Returns:
259 | Tuple of (training samples, validation samples)
260 | """
261 | return 450000, 50000
262 |
--------------------------------------------------------------------------------
/data_formatters/favorita.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2019 The Google Research Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | # Lint as: python3
17 | """Custom formatting functions for Favorita dataset.
18 |
19 | Defines dataset specific column definitions and data transformations.
20 | """
21 |
22 | import data_formatters.base
23 | import data_formatters.utils as utils
24 | import pandas as pd
25 | import sklearn.preprocessing
26 |
27 | DataTypes = data_formatters.base.DataTypes
28 | InputTypes = data_formatters.base.InputTypes
29 |
30 |
31 | class FavoritaFormatter(data_formatters.base.GenericDataFormatter):
32 | """Defines and formats data for the Favorita dataset.
33 |
34 | Attributes:
35 | column_definition: Defines input and data type of column used in the
36 | experiment.
37 | identifiers: Entity identifiers used in experiments.
38 | """
39 |
40 | _column_definition = [
41 | ('traj_id', DataTypes.REAL_VALUED, InputTypes.ID),
42 | ('date', DataTypes.DATE, InputTypes.TIME),
43 | ('log_sales', DataTypes.REAL_VALUED, InputTypes.TARGET),
44 | ('onpromotion', DataTypes.CATEGORICAL, InputTypes.KNOWN_INPUT),
45 | ('transactions', DataTypes.REAL_VALUED, InputTypes.OBSERVED_INPUT),
46 | ('oil', DataTypes.REAL_VALUED, InputTypes.OBSERVED_INPUT),
47 | ('day_of_week', DataTypes.CATEGORICAL, InputTypes.KNOWN_INPUT),
48 | ('day_of_month', DataTypes.REAL_VALUED, InputTypes.KNOWN_INPUT),
49 | ('month', DataTypes.REAL_VALUED, InputTypes.KNOWN_INPUT),
50 | ('national_hol', DataTypes.CATEGORICAL, InputTypes.KNOWN_INPUT),
51 | ('regional_hol', DataTypes.CATEGORICAL, InputTypes.KNOWN_INPUT),
52 | ('local_hol', DataTypes.CATEGORICAL, InputTypes.KNOWN_INPUT),
53 | ('open', DataTypes.REAL_VALUED, InputTypes.KNOWN_INPUT),
54 | ('item_nbr', DataTypes.CATEGORICAL, InputTypes.STATIC_INPUT),
55 | ('store_nbr', DataTypes.CATEGORICAL, InputTypes.STATIC_INPUT),
56 | ('city', DataTypes.CATEGORICAL, InputTypes.STATIC_INPUT),
57 | ('state', DataTypes.CATEGORICAL, InputTypes.STATIC_INPUT),
58 | ('type', DataTypes.CATEGORICAL, InputTypes.STATIC_INPUT),
59 | ('cluster', DataTypes.CATEGORICAL, InputTypes.STATIC_INPUT),
60 | ('family', DataTypes.CATEGORICAL, InputTypes.STATIC_INPUT),
61 | ('class', DataTypes.CATEGORICAL, InputTypes.STATIC_INPUT),
62 | ('perishable', DataTypes.CATEGORICAL, InputTypes.STATIC_INPUT)
63 | ]
64 |
65 | def __init__(self):
66 | """Initialises formatter."""
67 |
68 | self.identifiers = None
69 | self._real_scalers = None
70 | self._cat_scalers = None
71 | self._target_scaler = None
72 | self._num_classes_per_cat_input = None
73 |
74 | def split_data(self, df, valid_boundary=None, test_boundary=None):
75 | """Splits data frame into training-validation-test data frames.
76 |
77 | This also calibrates scaling object, and transforms data for each split.
78 |
79 | Args:
80 | df: Source data frame to split.
81 | valid_boundary: Starting year for validation data
82 | test_boundary: Starting year for test data
83 |
84 | Returns:
85 | Tuple of transformed (train, valid, test) data.
86 | """
87 |
88 | print('Formatting train-valid-test splits.')
89 |
90 | if valid_boundary is None:
91 | valid_boundary = pd.datetime(2015, 12, 1)
92 |
93 | fixed_params = self.get_fixed_params()
94 | time_steps = fixed_params['total_time_steps']
95 | lookback = fixed_params['num_encoder_steps']
96 | forecast_horizon = time_steps - lookback
97 |
98 | df['date'] = pd.to_datetime(df['date'])
99 | df_lists = {'train': [], 'valid': [], 'test': []}
100 | for _, sliced in df.groupby('traj_id'):
101 | index = sliced['date']
102 | train = sliced.loc[index < valid_boundary]
103 | train_len = len(train)
104 | valid_len = train_len + forecast_horizon
105 | valid = sliced.iloc[train_len - lookback:valid_len, :]
106 | test = sliced.iloc[valid_len - lookback:valid_len + forecast_horizon, :]
107 |
108 | sliced_map = {'train': train, 'valid': valid, 'test': test}
109 |
110 | for k in sliced_map:
111 | item = sliced_map[k]
112 |
113 | if len(item) >= time_steps:
114 | df_lists[k].append(item)
115 |
116 | dfs = {k: pd.concat(df_lists[k], axis=0) for k in df_lists}
117 |
118 | train = dfs['train']
119 | self.set_scalers(train, set_real=True)
120 |
121 | # Use all data for label encoding to handle labels not present in training.
122 | self.set_scalers(df, set_real=False)
123 |
124 | # Filter out identifiers not present in training (i.e. cold-started items).
125 | def filter_ids(frame):
126 | identifiers = set(self.identifiers)
127 | index = frame['traj_id']
128 | return frame.loc[index.apply(lambda x: x in identifiers)]
129 |
130 | valid = filter_ids(dfs['valid'])
131 | test = filter_ids(dfs['test'])
132 |
133 | return (self.transform_inputs(data) for data in [train, valid, test])
134 |
135 | def set_scalers(self, df, set_real=True):
136 | """Calibrates scalers using the data supplied.
137 |
138 | Label encoding is applied to the entire dataset (i.e. including test),
139 | so that unseen labels can be handled at run-time.
140 |
141 | Args:
142 | df: Data to use to calibrate scalers.
143 | set_real: Whether to fit set real-valued or categorical scalers
144 | """
145 | print('Setting scalers with training data...')
146 |
147 | column_definitions = self.get_column_definition()
148 | id_column = utils.get_single_col_by_input_type(InputTypes.ID,
149 | column_definitions)
150 | target_column = utils.get_single_col_by_input_type(InputTypes.TARGET,
151 | column_definitions)
152 |
153 | if set_real:
154 |
155 | # Extract identifiers in case required
156 | self.identifiers = list(df[id_column].unique())
157 |
158 | # Format real scalers
159 | self._real_scalers = {}
160 | #for col in ['oil', 'transactions', 'log_sales']:
161 | # self._real_scalers[col] = (df[col].mean(), df[col].std())
162 |
163 | self._target_scaler = (df[target_column].mean(), df[target_column].std())
164 |
165 | else:
166 | # Format categorical scalers
167 | categorical_inputs = utils.extract_cols_from_data_type(
168 | DataTypes.CATEGORICAL, column_definitions,
169 | {InputTypes.ID, InputTypes.TIME})
170 |
171 | categorical_scalers = {}
172 | num_classes = []
173 | if self.identifiers is None:
174 | raise ValueError('Scale real-valued inputs first!')
175 | id_set = set(self.identifiers)
176 | valid_idx = df['traj_id'].apply(lambda x: x in id_set)
177 | for col in categorical_inputs:
178 | # Set all to str so that we don't have mixed integer/string columns
179 | srs = df[col].apply(str).loc[valid_idx]
180 | categorical_scalers[col] = sklearn.preprocessing.LabelEncoder().fit(
181 | srs.values)
182 |
183 | num_classes.append(srs.nunique())
184 |
185 | # Set categorical scaler outputs
186 | self._cat_scalers = categorical_scalers
187 | self._num_classes_per_cat_input = num_classes
188 |
189 | def transform_inputs(self, df):
190 | """Performs feature transformations.
191 |
192 | This includes both feature engineering, preprocessing and normalisation.
193 |
194 | Args:
195 | df: Data frame to transform.
196 |
197 | Returns:
198 | Transformed data frame.
199 |
200 | """
201 | output = df.copy()
202 |
203 | if self._real_scalers is None and self._cat_scalers is None:
204 | raise ValueError('Scalers have not been set!')
205 |
206 | column_definitions = self.get_column_definition()
207 |
208 | categorical_inputs = utils.extract_cols_from_data_type(
209 | DataTypes.CATEGORICAL, column_definitions,
210 | {InputTypes.ID, InputTypes.TIME})
211 |
212 | # Format real inputs
213 | #for col in ['log_sales', 'oil', 'transactions']:
214 | # mean, std = self._real_scalers[col]
215 | # output[col] = (df[col] - mean) / std
216 |
217 |
218 | output['log_sales'] = output['log_sales'].fillna(0.) # mean imputation
219 |
220 | # Format categorical inputs
221 | for col in categorical_inputs:
222 | string_df = df[col].apply(str)
223 | output[col] = self._cat_scalers[col].transform(string_df)
224 |
225 | return output
226 |
227 | def format_predictions(self, predictions):
228 | """Reverts any normalisation to give predictions in original scale.
229 |
230 | Args:
231 | predictions: Dataframe of model predictions.
232 |
233 | Returns:
234 | Data frame of unnormalised predictions.
235 | """
236 | output = predictions.copy()
237 |
238 | column_names = predictions.columns
239 | mean, std = self._target_scaler
240 | for col in column_names:
241 | if col not in {'forecast_time', 'identifier'}:
242 | output[col] = (predictions[col] * std) + mean
243 |
244 | return output
245 |
246 | # Default params
247 | def get_fixed_params(self):
248 | """Returns fixed model parameters for experiments."""
249 |
250 | fixed_params = {
251 | 'total_time_steps': 120,
252 | 'num_encoder_steps': 30,
253 | 'num_epochs': 100,
254 | 'early_stopping_patience': 5,
255 | 'multiprocessing_workers': 5
256 | }
257 |
258 | return fixed_params
259 |
260 | def get_default_model_params(self):
261 | """Returns default optimised model parameters."""
262 |
263 | model_params = {
264 | 'dropout_rate': 0.1,
265 | 'hidden_layer_size': 240,
266 | 'learning_rate': 0.001,
267 | 'minibatch_size': 128,
268 | 'max_gradient_norm': 100.,
269 | 'num_heads': 4,
270 | 'stack_size': 1
271 | }
272 |
273 | return model_params
274 |
275 | def get_num_samples_for_calibration(self):
276 | """Gets the default number of training and validation samples.
277 |
278 | Use to sub-sample the data for network calibration and a value of -1 uses
279 | all available samples.
280 |
281 | Returns:
282 | Tuple of (training samples, validation samples)
283 | """
284 | return 450000, 50000
285 |
286 | def get_column_definition(self):
287 | """"Formats column definition in order expected by the TFT.
288 |
289 | Modified for Favorita to match column order of original experiment.
290 |
291 | Returns:
292 | Favorita-specific column definition
293 | """
294 |
295 | column_definition = self._column_definition
296 |
297 | # Sanity checks first.
298 | # Ensure only one ID and time column exist
299 | def _check_single_column(input_type):
300 |
301 | length = len([tup for tup in column_definition if tup[2] == input_type])
302 |
303 | if length != 1:
304 | raise ValueError('Illegal number of inputs ({}) of type {}'.format(
305 | length, input_type))
306 |
307 | _check_single_column(InputTypes.ID)
308 | _check_single_column(InputTypes.TIME)
309 |
310 | identifier = [tup for tup in column_definition if tup[2] == InputTypes.ID]
311 | time = [tup for tup in column_definition if tup[2] == InputTypes.TIME]
312 | real_inputs = [
313 | tup for tup in column_definition if tup[1] == DataTypes.REAL_VALUED and
314 | tup[2] not in {InputTypes.ID, InputTypes.TIME}
315 | ]
316 |
317 | col_definition_map = {tup[0]: tup for tup in column_definition}
318 | col_order = [
319 | 'item_nbr', 'store_nbr', 'city', 'state', 'type', 'cluster', 'family',
320 | 'class', 'perishable', 'onpromotion', 'day_of_week', 'national_hol',
321 | 'regional_hol', 'local_hol'
322 | ]
323 | categorical_inputs = [
324 | col_definition_map[k] for k in col_order if k in col_definition_map
325 | ]
326 |
327 | return identifier + time + real_inputs + categorical_inputs
328 |
--------------------------------------------------------------------------------
/data_formatters/traffic.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2019 The Google Research Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | # Lint as: python3
17 | """Custom formatting functions for Traffic dataset.
18 |
19 | Defines dataset specific column definitions and data transformations. This also
20 | performs z-score normalization across the entire dataset, hence re-uses most of
21 | the same functions as volatility.
22 | """
23 |
24 | import data_formatters.base
25 | import data_formatters.volatility
26 | import data_formatters.utils
27 |
28 | VolatilityFormatter = data_formatters.volatility.VolatilityFormatter
29 | DataTypes = data_formatters.base.DataTypes
30 | InputTypes = data_formatters.base.InputTypes
31 |
32 |
33 | class TrafficFormatter(VolatilityFormatter):
34 | """Defines and formats data for the traffic dataset.
35 |
36 | This also performs z-score normalization across the entire dataset, hence
37 | re-uses most of the same functions as volatility.
38 |
39 | Attributes:
40 | column_definition: Defines input and data type of column used in the
41 | experiment.
42 | identifiers: Entity identifiers used in experiments.
43 | """
44 |
45 | _column_definition = [
46 | ('id', DataTypes.REAL_VALUED, InputTypes.ID),
47 | ('hours_from_start', DataTypes.REAL_VALUED, InputTypes.TIME),
48 | ('values', DataTypes.REAL_VALUED, InputTypes.TARGET),
49 | ('time_on_day', DataTypes.REAL_VALUED, InputTypes.KNOWN_INPUT),
50 | ('day_of_week', DataTypes.REAL_VALUED, InputTypes.KNOWN_INPUT),
51 | ('hours_from_start', DataTypes.REAL_VALUED, InputTypes.KNOWN_INPUT),
52 | ('categorical_id', DataTypes.CATEGORICAL, InputTypes.STATIC_INPUT),
53 | ]
54 |
55 | def split_data(self, df, valid_boundary=151, test_boundary=166):
56 | """Splits data frame into training-validation-test data frames.
57 |
58 | This also calibrates scaling object, and transforms data for each split.
59 |
60 | Args:
61 | df: Source data frame to split.
62 | valid_boundary: Starting year for validation data
63 | test_boundary: Starting year for test data
64 |
65 | Returns:
66 | Tuple of transformed (train, valid, test) data.
67 | """
68 |
69 | print('Formatting train-valid-test splits.')
70 |
71 | index = df['sensor_day']
72 | train = df.loc[index < valid_boundary]
73 | valid = df.loc[(index >= valid_boundary - 7) & (index < test_boundary)]
74 | test = df.loc[index >= test_boundary - 7]
75 |
76 | self.set_scalers(train)
77 |
78 | return (self.transform_inputs(data) for data in [train, valid, test])
79 |
80 | # Default params
81 | def get_fixed_params(self):
82 | """Returns fixed model parameters for experiments."""
83 |
84 | fixed_params = {
85 | 'total_time_steps': 8 * 24,
86 | 'num_encoder_steps': 7 * 24,
87 | 'num_epochs': 100,
88 | 'early_stopping_patience': 5,
89 | 'multiprocessing_workers': 5
90 | }
91 |
92 | return fixed_params
93 |
94 | def get_default_model_params(self):
95 | """Returns default optimised model parameters."""
96 |
97 | model_params = {
98 | 'dropout_rate': 0.3,
99 | 'hidden_layer_size': 320,
100 | 'learning_rate': 0.001,
101 | 'minibatch_size': 128,
102 | 'max_gradient_norm': 100.,
103 | 'num_heads': 4,
104 | 'stack_size': 1
105 | }
106 |
107 | return model_params
108 |
109 | def get_num_samples_for_calibration(self):
110 | """Gets the default number of training and validation samples.
111 |
112 | Use to sub-sample the data for network calibration and a value of -1 uses
113 | all available samples.
114 |
115 | Returns:
116 | Tuple of (training samples, validation samples)
117 | """
118 | return 450000, 50000
119 |
--------------------------------------------------------------------------------
/data_formatters/utils.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2019 The Google Research Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | # Lint as: python3
17 | """Generic helper functions used across codebase."""
18 |
19 | import os
20 | import pathlib
21 | import torch
22 | import numpy as np
23 | import data_formatters
24 |
25 |
26 | # Loss functions.
27 | def pytorch_quantile_loss(y, y_pred, quantile):
28 | """Computes quantile loss for tensorflow.
29 |
30 | Standard quantile loss as defined in the "Training Procedure" section of
31 | the main TFT paper
32 |
33 | Args:
34 | y: Targets
35 | y_pred: Predictions
36 | quantile: Quantile to use for loss calculations (between 0 & 1)
37 |
38 | Returns:
39 | Tensor for quantile loss.
40 | """
41 |
42 | # Checks quantile
43 | if quantile < 0 or quantile > 1:
44 | raise ValueError(
45 | 'Illegal quantile value={}! Values should be between 0 and 1.'.format(
46 | quantile))
47 |
48 | prediction_underflow = y - y_pred
49 | q_loss = quantile * torch.max(prediction_underflow, torch.zeros_like(prediction_underflow)) + (
50 | 1. - quantile) * torch.max(-prediction_underflow, torch.zeros_like(prediction_underflow))
51 |
52 | return torch.sum(q_loss, axis=-1)
53 |
54 |
55 |
56 | # Generic.
57 | def get_single_col_by_input_type(input_type, column_definition):
58 | """Returns name of single column.
59 |
60 | Args:
61 | input_type: Input type of column to extract
62 | column_definition: Column definition list for experiment
63 | """
64 |
65 | l = [tup[0] for tup in column_definition if tup[2] == input_type]
66 |
67 | if len(l) != 1:
68 | raise ValueError('Invalid number of columns for {}'.format(input_type))
69 |
70 | return l[0]
71 |
72 |
73 | def extract_cols_from_data_type(data_type, column_definition,
74 | excluded_input_types):
75 | """Extracts the names of columns that correspond to a define data_type.
76 |
77 | Args:
78 | data_type: DataType of columns to extract.
79 | column_definition: Column definition to use.
80 | excluded_input_types: Set of input types to exclude
81 |
82 | Returns:
83 | List of names for columns with data type specified.
84 | """
85 | return [
86 | tup[0]
87 | for tup in column_definition
88 | if tup[1] == data_type and tup[2] not in excluded_input_types
89 | ]
90 |
91 |
92 | def numpy_normalised_quantile_loss(y, y_pred, quantile):
93 | """Computes normalised quantile loss for numpy arrays.
94 |
95 | Uses the q-Risk metric as defined in the "Training Procedure" section of the
96 | main TFT paper.
97 |
98 | Args:
99 | y: Targets
100 | y_pred: Predictions
101 | quantile: Quantile to use for loss calculations (between 0 & 1)
102 |
103 | Returns:
104 | Float for normalised quantile loss.
105 | """
106 | prediction_underflow = y - y_pred
107 | weighted_errors = quantile * np.maximum(prediction_underflow, 0.) \
108 | + (1. - quantile) * np.maximum(-prediction_underflow, 0.)
109 |
110 | quantile_loss = weighted_errors.mean()
111 | normaliser = y.abs().mean()
112 |
113 | return 2 * quantile_loss / normaliser
114 |
115 |
116 | # OS related functions.
117 | def create_folder_if_not_exist(directory):
118 | """Creates folder if it doesn't exist.
119 |
120 | Args:
121 | directory: Folder path to create.
122 | """
123 | # Also creates directories recursively
124 | pathlib.Path(directory).mkdir(parents=True, exist_ok=True)
125 |
126 |
127 | def make_data_formatter(exp_name):
128 | """Gets a data formatter object for experiment.
129 |
130 | Returns:
131 | Default DataFormatter per experiment.
132 | """
133 |
134 | data_formatter_class = {
135 | 'volatility': data_formatters.volatility.VolatilityFormatter,
136 | 'electricity': data_formatters.electricity.ElectricityFormatter,
137 | 'traffic': data_formatters.traffic.TrafficFormatter,
138 | 'favorita': data_formatters.favorita.FavoritaFormatter,
139 | }
140 |
141 | return data_formatter_class[exp_name]()
142 |
143 |
144 | def csv_path_to_folder(path: str):
145 | return "/".join(path.split('/')[:-1]) + "/"
146 |
147 |
148 | def data_csv_path(exp_name):
149 | csv_map = {
150 | 'volatility': './data/volatility/formatted_omi_vol.csv',
151 | 'electricity': './data/electricity/hourly_electricity.csv',
152 | 'traffic': './data/traffic/hourly_data.csv',
153 | 'favorita': './data/favorita/favorita_consolidated.csv',
154 | }
155 |
156 | return csv_map[exp_name]
157 |
--------------------------------------------------------------------------------
/data_formatters/volatility.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2019 The Google Research Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | # Lint as: python3
17 | """Custom formatting functions for Volatility dataset.
18 |
19 | Defines dataset specific column definitions and data transformations.
20 | """
21 |
22 | import data_formatters.base
23 | import data_formatters.utils
24 | import sklearn.preprocessing
25 | from data_formatters import utils
26 |
27 | GenericDataFormatter = data_formatters.base.GenericDataFormatter
28 | DataTypes = data_formatters.base.DataTypes
29 | InputTypes = data_formatters.base.InputTypes
30 |
31 |
32 | class VolatilityFormatter(GenericDataFormatter):
33 | """Defines and formats data for the volatility dataset.
34 |
35 | Attributes:
36 | column_definition: Defines input and data type of column used in the
37 | experiment.
38 | identifiers: Entity identifiers used in experiments.
39 | """
40 |
41 | _column_definition = [
42 | ('Symbol', DataTypes.CATEGORICAL, InputTypes.ID),
43 | ('date', DataTypes.DATE, InputTypes.TIME),
44 | ('log_vol', DataTypes.REAL_VALUED, InputTypes.TARGET),
45 | ('open_to_close', DataTypes.REAL_VALUED, InputTypes.OBSERVED_INPUT),
46 | ('days_from_start', DataTypes.REAL_VALUED, InputTypes.KNOWN_INPUT),
47 | ('day_of_week', DataTypes.CATEGORICAL, InputTypes.KNOWN_INPUT),
48 | ('day_of_month', DataTypes.CATEGORICAL, InputTypes.KNOWN_INPUT),
49 | ('week_of_year', DataTypes.CATEGORICAL, InputTypes.KNOWN_INPUT),
50 | ('month', DataTypes.CATEGORICAL, InputTypes.KNOWN_INPUT),
51 | ('Region', DataTypes.CATEGORICAL, InputTypes.STATIC_INPUT),
52 | ]
53 |
54 | def __init__(self):
55 | """Initialises formatter."""
56 |
57 | self.identifiers = None
58 | self._real_scalers = None
59 | self._cat_scalers = None
60 | self._target_scaler = None
61 | self._num_classes_per_cat_input = None
62 |
63 | def split_data(self, df, valid_boundary=2016, test_boundary=2018):
64 | """Splits data frame into training-validation-test data frames.
65 |
66 | This also calibrates scaling object, and transforms data for each split.
67 |
68 | Args:
69 | df: Source data frame to split.
70 | valid_boundary: Starting year for validation data
71 | test_boundary: Starting year for test data
72 |
73 | Returns:
74 | Tuple of transformed (train, valid, test) data.
75 | """
76 |
77 | print('Formatting train-valid-test splits.')
78 |
79 | index = df['year']
80 | train = df.loc[index < valid_boundary]
81 | valid = df.loc[(index >= valid_boundary) & (index < test_boundary)]
82 | test = df.loc[index >= test_boundary]
83 |
84 | self.set_scalers(train)
85 |
86 | return (self.transform_inputs(data) for data in [train, valid, test])
87 |
88 | def set_scalers(self, df):
89 | """Calibrates scalers using the data supplied.
90 |
91 | Args:
92 | df: Data to use to calibrate scalers.
93 | """
94 | print('Setting scalers with training data...')
95 |
96 | column_definitions = self.get_column_definition()
97 | id_column = utils.get_single_col_by_input_type(InputTypes.ID,
98 | column_definitions)
99 | target_column = utils.get_single_col_by_input_type(InputTypes.TARGET,
100 | column_definitions)
101 |
102 | # Extract identifiers in case required
103 | self.identifiers = list(df[id_column].unique())
104 |
105 | # Format real scalers
106 | real_inputs = utils.extract_cols_from_data_type(
107 | DataTypes.REAL_VALUED, column_definitions,
108 | {InputTypes.ID, InputTypes.TIME})
109 |
110 | data = df[real_inputs].values
111 | self._real_scalers = sklearn.preprocessing.StandardScaler().fit(data)
112 | self._target_scaler = sklearn.preprocessing.StandardScaler().fit(
113 | df[[target_column]].values) # used for predictions
114 |
115 | # Format categorical scalers
116 | categorical_inputs = utils.extract_cols_from_data_type(
117 | DataTypes.CATEGORICAL, column_definitions,
118 | {InputTypes.ID, InputTypes.TIME})
119 |
120 | categorical_scalers = {}
121 | num_classes = []
122 | for col in categorical_inputs:
123 | # Set all to str so that we don't have mixed integer/string columns
124 | srs = df[col].apply(str)
125 | categorical_scalers[col] = sklearn.preprocessing.LabelEncoder().fit(
126 | srs.values)
127 | num_classes.append(srs.nunique())
128 |
129 | # Set categorical scaler outputs
130 | self._cat_scalers = categorical_scalers
131 | self._num_classes_per_cat_input = num_classes
132 |
133 | def transform_inputs(self, df):
134 | """Performs feature transformations.
135 |
136 | This includes both feature engineering, preprocessing and normalisation.
137 |
138 | Args:
139 | df: Data frame to transform.
140 |
141 | Returns:
142 | Transformed data frame.
143 |
144 | """
145 | output = df.copy()
146 |
147 | if self._real_scalers is None and self._cat_scalers is None:
148 | raise ValueError('Scalers have not been set!')
149 |
150 | column_definitions = self.get_column_definition()
151 |
152 | real_inputs = utils.extract_cols_from_data_type(
153 | DataTypes.REAL_VALUED, column_definitions,
154 | {InputTypes.ID, InputTypes.TIME})
155 | categorical_inputs = utils.extract_cols_from_data_type(
156 | DataTypes.CATEGORICAL, column_definitions,
157 | {InputTypes.ID, InputTypes.TIME})
158 |
159 | # Format real inputs
160 | output[real_inputs] = self._real_scalers.transform(df[real_inputs].values)
161 |
162 | # Format categorical inputs
163 | for col in categorical_inputs:
164 | string_df = df[col].apply(str)
165 | output[col] = self._cat_scalers[col].transform(string_df)
166 |
167 | return output
168 |
169 | def format_predictions(self, predictions):
170 | """Reverts any normalisation to give predictions in original scale.
171 |
172 | Args:
173 | predictions: Dataframe of model predictions.
174 |
175 | Returns:
176 | Data frame of unnormalised predictions.
177 | """
178 | output = predictions.copy()
179 |
180 | column_names = predictions.columns
181 |
182 | for col in column_names:
183 | if col not in {'forecast_time', 'identifier'}:
184 | output[col] = self._target_scaler.inverse_transform(predictions[col].values.reshape(-1,1))
185 |
186 | return output
187 |
188 | # Default params
189 | def get_fixed_params(self):
190 | """Returns fixed model parameters for experiments."""
191 |
192 | fixed_params = {
193 | 'total_time_steps': 252 + 5,
194 | 'num_encoder_steps': 252,
195 | 'num_epochs': 100,
196 | 'early_stopping_patience': 5,
197 | 'multiprocessing_workers': 5,
198 | }
199 |
200 | return fixed_params
201 |
202 | def get_default_model_params(self):
203 | """Returns default optimised model parameters."""
204 |
205 | model_params = {
206 | 'dropout_rate': 0.3,
207 | 'hidden_layer_size': 160,
208 | 'learning_rate': 0.01,
209 | 'minibatch_size': 64,
210 | 'max_gradient_norm': 0.01,
211 | 'num_heads': 1,
212 | 'stack_size': 1
213 | }
214 |
215 | return model_params
216 |
--------------------------------------------------------------------------------
/dataset/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stevinc/Transformer_Timeseries/c3705aa9ece058dca98ec7af74f9a9a1a325f7a1/dataset/__init__.py
--------------------------------------------------------------------------------
/dataset/ts_dataset.py:
--------------------------------------------------------------------------------
1 | from torch import from_numpy
2 | import pandas as pd
3 | import data_formatters.utils as utils
4 | from data_formatters.base import InputTypes
5 | from torch.utils.data import Dataset
6 | import numpy as np
7 | import click
8 | from os import path
9 |
10 | class TSDataset(Dataset):
11 | ## Mostly adapted from original TFT Github, data_formatters
12 | def __init__(self, cnf, data_formatter):
13 |
14 | self.params = cnf.all_params
15 |
16 | self.csv = utils.data_csv_path(cnf.ds_name)
17 | self.data = pd.read_csv(self.csv, index_col=0, na_filter=False)
18 |
19 | self.train_set, self.valid_set, self.test_set = data_formatter.split_data(self.data)
20 | self.params['column_definition'] = data_formatter.get_column_definition()
21 |
22 | self.inputs = None
23 | self.outputs = None
24 | self.time = None
25 | self.identifiers = None
26 |
27 | def train(self):
28 | max_samples = self.params['train_samples']
29 | if path.exists(utils.csv_path_to_folder(self.csv) + "processed_traindata.npz"):
30 | f = np.load(utils.csv_path_to_folder(self.csv) + "processed_traindata.npz", allow_pickle=True)
31 | self.inputs, self.outputs, self.time, self.identifiers = f[f.files[0]], f[f.files[1]], f[f.files[2]], f[
32 | f.files[3]]
33 | else:
34 | self.preprocess(self.train_set, max_samples)
35 | np.savez(utils.csv_path_to_folder(self.csv) + "processed_traindata.npz", self.inputs, self.outputs,
36 | self.time,
37 | self.identifiers)
38 |
39 | def test(self):
40 | max_samples = self.params['test_samples']
41 | if path.exists(utils.csv_path_to_folder(self.csv) + "processed_testdata.npz"):
42 | f = np.load(utils.csv_path_to_folder(self.csv) + "processed_testdata.npz", allow_pickle=True)
43 | self.inputs, self.outputs, self.time, self.identifiers = f[f.files[0]], f[f.files[1]], f[f.files[2]], f[
44 | f.files[3]]
45 | else:
46 | self.preprocess(self.test_set, max_samples)
47 | np.savez(utils.csv_path_to_folder(self.csv) + "processed_testdata.npz", self.inputs, self.outputs,
48 | self.time,
49 | self.identifiers)
50 |
51 | def val(self):
52 | max_samples = self.params['val_samples']
53 | if path.exists(utils.csv_path_to_folder(self.csv) + "processed_validdata.npz"):
54 | f = np.load(utils.csv_path_to_folder(self.csv) + "processed_validdata.npz", allow_pickle=True)
55 | self.inputs, self.outputs, self.time, self.identifiers = f[f.files[0]], f[f.files[1]], f[f.files[2]], f[
56 | f.files[3]]
57 | else:
58 | self.preprocess(self.valid_set, max_samples)
59 | np.savez(utils.csv_path_to_folder(self.csv) + "processed_validdata.npz", self.inputs, self.outputs,
60 | self.time,
61 | self.identifiers)
62 |
63 | def preprocess(self, data, max_samples):
64 | time_steps = int(self.params['total_time_steps'])
65 | input_size = int(self.params['input_size'])
66 | output_size = int(self.params['output_size'])
67 | column_definition = self.params['column_definition']
68 |
69 | id_col = self._get_single_col_by_type(InputTypes.ID)
70 | time_col = self._get_single_col_by_type(InputTypes.TIME)
71 |
72 | data.sort_values(by=[id_col, time_col], inplace=True)
73 | print('Getting valid sampling locations.')
74 | valid_sampling_locations = []
75 | split_data_map = {}
76 | for identifier, df in data.groupby(id_col):
77 | # print('Getting locations for {}'.format(identifier))
78 | num_entries = len(df)
79 | if num_entries >= time_steps:
80 | valid_sampling_locations += [
81 | (identifier, time_steps + i)
82 | for i in range(num_entries - time_steps + 1)
83 | ]
84 | split_data_map[identifier] = df
85 |
86 | self.inputs = np.zeros((max_samples, time_steps, input_size))
87 | self.outputs = np.zeros((max_samples, time_steps, output_size))
88 | self.time = np.empty((max_samples, time_steps, 1), dtype=object)
89 | self.identifiers = np.empty((max_samples, time_steps, 1), dtype=object)
90 | print('# available segments={}'.format(len(valid_sampling_locations)))
91 |
92 | if max_samples > 0 and len(valid_sampling_locations) > max_samples:
93 | print('Extracting {} samples...'.format(max_samples))
94 | ranges = [
95 | valid_sampling_locations[i] for i in np.random.choice(
96 | len(valid_sampling_locations), max_samples, replace=False)
97 | ]
98 | else:
99 | print('Max samples={} exceeds # available segments={}'.format(
100 | max_samples, len(valid_sampling_locations)))
101 | ranges = valid_sampling_locations
102 |
103 | id_col = self._get_single_col_by_type(InputTypes.ID)
104 | time_col = self._get_single_col_by_type(InputTypes.TIME)
105 | target_col = self._get_single_col_by_type(InputTypes.TARGET)
106 | input_cols = [
107 | tup[0]
108 | for tup in column_definition
109 | if tup[2] not in {InputTypes.ID, InputTypes.TIME}
110 | ]
111 |
112 | for i, tup in enumerate(ranges):
113 | if ((i + 1) % 1000) == 0:
114 | print(i + 1, 'of', max_samples, 'samples done...')
115 | identifier, start_idx = tup
116 | sliced = split_data_map[identifier].iloc[start_idx - time_steps:start_idx]
117 |
118 | self.inputs[i, :, :] = sliced[input_cols]
119 | self.outputs[i, :, :] = sliced[[target_col]]
120 | self.time[i, :, 0] = sliced[time_col]
121 | self.identifiers[i, :, 0] = sliced[id_col]
122 |
123 | def __getitem__(self, index):
124 |
125 | num_encoder_steps = int(self.params['num_encoder_steps'])
126 | s = {
127 | 'inputs': self.inputs[index].astype(float),
128 | 'outputs': self.outputs[index, num_encoder_steps:, :],
129 | 'active_entries': np.ones_like(self.outputs[index, num_encoder_steps:, :]),
130 | 'time': self.time[index].tolist(),
131 | 'identifier': self.identifiers[index].tolist()
132 | }
133 |
134 | return s
135 |
136 | def __len__(self):
137 | return self.inputs.shape[0]
138 |
139 | def _get_single_col_by_type(self, input_type):
140 | """Returns name of single column for input type."""
141 | return utils.get_single_col_by_input_type(input_type, self.params['column_definition'])
142 |
143 |
144 | @click.command()
145 | @click.option('--conf_file_path', type=str, default="./conf/electricity.yaml")
146 | def main(conf_file_path):
147 | import data_formatters.utils as utils
148 | from conf import Conf
149 |
150 | cnf = Conf(conf_file_path=conf_file_path, seed=15, exp_name="test", log=False)
151 | data_formatter = utils.make_data_formatter(cnf.ds_name)
152 | dataset_train = TSDataset(cnf, data_formatter)
153 | dataset_train.train()
154 |
155 | for i in range(10):
156 | # 192 x ['power_usage', 'hour', 'day_of_week', 'hours_from_start', 'categorical_id']
157 | x = dataset_train[i]['inputs']
158 | # 24 x ['power_usage']
159 | y = dataset_train[i]['outputs']
160 | print(f'Example #{i}: x.shape={x.shape}, y.shape={y.shape}')
161 |
162 |
163 | if __name__ == "__main__":
164 | main()
--------------------------------------------------------------------------------
/env.yml:
--------------------------------------------------------------------------------
1 | name: tft
2 | channels:
3 | - anaconda
4 | - pytorch
5 | - conda-forge
6 | - defaults
7 | dependencies:
8 | - setuptools=58.0.4=py37h06a4308_0
9 | - _libgcc_mutex=0.1=conda_forge
10 | - _openmp_mutex=4.5=2_kmp_llvm
11 | - absl-py=1.0.0=pyhd8ed1ab_0
12 | - aiohttp=3.8.1=py37h540881e_1
13 | - aiosignal=1.2.0=pyhd8ed1ab_0
14 | - alsa-lib=1.2.3=h516909a_0
15 | - async-timeout=4.0.2=pyhd8ed1ab_0
16 | - asynctest=0.13.0=py_0
17 | - attrs=21.4.0=pyhd8ed1ab_0
18 | - azure-core=1.23.1=pyhd8ed1ab_0
19 | - azure-storage-blob=12.11.0=pyhd8ed1ab_0
20 | - backcall=0.2.0=pyh9f0ad1d_0
21 | - backports=1.0=py_2
22 | - backports.functools_lru_cache=1.6.4=pyhd8ed1ab_0
23 | - blinker=1.4=py_1
24 | - brotli=1.0.9=h166bdaf_7
25 | - brotli-bin=1.0.9=h166bdaf_7
26 | - brotlipy=0.7.0=py37h540881e_1004
27 | - c-ares=1.18.1=h7f98852_0
28 | - ca-certificates=2021.10.8=ha878542_0
29 | - cachetools=5.0.0=pyhd8ed1ab_0
30 | - certifi=2021.10.8=py37h89c1867_2
31 | - cffi=1.15.0=py37h036bc23_0
32 | - charset-normalizer=2.0.12=pyhd8ed1ab_0
33 | - click=8.1.3=py37h89c1867_0
34 | - codecov=2.1.11=pyhd3deb0d_0
35 | - colorama=0.4.4=pyh9f0ad1d_0
36 | - coverage=6.3.2=py37h540881e_2
37 | - cryptography=36.0.2=py37h38fbfac_1
38 | - cycler=0.11.0=pyhd8ed1ab_0
39 | - dbus=1.13.6=h5008d03_3
40 | - decorator=5.1.1=pyhd8ed1ab_0
41 | - expat=2.4.8=h27087fc_0
42 | - font-ttf-dejavu-sans-mono=2.37=hab24e00_0
43 | - font-ttf-inconsolata=3.000=h77eed37_0
44 | - font-ttf-source-code-pro=2.038=h77eed37_0
45 | - font-ttf-ubuntu=0.83=hab24e00_0
46 | - fontconfig=2.14.0=h8e229c2_0
47 | - fonts-conda-ecosystem=1=0
48 | - fonts-conda-forge=1=0
49 | - fonttools=4.33.3=py37h540881e_0
50 | - freetype=2.10.4=h0708190_1
51 | - frozenlist=1.3.0=py37h540881e_1
52 | - future=0.18.2=py37h89c1867_5
53 | - gettext=0.19.8.1=h73d1719_1008
54 | - giflib=5.2.1=h36c2ea0_2
55 | - google-auth=2.6.6=pyh6c4a22f_0
56 | - google-auth-oauthlib=0.4.6=pyhd8ed1ab_0
57 | - grpcio=1.45.0=py37he500948_0
58 | - gst-plugins-base=1.20.1=hcf0ee16_1
59 | - gstreamer=1.20.1=hd4edc92_1
60 | - icu=69.1=h9c3ff4c_0
61 | - idna=3.3=pyhd8ed1ab_0
62 | - importlib-metadata=4.11.3=py37h89c1867_1
63 | - ipdb=0.13.9=pyhd8ed1ab_0
64 | - ipython=7.33.0=py37h89c1867_0
65 | - isodate=0.6.1=pyhd8ed1ab_0
66 | - jbig=2.1=h7f98852_2003
67 | - jedi=0.18.1=py37h89c1867_1
68 | - joblib=1.1.0=pyhd8ed1ab_0
69 | - jpeg=9e=h166bdaf_1
70 | - keyutils=1.6.1=h166bdaf_0
71 | - kiwisolver=1.4.2=py37h7cecad7_1
72 | - krb5=1.19.3=h3790be6_0
73 | - lcms2=2.12=hddcbb42_0
74 | - ld_impl_linux-64=2.36.1=hea4e1c9_2
75 | - lerc=3.0=h9c3ff4c_0
76 | - libblas=3.9.0=14_linux64_openblas
77 | - libbrotlicommon=1.0.9=h166bdaf_7
78 | - libbrotlidec=1.0.9=h166bdaf_7
79 | - libbrotlienc=1.0.9=h166bdaf_7
80 | - libcblas=3.9.0=14_linux64_openblas
81 | - libclang=13.0.1=default_hc23dcda_0
82 | - libdeflate=1.10=h7f98852_0
83 | - libedit=3.1.20191231=he28a2e2_2
84 | - libevent=2.1.10=h9b69904_4
85 | - libffi=3.4.2=h7f98852_5
86 | - libgcc-ng=11.2.0=h1d223b6_16
87 | - libgfortran-ng=11.2.0=h69a702a_16
88 | - libgfortran5=11.2.0=h5c6108e_16
89 | - libglib=2.70.2=h174f98d_4
90 | - libiconv=1.16=h516909a_0
91 | - liblapack=3.9.0=14_linux64_openblas
92 | - libllvm13=13.0.1=hf817b99_2
93 | - libnsl=2.0.0=h7f98852_0
94 | - libogg=1.3.4=h7f98852_1
95 | - libopenblas=0.3.20=pthreads_h78a6416_0
96 | - libopus=1.3.1=h7f98852_1
97 | - libpng=1.6.37=h21135ba_2
98 | - libpq=14.2=hd57d9b9_0
99 | - libprotobuf=3.20.0=h6239696_0
100 | - libstdcxx-ng=11.2.0=he4da1e4_16
101 | - libtiff=4.3.0=h542a066_3
102 | - libuuid=2.32.1=h7f98852_1000
103 | - libvorbis=1.3.7=h9c3ff4c_0
104 | - libwebp=1.2.2=h3452ae3_0
105 | - libwebp-base=1.2.2=h7f98852_1
106 | - libxcb=1.13=h7f98852_1004
107 | - libxkbcommon=1.0.3=he3ba5ed_0
108 | - libxml2=2.9.12=h885dcf4_1
109 | - libzlib=1.2.11=h166bdaf_1014
110 | - llvm-openmp=14.0.3=he0ac6c6_0
111 | - lz4-c=1.9.3=h9c3ff4c_1
112 | - markdown=3.3.6=pyhd8ed1ab_0
113 | - matplotlib=3.5.1=py37h89c1867_0
114 | - matplotlib-base=3.5.1=py37h1058ff1_0
115 | - matplotlib-inline=0.1.3=pyhd8ed1ab_0
116 | - mkl=2021.4.0=h8d4b97c_729
117 | - mkl-service=2.4.0=py37h402132d_0
118 | - msrest=0.6.21=pyh44b312d_0
119 | - multidict=6.0.2=py37h540881e_1
120 | - munkres=1.1.4=pyh9f0ad1d_0
121 | - mysql-common=8.0.29=haf5c9bc_0
122 | - mysql-libs=8.0.29=h28c427c_0
123 | - ncurses=6.3=h27087fc_1
124 | - ninja=1.10.2=h4bd325d_1
125 | - nspr=4.32=h9c3ff4c_1
126 | - nss=3.77=h2350873_0
127 | - numpy=1.21.6=py37h976b520_0
128 | - oauthlib=3.2.0=pyhd8ed1ab_0
129 | - openjpeg=2.4.0=hb52868f_1
130 | - openssl=1.1.1n=h166bdaf_0
131 | - packaging=21.3=pyhd8ed1ab_0
132 | - pandas=1.3.5=py37he8f5f7f_0
133 | - parso=0.8.3=pyhd8ed1ab_0
134 | - path=16.4.0=py37h89c1867_1
135 | - pcre=8.45=h9c3ff4c_0
136 | - pexpect=4.8.0=pyh9f0ad1d_2
137 | - pickleshare=0.7.5=py_1003
138 | - pillow=9.1.0=py37h44f0d7a_2
139 | - pip=22.0.4=pyhd8ed1ab_0
140 | - prompt-toolkit=3.0.29=pyha770c72_0
141 | - protobuf=3.20.0=py37hd23a5d3_4
142 | - pthread-stubs=0.4=h36c2ea0_1001
143 | - ptyprocess=0.7.0=pyhd3deb0d_0
144 | - pyasn1=0.4.8=py_0
145 | - pyasn1-modules=0.2.7=py_0
146 | - pycparser=2.21=pyhd8ed1ab_0
147 | - pygments=2.12.0=pyhd8ed1ab_0
148 | - pyjwt=2.3.0=pyhd8ed1ab_1
149 | - pyopenssl=22.0.0=pyhd8ed1ab_0
150 | - pyparsing=3.0.8=pyhd8ed1ab_0
151 | - pyqt=5.12.3=py37h89c1867_8
152 | - pyqt-impl=5.12.3=py37hac37412_8
153 | - pyqt5-sip=4.19.18=py37hcd2ae1e_8
154 | - pyqtchart=5.12=py37he336c9b_8
155 | - pyqtwebengine=5.12.1=py37he336c9b_8
156 | - pysocks=1.7.1=py37h89c1867_5
157 | - python=3.7.12=hb7a2778_100_cpython
158 | - python-dateutil=2.8.2=pyhd8ed1ab_0
159 | - python_abi=3.7=2_cp37m
160 | - pytorch-model-summary=0.1.1=py_0
161 | - pytz=2022.1=pyhd8ed1ab_0
162 | - pyu2f=0.1.5=pyhd8ed1ab_0
163 | - pyyaml=6.0=py37h540881e_4
164 | - qt=5.12.9=h1304e3e_6
165 | - readline=8.1=h46c0cb4_0
166 | - requests=2.27.1=pyhd8ed1ab_0
167 | - requests-oauthlib=1.3.1=pyhd8ed1ab_0
168 | - rsa=4.8=pyhd8ed1ab_0
169 | - scikit-learn=1.0.2=py37hf9e9bfc_0
170 | - scipy=1.7.3=py37hf2a6cf1_0
171 | - six=1.16.0=pyh6c4a22f_0
172 | - sqlite=3.38.3=h4ff8645_0
173 | - tbb=2021.5.0=h924138e_1
174 | - tensorboard=2.8.0=pyhd8ed1ab_1
175 | - tensorboard-data-server=0.6.0=py37h38fbfac_2
176 | - tensorboard-plugin-wit=1.8.1=pyhd8ed1ab_0
177 | - termcolor=1.1.0=py_2
178 | - threadpoolctl=3.1.0=pyh8a188c0_0
179 | - tk=8.6.12=h27826a3_0
180 | - tomli=2.0.1=pyhd8ed1ab_0
181 | - tornado=6.1=py37h540881e_3
182 | - tqdm=4.64.0=pyhd8ed1ab_0
183 | - traitlets=5.1.1=pyhd8ed1ab_0
184 | - typing=3.10.0.0=pyhd8ed1ab_0
185 | - typing-extensions=4.2.0=hd8ed1ab_1
186 | - typing_extensions=4.2.0=pyha770c72_1
187 | - unicodedata2=14.0.0=py37h540881e_1
188 | - urllib3=1.26.9=pyhd8ed1ab_0
189 | - wcwidth=0.2.5=pyh9f0ad1d_2
190 | - werkzeug=2.1.2=pyhd8ed1ab_0
191 | - wheel=0.37.1=pyhd8ed1ab_0
192 | - xorg-libxau=1.0.9=h7f98852_0
193 | - xorg-libxdmcp=1.1.3=h7f98852_0
194 | - xz=5.2.5=h516909a_1
195 | - yaml=0.2.5=h7f98852_2
196 | - yarl=1.7.2=py37h540881e_2
197 | - zipp=3.8.0=pyhd8ed1ab_0
198 | - zlib=1.2.11=h166bdaf_1014
199 | - zstd=1.5.2=ha95c52a_0
200 | - blas=1.0=mkl
201 | - bzip2=1.0.8=h7b6447c_0
202 | - cudatoolkit=10.2.89=hfd86e86_1
203 | - gmp=6.2.1=h2531618_2
204 | - gnutls=3.6.15=he1e5248_0
205 | - lame=3.100=h7b6447c_0
206 | - libidn2=2.3.2=h7f8727e_0
207 | - libtasn1=4.16.0=h27cfd23_0
208 | - libunistring=0.9.10=h27cfd23_0
209 | - libuv=1.40.0=h7b6447c_0
210 | - nettle=3.7.3=hbbd107a_1
211 | - openh264=2.1.1=h4ff587b_0
212 | - ffmpeg=4.3=hf484d3e_0
213 | - pytorch=1.11.0=py3.7_cuda10.2_cudnn7.6.5_0
214 | - pytorch-mutex=1.0=cuda
215 | - torchaudio=0.11.0=py37_cu102
216 | - torchvision=0.12.0=py37_cu102
217 | prefix: /home/grads/m/mrsergazinov/.conda/envs/tft
218 |
219 |
--------------------------------------------------------------------------------
/inference.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # ---------------------
3 |
4 | from time import time
5 | import numpy as np
6 | import torch
7 | from torch import optim
8 | from torch.utils.data import DataLoader
9 | from conf import Conf
10 | from dataset.ts_dataset import TSDataset
11 | from models.temporal_fusion_t import tft_model
12 | from progress_bar import ProgressBar
13 | from utils import QuantileLoss, symmetric_mean_absolute_percentage_error, unnormalize_tensor, plot_temporal_serie
14 | import data_formatters.utils as utils
15 | from models.transformer import Transformer
16 | from models.transformer_grn.transformer import Transformer as GRNTransformer
17 |
18 |
19 |
20 | class TS(object):
21 | """
22 | Class for loading and test the pre-trained model
23 | """
24 |
25 | def __init__(self, cnf):
26 | # type: (Conf) -> Trainer
27 |
28 | self.cnf = cnf
29 | self.data_formatter = utils.make_data_formatter(cnf.ds_name)
30 |
31 | loader = TSDataset
32 | dataset_test = loader(self.cnf, self.data_formatter)
33 | dataset_test.test()
34 |
35 | # init model
36 | model_choice = self.cnf.all_params["model"]
37 | if model_choice == "transformer":
38 | # Baseline transformer
39 | self.model = Transformer(self.cnf.all_params)
40 | elif model_choice == "tf_transformer":
41 | # Temporal fusion transformer
42 | self.model = tft_model.TFT(self.cnf.all_params)
43 | elif model_choice == "grn_transformer":
44 | # Transformer + GRN to encode static vars
45 | self.model = GRNTransformer(self.cnf.all_params)
46 | else:
47 | raise NameError
48 |
49 | self.model = self.model.to(cnf.device)
50 |
51 | # init optimizer
52 | self.optimizer = optim.Adam(params=self.model.parameters(), lr=cnf.lr)
53 | self.loss = QuantileLoss(cnf.quantiles)
54 |
55 | # init test loader
56 | self.test_loader = DataLoader(
57 | dataset=dataset_test, batch_size=cnf.batch_size,
58 | num_workers=cnf.n_workers, shuffle=False, pin_memory=True,
59 | )
60 |
61 | # init logging stuffs
62 | self.log_path = cnf.exp_log_path
63 | self.log_freq = len(self.test_loader)
64 | self.train_losses = []
65 | self.test_loss = []
66 | self.test_losses = {'p10': [], 'p50': [], 'p90': []}
67 | self.test_smape = []
68 |
69 | # starting values
70 | self.epoch = 0
71 | self.best_test_loss = None
72 |
73 | # init progress bar
74 | self.progress_bar = ProgressBar(max_step=self.log_freq, max_epoch=self.cnf.epochs)
75 |
76 | # possibly load checkpoint
77 | self.load_ck()
78 |
79 | print("Finished preparing datasets.")
80 |
81 | def load_ck(self):
82 | """
83 | load training checkpoint
84 | """
85 | ck_path = self.log_path / self.cnf.exp_name + '_best.pth'
86 | if ck_path.exists():
87 | ck = torch.load(ck_path)
88 | print(f'[loading checkpoint \'{ck_path}\']')
89 | self.model.load_state_dict(ck)
90 |
91 | def test(self):
92 | """
93 | Quick test and plot prediction without saving or logging stuff on tensorboarc
94 | """
95 | with torch.no_grad():
96 | self.model.eval()
97 | p10_forecast, p10_forecast, p90_forecast, target = None, None, None, None
98 |
99 | t = time()
100 | for step, sample in enumerate(self.test_loader):
101 |
102 | # Hide future predictions from input vector, set to 0 (or 1) values where timestep > encoder_steps
103 | steps = self.cnf.all_params['num_encoder_steps']
104 | pred_len = sample['outputs'].shape[1]
105 | x = sample['inputs'].float().to(self.cnf.device)
106 | x[:, steps:, 0] = 1
107 |
108 | # Feed input to the model
109 | if self.cnf.all_params["model"] == "transformer" or self.cnf.all_params["model"] == "grn_transformer":
110 |
111 | # Auto-regressive prediction
112 | for i in range(pred_len):
113 | output = self.model.forward(x)
114 | x[:, steps + i, 0] = output[:, i, 1]
115 | output = self.model.forward(x)
116 |
117 | elif self.cnf.all_params["model"] == "tf_transformer":
118 | output, _, _ = self.model.forward(x)
119 | else:
120 | raise NameError
121 |
122 | output = output.squeeze()
123 | y, y_pred = sample['outputs'].squeeze().float().to(self.cnf.device), output
124 |
125 | # Compute loss
126 | loss, _ = self.loss(y_pred, y)
127 | smape = symmetric_mean_absolute_percentage_error(output[:, :, 1].detach().cpu().numpy(),
128 | sample['outputs'][:, :, 0].detach().cpu().numpy())
129 |
130 | # De-Normalize to compute metrics
131 | target = unnormalize_tensor(self.data_formatter, y, sample['identifier'][0][0])
132 | p10_forecast = unnormalize_tensor(self.data_formatter, y_pred[..., 0], sample['identifier'][0][0])
133 | p50_forecast = unnormalize_tensor(self.data_formatter, y_pred[..., 1], sample['identifier'][0][0])
134 | p90_forecast = unnormalize_tensor(self.data_formatter, y_pred[..., 2], sample['identifier'][0][0])
135 |
136 | # Compute metrics
137 | self.test_losses['p10'].append(self.loss.numpy_normalised_quantile_loss(p10_forecast, target, 0.1))
138 | self.test_losses['p50'].append(self.loss.numpy_normalised_quantile_loss(p50_forecast, target, 0.5))
139 | self.test_losses['p90'].append(self.loss.numpy_normalised_quantile_loss(p90_forecast, target, 0.9))
140 |
141 | self.test_loss.append(loss.item())
142 | self.test_smape.append(smape)
143 |
144 | # Plot serie prediction
145 | p1, p2, p3, target = np.expand_dims(p10_forecast, axis=-1), np.expand_dims(p50_forecast, axis=-1), \
146 | np.expand_dims(p90_forecast, axis=-1), np.expand_dims(target, axis=-1)
147 | p = np.concatenate((p1, p2, p3), axis=-1)
148 | plot_temporal_serie(p, target)
149 |
150 | # Log stuff
151 | for k in self.test_losses.keys():
152 | mean_test_loss = np.mean(self.test_losses[k])
153 | print(f'\t● AVG {k} Loss on TEST-set: {mean_test_loss:.6f} │ T: {time() - t:.2f} s')
154 |
155 | # log log log
156 | mean_test_loss = np.mean(self.test_loss)
157 | mean_smape = np.mean(self.test_smape)
158 | print(f'\t● AVG Loss on TEST-set: {mean_test_loss:.6f} │ T: {time() - t:.2f} s')
159 | print(f'\t● AVG SMAPE on TEST-set: {mean_smape:.6f} │ T: {time() - t:.2f} s')
160 |
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # ---------------------
3 |
4 | import click
5 | import torch.backends.cudnn as cudnn
6 |
7 | from conf import Conf
8 | from trainer import Trainer
9 | from inference import TS
10 |
11 | cudnn.benchmark = True
12 |
13 |
14 | @click.command()
15 | @click.option('--exp_name', type=str, default=None)
16 | @click.option('--conf_file_path', type=str, default=None)
17 | @click.option('--seed', type=int, default=None)
18 | @click.option('--inference', type=bool, default=False)
19 | def main(exp_name, conf_file_path, seed, inference):
20 | # type: (str, str, int, bool) -> None
21 |
22 | # if `exp_name` is None,
23 | # ask the user to enter it
24 | if exp_name is None:
25 | exp_name = click.prompt('▶ experiment name', default='default')
26 |
27 | # if `exp_name` contains '!',
28 | # `log_each_step` becomes `False`
29 | log_each_step = True
30 | if '!' in exp_name:
31 | exp_name = exp_name.replace('!', '')
32 | log_each_step = False
33 |
34 | # if `exp_name` contains a '@' character,
35 | # the number following '@' is considered as
36 | # the desired random seed for the experiment
37 | split = exp_name.split('@')
38 | if len(split) == 2:
39 | seed = int(split[1])
40 | exp_name = split[0]
41 |
42 | cnf = Conf(conf_file_path=conf_file_path, seed=seed, exp_name=exp_name, log=log_each_step)
43 | print(f'\n{cnf}')
44 |
45 | print(f'\n▶ Starting Experiment \'{exp_name}\' [seed: {cnf.seed}]')
46 |
47 | if inference:
48 | ts_model = TS(cnf=cnf)
49 | ts_model.test()
50 | else:
51 | trainer = Trainer(cnf=cnf)
52 | trainer.run()
53 |
54 |
55 | if __name__ == '__main__':
56 | main()
57 |
--------------------------------------------------------------------------------
/models/temporal_fusion_t/__init__.py:
--------------------------------------------------------------------------------
1 | from models.temporal_fusion_t.base import BaseModel
2 | from models.temporal_fusion_t.tft_model import TFT
--------------------------------------------------------------------------------
/models/temporal_fusion_t/add_and_norm.py:
--------------------------------------------------------------------------------
1 | from torch import nn
2 | import torch
3 |
4 | class AddAndNorm(nn.Module):
5 | def __init__(self, hidden_layer_size):
6 | super(AddAndNorm, self).__init__()
7 |
8 | self.normalize = nn.LayerNorm(hidden_layer_size)
9 |
10 | def forward(self, x1, x2):
11 | x = torch.add(x1, x2)
12 | return self.normalize(x)
13 |
--------------------------------------------------------------------------------
/models/temporal_fusion_t/base.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # ---------------------
3 |
4 | from abc import ABCMeta
5 | from abc import abstractmethod
6 | from typing import Union
7 |
8 | import torch
9 | from path import Path
10 | from torch import nn
11 |
12 |
13 | class BaseModel(nn.Module, metaclass=ABCMeta):
14 |
15 | def __init__(self):
16 | super().__init__()
17 |
18 |
19 | def kaiming_init(self, activation):
20 | # type: (str) -> ()
21 | """
22 | Apply "Kaiming-Normal" initialization to all Conv2D(s) of the model.
23 | :param activation: activation function after conv; values in {'relu', 'leaky_relu'}
24 | :return:
25 | """
26 | assert activation in ['ReLU', 'LeakyReLU', 'leaky_relu'], \
27 | '`activation` must be \'ReLU\' or \'LeakyReLU\''
28 |
29 | if activation == 'LeakyReLU':
30 | activation = 'leaky_relu'
31 | activation = activation.lower()
32 |
33 | for m in self.modules():
34 | if isinstance(m, nn.Conv2d):
35 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity=activation)
36 | if m.bias is not None:
37 | nn.init.constant_(m.bias, 0)
38 |
39 |
40 | @abstractmethod
41 | def forward(self, x):
42 | # type: (torch.Tensor) -> torch.Tensor
43 | """
44 | Defines the computation performed at every call.
45 | Should be overridden by all subclasses.
46 | """
47 | ...
48 |
49 |
50 | @property
51 | def n_param(self):
52 | # type: (BaseModel) -> int
53 | """
54 | :return: number of parameters
55 | """
56 | return sum(p.numel() for p in self.parameters() if p.requires_grad)
57 |
58 |
59 | @property
60 | def current_device(self):
61 | # type: () -> str
62 | """
63 | :return: string that represents the device on which the model is currently located
64 | >> e.g.: 'cpu', 'cuda', 'cuda:0', 'cuda:1', ...
65 | """
66 | return str(next(self.parameters()).device)
67 |
68 |
69 | @property
70 | def is_cuda(self):
71 | # type: () -> bool
72 | """
73 | :return: `True` if the model is on Cuda; `False` otherwise
74 | """
75 | return 'cuda' in self.current_device
76 |
77 |
78 | def save_w(self, path):
79 | # type: (Union[str, Path]) -> None
80 | """
81 | save model weights in the specified path
82 | """
83 | torch.save(self.state_dict(), path)
84 |
85 |
86 | def load_w(self, path):
87 | # type: (Union[str, Path]) -> None
88 | """
89 | load model weights from the specified path
90 | """
91 | self.load_state_dict(torch.load(path))
92 |
93 |
94 | def requires_grad(self, flag):
95 | # type: (bool) -> None
96 | """
97 | :param flag: True if the model requires gradient, False otherwise
98 | """
99 | for p in self.parameters():
100 | p.requires_grad = flag
101 |
--------------------------------------------------------------------------------
/models/temporal_fusion_t/gated_linear_unit.py:
--------------------------------------------------------------------------------
1 | from torch import nn
2 | import torch
3 | from models.temporal_fusion_t.linear_layer import LinearLayer
4 |
5 | class GLU(nn.Module):
6 | #Gated Linear Unit
7 | def __init__(self,
8 | input_size,
9 | hidden_layer_size,
10 | dropout_rate=None,
11 | use_time_distributed=True,
12 | batch_first=False
13 | ):
14 | super(GLU, self).__init__()
15 | self.hidden_layer_size = hidden_layer_size
16 | self.dropout_rate = dropout_rate
17 | self.use_time_distributed = use_time_distributed
18 |
19 | if dropout_rate is not None:
20 | self.dropout = nn.Dropout(self.dropout_rate)
21 |
22 | self.activation_layer = LinearLayer(input_size, hidden_layer_size, use_time_distributed, batch_first)
23 | self.gated_layer = LinearLayer(input_size, hidden_layer_size, use_time_distributed, batch_first)
24 |
25 | self.sigmoid = nn.Sigmoid()
26 |
27 | def forward(self, x):
28 | if self.dropout_rate is not None:
29 | x = self.dropout(x)
30 |
31 | activation = self.activation_layer(x)
32 | gated = self.sigmoid(self.gated_layer(x))
33 |
34 | return torch.mul(activation, gated), gated
35 |
--------------------------------------------------------------------------------
/models/temporal_fusion_t/gated_residual_network.py:
--------------------------------------------------------------------------------
1 | from torch import nn
2 | import math
3 | import torch
4 | from models.temporal_fusion_t.linear_layer import LinearLayer
5 | from models.temporal_fusion_t.add_and_norm import AddAndNorm
6 | from models.temporal_fusion_t.gated_linear_unit import GLU
7 |
8 | class GatedResidualNetwork(nn.Module):
9 | def __init__(self,
10 | input_size,
11 | hidden_layer_size,
12 | output_size=None,
13 | dropout_rate=None,
14 | use_time_distributed=True,
15 | return_gate=False,
16 | batch_first=False
17 | ):
18 |
19 | super(GatedResidualNetwork, self).__init__()
20 | if output_size is None:
21 | output = hidden_layer_size
22 | else:
23 | output = output_size
24 |
25 | self.output = output
26 | self.input_size = input_size
27 | self.output_size = output_size
28 | self.hidden_layer_size = hidden_layer_size
29 | self.return_gate = return_gate
30 |
31 | self.linear_layer = LinearLayer(input_size, output, use_time_distributed, batch_first)
32 |
33 | self.hidden_linear_layer1 = LinearLayer(input_size, hidden_layer_size, use_time_distributed, batch_first)
34 | self.hidden_context_layer = LinearLayer(hidden_layer_size, hidden_layer_size, use_time_distributed, batch_first)
35 | self.hidden_linear_layer2 = LinearLayer(hidden_layer_size, hidden_layer_size, use_time_distributed, batch_first)
36 |
37 | self.elu1 = nn.ELU()
38 | self.glu = GLU(hidden_layer_size, output, dropout_rate, use_time_distributed, batch_first)
39 | self.add_and_norm = AddAndNorm(hidden_layer_size=output)
40 |
41 | def forward(self, x, context=None):
42 | # Setup skip connection
43 | if self.output_size is None:
44 | skip = x
45 | else:
46 | skip = self.linear_layer(x)
47 |
48 | # Apply feedforward network
49 | hidden = self.hidden_linear_layer1(x)
50 | if context is not None:
51 | hidden = hidden + self.hidden_context_layer(context)
52 | hidden = self.elu1(hidden)
53 | hidden = self.hidden_linear_layer2(hidden)
54 |
55 | gating_layer, gate = self.glu(hidden)
56 | if self.return_gate:
57 | return self.add_and_norm(skip, gating_layer), gate
58 | else:
59 | return self.add_and_norm(skip, gating_layer)
--------------------------------------------------------------------------------
/models/temporal_fusion_t/interpretable_multi_head_attention.py:
--------------------------------------------------------------------------------
1 | from torch import nn
2 | import torch
3 | from models.temporal_fusion_t.scaled_dot_product_attention import ScaledDotProductAttention
4 |
5 | class InterpretableMultiHeadAttention(nn.Module):
6 | """Defines interpretable multi-head attention layer.
7 |
8 | Attributes:
9 | n_head: Number of heads
10 | d_k: Key/query dimensionality per head
11 | d_v: Value dimensionality
12 | dropout: Dropout rate to apply
13 | qs_layers: List of queries across heads
14 | ks_layers: List of keys across heads
15 | vs_layers: List of values across heads
16 | attention: Scaled dot product attention layer
17 | w_o: Output weight matrix to project internal state to the original TFT
18 | state size
19 | """
20 |
21 | def __init__(self, n_head, d_model, dropout_rate):
22 | """Initialises layer.
23 |
24 | Args:
25 | n_head: Number of heads
26 | d_model: TFT state dimensionality
27 | dropout: Dropout discard rate
28 | """
29 | super(InterpretableMultiHeadAttention, self).__init__()
30 |
31 | self.n_head = n_head
32 | self.d_k = self.d_v = d_k = d_v = d_model // n_head
33 | self.dropout = nn.Dropout(dropout_rate)
34 |
35 | self.qs_layers = nn.ModuleList()
36 | self.ks_layers = nn.ModuleList()
37 | self.vs_layers = nn.ModuleList()
38 |
39 | # Use same value layer to facilitate interp
40 | vs_layer = nn.Linear(d_model, d_v, bias=False)
41 | qs_layer = nn.Linear(d_model, d_k, bias=False)
42 | ks_layer = nn.Linear(d_model, d_k, bias=False)
43 |
44 | for _ in range(n_head):
45 | self.qs_layers.append(qs_layer)
46 | self.ks_layers.append(ks_layer)
47 | self.vs_layers.append(vs_layer) # use same vs_layer
48 |
49 | self.attention = ScaledDotProductAttention()
50 | self.w_o = nn.Linear(self.d_k, d_model, bias=False)
51 |
52 | def forward(self, q, k, v, mask=None):
53 | """Applies interpretable multihead attention.
54 |
55 | Using T to denote the number of time steps fed into the transformer.
56 |
57 | Args:
58 | q: Query tensor of shape=(?, T, d_model)
59 | k: Key of shape=(?, T, d_model)
60 | v: Values of shape=(?, T, d_model)
61 | mask: Masking if required with shape=(?, T, T)
62 |
63 | Returns:
64 | Tuple of (layer outputs, attention weights)
65 | """
66 | n_head = self.n_head
67 | heads = []
68 | attns = []
69 | for i in range(n_head):
70 | qs = self.qs_layers[i](q)
71 | ks = self.ks_layers[i](k)
72 | vs = self.vs_layers[i](v)
73 | head, attn = self.attention(qs, ks, vs, mask)
74 |
75 | head_dropout = self.dropout(head)
76 | heads.append(head_dropout)
77 | attns.append(attn)
78 | head = torch.stack(heads) if n_head > 1 else heads[0]
79 | attn = torch.stack(attns)
80 |
81 | outputs = torch.mean(head, axis=0) if n_head > 1 else head
82 | outputs = self.w_o(outputs)
83 | outputs = self.dropout(outputs) # output dropout
84 |
85 | return outputs, attn
86 |
--------------------------------------------------------------------------------
/models/temporal_fusion_t/linear_layer.py:
--------------------------------------------------------------------------------
1 | from torch import nn
2 | import torch
3 | from models.temporal_fusion_t.time_distributed import TimeDistributed
4 |
5 | class LinearLayer(nn.Module):
6 | def __init__(self,
7 | input_size,
8 | size,
9 | use_time_distributed=True,
10 | batch_first=False):
11 | super(LinearLayer, self).__init__()
12 |
13 | self.use_time_distributed=use_time_distributed
14 | self.input_size=input_size
15 | self.size=size
16 | if use_time_distributed:
17 | self.layer = TimeDistributed(nn.Linear(input_size, size), batch_first=batch_first)
18 | else:
19 | self.layer = nn.Linear(input_size, size)
20 |
21 | def forward(self, x):
22 | return self.layer(x)
23 |
--------------------------------------------------------------------------------
/models/temporal_fusion_t/lstm_combine_and_mask.py:
--------------------------------------------------------------------------------
1 | from torch import nn
2 | import torch
3 | from models.temporal_fusion_t.gated_residual_network import GatedResidualNetwork
4 |
5 |
6 | class LSTMCombineAndMask(nn.Module):
7 | def __init__(self, input_size, num_inputs, hidden_layer_size, dropout_rate, use_time_distributed=False, batch_first=True):
8 | super(LSTMCombineAndMask, self).__init__()
9 |
10 | self.hidden_layer_size = hidden_layer_size
11 | self.input_size = input_size
12 | self.num_inputs = num_inputs
13 | self.dropout_rate = dropout_rate
14 |
15 | self.flattened_grn = GatedResidualNetwork(self.num_inputs*self.hidden_layer_size, self.hidden_layer_size, self.num_inputs, self.dropout_rate, use_time_distributed=use_time_distributed, return_gate=True, batch_first=batch_first)
16 |
17 | self.single_variable_grns = nn.ModuleList()
18 | for i in range(self.num_inputs):
19 | self.single_variable_grns.append(GatedResidualNetwork(self.hidden_layer_size, self.hidden_layer_size, None, self.dropout_rate, use_time_distributed=use_time_distributed, return_gate=False, batch_first=batch_first))
20 |
21 | self.softmax = nn.Softmax(dim=2)
22 |
23 | def forward(self, embedding, additional_context=None):
24 | # Add temporal features
25 | _, time_steps, embedding_dim, num_inputs = list(embedding.shape)
26 |
27 | flattened_embedding = torch.reshape(embedding,
28 | [-1, time_steps, embedding_dim * num_inputs])
29 |
30 | expanded_static_context = additional_context.unsqueeze(1)
31 |
32 | if additional_context is not None:
33 | sparse_weights, static_gate = self.flattened_grn(flattened_embedding, expanded_static_context)
34 | else:
35 | sparse_weights = self.flattened_grn(flattened_embedding)
36 |
37 | sparse_weights = self.softmax(sparse_weights).unsqueeze(2)
38 |
39 | trans_emb_list = []
40 | for i in range(self.num_inputs):
41 | ##select slice of embedding belonging to a single input
42 | trans_emb_list.append(
43 | self.single_variable_grns[i](embedding[Ellipsis,i])
44 | )
45 |
46 | transformed_embedding = torch.stack(trans_emb_list, dim=-1)
47 |
48 | combined = transformed_embedding*sparse_weights
49 |
50 | temporal_ctx = combined.sum(dim=-1)
51 |
52 | return temporal_ctx, sparse_weights, static_gate
53 |
--------------------------------------------------------------------------------
/models/temporal_fusion_t/scaled_dot_product_attention.py:
--------------------------------------------------------------------------------
1 | from torch import nn
2 | import torch
3 |
4 | class ScaledDotProductAttention(nn.Module):
5 | """Defines scaled dot product attention layer.
6 |
7 | Attributes:
8 | dropout: Dropout rate to use
9 | activation: Normalisation function for scaled dot product attention (e.g.
10 | softmax by default)
11 | """
12 |
13 | def __init__(self, attn_dropout=0.0):
14 | super(ScaledDotProductAttention, self).__init__()
15 |
16 | self.dropout = nn.Dropout(attn_dropout)
17 | self.activation = nn.Softmax(dim=-1)
18 | self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
19 |
20 | def forward(self, q, k, v, mask):
21 | """Applies scaled dot product attention.
22 |
23 | Args:
24 | q: Queries
25 | k: Keys
26 | v: Values
27 | mask: Masking if required -- sets softmax to very large value
28 |
29 | Returns:
30 | Tuple of (layer outputs, attention weights)
31 | """
32 | attn = torch.bmm(q,k.permute(0,2,1)) # shape=(batch, q, k)
33 | if mask is not None:
34 | attn = attn.masked_fill(mask.bool().to(self.device), -1e9)
35 |
36 | attn = self.activation(attn)
37 | attn = self.dropout(attn)
38 | output = torch.bmm(attn,v)
39 | return output, attn
40 |
--------------------------------------------------------------------------------
/models/temporal_fusion_t/static_combine_and_mask.py:
--------------------------------------------------------------------------------
1 | from torch import nn
2 | import torch
3 | from models.temporal_fusion_t.gated_residual_network import GatedResidualNetwork
4 |
5 | class StaticCombineAndMask(nn.Module):
6 | def __init__(self, input_size, num_static, hidden_layer_size, dropout_rate, additional_context=None, use_time_distributed=False, batch_first=True):
7 | super(StaticCombineAndMask, self).__init__()
8 |
9 | self.hidden_layer_size = hidden_layer_size
10 | self.input_size =input_size
11 | self.num_static = num_static
12 | self.dropout_rate = dropout_rate
13 | self.additional_context = additional_context
14 |
15 | if self.additional_context is not None:
16 | self.flattened_grn = GatedResidualNetwork(self.num_static*self.hidden_layer_size, self.hidden_layer_size, self.num_static, self.dropout_rate, use_time_distributed=False, return_gate=False, batch_first=batch_first)
17 | else:
18 | self.flattened_grn = GatedResidualNetwork(self.num_static*self.hidden_layer_size, self.hidden_layer_size, self.num_static, self.dropout_rate, use_time_distributed=False, return_gate=False, batch_first=batch_first)
19 |
20 |
21 | self.single_variable_grns = nn.ModuleList()
22 | for i in range(self.num_static):
23 | self.single_variable_grns.append(GatedResidualNetwork(self.hidden_layer_size, self.hidden_layer_size, None, self.dropout_rate, use_time_distributed=False, return_gate=False, batch_first=batch_first))
24 |
25 | self.softmax = nn.Softmax(dim=1)
26 |
27 | def forward(self, embedding, additional_context=None):
28 | # Add temporal features
29 | _, num_static, _ = list(embedding.shape)
30 | flattened_embedding = torch.flatten(embedding, start_dim=1)
31 | if additional_context is not None:
32 | sparse_weights = self.flattened_grn(flattened_embedding, additional_context)
33 | else:
34 | sparse_weights = self.flattened_grn(flattened_embedding)
35 |
36 | sparse_weights = self.softmax(sparse_weights).unsqueeze(2)
37 |
38 | trans_emb_list = []
39 | for i in range(self.num_static):
40 | ##select slice of embedding belonging to a single input
41 | trans_emb_list.append(
42 | self.single_variable_grns[i](torch.flatten(embedding[:, i:i + 1, :], start_dim=1))
43 | )
44 |
45 | transformed_embedding = torch.stack(trans_emb_list, dim=1)
46 |
47 | combined = transformed_embedding*sparse_weights
48 |
49 | static_vec = combined.sum(dim=1)
50 |
51 | return static_vec, sparse_weights
52 |
--------------------------------------------------------------------------------
/models/temporal_fusion_t/tft_model.py:
--------------------------------------------------------------------------------
1 | """
2 | Implementation of Temporal Fusion Transformers: https://arxiv.org/abs/1912.09363
3 | """
4 |
5 | import math
6 | import torch
7 | import ipdb
8 | import json
9 | from torch import nn
10 | from models.temporal_fusion_t.base import BaseModel
11 | from models.temporal_fusion_t.add_and_norm import AddAndNorm
12 | from models.temporal_fusion_t.gated_residual_network import GatedResidualNetwork
13 | from models.temporal_fusion_t.gated_linear_unit import GLU
14 | from models.temporal_fusion_t.linear_layer import LinearLayer
15 | from models.temporal_fusion_t.lstm_combine_and_mask import LSTMCombineAndMask
16 | from models.temporal_fusion_t.static_combine_and_mask import StaticCombineAndMask
17 | from models.temporal_fusion_t.time_distributed import TimeDistributed
18 | from models.temporal_fusion_t.interpretable_multi_head_attention import InterpretableMultiHeadAttention
19 |
20 |
21 | class TFT(BaseModel):
22 | def __init__(self, raw_params):
23 | super(TFT, self).__init__()
24 |
25 | params = dict(raw_params) # copy locally
26 | print(params)
27 |
28 | # Data parameters
29 | self.time_steps = int(params['total_time_steps'])
30 | self.input_size = int(params['input_size'])
31 | self.output_size = int(params['output_size'])
32 | self.category_counts = json.loads(str(params['category_counts']))
33 | self.n_multiprocessing_workers = int(params['n_workers'])
34 |
35 | # Relevant indices for TFT
36 | self._input_obs_loc = json.loads(str(params['input_obs_loc']))
37 | self._static_input_loc = json.loads(str(params['static_input_loc']))
38 | self._known_regular_input_idx = json.loads(
39 | str(params['known_regular_inputs']))
40 | self._known_categorical_input_idx = json.loads(
41 | str(params['known_categorical_inputs']))
42 |
43 | # Network params
44 | self.quantiles = list(params['quantiles'])
45 | self.device = str(params['device'])
46 | self.hidden_layer_size = int(params['hidden_layer_size'])
47 | self.dropout_rate = float(params['dropout_rate'])
48 | self.max_gradient_norm = float(params['max_gradient_norm'])
49 | self.learning_rate = float(params['lr'])
50 | self.minibatch_size = int(params['batch_size'])
51 | self.num_epochs = int(params['num_epochs'])
52 | self.early_stopping_patience = int(params['early_stopping_patience'])
53 |
54 | self.num_encoder_steps = int(params['num_encoder_steps'])
55 | self.num_stacks = int(params['stack_size'])
56 | self.num_heads = int(params['num_heads'])
57 | self.batch_first = True
58 | self.num_static = len(self._static_input_loc)
59 | self.num_inputs = len(self._known_regular_input_idx) + self.output_size
60 | self.num_inputs_decoder = len(self._known_regular_input_idx)
61 |
62 | # Serialisation options
63 | # self._temp_folder = os.path.join(params['model_folder'], 'tmp')
64 | # self.reset_temp_folder()
65 |
66 | # Extra components to store Tensorflow nodes for attention computations
67 | self._input_placeholder = None
68 | self._attention_components = None
69 | self._prediction_parts = None
70 |
71 | # print('*** params ***')
72 | # for k in params:
73 | # print('# {} = {}'.format(k, params[k]))
74 |
75 | #######
76 | time_steps = self.time_steps
77 | num_categorical_variables = len(self.category_counts)
78 | num_regular_variables = self.input_size - num_categorical_variables
79 |
80 | embedding_sizes = [
81 | self.hidden_layer_size for i, size in enumerate(self.category_counts)
82 | ]
83 |
84 | print("num_categorical_variables")
85 | print(num_categorical_variables)
86 | self.embeddings = nn.ModuleList()
87 | for i in range(num_categorical_variables):
88 | embedding = nn.Embedding(self.category_counts[i], embedding_sizes[i])
89 | self.embeddings.append(embedding)
90 |
91 | self.static_input_layer = nn.Linear(self.hidden_layer_size, self.hidden_layer_size)
92 | self.time_varying_embedding_layer = LinearLayer(input_size=1, size=self.hidden_layer_size, use_time_distributed=True, batch_first=self.batch_first)
93 |
94 | self.static_combine_and_mask = StaticCombineAndMask(
95 | input_size=self.input_size,
96 | num_static=self.num_static,
97 | hidden_layer_size=self.hidden_layer_size,
98 | dropout_rate=self.dropout_rate,
99 | additional_context=None,
100 | use_time_distributed=False,
101 | batch_first=self.batch_first)
102 | self.static_context_variable_selection_grn = GatedResidualNetwork(
103 | input_size=self.hidden_layer_size,
104 | hidden_layer_size=self.hidden_layer_size,
105 | output_size=None,
106 | dropout_rate=self.dropout_rate,
107 | use_time_distributed=False,
108 | return_gate=False,
109 | batch_first=self.batch_first)
110 | self.static_context_enrichment_grn = GatedResidualNetwork(
111 | input_size=self.hidden_layer_size,
112 | hidden_layer_size=self.hidden_layer_size,
113 | output_size=None,
114 | dropout_rate=self.dropout_rate,
115 | use_time_distributed=False,
116 | return_gate=False,
117 | batch_first=self.batch_first)
118 | self.static_context_state_h_grn = GatedResidualNetwork(
119 | input_size=self.hidden_layer_size,
120 | hidden_layer_size=self.hidden_layer_size,
121 | output_size=None,
122 | dropout_rate=self.dropout_rate,
123 | use_time_distributed=False,
124 | return_gate=False,
125 | batch_first=self.batch_first)
126 | self.static_context_state_c_grn = GatedResidualNetwork(
127 | input_size=self.hidden_layer_size,
128 | hidden_layer_size=self.hidden_layer_size,
129 | output_size=None,
130 | dropout_rate=self.dropout_rate,
131 | use_time_distributed=False,
132 | return_gate=False,
133 | batch_first=self.batch_first)
134 | self.historical_lstm_combine_and_mask = LSTMCombineAndMask(
135 | input_size=self.num_encoder_steps,
136 | num_inputs=self.num_inputs,
137 | hidden_layer_size=self.hidden_layer_size,
138 | dropout_rate=self.dropout_rate,
139 | use_time_distributed=True,
140 | batch_first=self.batch_first)
141 | self.future_lstm_combine_and_mask = LSTMCombineAndMask(
142 | input_size=self.num_encoder_steps,
143 | num_inputs=self.num_inputs_decoder,
144 | hidden_layer_size=self.hidden_layer_size,
145 | dropout_rate=self.dropout_rate,
146 | use_time_distributed=True,
147 | batch_first=self.batch_first)
148 |
149 | self.lstm_encoder = nn.LSTM(input_size=self.hidden_layer_size, hidden_size=self.hidden_layer_size, batch_first=self.batch_first)
150 | self.lstm_decoder = nn.LSTM(input_size=self.hidden_layer_size, hidden_size=self.hidden_layer_size, batch_first=self.batch_first)
151 |
152 | self.lstm_glu = GLU(
153 | input_size=self.hidden_layer_size,
154 | hidden_layer_size=self.hidden_layer_size,
155 | dropout_rate=self.dropout_rate,
156 | use_time_distributed=True,
157 | batch_first=self.batch_first)
158 | self.lstm_glu_add_and_norm = AddAndNorm(hidden_layer_size=self.hidden_layer_size)
159 |
160 | self.static_enrichment_grn = GatedResidualNetwork(
161 | input_size=self.hidden_layer_size,
162 | hidden_layer_size=self.hidden_layer_size,
163 | output_size=None,
164 | dropout_rate=self.dropout_rate,
165 | use_time_distributed=True,
166 | return_gate=True,
167 | batch_first=self.batch_first)
168 |
169 | self.self_attn_layer = InterpretableMultiHeadAttention(self.num_heads, self.hidden_layer_size, dropout_rate=self.dropout_rate)
170 |
171 | self.self_attention_glu = GLU(
172 | input_size=self.hidden_layer_size,
173 | hidden_layer_size=self.hidden_layer_size,
174 | dropout_rate=self.dropout_rate,
175 | use_time_distributed=True,
176 | batch_first=self.batch_first)
177 | self.self_attention_glu_add_and_norm = AddAndNorm(hidden_layer_size=self.hidden_layer_size)
178 |
179 | self.decoder_grn = GatedResidualNetwork(
180 | input_size=self.hidden_layer_size,
181 | hidden_layer_size=self.hidden_layer_size,
182 | output_size=None,
183 | dropout_rate=self.dropout_rate,
184 | use_time_distributed=True,
185 | return_gate=False,
186 | batch_first=self.batch_first)
187 |
188 | self.final_glu = GLU(
189 | input_size=self.hidden_layer_size,
190 | hidden_layer_size=self.hidden_layer_size,
191 | dropout_rate=self.dropout_rate,
192 | use_time_distributed=True,
193 | batch_first=self.batch_first)
194 | self.final_glu_add_and_norm = AddAndNorm(hidden_layer_size=self.hidden_layer_size)
195 |
196 | self.output_layer = LinearLayer(
197 | input_size=self.hidden_layer_size,
198 | size=self.output_size * len(self.quantiles),
199 | use_time_distributed=True,
200 | batch_first=self.batch_first)
201 |
202 | def get_decoder_mask(self, self_attn_inputs):
203 | """Returns causal mask to apply for self-attention layer.
204 |
205 | Args:
206 | self_attn_inputs: Inputs to self attention layer to determine mask shape
207 | """
208 | len_s = self_attn_inputs.shape[1] # 192
209 | bs = self_attn_inputs.shape[:1][0] # [64]
210 | # create batch_size identity matrices
211 | mask = torch.cumsum(torch.eye(len_s).reshape((1, len_s, len_s)).repeat(bs, 1, 1), 1)
212 | return mask
213 |
214 | def get_tft_embeddings(self, all_inputs):
215 | time_steps = self.time_steps
216 |
217 | num_categorical_variables = len(self.category_counts)
218 | num_regular_variables = self.input_size - num_categorical_variables
219 |
220 | embedding_sizes = [
221 | self.hidden_layer_size for i, size in enumerate(self.category_counts)
222 | ]
223 |
224 | regular_inputs, categorical_inputs \
225 | = all_inputs[:, :, :num_regular_variables], \
226 | all_inputs[:, :, num_regular_variables:]
227 |
228 | embedded_inputs = [
229 | self.embeddings[i](categorical_inputs[:,:, i].long())
230 | for i in range(num_categorical_variables)
231 | ]
232 |
233 | # Static inputs
234 | if self._static_input_loc:
235 | static_inputs = []
236 | for i in range(num_regular_variables):
237 | if i in self._static_input_loc:
238 | reg_i = self.static_input_layer(regular_inputs[:, 0, i:i + 1])
239 | static_inputs.append(reg_i)
240 |
241 | emb_inputs = []
242 | for i in range(num_categorical_variables):
243 | if i + num_regular_variables in self._static_input_loc:
244 | emb_inputs.append(embedded_inputs[i][:, 0, :])
245 |
246 | static_inputs += emb_inputs
247 | static_inputs = torch.stack(static_inputs, dim=1)
248 |
249 | else:
250 | static_inputs = None
251 |
252 | # Targets
253 | obs_inputs = torch.stack([
254 | self.time_varying_embedding_layer(regular_inputs[Ellipsis, i:i + 1].float())
255 | for i in self._input_obs_loc
256 | ], dim=-1)
257 |
258 |
259 | # Observed (a prioir unknown) inputs
260 | wired_embeddings = []
261 | for i in range(num_categorical_variables):
262 | if i not in self._known_categorical_input_idx and i not in self._input_obs_loc:
263 | e = self.embeddings[i](categorical_inputs[:, :, i])
264 | wired_embeddings.append(e)
265 |
266 | unknown_inputs = []
267 | for i in range(regular_inputs.shape[-1]):
268 | if i not in self._known_regular_input_idx and i not in self._input_obs_loc:
269 | e = self.time_varying_embedding_layer(regular_inputs[Ellipsis, i:i + 1])
270 | unknown_inputs.append(e)
271 |
272 | if unknown_inputs + wired_embeddings:
273 | unknown_inputs = torch.stack(unknown_inputs + wired_embeddings, dim=-1)
274 | else:
275 | unknown_inputs = None
276 |
277 | # A priori known inputs
278 | known_regular_inputs = []
279 | for i in self._known_regular_input_idx:
280 | if i not in self._static_input_loc:
281 | known_regular_inputs.append(self.time_varying_embedding_layer(regular_inputs[Ellipsis, i:i + 1].float()))
282 |
283 | known_categorical_inputs = []
284 | for i in self._known_categorical_input_idx:
285 | if i + num_regular_variables not in self._static_input_loc:
286 | known_categorical_inputs.append(embedded_inputs[i])
287 |
288 | known_combined_layer = torch.stack(known_regular_inputs + known_categorical_inputs, dim=-1)
289 |
290 | return unknown_inputs, known_combined_layer, obs_inputs, static_inputs
291 |
292 | def forward(self, x):
293 | # Size definitions.
294 | time_steps = self.time_steps
295 | combined_input_size = self.input_size
296 | encoder_steps = self.num_encoder_steps
297 | all_inputs = x.to(self.device)
298 |
299 | unknown_inputs, known_combined_layer, obs_inputs, static_inputs \
300 | = self.get_tft_embeddings(all_inputs)
301 |
302 | # Isolate known and observed historical inputs.
303 | if unknown_inputs is not None:
304 | historical_inputs = torch.cat([
305 | unknown_inputs[:, :encoder_steps, :],
306 | known_combined_layer[:, :encoder_steps, :],
307 | obs_inputs[:, :encoder_steps, :]
308 | ], dim=-1)
309 | else:
310 | historical_inputs = torch.cat([
311 | known_combined_layer[:, :encoder_steps, :],
312 | obs_inputs[:, :encoder_steps, :]
313 | ], dim=-1)
314 |
315 | # Isolate only known future inputs.
316 | future_inputs = known_combined_layer[:, encoder_steps:, :]
317 |
318 | static_encoder, static_weights = self.static_combine_and_mask(static_inputs)
319 | static_context_variable_selection = self.static_context_variable_selection_grn(static_encoder)
320 | static_context_enrichment = self.static_context_enrichment_grn(static_encoder)
321 | static_context_state_h = self.static_context_state_h_grn(static_encoder)
322 | static_context_state_c = self.static_context_state_c_grn(static_encoder)
323 | historical_features, historical_flags, _ = self.historical_lstm_combine_and_mask(historical_inputs, static_context_variable_selection)
324 | future_features, future_flags, _ = self.future_lstm_combine_and_mask(future_inputs, static_context_variable_selection)
325 |
326 | history_lstm, (state_h, state_c) = self.lstm_encoder(historical_features, (static_context_state_h.unsqueeze(0), static_context_state_c.unsqueeze(0)))
327 | future_lstm, _ = self.lstm_decoder(future_features, (state_h, state_c))
328 |
329 | lstm_layer = torch.cat([history_lstm, future_lstm], dim=1)
330 | # Apply gated skip connection
331 | input_embeddings = torch.cat([historical_features, future_features], dim=1)
332 |
333 | lstm_layer, _ = self.lstm_glu(lstm_layer)
334 | temporal_feature_layer = self.lstm_glu_add_and_norm(lstm_layer, input_embeddings)
335 |
336 | # Static enrichment layers
337 | expanded_static_context = static_context_enrichment.unsqueeze(1)
338 | enriched, _ = self.static_enrichment_grn(temporal_feature_layer, expanded_static_context)
339 |
340 | # Decoder self attention
341 | mask = self.get_decoder_mask(enriched)
342 | x, self_att = self.self_attn_layer(enriched, enriched, enriched, mask)#, attn_mask=mask.repeat(self.num_heads, 1, 1))
343 |
344 | x, _ = self.self_attention_glu(x)
345 | x = self.self_attention_glu_add_and_norm(x, enriched)
346 |
347 | # Nonlinear processing on outputs
348 | decoder = self.decoder_grn(x)
349 | # Final skip connection
350 | decoder, _ = self.final_glu(decoder)
351 | transformer_layer = self.final_glu_add_and_norm(decoder, temporal_feature_layer)
352 | # Attention components for explainability
353 | attention_components = {
354 | # Temporal attention weights
355 | 'decoder_self_attn': self_att,
356 | # Static variable selection weights
357 | 'static_flags': static_weights[Ellipsis, 0],
358 | # Variable selection weights of past inputs
359 | 'historical_flags': historical_flags[Ellipsis, 0, :],
360 | # Variable selection weights of future inputs
361 | 'future_flags': future_flags[Ellipsis, 0, :]
362 | }
363 |
364 | outputs = self.output_layer(transformer_layer[:, self.num_encoder_steps:, :])
365 | return outputs, all_inputs, attention_components
366 |
--------------------------------------------------------------------------------
/models/temporal_fusion_t/time_distributed.py:
--------------------------------------------------------------------------------
1 | from torch import nn
2 | import torch
3 |
4 | class TimeDistributed(nn.Module):
5 | ## Takes any module and stacks the time dimension with the batch dimenison of inputs before apply the module
6 | ## From: https://discuss.pytorch.org/t/any-pytorch-function-can-work-as-keras-timedistributed/1346/4
7 | def __init__(self, module, batch_first=False):
8 | super(TimeDistributed, self).__init__()
9 | self.module = module
10 | self.batch_first = batch_first
11 |
12 | def forward(self, x):
13 |
14 | if len(x.size()) <= 2:
15 | return self.module(x)
16 |
17 | # Squash samples and timesteps into a single axis
18 | x_reshape = x.contiguous().view(-1, x.size(-1)) # (samples * timesteps, input_size)
19 |
20 | if x_reshape.dtype != torch.float32:
21 | x_reshape = x_reshape.float()
22 |
23 | y = self.module(x_reshape)
24 |
25 | # We have to reshape Y
26 | if self.batch_first:
27 | y = y.contiguous().view(x.size(0), -1, y.size(-1)) # (samples, timesteps, output_size)
28 | else:
29 | y = y.view(-1, x.size(1), y.size(-1)) # (timesteps, samples, output_size)
30 |
31 | return y
32 |
--------------------------------------------------------------------------------
/models/transformer/__init__.py:
--------------------------------------------------------------------------------
1 | from models.transformer.transformer import Transformer
2 |
--------------------------------------------------------------------------------
/models/transformer/decoder.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 | import torch.nn as nn
4 | import torch.nn.functional as F
5 |
6 | from models.transformer.multiHeadAttention import MultiHeadAttention, MultiHeadAttentionChunk, MultiHeadAttentionWindow
7 | from models.transformer.positionwiseFeedForward import PositionwiseFeedForward
8 |
9 |
10 | class Decoder(nn.Module):
11 | """Decoder block from Attention is All You Need.
12 |
13 | Apply two Multi Head Attention block followed by a Point-wise Feed Forward block.
14 | Residual sum and normalization are applied at each step.
15 |
16 | Parameters
17 | ----------
18 | d_model:
19 | Dimension of the input vector.
20 | q:
21 | Dimension of all query matrix.
22 | v:
23 | Dimension of all value matrix.
24 | h:
25 | Number of heads.
26 | attention_size:
27 | Number of backward elements to apply attention.
28 | Deactivated if ``None``. Default is ``None``.
29 | dropout:
30 | Dropout probability after each MHA or PFF block.
31 | Default is ``0.3``.
32 | chunk_mode:
33 | Swict between different MultiHeadAttention blocks.
34 | One of ``'chunk'``, ``'window'`` or ``None``. Default is ``'chunk'``.
35 | """
36 |
37 | def __init__(self,
38 | d_model: int,
39 | q: int,
40 | v: int,
41 | h: int,
42 | attention_size: int = None,
43 | dropout: float = 0.3,
44 | chunk_mode: str = 'chunk'):
45 | """Initialize the Decoder block"""
46 | super().__init__()
47 |
48 | chunk_mode_modules = {
49 | 'chunk': MultiHeadAttentionChunk,
50 | 'window': MultiHeadAttentionWindow,
51 | }
52 |
53 | if chunk_mode in chunk_mode_modules.keys():
54 | MHA = chunk_mode_modules[chunk_mode]
55 | else:
56 | MHA = MultiHeadAttention
57 |
58 | self._selfAttention = MHA(d_model, q, v, h, attention_size=attention_size)
59 | self._encoderDecoderAttention = MHA(d_model, q, v, h, attention_size=attention_size)
60 | self._feedForward = PositionwiseFeedForward(d_model)
61 |
62 | self._layerNorm1 = nn.LayerNorm(d_model)
63 | self._layerNorm2 = nn.LayerNorm(d_model)
64 | self._layerNorm3 = nn.LayerNorm(d_model)
65 |
66 | self._dropout = nn.Dropout(p=dropout)
67 |
68 | def forward(self, x: torch.Tensor, memory: torch.Tensor) -> torch.Tensor:
69 | """Propagate the input through the Decoder block.
70 |
71 | Apply the self attention block, add residual and normalize.
72 | Apply the encoder-decoder attention block, add residual and normalize.
73 | Apply the feed forward network, add residual and normalize.
74 |
75 | Parameters
76 | ----------
77 | x:
78 | Input tensor with shape (batch_size, K, d_model).
79 | memory:
80 | Memory tensor with shape (batch_size, K, d_model)
81 | from encoder output.
82 |
83 | Returns
84 | -------
85 | x:
86 | Output tensor with shape (batch_size, K, d_model).
87 | """
88 | # Self attention
89 | residual = x
90 | x = self._selfAttention(query=x, key=x, value=x, mask="future")
91 | x = self._dropout(x)
92 | x = self._layerNorm1(x + residual)
93 |
94 | # Encoder-decoder attention
95 | residual = x
96 | x = self._encoderDecoderAttention(query=x, key=memory, value=memory)
97 | x = self._dropout(x)
98 | x = self._layerNorm2(x + residual)
99 |
100 | # Feed forward
101 | residual = x
102 | x = self._feedForward(x)
103 | x = self._dropout(x)
104 | x = self._layerNorm3(x + residual)
105 |
106 | return x
107 |
--------------------------------------------------------------------------------
/models/transformer/encoder.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 | import torch.nn as nn
4 | import torch.nn.functional as F
5 |
6 | from models.transformer.multiHeadAttention import MultiHeadAttention, MultiHeadAttentionChunk, MultiHeadAttentionWindow
7 | from models.transformer.positionwiseFeedForward import PositionwiseFeedForward
8 |
9 |
10 | class Encoder(nn.Module):
11 | """Encoder block from Attention is All You Need.
12 |
13 | Apply Multi Head Attention block followed by a Point-wise Feed Forward block.
14 | Residual sum and normalization are applied at each step.
15 |
16 | Parameters
17 | ----------
18 | d_model:
19 | Dimension of the input vector.
20 | q:
21 | Dimension of all query matrix.
22 | v:
23 | Dimension of all value matrix.
24 | h:
25 | Number of heads.
26 | attention_size:
27 | Number of backward elements to apply attention.
28 | Deactivated if ``None``. Default is ``None``.
29 | dropout:
30 | Dropout probability after each MHA or PFF block.
31 | Default is ``0.3``.
32 | chunk_mode:
33 | Swict between different MultiHeadAttention blocks.
34 | One of ``'chunk'``, ``'window'`` or ``None``. Default is ``'chunk'``.
35 | """
36 |
37 | def __init__(self,
38 | d_model: int,
39 | q: int,
40 | v: int,
41 | h: int,
42 | attention_size: int = None,
43 | dropout: float = 0.3,
44 | chunk_mode: str = 'chunk'):
45 | """Initialize the Encoder block"""
46 | super().__init__()
47 |
48 | chunk_mode_modules = {
49 | 'chunk': MultiHeadAttentionChunk,
50 | 'window': MultiHeadAttentionWindow,
51 | }
52 |
53 | if chunk_mode in chunk_mode_modules.keys():
54 | MHA = chunk_mode_modules[chunk_mode]
55 | else:
56 | MHA = MultiHeadAttention
57 |
58 | self._selfAttention = MHA(d_model, q, v, h, attention_size=attention_size)
59 | self._feedForward = PositionwiseFeedForward(d_model)
60 |
61 | self._layerNorm1 = nn.LayerNorm(d_model)
62 | self._layerNorm2 = nn.LayerNorm(d_model)
63 |
64 | self._dopout = nn.Dropout(p=dropout)
65 |
66 | def forward(self, x: torch.Tensor) -> torch.Tensor:
67 | """Propagate the input through the Encoder block.
68 |
69 | Apply the Multi Head Attention block, add residual and normalize.
70 | Apply the Point-wise Feed Forward block, add residual and normalize.
71 |
72 | Parameters
73 | ----------
74 | x:
75 | Input tensor with shape (batch_size, K, d_model).
76 |
77 | Returns
78 | -------
79 | Output tensor with shape (batch_size, K, d_model).
80 | """
81 | # Self attention
82 | residual = x
83 | x = self._selfAttention(query=x, key=x, value=x)
84 | x = self._dopout(x)
85 | x = self._layerNorm1(x + residual)
86 |
87 | # Feed forward
88 | residual = x
89 | x = self._feedForward(x)
90 | x = self._dopout(x)
91 | x = self._layerNorm2(x + residual)
92 |
93 | return x
94 |
95 | @property
96 | def attention_map(self) -> torch.Tensor:
97 | """Attention map after a forward propagation,
98 | variable `score` in the original paper.
99 | """
100 | return self._selfAttention.attention_map
101 |
--------------------------------------------------------------------------------
/models/transformer/loss.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 |
4 |
5 | class OZELoss(nn.Module):
6 | """Custom loss for TRNSys metamodel.
7 |
8 | Compute, for temperature and consumptions, the intergral of the squared differences
9 | over time. Sum the log with a coeficient ``alpha``.
10 |
11 | .. math::
12 | \Delta_T = \sqrt{\int (y_{est}^T - y^T)^2}
13 |
14 | \Delta_Q = \sqrt{\int (y_{est}^Q - y^Q)^2}
15 |
16 | loss = log(1 + \Delta_T) + \\alpha \cdot log(1 + \Delta_Q)
17 |
18 | Parameters:
19 | -----------
20 | alpha:
21 | Coefficient for consumption. Default is ``0.3``.
22 | """
23 |
24 | def __init__(self, reduction: str = 'mean', alpha: float = 0.3):
25 | super().__init__()
26 |
27 | self.alpha = alpha
28 | self.reduction = reduction
29 |
30 | self.base_loss = nn.MSELoss(reduction=self.reduction)
31 |
32 | def forward(self,
33 | y_true: torch.Tensor,
34 | y_pred: torch.Tensor) -> torch.Tensor:
35 | """Compute the loss between a target value and a prediction.
36 |
37 | Parameters
38 | ----------
39 | y_true:
40 | Target value.
41 | y_pred:
42 | Estimated value.
43 |
44 | Returns
45 | -------
46 | Loss as a tensor with gradient attached.
47 | """
48 | delta_Q = self.base_loss(y_pred[..., :-1], y_true[..., :-1])
49 | delta_T = self.base_loss(y_pred[..., -1], y_true[..., -1])
50 |
51 | if self.reduction == 'none':
52 | delta_Q = delta_Q.mean(dim=(1, 2))
53 | delta_T = delta_T.mean(dim=(1))
54 |
55 | return torch.log(1 + delta_T) + self.alpha * torch.log(1 + delta_Q)
56 |
--------------------------------------------------------------------------------
/models/transformer/multiHeadAttention.py:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 |
3 | import numpy as np
4 | import torch
5 | import torch.nn as nn
6 | import torch.nn.functional as F
7 |
8 | from models.transformer.utils import generate_local_map_mask
9 |
10 |
11 | class MultiHeadAttention(nn.Module):
12 | """Multi Head Attention block from Attention is All You Need.
13 |
14 | Given 3 inputs of shape (batch_size, K, d_model), that will be used
15 | to compute query, keys and values, we output a self attention
16 | tensor of shape (batch_size, K, d_model).
17 |
18 | Parameters
19 | ----------
20 | d_model:
21 | Dimension of the input vector.
22 | q:
23 | Dimension of all query matrix.
24 | v:
25 | Dimension of all value matrix.
26 | h:
27 | Number of heads.
28 | attention_size:
29 | Number of backward elements to apply attention.
30 | Deactivated if ``None``. Default is ``None``.
31 | """
32 |
33 | def __init__(self,
34 | d_model: int,
35 | q: int,
36 | v: int,
37 | h: int,
38 | attention_size: int = None):
39 | """Initialize the Multi Head Block."""
40 | super().__init__()
41 |
42 | self._h = h
43 | self._attention_size = attention_size
44 |
45 | # Query, keys and value matrices
46 | self._W_q = nn.Linear(d_model, q*self._h)
47 | self._W_k = nn.Linear(d_model, q*self._h)
48 | self._W_v = nn.Linear(d_model, v*self._h)
49 |
50 | # Output linear function
51 | self._W_o = nn.Linear(self._h*v, d_model)
52 |
53 | # Score placeholder
54 | self._scores = None
55 |
56 | def forward(self,
57 | query: torch.Tensor,
58 | key: torch.Tensor,
59 | value: torch.Tensor,
60 | mask: Optional[str] = None) -> torch.Tensor:
61 | """Propagate forward the input through the MHB.
62 |
63 | We compute for each head the queries, keys and values matrices,
64 | followed by the Scaled Dot-Product. The result is concatenated
65 | and returned with shape (batch_size, K, d_model).
66 |
67 | Parameters
68 | ----------
69 | query:
70 | Input tensor with shape (batch_size, K, d_model) used to compute queries.
71 | key:
72 | Input tensor with shape (batch_size, K, d_model) used to compute keys.
73 | value:
74 | Input tensor with shape (batch_size, K, d_model) used to compute values.
75 | mask:
76 | Mask to apply on scores before computing attention.
77 | One of ``'subsequent'``, None. Default is None.
78 |
79 | Returns
80 | -------
81 | Self attention tensor with shape (batch_size, K, d_model).
82 | """
83 | rows = query.shape[1]
84 | cols = key.shape[1]
85 |
86 | # Compute Q, K and V, concatenate heads on batch dimension
87 | queries = torch.cat(self._W_q(query).chunk(self._h, dim=-1), dim=0)
88 | keys = torch.cat(self._W_k(key).chunk(self._h, dim=-1), dim=0)
89 | values = torch.cat(self._W_v(value).chunk(self._h, dim=-1), dim=0)
90 |
91 | # Scaled Dot Product
92 | self._scores = torch.bmm(queries, keys.transpose(1, 2)) / np.sqrt(cols)
93 |
94 | # Compute local map mask
95 | if self._attention_size != 0:
96 | attention_mask = generate_local_map_mask(rows, cols, self._attention_size, mask_future=False, device=self._scores.device)
97 | self._scores = self._scores.masked_fill(attention_mask, float('-inf'))
98 |
99 | # Compute future mask
100 | if mask == "future":
101 | future_mask = torch.triu(torch.ones((rows, cols)), diagonal=1).bool()
102 | future_mask = future_mask.to(self._scores.device)
103 | self._scores = self._scores.masked_fill(future_mask, float('-inf'))
104 |
105 | # Apply sotfmax
106 | self._scores = F.softmax(self._scores, dim=-1)
107 |
108 | attention = torch.bmm(self._scores, values)
109 |
110 | # Concatenat the heads
111 | attention_heads = torch.cat(attention.chunk(self._h, dim=0), dim=-1)
112 |
113 | # Apply linear transformation W^O
114 | self_attention = self._W_o(attention_heads)
115 |
116 | return self_attention
117 |
118 | @property
119 | def attention_map(self) -> torch.Tensor:
120 | """Attention map after a forward propagation,
121 | variable `score` in the original paper.
122 | """
123 | if self._scores is None:
124 | raise RuntimeError(
125 | "Evaluate the model once to generate attention map")
126 | return self._scores
127 |
128 |
129 | class MultiHeadAttentionChunk(MultiHeadAttention):
130 | """Multi Head Attention block with chunk.
131 |
132 | Given 3 inputs of shape (batch_size, K, d_model), that will be used
133 | to compute query, keys and values, we output a self attention
134 | tensor of shape (batch_size, K, d_model).
135 | Queries, keys and values are divided in chunks of constant size.
136 |
137 | Parameters
138 | ----------
139 | d_model:
140 | Dimension of the input vector.
141 | q:
142 | Dimension of all query matrix.
143 | v:
144 | Dimension of all value matrix.
145 | h:
146 | Number of heads.
147 | attention_size:
148 | Number of backward elements to apply attention.
149 | Deactivated if ``None``. Default is ``None``.
150 | chunk_size:
151 | Size of chunks to apply attention on. Last one may be smaller (see :class:`torch.Tensor.chunk`).
152 | Default is 168.
153 | """
154 |
155 | def __init__(self,
156 | d_model: int,
157 | q: int,
158 | v: int,
159 | h: int,
160 | attention_size: int = None,
161 | chunk_size: Optional[int] = 168,
162 | **kwargs):
163 | """Initialize the Multi Head Block."""
164 | super().__init__(d_model, q, v, h, attention_size, **kwargs)
165 |
166 | self._chunk_size = chunk_size
167 |
168 | # Score mask for decoder
169 | self._future_mask = nn.Parameter(torch.triu(torch.ones((self._chunk_size, self._chunk_size)), diagonal=1).bool(),
170 | requires_grad=False)
171 |
172 | if self._attention_size is not None:
173 | self._attention_mask = nn.Parameter(generate_local_map_mask(self._chunk_size, self._chunk_size, self._attention_size),
174 | requires_grad=False)
175 |
176 | def forward(self,
177 | query: torch.Tensor,
178 | key: torch.Tensor,
179 | value: torch.Tensor,
180 | mask: Optional[str] = None) -> torch.Tensor:
181 | """Propagate forward the input through the MHB.
182 |
183 | We compute for each head the queries, keys and values matrices,
184 | followed by the Scaled Dot-Product. The result is concatenated
185 | and returned with shape (batch_size, K, d_model).
186 |
187 | Parameters
188 | ----------
189 | query:
190 | Input tensor with shape (batch_size, K, d_model) used to compute queries.
191 | key:
192 | Input tensor with shape (batch_size, K, d_model) used to compute keys.
193 | value:
194 | Input tensor with shape (batch_size, K, d_model) used to compute values.
195 | mask:
196 | Mask to apply on scores before computing attention.
197 | One of ``'subsequent'``, None. Default is None.
198 |
199 | Returns
200 | -------
201 | Self attention tensor with shape (batch_size, K, d_model).
202 | """
203 | K = query.shape[1]
204 | n_chunk = K // self._chunk_size
205 |
206 | # Compute Q, K and V, concatenate heads on batch dimension
207 | queries = torch.cat(torch.cat(self._W_q(query).chunk(self._h, dim=-1), dim=0).chunk(n_chunk, dim=1), dim=0)
208 | keys = torch.cat(torch.cat(self._W_k(key).chunk(self._h, dim=-1), dim=0).chunk(n_chunk, dim=1), dim=0)
209 | values = torch.cat(torch.cat(self._W_v(value).chunk(self._h, dim=-1), dim=0).chunk(n_chunk, dim=1), dim=0)
210 |
211 | # Scaled Dot Product
212 | self._scores = torch.bmm(queries, keys.transpose(1, 2)) / np.sqrt(self._chunk_size)
213 |
214 | # Compute local map mask
215 | if self._attention_size is not None:
216 | self._scores = self._scores.masked_fill(self._attention_mask, float('-inf'))
217 |
218 | # Compute future mask
219 | if mask == "subsequent":
220 | self._scores = self._scores.masked_fill(self._future_mask, float('-inf'))
221 |
222 | # Apply softmax
223 | self._scores = F.softmax(self._scores, dim=-1)
224 |
225 | attention = torch.bmm(self._scores, values)
226 |
227 | # Concatenat the heads
228 | attention_heads = torch.cat(torch.cat(attention.chunk(
229 | n_chunk, dim=0), dim=1).chunk(self._h, dim=0), dim=-1)
230 |
231 | # Apply linear transformation W^O
232 | self_attention = self._W_o(attention_heads)
233 |
234 | return self_attention
235 |
236 |
237 | class MultiHeadAttentionWindow(MultiHeadAttention):
238 | """Multi Head Attention block with moving window.
239 |
240 | Given 3 inputs of shape (batch_size, K, d_model), that will be used
241 | to compute query, keys and values, we output a self attention
242 | tensor of shape (batch_size, K, d_model).
243 | Queries, keys and values are divided in chunks using a moving window.
244 |
245 | Parameters
246 | ----------
247 | d_model:
248 | Dimension of the input vector.
249 | q:
250 | Dimension of all query matrix.
251 | v:
252 | Dimension of all value matrix.
253 | h:
254 | Number of heads.
255 | attention_size:
256 | Number of backward elements to apply attention.
257 | Deactivated if ``None``. Default is ``None``.
258 | window_size:
259 | Size of the window used to extract chunks.
260 | Default is 168
261 | padding:
262 | Padding around each window. Padding will be applied to input sequence.
263 | Default is 168 // 4 = 42.
264 | """
265 |
266 | def __init__(self,
267 | d_model: int,
268 | q: int,
269 | v: int,
270 | h: int,
271 | attention_size: int = None,
272 | window_size: Optional[int] = 168,
273 | padding: Optional[int] = 168 // 4,
274 | **kwargs):
275 | """Initialize the Multi Head Block."""
276 | super().__init__(d_model, q, v, h, attention_size, **kwargs)
277 |
278 | self._window_size = window_size
279 | self._padding = padding
280 | self._q = q
281 | self._v = v
282 |
283 | # Step size for the moving window
284 | self._step = self._window_size - 2 * self._padding
285 |
286 | # Score mask for decoder
287 | self._future_mask = nn.Parameter(torch.triu(torch.ones((self._window_size, self._window_size)), diagonal=1).bool(),
288 | requires_grad=False)
289 |
290 | if self._attention_size is not None:
291 | self._attention_mask = nn.Parameter(generate_local_map_mask(self._window_size, self._window_size, self._attention_size),
292 | requires_grad=False)
293 |
294 | def forward(self,
295 | query: torch.Tensor,
296 | key: torch.Tensor,
297 | value: torch.Tensor,
298 | mask: Optional[str] = None) -> torch.Tensor:
299 | """Propagate forward the input through the MHB.
300 |
301 | We compute for each head the queries, keys and values matrices,
302 | followed by the Scaled Dot-Product. The result is concatenated
303 | and returned with shape (batch_size, K, d_model).
304 |
305 | Parameters
306 | ----------
307 | query:
308 | Input tensor with shape (batch_size, K, d_model) used to compute queries.
309 | key:
310 | Input tensor with shape (batch_size, K, d_model) used to compute keys.
311 | value:
312 | Input tensor with shape (batch_size, K, d_model) used to compute values.
313 | mask:
314 | Mask to apply on scores before computing attention.
315 | One of ``'subsequent'``, None. Default is None.
316 |
317 | Returns
318 | -------
319 | Self attention tensor with shape (batch_size, K, d_model).
320 | """
321 | batch_size = query.shape[0]
322 |
323 | # Apply padding to input sequence
324 | query = F.pad(query.transpose(1, 2), (self._padding, self._padding), 'replicate').transpose(1, 2)
325 | key = F.pad(key.transpose(1, 2), (self._padding, self._padding), 'replicate').transpose(1, 2)
326 | value = F.pad(value.transpose(1, 2), (self._padding, self._padding), 'replicate').transpose(1, 2)
327 |
328 | # Compute Q, K and V, concatenate heads on batch dimension
329 | queries = torch.cat(self._W_q(query).chunk(self._h, dim=-1), dim=0)
330 | keys = torch.cat(self._W_k(key).chunk(self._h, dim=-1), dim=0)
331 | values = torch.cat(self._W_v(value).chunk(self._h, dim=-1), dim=0)
332 |
333 | # Divide Q, K and V using a moving window
334 | queries = queries.unfold(dimension=1, size=self._window_size, step=self._step).reshape((-1, self._q, self._window_size)).transpose(1, 2)
335 | keys = keys.unfold(dimension=1, size=self._window_size, step=self._step).reshape((-1, self._q, self._window_size)).transpose(1, 2)
336 | values = values.unfold(dimension=1, size=self._window_size, step=self._step).reshape((-1, self._v, self._window_size)).transpose(1, 2)
337 |
338 | # Scaled Dot Product
339 | self._scores = torch.bmm(queries, keys.transpose(1, 2)) / np.sqrt(self._window_size)
340 |
341 | # Compute local map mask
342 | if self._attention_size is not None:
343 | self._scores = self._scores.masked_fill(self._attention_mask, float('-inf'))
344 |
345 | # Compute future mask
346 | if mask == "subsequent":
347 | self._scores = self._scores.masked_fill(self._future_mask, float('-inf'))
348 |
349 | # Apply softmax
350 | self._scores = F.softmax(self._scores, dim=-1)
351 |
352 | attention = torch.bmm(self._scores, values)
353 |
354 | # Fold chunks back
355 | attention = attention.reshape((batch_size*self._h, -1, self._window_size, self._v))
356 | attention = attention[:, :, self._padding:-self._padding, :]
357 | attention = attention.reshape((batch_size*self._h, -1, self._v))
358 |
359 | # Concatenat the heads
360 | attention_heads = torch.cat(attention.chunk(self._h, dim=0), dim=-1)
361 |
362 | # Apply linear transformation W^O
363 | self_attention = self._W_o(attention_heads)
364 |
365 | return self_attention
366 |
367 |
368 | def main():
369 | mhd = MultiHeadAttention(128, 5, 5, 4, attention_size=0)
370 | k = torch.rand((64, 5, 128))
371 | q = torch.rand((64, 5, 128))
372 | v = torch.rand((64, 5, 128))
373 |
374 | attention = mhd.forward(q,k,v, mask='future')
375 | return attention
376 |
377 |
378 | if __name__ == '__main__':
379 | main()
380 |
--------------------------------------------------------------------------------
/models/transformer/positionwiseFeedForward.py:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 |
3 | import torch
4 | import torch.nn as nn
5 | import torch.nn.functional as F
6 |
7 |
8 | class PositionwiseFeedForward(nn.Module):
9 | """Position-wise Feed Forward Network block from Attention is All You Need.
10 |
11 | Apply two linear transformations to each input, separately but indetically. We
12 | implement them as 1D convolutions. Input and output have a shape (batch_size, d_model).
13 |
14 | Parameters
15 | ----------
16 | d_model:
17 | Dimension of input tensor.
18 | d_ff:
19 | Dimension of hidden layer, default is 2048.
20 | """
21 |
22 | def __init__(self,
23 | d_model: int,
24 | d_ff: Optional[int] = 128):
25 | """Initialize the PFF block."""
26 | super().__init__()
27 |
28 | self._linear1 = nn.Linear(d_model, d_ff)
29 | self._linear2 = nn.Linear(d_ff, d_model)
30 |
31 | def forward(self, x: torch.Tensor) -> torch.Tensor:
32 | """Propagate forward the input through the PFF block.
33 |
34 | Apply the first linear transformation, then a relu actvation,
35 | and the second linear transformation.
36 |
37 | Parameters
38 | ----------
39 | x:
40 | Input tensor with shape (batch_size, K, d_model).
41 |
42 | Returns
43 | -------
44 | Output tensor with shape (batch_size, K, d_model).
45 | """
46 | return self._linear2(F.relu(self._linear1(x)))
47 |
--------------------------------------------------------------------------------
/models/transformer/transformer.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 |
4 | from models.transformer.encoder import Encoder
5 | from models.transformer.decoder import Decoder
6 | from models.transformer.utils import generate_original_PE, generate_regular_PE
7 |
8 |
9 | class Transformer(nn.Module):
10 | """Transformer model from Attention is All You Need.
11 |
12 | A classic transformer model adapted for sequential data.
13 | Embedding has been replaced with a fully connected layer,
14 | the last layer softmax is now a sigmoid.
15 |
16 | Attributes
17 | ----------
18 | layers_encoding: :py:class:`list` of :class:`Encoder.Encoder`
19 | stack of Encoder layers.
20 | layers_decoding: :py:class:`list` of :class:`Decoder.Decoder`
21 | stack of Decoder layers.
22 |
23 | Parameters
24 | ----------
25 | d_input:
26 | Model input dimension.
27 | d_model:
28 | Dimension of the input vector.
29 | d_output:
30 | Model output dimension.
31 | q:
32 | Dimension of queries and keys.
33 | v:
34 | Dimension of values.
35 | h:
36 | Number of heads.
37 | N:
38 | Number of encoder and decoder layers to stack.
39 | attention_size:
40 | Number of backward elements to apply attention.
41 | Deactivated if ``None``. Default is ``None``.
42 | dropout:
43 | Dropout probability after each MHA or PFF block.
44 | Default is ``0.3``.
45 | chunk_mode:
46 | Swict between different MultiHeadAttention blocks.
47 | One of ``'chunk'``, ``'window'`` or ``None``. Default is ``'chunk'``.
48 | pe:
49 | Type of positional encoding to add.
50 | Must be one of ``'original'``, ``'regular'`` or ``None``. Default is ``None``.
51 | """
52 |
53 | def __init__(self, cnf: dict):
54 | """Create transformer structure from Encoder and Decoder blocks."""
55 | super().__init__()
56 |
57 | d_model = cnf["d_model"]
58 | q = cnf["q"]
59 | v = cnf["v"]
60 | h = cnf["h"]
61 | N = cnf["N"]
62 | attention_size = cnf["attention_size"]
63 | dropout = cnf["dropout"]
64 | pe = cnf["pe"]
65 | chunk_mode = cnf["chunk_mode"]
66 | d_input = cnf["d_input"]
67 | d_output = cnf["d_output"]
68 | self.time_steps = cnf["num_encoder_steps"]
69 |
70 | self._d_model = d_model
71 |
72 | self.layers_encoding = nn.ModuleList([Encoder(d_model,
73 | q,
74 | v,
75 | h,
76 | attention_size=attention_size,
77 | dropout=dropout,
78 | chunk_mode=chunk_mode) for _ in range(N)])
79 | self.layers_decoding = nn.ModuleList([Decoder(d_model,
80 | q,
81 | v,
82 | h,
83 | attention_size=attention_size,
84 | dropout=dropout,
85 | chunk_mode=chunk_mode) for _ in range(N)])
86 |
87 | self._embedding_input = nn.Linear(d_input, d_model)
88 | self._embedding_output = nn.Linear(d_input, d_model)
89 |
90 | self._linear = nn.Linear(d_model, d_output)
91 |
92 | pe_functions = {
93 | 'original': generate_original_PE,
94 | 'regular': generate_regular_PE,
95 | }
96 |
97 | if pe in pe_functions.keys():
98 | self._generate_PE = pe_functions[pe]
99 | else:
100 | self._generate_PE = None
101 |
102 | self.name = 'transformer'
103 |
104 | def forward(self, xy: torch.Tensor) -> torch.Tensor:
105 | """Propagate input through transformer
106 |
107 | Forward input through an embedding module,
108 | the encoder then decoder stacks, and an output module.
109 |
110 | Parameters
111 | ----------
112 | x:
113 | :class:`torch.Tensor` of shape (batch_size, K, d_input).
114 |
115 | Returns
116 | -------
117 | Output tensor with shape (batch_size, K, d_output).
118 | """
119 | x = xy[:, :self.time_steps]
120 | y = xy[:, self.time_steps:]
121 |
122 | # Shift tensor add start token
123 | pad = torch.ones((y.shape[0], 1, y.shape[2])).to(y.device)
124 | y = torch.cat((pad, y), dim=1)[:, :-1, :]
125 |
126 | # Embeddin module
127 | encoding_x = self._embedding_input(x)
128 | encoding_y = self._embedding_output(y)
129 |
130 | # Add position encoding
131 | if self._generate_PE is not None:
132 | positional_encoding = self._generate_PE(x.shape[1], self._d_model)
133 | positional_encoding = positional_encoding.to(encoding_x.device)
134 | encoding_x.add_(positional_encoding)
135 |
136 | # Encoding stack
137 | for layer in self.layers_encoding:
138 | encoding_x = layer(encoding_x)
139 |
140 | # Decoding stack
141 | decoding = encoding_y
142 |
143 | # Add position encoding
144 | if self._generate_PE is not None:
145 | positional_encoding = self._generate_PE(y.shape[1], self._d_model)
146 | positional_encoding = positional_encoding.to(decoding.device)
147 | decoding.add_(positional_encoding)
148 |
149 | for layer in self.layers_decoding:
150 | decoding = layer(decoding, encoding_x)
151 |
152 | # Output module
153 | output = self._linear(decoding)
154 | return output
155 |
--------------------------------------------------------------------------------
/models/transformer/utils.py:
--------------------------------------------------------------------------------
1 | from typing import Optional, Union
2 |
3 | import numpy as np
4 | import torch
5 |
6 |
7 | def generate_original_PE(length: int, d_model: int) -> torch.Tensor:
8 | """Generate positional encoding as described in original paper. :class:`torch.Tensor`
9 |
10 | Parameters
11 | ----------
12 | length:
13 | Time window length, i.e. K.
14 | d_model:
15 | Dimension of the model vector.
16 |
17 | Returns
18 | -------
19 | Tensor of shape (K, d_model).
20 | """
21 | PE = torch.zeros((length, d_model))
22 |
23 | pos = torch.arange(length).unsqueeze(1)
24 | PE[:, 0::2] = torch.sin(
25 | pos / torch.pow(1000, torch.arange(0, d_model, 2, dtype=torch.float32)/d_model))
26 | PE[:, 1::2] = torch.cos(
27 | pos / torch.pow(1000, torch.arange(1, d_model, 2, dtype=torch.float32)/d_model))
28 |
29 | return PE
30 |
31 |
32 | def generate_regular_PE(length: int, d_model: int, period: Optional[int] = 24) -> torch.Tensor:
33 | """Generate positional encoding with a given period.
34 |
35 | Parameters
36 | ----------
37 | length:
38 | Time window length, i.e. K.
39 | d_model:
40 | Dimension of the model vector.
41 | period:
42 | Size of the pattern to repeat.
43 | Default is 24.
44 |
45 | Returns
46 | -------
47 | Tensor of shape (K, d_model).
48 | """
49 | PE = torch.zeros((length, d_model))
50 |
51 | pos = torch.arange(length, dtype=torch.float32).unsqueeze(1)
52 | PE = torch.sin(pos * 2 * np.pi / period)
53 | PE = PE.repeat((1, d_model))
54 |
55 | return PE
56 |
57 |
58 | def generate_local_map_mask(row: int,
59 | col: int,
60 | attention_size: int,
61 | mask_future=False,
62 | device: torch.device = 'cpu') -> torch.BoolTensor:
63 | """Compute attention mask as attention_size wide diagonal.
64 |
65 | Parameters
66 | ----------
67 | row:
68 | Time dimension size v1
69 | col:
70 | Time dimension size v2
71 | attention_size:
72 | Number of backward elements to apply attention.
73 | device:
74 | torch device. Default is ``'cpu'``.
75 |
76 | Returns
77 | -------
78 | Mask as a boolean tensor.
79 | """
80 | local_map = np.empty((row, col))
81 | i, j = np.indices(local_map.shape)
82 |
83 | if mask_future:
84 | local_map[i, j] = (i - j > attention_size) ^ (j - i > 0)
85 | else:
86 | local_map[i, j] = np.abs(i - j) > attention_size
87 |
88 | return torch.BoolTensor(local_map).to(device)
89 |
--------------------------------------------------------------------------------
/models/transformer_grn/__init__.py:
--------------------------------------------------------------------------------
1 | from models.transformer_grn.transformer import Transformer
2 |
--------------------------------------------------------------------------------
/models/transformer_grn/decoder.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 | import torch.nn as nn
4 | import torch.nn.functional as F
5 |
6 | from models.transformer.multiHeadAttention import MultiHeadAttention, MultiHeadAttentionChunk, MultiHeadAttentionWindow
7 | from models.transformer.positionwiseFeedForward import PositionwiseFeedForward
8 | from models.temporal_fusion_t.gated_residual_network import GatedResidualNetwork
9 |
10 |
11 | class Decoder(nn.Module):
12 | """Decoder block from Attention is All You Need.
13 |
14 | Apply two Multi Head Attention block followed by a Point-wise Feed Forward block.
15 | Residual sum and normalization are applied at each step.
16 |
17 | Parameters
18 | ----------
19 | d_model:
20 | Dimension of the input vector.
21 | q:
22 | Dimension of all query matrix.
23 | v:
24 | Dimension of all value matrix.
25 | h:
26 | Number of heads.
27 | attention_size:
28 | Number of backward elements to apply attention.
29 | Deactivated if ``None``. Default is ``None``.
30 | dropout:
31 | Dropout probability after each MHA or PFF block.
32 | Default is ``0.3``.
33 | chunk_mode:
34 | Swict between different MultiHeadAttention blocks.
35 | One of ``'chunk'``, ``'window'`` or ``None``. Default is ``'chunk'``.
36 | """
37 |
38 | def __init__(self,
39 | d_model: int,
40 | q: int,
41 | v: int,
42 | h: int,
43 | attention_size: int = None,
44 | dropout: float = 0.3,
45 | chunk_mode: str = 'chunk'):
46 | """Initialize the Decoder block"""
47 | super().__init__()
48 |
49 | chunk_mode_modules = {
50 | 'chunk': MultiHeadAttentionChunk,
51 | 'window': MultiHeadAttentionWindow,
52 | }
53 |
54 | if chunk_mode in chunk_mode_modules.keys():
55 | MHA = chunk_mode_modules[chunk_mode]
56 | else:
57 | MHA = MultiHeadAttention
58 |
59 | self._selfAttention = MHA(d_model, q, v, h, attention_size=attention_size)
60 | self._encoderDecoderAttention = MHA(d_model, q, v, h, attention_size=attention_size)
61 | self._feedForward = PositionwiseFeedForward(d_model)
62 |
63 | self._layerNorm1 = nn.LayerNorm(d_model)
64 | self._layerNorm2 = nn.LayerNorm(d_model)
65 | self._layerNorm3 = nn.LayerNorm(d_model)
66 |
67 | self._dropout = nn.Dropout(p=dropout)
68 | self.grn = GatedResidualNetwork(
69 | input_size=d_model,
70 | hidden_layer_size=d_model,
71 | output_size=None,
72 | dropout_rate=0.1,
73 | use_time_distributed=True,
74 | return_gate=True,
75 | batch_first=True)
76 |
77 | def forward(self, x: torch.Tensor, memory: torch.Tensor, context=None) -> torch.Tensor:
78 | """Propagate the input through the Decoder block.
79 |
80 | Apply the self attention block, add residual and normalize.
81 | Apply the encoder-decoder attention block, add residual and normalize.
82 | Apply the feed forward network, add residual and normalize.
83 |
84 | Parameters
85 | ----------
86 | x:
87 | Input tensor with shape (batch_size, K, d_model).
88 | memory:
89 | Memory tensor with shape (batch_size, K, d_model)
90 | from encoder output.
91 |
92 | Returns
93 | -------
94 | x:
95 | Output tensor with shape (batch_size, K, d_model).
96 | """
97 | # Self attention
98 | residual = x
99 | x = self._selfAttention(query=x, key=x, value=x, mask="future")
100 | x = self._dropout(x)
101 | x = self._layerNorm1(x + residual)
102 |
103 | # Inject static vars
104 | if context is not None:
105 | x, _ = self.grn(x, context)
106 |
107 | # Encoder-decoder attention
108 | residual = x
109 | x = self._encoderDecoderAttention(query=x, key=memory, value=memory)
110 | x = self._dropout(x)
111 | x = self._layerNorm2(x + residual)
112 |
113 | # Feed forward
114 | residual = x
115 | x = self._feedForward(x)
116 | x = self._dropout(x)
117 | x = self._layerNorm3(x + residual)
118 |
119 | return x
120 |
--------------------------------------------------------------------------------
/models/transformer_grn/encoder.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 | import torch.nn as nn
4 | import torch.nn.functional as F
5 |
6 | from models.transformer.multiHeadAttention import MultiHeadAttention, MultiHeadAttentionChunk, MultiHeadAttentionWindow
7 | from models.transformer.positionwiseFeedForward import PositionwiseFeedForward
8 | from models.temporal_fusion_t.gated_residual_network import GatedResidualNetwork
9 |
10 |
11 | class Encoder(nn.Module):
12 | """Encoder block from Attention is All You Need.
13 |
14 | Apply Multi Head Attention block followed by a Point-wise Feed Forward block.
15 | Residual sum and normalization are applied at each step.
16 |
17 | Parameters
18 | ----------
19 | d_model:
20 | Dimension of the input vector.
21 | q:
22 | Dimension of all query matrix.
23 | v:
24 | Dimension of all value matrix.
25 | h:
26 | Number of heads.
27 | attention_size:
28 | Number of backward elements to apply attention.
29 | Deactivated if ``None``. Default is ``None``.
30 | dropout:
31 | Dropout probability after each MHA or PFF block.
32 | Default is ``0.3``.
33 | chunk_mode:
34 | Swict between different MultiHeadAttention blocks.
35 | One of ``'chunk'``, ``'window'`` or ``None``. Default is ``'chunk'``.
36 | """
37 |
38 | def __init__(self,
39 | d_model: int,
40 | q: int,
41 | v: int,
42 | h: int,
43 | attention_size: int = None,
44 | dropout: float = 0.3,
45 | chunk_mode: str = 'chunk'):
46 | """Initialize the Encoder block"""
47 | super().__init__()
48 |
49 | chunk_mode_modules = {
50 | 'chunk': MultiHeadAttentionChunk,
51 | 'window': MultiHeadAttentionWindow,
52 | }
53 |
54 | if chunk_mode in chunk_mode_modules.keys():
55 | MHA = chunk_mode_modules[chunk_mode]
56 | else:
57 | MHA = MultiHeadAttention
58 |
59 | self._selfAttention = MHA(d_model, q, v, h, attention_size=attention_size)
60 | self._feedForward = PositionwiseFeedForward(d_model)
61 |
62 | self._layerNorm1 = nn.LayerNorm(d_model)
63 | self._layerNorm2 = nn.LayerNorm(d_model)
64 |
65 | self._dopout = nn.Dropout(p=dropout)
66 |
67 | self.grn = GatedResidualNetwork(
68 | input_size=d_model,
69 | hidden_layer_size=d_model,
70 | output_size=None,
71 | dropout_rate=0.1,
72 | use_time_distributed=True,
73 | return_gate=True,
74 | batch_first=True)
75 |
76 | def forward(self, x: torch.Tensor, context=None) -> torch.Tensor:
77 | """Propagate the input through the Encoder block.
78 |
79 | Apply the Multi Head Attention block, add residual and normalize.
80 | Apply the Point-wise Feed Forward block, add residual and normalize.
81 |
82 | Parameters
83 | ----------
84 | x:
85 | Input tensor with shape (batch_size, K, d_model).
86 |
87 | Returns
88 | -------
89 | Output tensor with shape (batch_size, K, d_model).
90 | """
91 | # Self attention
92 | residual = x
93 | x = self._selfAttention(query=x, key=x, value=x)
94 | x = self._dopout(x)
95 | x = self._layerNorm1(x + residual)
96 |
97 | # Inject static vars
98 | if context is not None:
99 | x, _ = self.grn(x, context)
100 |
101 | # Feed forward
102 | residual = x
103 | x = self._feedForward(x)
104 | x = self._dopout(x)
105 | x = self._layerNorm2(x + residual)
106 |
107 | return x
108 |
109 | @property
110 | def attention_map(self) -> torch.Tensor:
111 | """Attention map after a forward propagation,
112 | variable `score` in the original paper.
113 | """
114 | return self._selfAttention.attention_map
115 |
--------------------------------------------------------------------------------
/models/transformer_grn/loss.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 |
4 |
5 | class OZELoss(nn.Module):
6 | """Custom loss for TRNSys metamodel.
7 |
8 | Compute, for temperature and consumptions, the intergral of the squared differences
9 | over time. Sum the log with a coeficient ``alpha``.
10 |
11 | .. math::
12 | \Delta_T = \sqrt{\int (y_{est}^T - y^T)^2}
13 |
14 | \Delta_Q = \sqrt{\int (y_{est}^Q - y^Q)^2}
15 |
16 | loss = log(1 + \Delta_T) + \\alpha \cdot log(1 + \Delta_Q)
17 |
18 | Parameters:
19 | -----------
20 | alpha:
21 | Coefficient for consumption. Default is ``0.3``.
22 | """
23 |
24 | def __init__(self, reduction: str = 'mean', alpha: float = 0.3):
25 | super().__init__()
26 |
27 | self.alpha = alpha
28 | self.reduction = reduction
29 |
30 | self.base_loss = nn.MSELoss(reduction=self.reduction)
31 |
32 | def forward(self,
33 | y_true: torch.Tensor,
34 | y_pred: torch.Tensor) -> torch.Tensor:
35 | """Compute the loss between a target value and a prediction.
36 |
37 | Parameters
38 | ----------
39 | y_true:
40 | Target value.
41 | y_pred:
42 | Estimated value.
43 |
44 | Returns
45 | -------
46 | Loss as a tensor with gradient attached.
47 | """
48 | delta_Q = self.base_loss(y_pred[..., :-1], y_true[..., :-1])
49 | delta_T = self.base_loss(y_pred[..., -1], y_true[..., -1])
50 |
51 | if self.reduction == 'none':
52 | delta_Q = delta_Q.mean(dim=(1, 2))
53 | delta_T = delta_T.mean(dim=(1))
54 |
55 | return torch.log(1 + delta_T) + self.alpha * torch.log(1 + delta_Q)
56 |
--------------------------------------------------------------------------------
/models/transformer_grn/multiHeadAttention.py:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 |
3 | import numpy as np
4 | import torch
5 | import torch.nn as nn
6 | import torch.nn.functional as F
7 |
8 | from models.transformer.utils import generate_local_map_mask
9 |
10 |
11 | class MultiHeadAttention(nn.Module):
12 | """Multi Head Attention block from Attention is All You Need.
13 |
14 | Given 3 inputs of shape (batch_size, K, d_model), that will be used
15 | to compute query, keys and values, we output a self attention
16 | tensor of shape (batch_size, K, d_model).
17 |
18 | Parameters
19 | ----------
20 | d_model:
21 | Dimension of the input vector.
22 | q:
23 | Dimension of all query matrix.
24 | v:
25 | Dimension of all value matrix.
26 | h:
27 | Number of heads.
28 | attention_size:
29 | Number of backward elements to apply attention.
30 | Deactivated if ``None``. Default is ``None``.
31 | """
32 |
33 | def __init__(self,
34 | d_model: int,
35 | q: int,
36 | v: int,
37 | h: int,
38 | attention_size: int = None):
39 | """Initialize the Multi Head Block."""
40 | super().__init__()
41 |
42 | self._h = h
43 | self._attention_size = attention_size
44 |
45 | # Query, keys and value matrices
46 | self._W_q = nn.Linear(d_model, q*self._h)
47 | self._W_k = nn.Linear(d_model, q*self._h)
48 | self._W_v = nn.Linear(d_model, v*self._h)
49 |
50 | # Output linear function
51 | self._W_o = nn.Linear(self._h*v, d_model)
52 |
53 | # Score placeholder
54 | self._scores = None
55 |
56 | def forward(self,
57 | query: torch.Tensor,
58 | key: torch.Tensor,
59 | value: torch.Tensor,
60 | mask: Optional[str] = None) -> torch.Tensor:
61 | """Propagate forward the input through the MHB.
62 |
63 | We compute for each head the queries, keys and values matrices,
64 | followed by the Scaled Dot-Product. The result is concatenated
65 | and returned with shape (batch_size, K, d_model).
66 |
67 | Parameters
68 | ----------
69 | query:
70 | Input tensor with shape (batch_size, K, d_model) used to compute queries.
71 | key:
72 | Input tensor with shape (batch_size, K, d_model) used to compute keys.
73 | value:
74 | Input tensor with shape (batch_size, K, d_model) used to compute values.
75 | mask:
76 | Mask to apply on scores before computing attention.
77 | One of ``'subsequent'``, None. Default is None.
78 |
79 | Returns
80 | -------
81 | Self attention tensor with shape (batch_size, K, d_model).
82 | """
83 | rows = query.shape[1]
84 | cols = key.shape[1]
85 |
86 | # Compute Q, K and V, concatenate heads on batch dimension
87 | queries = torch.cat(self._W_q(query).chunk(self._h, dim=-1), dim=0)
88 | keys = torch.cat(self._W_k(key).chunk(self._h, dim=-1), dim=0)
89 | values = torch.cat(self._W_v(value).chunk(self._h, dim=-1), dim=0)
90 |
91 | # Scaled Dot Product
92 | self._scores = torch.bmm(queries, keys.transpose(1, 2)) / np.sqrt(cols)
93 |
94 | # Compute local map mask
95 | if self._attention_size != 0:
96 | attention_mask = generate_local_map_mask(rows, cols, self._attention_size, mask_future=False, device=self._scores.device)
97 | self._scores = self._scores.masked_fill(attention_mask, float('-inf'))
98 |
99 | # Compute future mask
100 | if mask == "future":
101 | future_mask = torch.triu(torch.ones((rows, cols)), diagonal=1).bool()
102 | future_mask = future_mask.to(self._scores.device)
103 | self._scores = self._scores.masked_fill(future_mask, float('-inf'))
104 |
105 | # Apply sotfmax
106 | self._scores = F.softmax(self._scores, dim=-1)
107 |
108 | attention = torch.bmm(self._scores, values)
109 |
110 | # Concatenat the heads
111 | attention_heads = torch.cat(attention.chunk(self._h, dim=0), dim=-1)
112 |
113 | # Apply linear transformation W^O
114 | self_attention = self._W_o(attention_heads)
115 |
116 | return self_attention
117 |
118 | @property
119 | def attention_map(self) -> torch.Tensor:
120 | """Attention map after a forward propagation,
121 | variable `score` in the original paper.
122 | """
123 | if self._scores is None:
124 | raise RuntimeError(
125 | "Evaluate the model once to generate attention map")
126 | return self._scores
127 |
128 |
129 | class MultiHeadAttentionChunk(MultiHeadAttention):
130 | """Multi Head Attention block with chunk.
131 |
132 | Given 3 inputs of shape (batch_size, K, d_model), that will be used
133 | to compute query, keys and values, we output a self attention
134 | tensor of shape (batch_size, K, d_model).
135 | Queries, keys and values are divided in chunks of constant size.
136 |
137 | Parameters
138 | ----------
139 | d_model:
140 | Dimension of the input vector.
141 | q:
142 | Dimension of all query matrix.
143 | v:
144 | Dimension of all value matrix.
145 | h:
146 | Number of heads.
147 | attention_size:
148 | Number of backward elements to apply attention.
149 | Deactivated if ``None``. Default is ``None``.
150 | chunk_size:
151 | Size of chunks to apply attention on. Last one may be smaller (see :class:`torch.Tensor.chunk`).
152 | Default is 168.
153 | """
154 |
155 | def __init__(self,
156 | d_model: int,
157 | q: int,
158 | v: int,
159 | h: int,
160 | attention_size: int = None,
161 | chunk_size: Optional[int] = 168,
162 | **kwargs):
163 | """Initialize the Multi Head Block."""
164 | super().__init__(d_model, q, v, h, attention_size, **kwargs)
165 |
166 | self._chunk_size = chunk_size
167 |
168 | # Score mask for decoder
169 | self._future_mask = nn.Parameter(torch.triu(torch.ones((self._chunk_size, self._chunk_size)), diagonal=1).bool(),
170 | requires_grad=False)
171 |
172 | if self._attention_size is not None:
173 | self._attention_mask = nn.Parameter(generate_local_map_mask(self._chunk_size, self._chunk_size, self._attention_size),
174 | requires_grad=False)
175 |
176 | def forward(self,
177 | query: torch.Tensor,
178 | key: torch.Tensor,
179 | value: torch.Tensor,
180 | mask: Optional[str] = None) -> torch.Tensor:
181 | """Propagate forward the input through the MHB.
182 |
183 | We compute for each head the queries, keys and values matrices,
184 | followed by the Scaled Dot-Product. The result is concatenated
185 | and returned with shape (batch_size, K, d_model).
186 |
187 | Parameters
188 | ----------
189 | query:
190 | Input tensor with shape (batch_size, K, d_model) used to compute queries.
191 | key:
192 | Input tensor with shape (batch_size, K, d_model) used to compute keys.
193 | value:
194 | Input tensor with shape (batch_size, K, d_model) used to compute values.
195 | mask:
196 | Mask to apply on scores before computing attention.
197 | One of ``'subsequent'``, None. Default is None.
198 |
199 | Returns
200 | -------
201 | Self attention tensor with shape (batch_size, K, d_model).
202 | """
203 | K = query.shape[1]
204 | n_chunk = K // self._chunk_size
205 |
206 | # Compute Q, K and V, concatenate heads on batch dimension
207 | queries = torch.cat(torch.cat(self._W_q(query).chunk(self._h, dim=-1), dim=0).chunk(n_chunk, dim=1), dim=0)
208 | keys = torch.cat(torch.cat(self._W_k(key).chunk(self._h, dim=-1), dim=0).chunk(n_chunk, dim=1), dim=0)
209 | values = torch.cat(torch.cat(self._W_v(value).chunk(self._h, dim=-1), dim=0).chunk(n_chunk, dim=1), dim=0)
210 |
211 | # Scaled Dot Product
212 | self._scores = torch.bmm(queries, keys.transpose(1, 2)) / np.sqrt(self._chunk_size)
213 |
214 | # Compute local map mask
215 | if self._attention_size is not None:
216 | self._scores = self._scores.masked_fill(self._attention_mask, float('-inf'))
217 |
218 | # Compute future mask
219 | if mask == "subsequent":
220 | self._scores = self._scores.masked_fill(self._future_mask, float('-inf'))
221 |
222 | # Apply softmax
223 | self._scores = F.softmax(self._scores, dim=-1)
224 |
225 | attention = torch.bmm(self._scores, values)
226 |
227 | # Concatenat the heads
228 | attention_heads = torch.cat(torch.cat(attention.chunk(
229 | n_chunk, dim=0), dim=1).chunk(self._h, dim=0), dim=-1)
230 |
231 | # Apply linear transformation W^O
232 | self_attention = self._W_o(attention_heads)
233 |
234 | return self_attention
235 |
236 |
237 | class MultiHeadAttentionWindow(MultiHeadAttention):
238 | """Multi Head Attention block with moving window.
239 |
240 | Given 3 inputs of shape (batch_size, K, d_model), that will be used
241 | to compute query, keys and values, we output a self attention
242 | tensor of shape (batch_size, K, d_model).
243 | Queries, keys and values are divided in chunks using a moving window.
244 |
245 | Parameters
246 | ----------
247 | d_model:
248 | Dimension of the input vector.
249 | q:
250 | Dimension of all query matrix.
251 | v:
252 | Dimension of all value matrix.
253 | h:
254 | Number of heads.
255 | attention_size:
256 | Number of backward elements to apply attention.
257 | Deactivated if ``None``. Default is ``None``.
258 | window_size:
259 | Size of the window used to extract chunks.
260 | Default is 168
261 | padding:
262 | Padding around each window. Padding will be applied to input sequence.
263 | Default is 168 // 4 = 42.
264 | """
265 |
266 | def __init__(self,
267 | d_model: int,
268 | q: int,
269 | v: int,
270 | h: int,
271 | attention_size: int = None,
272 | window_size: Optional[int] = 168,
273 | padding: Optional[int] = 168 // 4,
274 | **kwargs):
275 | """Initialize the Multi Head Block."""
276 | super().__init__(d_model, q, v, h, attention_size, **kwargs)
277 |
278 | self._window_size = window_size
279 | self._padding = padding
280 | self._q = q
281 | self._v = v
282 |
283 | # Step size for the moving window
284 | self._step = self._window_size - 2 * self._padding
285 |
286 | # Score mask for decoder
287 | self._future_mask = nn.Parameter(torch.triu(torch.ones((self._window_size, self._window_size)), diagonal=1).bool(),
288 | requires_grad=False)
289 |
290 | if self._attention_size is not None:
291 | self._attention_mask = nn.Parameter(generate_local_map_mask(self._window_size, self._window_size, self._attention_size),
292 | requires_grad=False)
293 |
294 | def forward(self,
295 | query: torch.Tensor,
296 | key: torch.Tensor,
297 | value: torch.Tensor,
298 | mask: Optional[str] = None) -> torch.Tensor:
299 | """Propagate forward the input through the MHB.
300 |
301 | We compute for each head the queries, keys and values matrices,
302 | followed by the Scaled Dot-Product. The result is concatenated
303 | and returned with shape (batch_size, K, d_model).
304 |
305 | Parameters
306 | ----------
307 | query:
308 | Input tensor with shape (batch_size, K, d_model) used to compute queries.
309 | key:
310 | Input tensor with shape (batch_size, K, d_model) used to compute keys.
311 | value:
312 | Input tensor with shape (batch_size, K, d_model) used to compute values.
313 | mask:
314 | Mask to apply on scores before computing attention.
315 | One of ``'subsequent'``, None. Default is None.
316 |
317 | Returns
318 | -------
319 | Self attention tensor with shape (batch_size, K, d_model).
320 | """
321 | batch_size = query.shape[0]
322 |
323 | # Apply padding to input sequence
324 | query = F.pad(query.transpose(1, 2), (self._padding, self._padding), 'replicate').transpose(1, 2)
325 | key = F.pad(key.transpose(1, 2), (self._padding, self._padding), 'replicate').transpose(1, 2)
326 | value = F.pad(value.transpose(1, 2), (self._padding, self._padding), 'replicate').transpose(1, 2)
327 |
328 | # Compute Q, K and V, concatenate heads on batch dimension
329 | queries = torch.cat(self._W_q(query).chunk(self._h, dim=-1), dim=0)
330 | keys = torch.cat(self._W_k(key).chunk(self._h, dim=-1), dim=0)
331 | values = torch.cat(self._W_v(value).chunk(self._h, dim=-1), dim=0)
332 |
333 | # Divide Q, K and V using a moving window
334 | queries = queries.unfold(dimension=1, size=self._window_size, step=self._step).reshape((-1, self._q, self._window_size)).transpose(1, 2)
335 | keys = keys.unfold(dimension=1, size=self._window_size, step=self._step).reshape((-1, self._q, self._window_size)).transpose(1, 2)
336 | values = values.unfold(dimension=1, size=self._window_size, step=self._step).reshape((-1, self._v, self._window_size)).transpose(1, 2)
337 |
338 | # Scaled Dot Product
339 | self._scores = torch.bmm(queries, keys.transpose(1, 2)) / np.sqrt(self._window_size)
340 |
341 | # Compute local map mask
342 | if self._attention_size is not None:
343 | self._scores = self._scores.masked_fill(self._attention_mask, float('-inf'))
344 |
345 | # Compute future mask
346 | if mask == "subsequent":
347 | self._scores = self._scores.masked_fill(self._future_mask, float('-inf'))
348 |
349 | # Apply softmax
350 | self._scores = F.softmax(self._scores, dim=-1)
351 |
352 | attention = torch.bmm(self._scores, values)
353 |
354 | # Fold chunks back
355 | attention = attention.reshape((batch_size*self._h, -1, self._window_size, self._v))
356 | attention = attention[:, :, self._padding:-self._padding, :]
357 | attention = attention.reshape((batch_size*self._h, -1, self._v))
358 |
359 | # Concatenat the heads
360 | attention_heads = torch.cat(attention.chunk(self._h, dim=0), dim=-1)
361 |
362 | # Apply linear transformation W^O
363 | self_attention = self._W_o(attention_heads)
364 |
365 | return self_attention
366 |
--------------------------------------------------------------------------------
/models/transformer_grn/positionwiseFeedForward.py:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 |
3 | import torch
4 | import torch.nn as nn
5 | import torch.nn.functional as F
6 |
7 |
8 | class PositionwiseFeedForward(nn.Module):
9 | """Position-wise Feed Forward Network block from Attention is All You Need.
10 |
11 | Apply two linear transformations to each input, separately but indetically. We
12 | implement them as 1D convolutions. Input and output have a shape (batch_size, d_model).
13 |
14 | Parameters
15 | ----------
16 | d_model:
17 | Dimension of input tensor.
18 | d_ff:
19 | Dimension of hidden layer, default is 2048.
20 | """
21 |
22 | def __init__(self,
23 | d_model: int,
24 | d_ff: Optional[int] = 128):
25 | """Initialize the PFF block."""
26 | super().__init__()
27 |
28 | self._linear1 = nn.Linear(d_model, d_ff)
29 | self._linear2 = nn.Linear(d_ff, d_model)
30 |
31 | def forward(self, x: torch.Tensor) -> torch.Tensor:
32 | """Propagate forward the input through the PFF block.
33 |
34 | Apply the first linear transformation, then a relu actvation,
35 | and the second linear transformation.
36 |
37 | Parameters
38 | ----------
39 | x:
40 | Input tensor with shape (batch_size, K, d_model).
41 |
42 | Returns
43 | -------
44 | Output tensor with shape (batch_size, K, d_model).
45 | """
46 | return self._linear2(F.relu(self._linear1(x)))
47 |
--------------------------------------------------------------------------------
/models/transformer_grn/transformer.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 |
4 | from models.transformer_grn.encoder import Encoder
5 | from models.transformer_grn.decoder import Decoder
6 | from models.transformer.utils import generate_original_PE, generate_regular_PE
7 | from models.temporal_fusion_t.linear_layer import LinearLayer
8 |
9 |
10 | class Transformer(nn.Module):
11 | """Transformer model from Attention is All You Need.
12 |
13 | A classic transformer model adapted for sequential data.
14 | Embedding has been replaced with a fully connected layer,
15 | the last layer softmax is now a sigmoid.
16 |
17 | Attributes
18 | ----------
19 | layers_encoding: :py:class:`list` of :class:`Encoder.Encoder`
20 | stack of Encoder layers.
21 | layers_decoding: :py:class:`list` of :class:`Decoder.Decoder`
22 | stack of Decoder layers.
23 |
24 | Parameters
25 | ----------
26 | d_input:
27 | Model input dimension.
28 | d_model:
29 | Dimension of the input vector.
30 | d_output:
31 | Model output dimension.
32 | q:
33 | Dimension of queries and keys.
34 | v:
35 | Dimension of values.
36 | h:
37 | Number of heads.
38 | N:
39 | Number of encoder and decoder layers to stack.
40 | attention_size:
41 | Number of backward elements to apply attention.
42 | Deactivated if ``None``. Default is ``None``.
43 | dropout:
44 | Dropout probability after each MHA or PFF block.
45 | Default is ``0.3``.
46 | chunk_mode:
47 | Swict between different MultiHeadAttention blocks.
48 | One of ``'chunk'``, ``'window'`` or ``None``. Default is ``'chunk'``.
49 | pe:
50 | Type of positional encoding to add.
51 | Must be one of ``'original'``, ``'regular'`` or ``None``. Default is ``None``.
52 | """
53 |
54 | def __init__(self, cnf: dict):
55 | """Create transformer structure from Encoder and Decoder blocks."""
56 | super().__init__()
57 |
58 | d_model = cnf["d_model"]
59 | q = cnf["q"]
60 | v = cnf["v"]
61 | h = cnf["h"]
62 | N = cnf["N"]
63 | attention_size = cnf["attention_size"]
64 | dropout = cnf["dropout"]
65 | pe = cnf["pe"]
66 | chunk_mode = cnf["chunk_mode"]
67 | d_input = cnf["d_input"]
68 | d_output = cnf["d_output"]
69 | self.time_steps = cnf["num_encoder_steps"]
70 | self.static_vars = cnf['static_input_loc']
71 | self.regular_vars = cnf['known_regular_inputs'] + cnf['input_obs_loc']
72 |
73 | self._d_model = d_model
74 |
75 | self.layers_encoding = nn.ModuleList([Encoder(d_model,
76 | q,
77 | v,
78 | h,
79 | attention_size=attention_size,
80 | dropout=dropout,
81 | chunk_mode=chunk_mode) for _ in range(N)])
82 | self.layers_decoding = nn.ModuleList([Decoder(d_model,
83 | q,
84 | v,
85 | h,
86 | attention_size=attention_size,
87 | dropout=dropout,
88 | chunk_mode=chunk_mode) for _ in range(N)])
89 |
90 | self._embedding_categorical = nn.ModuleList()
91 | for i in range(len(self.static_vars)):
92 | embedding = nn.Embedding(cnf['category_counts'][i], d_model)
93 | self._embedding_categorical.append(embedding)
94 |
95 | self._time_varying_embedding_layer = LinearLayer(input_size=len(self.regular_vars), size=d_model,
96 | use_time_distributed=True, batch_first=True)
97 |
98 | self._linear = nn.Linear(d_model, d_output)
99 |
100 | pe_functions = {
101 | 'original': generate_original_PE,
102 | 'regular': generate_regular_PE,
103 | }
104 |
105 | if pe in pe_functions.keys():
106 | self._generate_PE = pe_functions[pe]
107 | else:
108 | self._generate_PE = None
109 |
110 | self.name = 'transformer'
111 |
112 | def split_features(self, x):
113 | x_static = torch.stack([
114 | self._embedding_categorical[i](x[..., ix].long())
115 | for i, ix in enumerate(self.static_vars)
116 | ], dim=-1)
117 |
118 | x_static = x_static[:, 0:1, :].squeeze(-1)
119 | x_input = self._time_varying_embedding_layer(x[..., self.regular_vars])
120 |
121 | return x_input, x_static
122 |
123 | def forward(self, xy: torch.Tensor) -> torch.Tensor:
124 | """Propagate input through transformer
125 |
126 | Forward input through an embedding module,
127 | the encoder then decoder stacks, and an output module.
128 |
129 | Parameters
130 | ----------
131 | x:
132 | :class:`torch.Tensor` of shape (batch_size, K, d_input).
133 |
134 | Returns
135 | -------
136 | Output tensor with shape (batch_size, K, d_output).
137 | """
138 | x = xy[:, :self.time_steps]
139 | y = xy[:, self.time_steps:]
140 |
141 | # Shift tensor add start token
142 | pad = torch.ones((y.shape[0], 1, y.shape[2])).to(y.device)
143 | y = torch.cat((pad, y), dim=1)[:, :-1, :]
144 |
145 | x_input, x_static = self.split_features(x)
146 | y_input, y_static = self.split_features(y)
147 |
148 | # Add position encoding
149 | if self._generate_PE is not None:
150 | positional_encoding = self._generate_PE(x_input.shape[1], self._d_model)
151 | positional_encoding = positional_encoding.to(x_input.device)
152 | x_input.add_(positional_encoding)
153 |
154 | # Encoding stack
155 | for layer in self.layers_encoding:
156 | encoding_x = layer(x_input, context=x_static)
157 |
158 | # Decoding stack
159 | decoding = y_input
160 |
161 | # Add position encoding
162 | if self._generate_PE is not None:
163 | positional_encoding = self._generate_PE(y.shape[1], self._d_model)
164 | positional_encoding = positional_encoding.to(decoding.device)
165 | decoding.add_(positional_encoding)
166 |
167 | for layer in self.layers_decoding:
168 | decoding = layer(decoding, encoding_x, context=y_static)
169 |
170 | # Output module
171 | output = self._linear(decoding)
172 | return output
173 |
--------------------------------------------------------------------------------
/models/transformer_grn/utils.py:
--------------------------------------------------------------------------------
1 | from typing import Optional, Union
2 |
3 | import numpy as np
4 | import torch
5 |
6 |
7 | def generate_original_PE(length: int, d_model: int) -> torch.Tensor:
8 | """Generate positional encoding as described in original paper. :class:`torch.Tensor`
9 |
10 | Parameters
11 | ----------
12 | length:
13 | Time window length, i.e. K.
14 | d_model:
15 | Dimension of the model vector.
16 |
17 | Returns
18 | -------
19 | Tensor of shape (K, d_model).
20 | """
21 | PE = torch.zeros((length, d_model))
22 |
23 | pos = torch.arange(length).unsqueeze(1)
24 | PE[:, 0::2] = torch.sin(
25 | pos / torch.pow(1000, torch.arange(0, d_model, 2, dtype=torch.float32)/d_model))
26 | PE[:, 1::2] = torch.cos(
27 | pos / torch.pow(1000, torch.arange(1, d_model, 2, dtype=torch.float32)/d_model))
28 |
29 | return PE
30 |
31 |
32 | def generate_regular_PE(length: int, d_model: int, period: Optional[int] = 24) -> torch.Tensor:
33 | """Generate positional encoding with a given period.
34 |
35 | Parameters
36 | ----------
37 | length:
38 | Time window length, i.e. K.
39 | d_model:
40 | Dimension of the model vector.
41 | period:
42 | Size of the pattern to repeat.
43 | Default is 24.
44 |
45 | Returns
46 | -------
47 | Tensor of shape (K, d_model).
48 | """
49 | PE = torch.zeros((length, d_model))
50 |
51 | pos = torch.arange(length, dtype=torch.float32).unsqueeze(1)
52 | PE = torch.sin(pos * 2 * np.pi / period)
53 | PE = PE.repeat((1, d_model))
54 |
55 | return PE
56 |
57 |
58 | def generate_local_map_mask(row: int,
59 | col: int,
60 | attention_size: int,
61 | mask_future=False,
62 | device: torch.device = 'cpu') -> torch.BoolTensor:
63 | """Compute attention mask as attention_size wide diagonal.
64 |
65 | Parameters
66 | ----------
67 | row:
68 | Time dimension size v1
69 | col:
70 | Time dimension size v2
71 | attention_size:
72 | Number of backward elements to apply attention.
73 | device:
74 | torch device. Default is ``'cpu'``.
75 |
76 | Returns
77 | -------
78 | Mask as a boolean tensor.
79 | """
80 | local_map = np.empty((row, col))
81 | i, j = np.indices(local_map.shape)
82 |
83 | if mask_future:
84 | local_map[i, j] = (i - j > attention_size) ^ (j - i > 0)
85 | else:
86 | local_map[i, j] = np.abs(i - j) > attention_size
87 |
88 | return torch.BoolTensor(local_map).to(device)
89 |
--------------------------------------------------------------------------------
/progress_bar.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # ---------------------
3 |
4 | import math
5 | from datetime import datetime
6 |
7 |
8 | class ProgressBar(object):
9 | """
10 | Utility class for the management of progress bars showing training progress in the form
11 | "[] Epoch . ││ "
12 | """
13 |
14 |
15 | @property
16 | def progress(self):
17 | # type: () -> float
18 | return (self.step + 1) / self.max_step
19 |
20 |
21 | def __init__(self, max_step, max_epoch, current_epoch=0):
22 | # type: (int, int, int) -> None
23 | self.max_step = max_step
24 | self.max_epoch = max_epoch
25 | self.current_epoch = current_epoch
26 | self.step = 0
27 |
28 |
29 | def inc(self):
30 | # type: () -> ()
31 | """
32 | Increase the progress bar value by one unit
33 | """
34 | self.step = self.step + 1
35 | if self.step == self.max_step:
36 | self.step = 0
37 | self.current_epoch = self.current_epoch + 1
38 |
39 |
40 | def __str__(self):
41 | # type: () -> str
42 | value = int(round(self.progress * 50))
43 | date = datetime.now().strftime("%b-%d@%H:%M").lower()
44 | progress_bar = ('█' * value + ('┈' * (50 - value)))
45 | return '\r[{}] Epoch {:0{e}d}.{:0{s}d}: │{}│ {:6.2f}%'.format(
46 | date, self.current_epoch, self.step + 1,
47 | progress_bar, 100 * self.progress,
48 | e=math.ceil(math.log10(self.max_epoch)),
49 | s=math.ceil(math.log10(self.max_step + 1)),
50 | )
51 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | azure-storage-blob
2 | typing>=3.7
3 | Click>=7.0
4 | numpy>=1.17
5 | torchsummary>=1.5
6 | matplotlib>=3.1
7 | torch>=1.3
8 | termcolor>=1.1
9 | torchvision>=0.2
10 | Pillow>=6.2
11 | tensorboardX>=1.9
12 | PyYAML>=5.1.2
13 | path.py>=12.0
14 | pandas
15 | scikit-learn
--------------------------------------------------------------------------------
/scheduler.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # ---------------------
3 |
4 | import torch.backends.cudnn as cudnn
5 | from conf import Conf
6 | from trainer import Trainer
7 | import time
8 |
9 | import glob
10 | from pathlib import Path
11 | from retry import retry
12 | import click
13 |
14 | cudnn.benchmark = True
15 |
16 |
17 | @click.command()
18 | @click.option('--exp_path', type=str, default="./conf/experiments/")
19 | @retry(tries=2, delay=2)
20 | def scheduler(exp_path):
21 | for i, file in enumerate(sorted(glob.glob(exp_path + "*.yaml"))):
22 | time.sleep(5)
23 | exp_name = Path(file).stem
24 | cnf = Conf(conf_file_path=file, exp_name=exp_name, seed=666, log=False)
25 | print("\n Starting experiment: " + exp_name + "\n")
26 | trainer = Trainer(cnf=cnf)
27 | try:
28 | trainer.run()
29 | except Exception as e:
30 | print(e)
31 | del trainer
32 | print("\n Starting next experiment...\n")
33 |
34 |
35 | if __name__ == '__main__':
36 | scheduler()
37 |
--------------------------------------------------------------------------------
/slurm.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # ---------------------
3 |
4 | import subprocess
5 |
6 | import click
7 | from path import Path
8 |
9 |
10 | # -----------------------------
11 | # Template of the Slurm script
12 | # -----------------------------
13 | TEMPLATE = '''#!/bin/bash
14 | #SBATCH --job-name=**exp**
15 | #SBATCH --output=**project**/slurm/log/out.**exp**.txt
16 | #SBATCH --error=**project**/slurm/log/err.**exp**.txt
17 | #SBATCH --open-mode=append
18 | #SBATCH --partition=prod
19 | #SBATCH --nodes=1
20 | #SBATCH --ntasks=1
21 | #SBATCH --cpus-per-task=4
22 | #SBATCH --gres=gpu:1
23 |
24 | source activate python3
25 |
26 | cd **project**
27 | srun python -u main.py --exp_name '**exp**!' --conf_file_path '**cnf**'
28 | '''
29 |
30 |
31 | @click.command()
32 | def main():
33 | """
34 | (1) creates slurm script
35 | (2) saves it in 'slurm/.sh
36 | (3) runs it if `sbatch` == True
37 | """
38 |
39 | out_err_log_dir_path = Path('slurm/log')
40 | if not out_err_log_dir_path.exists():
41 | out_err_log_dir_path.makedirs()
42 |
43 | exp_name = click.prompt('▶ experiment name', type=str)
44 | if Path(f'conf/{exp_name}.yaml').exists():
45 | conf_file_name = click.prompt('▶ conf file name', default=f'{exp_name}.yaml')
46 | else:
47 | conf_file_name = click.prompt('▶ conf file name', default='default.yaml')
48 |
49 | if '/' in conf_file_name:
50 | conf_file_path = conf_file_name
51 | else:
52 | conf_file_path = f'conf/{conf_file_name}'
53 | project_dir_path = Path('.').abspath()
54 |
55 | text = TEMPLATE
56 | text = text.replace('**exp**', exp_name)
57 | text = text.replace('**cnf**', conf_file_path)
58 | text = text.replace('**project**', project_dir_path)
59 | if 'flanzi' in project_dir_path:
60 | text = text.replace('source activate python3', '#source activate python3')
61 |
62 | print('\n-------------------------------------\n')
63 | print(text)
64 |
65 | out_file_path = Path('slurm') / exp_name + '.sh'
66 | out_file_path.write_text(text=text)
67 |
68 | print('-------------------------------------\n')
69 | if click.confirm('▶ sbatch now?', default=True):
70 | print('\n-------------------------------------\n')
71 | command = f'sbatch {out_file_path}'
72 | process = subprocess.Popen(command.split(), stdout=subprocess.PIPE)
73 | output, error = process.communicate()
74 | print('▶', output.decode())
75 | if error:
76 | print('▶ [ERROR] - ', error.decode())
77 |
78 |
79 | if __name__ == '__main__':
80 | main()
81 |
--------------------------------------------------------------------------------
/slurm/Traffic_5TR.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #SBATCH --job-name=TF_Traffic
3 | #SBATCH --output=/homes/svincenzi/TIME_SERIES/slurm/log/out.TF_Traffic.txt
4 | #SBATCH --error=/homes/svincenzi/TIME_SERIES/slurm/log/err.TF_Traffic.txt
5 | #SBATCH --open-mode=append
6 | #SBATCH --partition=prod
7 | #SBATCH --nodes=1
8 | #SBATCH --ntasks=1
9 | #SBATCH --cpus-per-task=4
10 | #SBATCH --gres=gpu:1
11 |
12 | source activate py_env2
13 | module load cuda/10.0
14 |
15 | export PYTHONPATH=/homes/svincenzi/TIME_SERIES
16 |
17 |
18 | cd /homes/svincenzi/TIME_SERIES
19 | srun python -u main.py --exp_name 'TF_Traffic!' --conf_file_path 'conf/traffic.yaml'
20 |
--------------------------------------------------------------------------------
/trainer.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # ---------------------
3 |
4 | from time import time
5 | import numpy as np
6 | import torch
7 | from torch import optim
8 | from torch.utils.data import DataLoader
9 | from torch.utils.tensorboard import SummaryWriter
10 | from conf import Conf
11 | from dataset.ts_dataset import TSDataset
12 | from models.temporal_fusion_t import tft_model
13 | from progress_bar import ProgressBar
14 | from utils import QuantileLoss, symmetric_mean_absolute_percentage_error, unnormalize_tensor, plot_temporal_serie
15 | import data_formatters.utils as utils
16 | from models.transformer import Transformer
17 | from models.transformer_grn.transformer import Transformer as GRNTransformer
18 |
19 |
20 | class Trainer(object):
21 | """
22 | Class for training and test the model
23 | """
24 |
25 | def __init__(self, cnf):
26 | # type: (Conf) -> Trainer
27 |
28 | torch.set_num_threads(3)
29 |
30 | self.cnf = cnf
31 | self.data_formatter = utils.make_data_formatter(cnf.ds_name)
32 |
33 | loader = TSDataset
34 |
35 | # init dataset
36 | dataset_train = loader(self.cnf, self.data_formatter)
37 | dataset_train.train()
38 | dataset_test = loader(self.cnf, self.data_formatter)
39 | dataset_test.test()
40 |
41 | # init model
42 | model_choice = self.cnf.all_params["model"]
43 | if model_choice == "transformer":
44 | # Baseline transformer
45 | self.model = Transformer(self.cnf.all_params)
46 | elif model_choice == "tf_transformer":
47 | # Temporal fusion transformer
48 | self.model = tft_model.TFT(self.cnf.all_params)
49 | elif model_choice == "grn_transformer":
50 | # Transformer + GRN to encode static vars
51 | self.model = GRNTransformer(self.cnf.all_params)
52 | else:
53 | raise NameError
54 |
55 | self.model = self.model.to(cnf.device)
56 |
57 | # init optimizer
58 | self.optimizer = optim.Adam(params=self.model.parameters(), lr=cnf.lr)
59 | self.loss = QuantileLoss(cnf.quantiles)
60 |
61 | # init train loader
62 | self.train_loader = DataLoader(
63 | dataset=dataset_train, batch_size=cnf.batch_size,
64 | num_workers=cnf.n_workers, shuffle=True, pin_memory=True,
65 | )
66 |
67 | # init test loader
68 | self.test_loader = DataLoader(
69 | dataset=dataset_test, batch_size=cnf.batch_size,
70 | num_workers=cnf.n_workers, shuffle=False, pin_memory=True,
71 | )
72 |
73 | # init logging stuffs
74 | self.log_path = cnf.exp_log_path
75 | print(f'tensorboard --logdir={cnf.project_log_path.abspath()}\n')
76 | self.sw = SummaryWriter(self.log_path)
77 | self.log_freq = len(self.train_loader)
78 | self.train_losses = []
79 | self.test_loss = []
80 | self.test_losses = {'p10': [], 'p50': [], 'p90': []}
81 | self.test_smape = []
82 |
83 | # starting values
84 | self.epoch = 0
85 | self.best_test_loss = None
86 |
87 | # init progress bar
88 | self.progress_bar = ProgressBar(max_step=self.log_freq, max_epoch=self.cnf.epochs)
89 |
90 | # possibly load checkpoint
91 | self.load_ck()
92 |
93 | print("Finished preparing datasets.")
94 |
95 | def load_ck(self):
96 | """
97 | load training checkpoint
98 | """
99 | ck_path = self.log_path / 'training.ck'
100 | if ck_path.exists():
101 | ck = torch.load(ck_path)
102 | print(f'[loading checkpoint \'{ck_path}\']')
103 | self.epoch = ck['epoch']
104 | self.progress_bar.current_epoch = self.epoch
105 | self.model.load_state_dict(ck['model'])
106 | self.optimizer.load_state_dict(ck['optimizer'])
107 | self.best_test_loss = self.best_test_loss
108 |
109 | def save_ck(self):
110 | """
111 | save training checkpoint
112 | """
113 | ck = {
114 | 'epoch': self.epoch,
115 | 'model': self.model.state_dict(),
116 | 'optimizer': self.optimizer.state_dict(),
117 | 'best_test_loss': self.best_test_loss
118 | }
119 | torch.save(ck, self.log_path / 'training.ck')
120 |
121 | def train(self):
122 | """
123 | train model for one epoch on the Training-Set.
124 | """
125 | start_time = time()
126 | self.model.train()
127 |
128 | times = []
129 | for step, sample in enumerate(self.train_loader):
130 | t = time()
131 | self.optimizer.zero_grad()
132 | # Feed input to the model
133 | x = sample['inputs'].float().to(self.cnf.device)
134 | if self.cnf.all_params["model"] == "tf_transformer":
135 | output, _, _ = self.model.forward(x)
136 | else:
137 | output = self.model.forward(x)
138 |
139 | # Compute Loss
140 | loss, _ = self.loss(output.squeeze(), sample['outputs'].squeeze().float().to(self.cnf.device))
141 | loss.backward()
142 | torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.cnf.all_params['max_gradient_norm'])
143 | self.train_losses.append(loss.item())
144 | self.optimizer.step(None)
145 |
146 | # print an incredible progress bar
147 | times.append(time() - t)
148 | if self.cnf.log_each_step or (not self.cnf.log_each_step and self.progress_bar.progress == 1):
149 | print(f'\r{self.progress_bar} '
150 | f'│ Loss: {np.mean(self.train_losses):.6f} '
151 | f'│ ↯: {1 / np.mean(times):5.2f} step/s', end='')
152 | self.progress_bar.inc()
153 |
154 | # log average loss of this epoch
155 | mean_epoch_loss = np.mean(self.train_losses)
156 | self.sw.add_scalar(tag='train_loss', scalar_value=mean_epoch_loss, global_step=self.epoch)
157 | self.train_losses = []
158 |
159 | # log epoch duration
160 | print(f' │ T: {time() - start_time:.2f} s')
161 |
162 | def test(self):
163 | """
164 | test model on the Test-Set
165 | """
166 | self.model.eval()
167 | output, sample = None, None
168 |
169 | t = time()
170 | for step, sample in enumerate(self.test_loader):
171 |
172 | # Hide future predictions from input vector, set to 0 (or 1) values where timestep > encoder_steps
173 | steps = self.cnf.all_params['num_encoder_steps']
174 | pred_len = sample['outputs'].shape[1]
175 | x = sample['inputs'].float().to(self.cnf.device)
176 | x[:, steps:, 0] = 1
177 |
178 | # Feed input to the model
179 | if self.cnf.all_params["model"] == "transformer" or self.cnf.all_params["model"] == "grn_transformer":
180 |
181 | # Auto-regressive prediction
182 | for i in range(pred_len):
183 | output = self.model.forward(x)
184 | x[:, steps + i, 0] = output[:, i, 1]
185 | output = self.model.forward(x)
186 |
187 | elif self.cnf.all_params["model"] == "tf_transformer":
188 | output, _, _ = self.model.forward(x)
189 | else:
190 | raise NameError
191 |
192 | output = output.squeeze()
193 | y, y_pred = sample['outputs'].squeeze().float().to(self.cnf.device), output
194 |
195 | # Compute loss
196 | loss, _ = self.loss(y_pred, y)
197 | smape = symmetric_mean_absolute_percentage_error(output[:, :, 1].detach().cpu().numpy(),
198 | sample['outputs'][:, :, 0].detach().cpu().numpy())
199 |
200 | # De-Normalize to compute metrics
201 | target = unnormalize_tensor(self.data_formatter, y, sample['identifier'][0][0])
202 | p10_forecast = unnormalize_tensor(self.data_formatter, y_pred[..., 0], sample['identifier'][0][0])
203 | p50_forecast = unnormalize_tensor(self.data_formatter, y_pred[..., 1], sample['identifier'][0][0])
204 | p90_forecast = unnormalize_tensor(self.data_formatter, y_pred[..., 2], sample['identifier'][0][0])
205 |
206 | # Compute metrics
207 | self.test_losses['p10'].append(self.loss.numpy_normalised_quantile_loss(p10_forecast, target, 0.1))
208 | self.test_losses['p50'].append(self.loss.numpy_normalised_quantile_loss(p50_forecast, target, 0.5))
209 | self.test_losses['p90'].append(self.loss.numpy_normalised_quantile_loss(p90_forecast, target, 0.9))
210 |
211 | self.test_loss.append(loss.item())
212 | self.test_smape.append(smape)
213 |
214 | # Log stuff
215 | for k in self.test_losses.keys():
216 | mean_test_loss = np.mean(self.test_losses[k])
217 | print(f'\t● AVG {k} Loss on TEST-set: {mean_test_loss:.6f} │ T: {time() - t:.2f} s')
218 | self.sw.add_scalar(tag=k + '_test_loss', scalar_value=mean_test_loss, global_step=self.epoch)
219 |
220 | # log log log
221 | mean_test_loss = np.mean(self.test_loss)
222 | mean_smape = np.mean(self.test_smape)
223 | print(f'\t● AVG Loss on TEST-set: {mean_test_loss:.6f} │ T: {time() - t:.2f} s')
224 | print(f'\t● AVG SMAPE on TEST-set: {mean_smape:.6f} │ T: {time() - t:.2f} s')
225 | self.sw.add_scalar(tag='test_smape', scalar_value=mean_smape, global_step=self.epoch)
226 | self.sw.add_scalar(tag='test_loss', scalar_value=mean_test_loss, global_step=self.epoch)
227 |
228 | # save best model
229 | if self.best_test_loss is None or mean_test_loss < self.best_test_loss:
230 | self.best_test_loss = mean_test_loss
231 | torch.save(self.model.state_dict(), self.log_path / self.cnf.exp_name + '_best.pth')
232 |
233 | def run(self):
234 | """
235 | start model training procedure (train > test > checkpoint > repeat)
236 | """
237 | for _ in range(self.epoch, self.cnf.epochs):
238 | self.train()
239 |
240 | with torch.no_grad():
241 | self.test()
242 |
243 | self.epoch += 1
244 | self.save_ck()
245 |
--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # ---------------------
3 |
4 | import json
5 | import os
6 | from datetime import datetime
7 | from enum import Enum
8 | from typing import *
9 | from typing import Callable, List, TypeVar
10 |
11 | import PIL
12 | import matplotlib.pyplot as plt
13 | import numpy as np
14 | import pandas as pd
15 | import torch
16 | from PIL.Image import Image
17 | from matplotlib import cm
18 | from matplotlib import figure
19 | from pathlib import Path
20 | from torch import Tensor
21 | from torch import nn
22 | from torchvision.transforms import ToTensor
23 |
24 |
25 | class QuantileLoss(nn.Module):
26 | ## From: https://medium.com/the-artificial-impostor/quantile-regression-part-2-6fdbc26b2629
27 |
28 | def __init__(self, quantiles):
29 | ##takes a list of quantiles
30 | super().__init__()
31 | self.quantiles = quantiles
32 |
33 | def numpy_normalised_quantile_loss(self, y_pred, y, quantile):
34 | """Computes normalised quantile loss for numpy arrays.
35 | Uses the q-Risk metric as defined in the "Training Procedure" section of the
36 | main TFT paper.
37 | Args:
38 | y: Targets
39 | y_pred: Predictions
40 | quantile: Quantile to use for loss calculations (between 0 & 1)
41 | Returns:
42 | Float for normalised quantile loss.
43 | """
44 | if isinstance(y_pred, torch.Tensor):
45 | y_pred = y_pred.detach().cpu().numpy()
46 |
47 | if len(y_pred.shape) == 3:
48 | ix = self.quantiles.index(quantile)
49 | y_pred = y_pred[..., ix]
50 |
51 | if isinstance(y, torch.Tensor):
52 | y = y.detach().cpu().numpy()
53 |
54 | prediction_underflow = y - y_pred
55 | weighted_errors = quantile * np.maximum(prediction_underflow, 0.) \
56 | + (1. - quantile) * np.maximum(-prediction_underflow, 0.)
57 |
58 | quantile_loss = weighted_errors.mean()
59 | normaliser = np.abs(y).mean()
60 |
61 | return 2 * quantile_loss / normaliser
62 |
63 | def forward(self, preds, target, ret_losses=True):
64 | assert not target.requires_grad
65 | assert preds.size(0) == target.size(0)
66 | losses = []
67 |
68 | for i, q in enumerate(self.quantiles):
69 | errors = target - preds[:, :, i]
70 | losses.append(
71 | torch.max(
72 | (q - 1) * errors,
73 | q * errors
74 | ).unsqueeze(1))
75 | loss = torch.mean(
76 | torch.sum(torch.cat(losses, dim=1), dim=1))
77 | if ret_losses:
78 | return loss, losses
79 | return loss
80 |
81 |
82 | def unnormalize_tensor(data_formatter, data, identifier):
83 | data = pd.DataFrame(
84 | data.detach().cpu().numpy(),
85 | columns=[
86 | 't+{}'.format(i)
87 | for i in range(data.shape[1])
88 | ])
89 |
90 | data['identifier'] = np.array(identifier)
91 | data = data_formatter.format_predictions(data)
92 |
93 | return data.drop(columns=['identifier']).values
94 |
95 |
96 | def symmetric_mean_absolute_percentage_error(forecast, actual):
97 | # Symmetric Mean Absolute Percentage Error (SMAPE)
98 | sequence_length = forecast.shape[1]
99 | sumf = np.sum(np.abs(forecast - actual) / (np.abs(actual) + np.abs(forecast)), axis=1)
100 | return np.mean((2 * sumf) / sequence_length)
101 |
102 |
103 | def plot_temporal_serie(y_pred, y_true):
104 | if isinstance(y_pred, Tensor):
105 | y_pred = y_pred.detach().cpu().numpy()
106 |
107 | if isinstance(y_true, Tensor):
108 | y_true = y_true.detach().cpu().numpy()
109 |
110 | ind = np.random.choice(y_pred.shape[0])
111 | plt.plot(y_pred[ind, :, 0], label='pred_1')
112 | plt.plot(y_pred[ind, :, 1], label='pred_5')
113 | plt.plot(y_pred[ind, :, 2], label='pred_9')
114 |
115 | plt.plot(y_true[ind, :, 0], label='true')
116 | plt.legend()
117 | plt.show()
118 |
119 |
120 | def imread(path):
121 | # type: (Union[Path, str]) -> Image
122 | """
123 | Reads the image located in `path`
124 | :param path:
125 | :return:
126 | """
127 | with open(path, 'rb') as f:
128 | with PIL.Image.open(f) as img:
129 | return img.convert('RGB')
130 |
131 |
132 | def pyplot_to_numpy(pyplot_figure):
133 | # type: (figure.Figure) -> np.ndarray
134 | """
135 | Converts a PyPlot figure into a NumPy array
136 | :param pyplot_figure: figure you want to convert
137 | :return: converted NumPy array
138 | """
139 | pyplot_figure.canvas.draw()
140 | x = np.fromstring(pyplot_figure.canvas.tostring_rgb(), dtype=np.uint8, sep='')
141 | x = x.reshape(pyplot_figure.canvas.get_width_height()[::-1] + (3,))
142 | return x
143 |
144 |
145 | def pyplot_to_tensor(pyplot_figure):
146 | # type: (figure.Figure) -> Tensor
147 | """
148 | Converts a PyPlot figure into a PyTorch tensor
149 | :param pyplot_figure: figure you want to convert
150 | :return: converted PyTorch tensor
151 | """
152 | x = pyplot_to_numpy(pyplot_figure=pyplot_figure)
153 | x = ToTensor()(x)
154 | return x
155 |
156 |
157 | def apply_colormap_to_tensor(x, cmap='jet', range=(None, None)):
158 | # type: (Tensor, str, Optional[Tuple[float, float]]) -> Tensor
159 | """
160 | :param x: Tensor with shape (1, H, W)
161 | :param cmap: name of the color map you want to apply
162 | :param range: tuple of (minimum possible value in x, maximum possible value in x)
163 | :return: Tensor with shape (3, H, W)
164 | """
165 | cmap = cm.ScalarMappable(cmap=cmap)
166 | cmap.set_clim(vmin=range[0], vmax=range[1])
167 | x = x.detatch().cpu().numpy()
168 | x = x.squeeze()
169 | x = cmap.to_rgba(x)[:, :, :-1]
170 | return ToTensor()(x)
171 |
172 |
--------------------------------------------------------------------------------