├── .gitignore ├── Drawings ├── deep_volatility_architecture.tex └── deep_volatility_architecture_horiz.tex ├── LICENSE ├── README.md ├── poetry.lock ├── pyproject.toml ├── scripts └── train_example.sh ├── setup.cfg ├── src └── deep_volatility_models │ ├── __init__.py │ ├── architecture.py │ ├── data_sources.py │ ├── embedding_models.py │ ├── evaluate_model.py │ ├── hyperopt_opt.py │ ├── hyperopt_opt2.py │ ├── hyperopt_opt_risk_neutral.py │ ├── hyperopt_risk_neutral_no_mixture.py │ ├── loss_functions.py │ ├── mixture_model_stats.py │ ├── model_wrappers.py │ ├── models.py │ ├── optuna_opt.py │ ├── sample.py │ ├── stock_data.py │ ├── time_series_datasets.py │ ├── train_univariate.py │ ├── training.py │ └── util.py └── tests ├── __init__.py ├── test_architecture.py ├── test_loss_functions.py ├── test_mixture_model_stats.py ├── test_stock_data.py ├── test_time_series_datasets.py └── test_util.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.csv 2 | *.pt 3 | *~ 4 | *.log 5 | *.output 6 | *.png 7 | *.aux 8 | *.dvi 9 | *.pdf 10 | *.ps 11 | __pycache__ 12 | .ipynb_checkpoints 13 | -------------------------------------------------------------------------------- /Drawings/deep_volatility_architecture.tex: -------------------------------------------------------------------------------- 1 | % \def\pgfsysdriver{pgfsys-dvipdfm.def} 2 | \documentclass[dvips,tikz,12pt,convert={true,density=1200}]{standalone} 3 | % \documentclass[dvipdfm,tikz]{standalone} 4 | \usepackage{tikz} 5 | \usetikzlibrary{arrows.meta} 6 | \usetikzlibrary{positioning} 7 | \usetikzlibrary{calc} 8 | \newdimen\cellsize\cellsize=6pt 9 | \tikzset{tensor/.style args={#1#2}{rectangle,draw=blue!50,fill=blue!20,minimum width=#1,minimum height=#2,inner sep=0pt,% 10 | path picture={\draw[xstep=\the\cellsize,ystep=0cm,black, very thin] (path picture bounding box.south west) grid (path picture bounding box.north east);}% 11 | }} 12 | \tikzset{tensor2/.style args={#1#2}{rectangle,draw=blue!50,fill=blue!20,minimum width=#1,minimum height=#2,inner sep=0pt,% 13 | path picture={\dimen0=#1\count0=\dimen0\divide\count0 by \cellsize 14 | \dimen1=#2\count1=\dimen1\divide\count1 by \cellsize 15 | \draw[black,ultra thin] 16 | (path picture bounding box.south west) \foreach \j in {1,...,\the\count1}{++(0pt,\the\cellsize) -- +(\the\dimen0, 0pt)} 17 | ; 18 | \draw[black,very thin] 19 | (path picture bounding box.south west) \foreach \i in {1,...,\the\count0}{++(\the\cellsize,0pt) -- +(0pt, #2)} 20 | ; 21 | }}} 22 | \tikzset{tensor3/.style args={#1#2}{rectangle,draw=blue!50,fill=blue!20,minimum width=#1,minimum height=#2,inner sep=0pt,% 23 | path picture={\dimen0=#1\divide\dimen0 by 2\advance\dimen0 by -4pt\count0=\dimen0\divide\count0 by \cellsize 24 | \dimen1=#2\count1=\dimen1\divide\count1 by \cellsize 25 | \dimen2=\cellsize\multiply\dimen2 by \count0 26 | \draw[black,ultra thin] 27 | (path picture bounding box.south west) \foreach \j in {1,...,\the\count1}{++(0pt,\the\cellsize) -- +(\the\dimen2, 0pt)} 28 | (path picture bounding box.south east) \foreach \j in {1,...,\the\count1}{++(0pt,\the\cellsize) -- +(-\the\dimen2, 0pt)} 29 | ; 30 | \draw[black,very thin] 31 | (path picture bounding box.south west) \foreach \i in {1,...,\the\count0}{++(\the\cellsize,0pt) -- +(0pt, #2)} 32 | (path picture bounding box.south east) \foreach \i in {1,...,\the\count0}{++(-\the\cellsize,0pt) -- +(0pt, #2)} 33 | ; 34 | \draw[black,very thin] 35 | (path picture bounding box) +(-2pt, 0pt) [fill] circle[radius=0.3pt] +(0,0) circle[radius=0.3pt] +(2pt,0pt) circle[fill,radius=0.3pt] ; 36 | }}} 37 | 38 | \begin{document} 39 | \begin{tikzpicture} 40 | [block/.style ={rectangle,draw=red!50,fill=red!20,minimum size=4mm}, 41 | node distance=0.30cm 42 | ] 43 | \node[tensor3={256pt}{\the\cellsize},rotate=90] (timeseriesinput) [label=left:$1\times256$] {}; 44 | \node[block] (firstconvolution) [below=of timeseriesinput] {1D Conv} 45 | edge [{Latex}-] (timeseriesinput) 46 | ; 47 | \node[tensor3={64pt}{18pt}] (firstlayeroutput) [below=of firstconvolution] [label=left:${\rm features}\times 64$]{} 48 | edge [{Latex}-] (firstconvolution) 49 | ; 50 | \node[block] (secondconvolution) [below=of firstlayeroutput] {1D Conv} 51 | edge [{Latex}-] (firstlayeroutput) 52 | ; 53 | \node[tensor2={\the\cellsize}{18pt}] (latent) [below=of secondconvolution] [label=left:${\rm features}\times 1$] {} 54 | edge [{Latex}-] (secondconvolution) 55 | ; 56 | \node[tensor2={\the\cellsize}{24pt}] (embedding) [below=of latent] [label=left:${\rm embedding\ dimension}\times 1$] {} 57 | ; 58 | \node[circle,fill,inner sep=1pt,outer sep=0pt] (aggregate) at ($(latent)!0.5!(embedding)$) [right=0.35cm] {} 59 | ; 60 | \node[block] (mixing) [right=of aggregate] {FC} 61 | ; 62 | 63 | \draw[-{Latex}] (latent) -| (aggregate) -- (mixing) ; 64 | \draw (embedding) -| (aggregate) ; 65 | 66 | \end{tikzpicture} 67 | \end{document} 68 | -------------------------------------------------------------------------------- /Drawings/deep_volatility_architecture_horiz.tex: -------------------------------------------------------------------------------- 1 | \def\pgfsysdriver{pgfsys-dvipdfm.def} 2 | \documentclass[dvipdfm,tikz,12pt]{standalone} 3 | % \documentclass[dvips,tikz,12pt,convert={true,density=1200}]{standalone} 4 | \usepackage{tikz} 5 | % 6 | % \usepackage{xcharter-otf} 7 | % \usepackage[scaled=.98,sups,osf]{XCharter}% lining figures in math, osf in text 8 | \usepackage[scaled=.98,sups]{XCharter}% lining figures in math, osf in text 9 | \usepackage[scaled=1.04,varqu,varl]{inconsolata}% inconsolata typewriter 10 | \usepackage[type1]{cabin}% sans serif 11 | \usepackage[uprightscript,charter,vvarbb,scaled=1.05]{newtxmath} 12 | \linespread{1.04} 13 | % 14 | \usetikzlibrary{arrows.meta} 15 | \usetikzlibrary{positioning} 16 | \usetikzlibrary{calc} 17 | \usetikzlibrary{quotes} 18 | \newdimen\cellsize\cellsize=6pt 19 | \tikzset{tensor/.style args={#1#2#3}{rectangle,draw=black!50,fill=#3!20,minimum width=#1,minimum height=#2,inner sep=0pt,% 20 | path picture={\dimen0=#1\count0=\dimen0\divide\count0 by \cellsize 21 | \dimen1=#2\count1=\dimen1\divide\count1 by \cellsize 22 | \draw[black!50,ultra thin] 23 | (path picture bounding box.south west) \foreach \i in {1,...,\the\count0}{++(\the\cellsize,0pt) -- +(0pt, #2)} 24 | ; 25 | \draw[black!50,thin] 26 | (path picture bounding box.south west) \foreach \j in {1,...,\the\count1}{++(0pt,\the\cellsize) -- +(\the\dimen0, 0pt)} 27 | ; 28 | }}} 29 | \tikzset{bigtensor/.style args={#1#2#3}{rectangle,draw=black!50,fill=#3!20,minimum width=#1,minimum height=#2,inner sep=0pt,% 30 | path picture={\dimen0=#1\count0=\dimen1\divide\count0 by \cellsize 31 | \dimen1=#2\divide\dimen1 by 2\advance\dimen1 by -4pt\count1=\dimen1\divide\count1 by \cellsize 32 | \dimen2=\cellsize\multiply\dimen2 by \count1 33 | \draw[black!50,thin] 34 | (path picture bounding box.south west) \foreach \i in {1,...,\the\count1}{++(0pt,\the\cellsize) -- +(#1,0pt)} 35 | (path picture bounding box.north west) \foreach \i in {1,...,\the\count1}{++(0pt,-\the\cellsize) -- +(#1,0pt)} 36 | ; 37 | \draw[black!50,ultra thin] 38 | (path picture bounding box.south west) \foreach \j in {1,...,\the\count0}{++(\the\cellsize,0pt) -- +(0pt,\the\dimen2)} 39 | (path picture bounding box.north west) \foreach \j in {1,...,\the\count0}{++(\the\cellsize,0pt) -- +(0pt,-\the\dimen2)} 40 | ; 41 | \draw[black!75,very thin] 42 | (path picture bounding box) +(0pt,-2pt) [fill] circle[radius=0.4pt] +(0,0) circle[radius=0.4pt] +(0pt,2pt) circle[fill,radius=0.4pt] ; 43 | }}} 44 | 45 | \begin{document} 46 | \begin{tikzpicture} 47 | [block/.style={rectangle,draw=red!50,fill=red!20,minimum size=4mm,align=center}, 48 | connection/.style={circle,fill,inner sep=1pt,outer sep=0pt}, 49 | node distance=1.70cm and 0.95cm, 50 | every edge quotes/.style={font=\tiny,auto=right}, 51 | every label/.style={font=\tiny,text width=1.75cm,align=center} 52 | ] 53 | \node[bigtensor={\the\cellsize}{124pt}{green},fill=green!20] (timeseriesinput) [label=below:$256\times1$\\ ($x_{n-256} \hbox{ \it to } x_{n-1}$)] 54 | [label={[font=\small]above:{\it time series}}] {}; 55 | ; 56 | \node[bigtensor={18pt}{72pt}{blue}] (layer1) [right=of timeseriesinput] [label=below:$64 \times n_f$]{} 57 | edge ["conv1",{Latex}-] (timeseriesinput) 58 | ; 59 | \node[bigtensor={18pt}{46pt}{blue}] (layer2) [right=of layer1] [label=below:$16 \times n_f$]{} 60 | edge ["conv2",{Latex}-] (layer1) 61 | ; 62 | \node[tensor={18pt}{24pt}{blue}] (layer3) [right=of layer2] [label=below:$4 \times n_f$] {} 63 | edge ["conv3",{Latex}-] (layer2) 64 | ; 65 | \node[tensor={18pt}{\the\cellsize}{blue}] (latent) [right=of layer3] [label=below:$1 \times n_f$] {} 66 | edge ["conv4",{Latex}-] (layer3) 67 | ; 68 | \node[tensor={\the\cellsize}{18pt}{orange}] (flat latent) [right=of latent] [label=below:$n_f \times 1$] [label={[font=\small]above:{\it ts latent}}] {} 69 | edge ["transpose",{Latex}-] (latent) 70 | ; 71 | \node[tensor={\the\cellsize}{24pt}{green}] (embedding) [below=of flat latent] 72 | [label=below:$n_e \times 1$] [label={[font=\small,text depth=0pt]above:\parbox[b]{1.75cm}{\centering \it stock\\[-0.8ex]embedding}}] {} 73 | ; 74 | 75 | \node (hidden1) at ($(flat latent)!0.5!(embedding)$) {} 76 | ; 77 | \node[connection] (aggregate) [right=0.85cm of hidden1] {} 78 | ; 79 | \draw (flat latent) -| (aggregate) ; 80 | \draw (embedding) -| (aggregate) ; 81 | 82 | 83 | \node[tensor={\the\cellsize}{18pt}{red}] (latent2) [right=0.6cm of aggregate] [label=below:$n_f \times 1$] [label={[font=\small]above:{\it latent}}] {} 84 | edge ["fc1",{Latex}-] (aggregate) 85 | ; 86 | 87 | \node[connection] (split) [right=0.6cm of latent2] {}; 88 | 89 | \node (sigma) [right=3.4cm of flat latent] {$\sigma_n$}; 90 | \node (mu) [right=3.4cm of embedding] {$\mu_n$}; 91 | 92 | \draw[-{Latex}] (latent2) -- (split) |- node[near end,auto=left] {\tiny $\sigma$-head} (sigma) ; 93 | \draw[-{Latex}] (split) |- node[near end,auto=left] {\tiny $\mu$-head} (mu) ; 94 | 95 | 96 | \end{tikzpicture} 97 | \end{document} 98 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Deep Volatility Models 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Deep Volatility Models (for stock prices) 2 | 3 | This package uses convolutional neural networks (implemented in PyTorch) to train mixture models to model 4 | the volatility of stock prices. 5 | 6 | A single model is trained on a number of different stock symbols. Internally, 7 | an embedding is learned for each symbol. In other words, a convolutional neural 8 | network learns general features of time series of daily returns that predict the 9 | volatilty along with an embedding that tunes the result for different symbolsl. 10 | 11 | ## Motivation 12 | 13 | The volatility of stock returns changes daily. The models produced by this 14 | package predict the *distribution* of the log returns for the next trading date. 15 | The actual turn is virtually impossible to predict, but predicting the 16 | distribution of return has several uses: 17 | 18 | 1. *The distribution can be sampled to generate simulated sequences of returns 19 | that can be used as synthetic data to test various trading algorithms.* 20 | Datasets with historic daily returns are very small so testing algorithms using 21 | historic data is very prone to overfitting. 22 | 23 | 24 | 2. *Knowing the distribution of the daily returns (especially the volatility) 25 | can be used to determine fair prices for stock options.* The famous 26 | Black-Scholes formula predicts fair option prices. However, it assumes the 27 | daily returns to be stationary and normally distributed. However, observed 28 | daily returns are not stationary (the variance varies with time) and the 29 | returns are not normally distributed. They tend to have "long tails" 30 | compared to a normal distrubution (i.e., kurtosis) and they are not always symmetric 31 | (i.e., skew). It's possible to estimate the variances by computing the 32 | variance of a trailing sample. However, during periods of increasing 33 | volatility this would underestimate the volatility since the volatility today 34 | can be significantly greater than the volatility of the past N days. 35 | Likewise, during periods of 36 | decreasing volatility this would overestimate the volatility. The goal is to 37 | determine the *instantaneous* volatility to provide estimates of the distribution of 38 | daily returns during the next trading day (or the next few trading days) 39 | 40 | ### Installation 41 | 42 | This package can be installed by running `pip install .` in the top level directory of a `git clone` checkout 43 | 44 | pip install . 45 | 46 | ### Train a new model on a set of symbols: 47 | 48 | Ideally you would train models on a larger set of symbols. Here we use a small 49 | set for demo purposes: 50 | 51 | python -m deep_volatility_models.train_univariate -s SPY -s QQQ -s BND 52 | 53 | 54 | ### Evaluate the model on some of the symbols 55 | This script will produce a table and a plot with volatility and mean predictions 56 | for the past and for the trading day. 57 | 58 | python -m deep)volatility_models.evaluate_model -s SPY -s BND 59 | 60 | # Future extensions 61 | 62 | Models generated as described above do not model correlations between symbols. 63 | It's possible to generate multivariate models that represent the correlations 64 | between symbols. 65 | 66 | The inference code described above infers the parameters of a mixture 67 | model representing the distribution of daily returns. No code has 68 | been provided here to sample these distributions to generate synthetic 69 | data. 70 | 71 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "deep_volatility_models" 3 | version = "0.0.1" 4 | description = "Volatility models for stock prices using deep learning and mixture models." 5 | authors = ["Mark A Wicks "] 6 | 7 | [tool.poetry.dependencies] 8 | python = "^3.8,<3.11" 9 | pandas = "^1.2.3" 10 | torch = "^1.12.0" 11 | yfinance = "^0.1.59" 12 | matplotlib = "^3.4.0" 13 | sklearn = "^0.0" 14 | hyperopt = "^0.2" 15 | optuna = "^2.10" 16 | 17 | [tool.poetry.dev-dependencies] 18 | pytest = "^5.2" 19 | black = "^22.3.0" 20 | 21 | [build-system] 22 | requires = ["poetry-core>=1.0.0a6", "setuptools>=46.0.0"] 23 | build-backend = "poetry.core.masonry.api" 24 | -------------------------------------------------------------------------------- /scripts/train_example.sh: -------------------------------------------------------------------------------- 1 | SYMBOLS="bnd edv tyd gld vnq vti spy qqq qld xmvm vbk xlv fxg rxl fxl ibb vgt iyf xly uge jnk" 2 | 3 | args="" 4 | for symbol in $SYMBOLS 5 | do 6 | args="$args --symbol $symbol" 7 | done 8 | python -m deep_volatility_models.train_univariate $* $args 9 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | name = deep-volatility-models-mawicks 3 | version = 0.0.1 4 | author = Mark A Wicks 5 | author_email = Mark A Wicks 6 | description = Volatility models for stock prices using deep learning and mixture models. 7 | long_description = file: README.md 8 | long_description_content_type = text/markdown 9 | url = https://github.com/mawicks/deep-volatility-models 10 | project_urls = 11 | Bug Tracker = https://github.com/mawicks/deep-volatility-models/issues 12 | classifiers = 13 | Programming Language :: Python :: 3 14 | License :: OSI Approved :: MIT License 15 | Operating System :: OS Independent 16 | 17 | [options] 18 | package_dir = 19 | = src 20 | packages = find: 21 | python_requires = >=3.8 22 | 23 | [options.packages.find] 24 | where = src 25 | -------------------------------------------------------------------------------- /src/deep_volatility_models/__init__.py: -------------------------------------------------------------------------------- 1 | from . import architecture 2 | from . import data_sources 3 | from . import embedding_models 4 | 5 | # from . import evaluate_model 6 | from . import mixture_model_stats 7 | from . import model_wrappers 8 | from . import models 9 | from . import sample 10 | from . import time_series_datasets 11 | 12 | # from . import train_univariate 13 | from . import training 14 | from . import util 15 | -------------------------------------------------------------------------------- /src/deep_volatility_models/data_sources.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from typing import Callable, Dict, Iterable, Union 4 | import zipfile 5 | 6 | # Third party modules 7 | import numpy as np 8 | import pandas as pd 9 | import yfinance as yf 10 | 11 | # Local modules 12 | from deep_volatility_models import util 13 | 14 | logging.basicConfig(level=logging.INFO) 15 | 16 | DataSource = Callable[[Union[str, Iterable[str]]], Dict[str, pd.DataFrame]] 17 | 18 | 19 | def YFinanceSource() -> DataSource: 20 | """ 21 | Sample usage: 22 | >>> from deep_volatility_models import data_sources 23 | >>> symbols = ["SPY", "QQQ"] 24 | >>> ds = data_sources.YFinanceSource() 25 | >>> response = ds(symbols) 26 | >>> response["SPY"][:4][['open', 'close']] # doctest: +NORMALIZE_WHITESPACE 27 | open close 28 | date 29 | 1993-02-01 43.96875 44.25000 30 | 1993-02-02 44.21875 44.34375 31 | 1993-02-03 44.40625 44.81250 32 | 1993-02-04 44.96875 45.00000 33 | >>> 34 | """ 35 | 36 | def _add_columns(df): 37 | new_df = df.dropna().reset_index() 38 | rename_dict = {c: util.rename_column(c) for c in new_df.columns} 39 | log_return = np.log(new_df["Adj Close"] / new_df["Adj Close"].shift(1)) 40 | new_df = new_df.assign(log_return=log_return) 41 | new_df.rename(columns=rename_dict, inplace=True) 42 | new_df.set_index("date", inplace=True) 43 | return new_df 44 | 45 | def price_history(symbol_set: Union[Iterable[str], str]) -> Dict[str, pd.DataFrame]: 46 | 47 | # Convert symbol_set to a list 48 | symbols = util.to_symbol_list(symbol_set) 49 | 50 | # Do the download 51 | df = yf.download( 52 | symbols, period="max", group_by="ticker", actions=True, progress=False 53 | ) 54 | response = {} 55 | 56 | for symbol in symbols: 57 | # The `group_by` option for yf.download() behaves differently when there's only one symbol. 58 | # Always return a dictionary of dataframes, even for one symbol. 59 | if len(symbols) > 1: 60 | symbol_df = df[symbol] 61 | else: 62 | symbol_df = df 63 | 64 | response[symbol] = ( 65 | _add_columns(symbol_df).dropna().applymap(lambda x: round(x, 6)) 66 | ) 67 | 68 | return response 69 | 70 | return price_history 71 | 72 | 73 | def HugeStockMarketDatasetSource(zip_filename) -> DataSource: 74 | """ 75 | Sample usage 76 | >>> from deep_volatility_models import data_sources 77 | >>> symbols = ["SPY", "QQQ"] 78 | >>> ds = data_sources.HugeStockMarketDatasetSource('archive.zip') 79 | >>> response = ds(symbols) 80 | """ 81 | 82 | def _add_columns(df): 83 | new_df = df.dropna().reset_index() 84 | rename_dict = {c: util.rename_column(c) for c in new_df.columns} 85 | new_df.rename(columns=rename_dict, inplace=True) 86 | 87 | log_return = np.log(new_df["close"] / new_df["close"].shift(1)) 88 | new_df = new_df.assign(log_return=log_return) 89 | 90 | new_df.set_index("date", inplace=True) 91 | return new_df 92 | 93 | def price_history(symbol_set: Union[Iterable[str], str]) -> Dict[str, pd.DataFrame]: 94 | 95 | # Convert symbol_set to a list 96 | symbols = util.to_symbol_list(symbol_set) 97 | response = {} 98 | 99 | with zipfile.ZipFile(zip_filename, "r") as open_zipfile: 100 | for symbol in symbols: 101 | found = False 102 | for prefix in ["Data/Stocks", "Data/ETFs"]: 103 | try: 104 | name = f"{prefix}/{symbol.lower()}.us.txt" 105 | symbol_df = pd.read_csv(open_zipfile.open(name)) 106 | response[symbol] = ( 107 | _add_columns(symbol_df) 108 | .dropna() 109 | .applymap(lambda x: round(x, 6)) 110 | ) 111 | found = True 112 | except KeyError: 113 | pass 114 | if not found: 115 | raise ValueError( 116 | f"Symbol {symbol} not found in Huge Stock Market Dataset" 117 | ) 118 | 119 | return response 120 | 121 | return price_history 122 | 123 | 124 | if __name__ == "__main__": # pragma: no cover 125 | symbols = ["spy", "qqq"] 126 | ds = YFinanceSource() 127 | response = ds(symbols) 128 | 129 | for k, v in response.items(): 130 | print(f"{k}:\n{v.head(3)}") 131 | -------------------------------------------------------------------------------- /src/deep_volatility_models/embedding_models.py: -------------------------------------------------------------------------------- 1 | from typing import Callable, Dict 2 | 3 | # Third party packages 4 | import torch 5 | 6 | # Local packages 7 | from deep_volatility_models import model_wrappers 8 | from deep_volatility_models import sample 9 | 10 | 11 | class SingleSymbolModelFromEmbedding(torch.nn.Module): 12 | def __init__(self, network: torch.nn.Module, single_embedding: torch.Tensor): 13 | super().__init__() 14 | self.network = network 15 | self.single_embedding = single_embedding 16 | 17 | # Client code reads the window_size attribute :( 18 | self.window_size = network.window_size 19 | 20 | @property 21 | def is_mixture(self): 22 | return self.network.is_mixture 23 | 24 | def make_predictors(self, window: torch.Tensor) -> torch.Tensor: 25 | """ 26 | Combine the `window` and the `embedding` to make `predictors` input for 27 | use with the underlying network. 28 | """ 29 | 30 | minibatch_dim = window.shape[0] 31 | embedding_dim = len(self.single_embedding) 32 | embedding = self.single_embedding.unsqueeze(0).expand( 33 | minibatch_dim, embedding_dim 34 | ) 35 | predictors = (window, embedding) 36 | return predictors 37 | 38 | def simulate_one( 39 | self, 40 | window: torch.Tensor, 41 | time_samples: int, 42 | ): 43 | return sample.simulate_one( 44 | self.network, 45 | self.make_predictors(window), 46 | time_samples, 47 | ) 48 | 49 | def forward(self, window: torch.Tensor) -> torch.Tensor: 50 | return self.network.forward(self.make_predictors(window)) 51 | 52 | 53 | def SingleSymbolModelFactory( 54 | encoding: Dict[str, int], wrapped_model: model_wrappers.StockModel 55 | ) -> Callable[[str], model_wrappers.StockModel]: 56 | if isinstance(wrapped_model.network.model, torch.nn.Module): 57 | model = wrapped_model.network.model 58 | else: 59 | raise ValueError( 60 | "wrapped_model must have `network` field with `model` of type `Module`" 61 | ) 62 | 63 | if isinstance(wrapped_model.network.embedding, torch.nn.Module): 64 | embeddings = wrapped_model.network.embedding 65 | else: 66 | raise ValueError( 67 | "wrapped_model must have `network` field with `embeddings` of type `Module`" 68 | ) 69 | 70 | def single_symbol_model(symbol: str) -> model_wrappers.StockModel: 71 | single_embedding = embeddings(torch.tensor(encoding[symbol])).detach() 72 | return model_wrappers.StockModel( 73 | symbols=(symbol.upper(),), 74 | network=SingleSymbolModelFromEmbedding(model, single_embedding), 75 | date=wrapped_model.date, 76 | epochs=wrapped_model.epochs, 77 | loss=wrapped_model.loss, 78 | training_data_start_date=None, 79 | training_data_end_date=None, 80 | ) 81 | 82 | return single_symbol_model 83 | -------------------------------------------------------------------------------- /src/deep_volatility_models/evaluate_model.py: -------------------------------------------------------------------------------- 1 | # Standard Python 2 | 3 | import datetime as dt 4 | import logging 5 | import os 6 | import pickle 7 | import sys 8 | import traceback 9 | 10 | # Common packages 11 | import click 12 | import matplotlib.pyplot as plt 13 | import numpy as np 14 | import pandas as pd 15 | import torch 16 | 17 | plt.style.use("ggplot") 18 | 19 | # import cufflinks as cf 20 | # from IPython.display import display,HTML 21 | 22 | # Local imports 23 | from deep_volatility_models import data_sources 24 | from deep_volatility_models import embedding_models 25 | from deep_volatility_models import sample 26 | from deep_volatility_models import loss_functions 27 | from deep_volatility_models import mixture_model_stats 28 | from deep_volatility_models import stock_data 29 | from deep_volatility_models import time_series_datasets 30 | 31 | 32 | pd.set_option("display.width", None) 33 | pd.set_option("display.max_columns", None) 34 | pd.set_option("display.min_rows", None) 35 | pd.set_option("display.max_rows", 10) 36 | 37 | # Configure external packages and run() 38 | logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO, force=True) 39 | 40 | # Torch configuration 41 | torch.set_printoptions( 42 | precision=4, threshold=20, edgeitems=3, linewidth=None, profile="short" 43 | ) 44 | 45 | ANNUAL_TRADING_DAYS = 252.0 46 | # ROOT_PATH = os.path.dirname(os.path.realpath(__file__)) 47 | ROOT_PATH = "." 48 | 49 | TIME_SAMPLES = 98 50 | 51 | 52 | def simulate(model, symbol, window, current_price, simulations): 53 | """ 54 | Arguments: 55 | model: torch.nn.Module 56 | symbol: str 57 | window: single input row as a torch.Tensor of shape (symbols, window_size) 58 | """ 59 | # Create a batch dimension (we'll doing a single row, so the batch dimension is one): 60 | window = window.unsqueeze(0) 61 | 62 | logging.info(f"{symbol} window: {window.shape}") 63 | logging.info(f"{symbol} window]: {window}") 64 | 65 | simulated_returns = model.simulate_one(window, TIME_SAMPLES) 66 | simulated_returns_many = sample.simulate_many( 67 | model, window, TIME_SAMPLES, simulations 68 | ) 69 | 70 | historic_returns = np.exp(np.cumsum(window.squeeze(1).squeeze(0).numpy())) 71 | simulated_returns_many = simulated_returns_many.squeeze(1).squeeze(0).numpy() 72 | logging.info(f"mean simulated return: {np.mean(simulated_returns_many)}") 73 | sample_index = list( 74 | range( 75 | len(historic_returns) - 1, 76 | len(historic_returns) + len(simulated_returns_many) - 1, 77 | ) 78 | ) 79 | plt.plot( 80 | current_price * historic_returns / historic_returns[-1], 81 | color="k", 82 | alpha=0.5, 83 | label=f"Time Series Input ({symbol})", 84 | ) 85 | colors = ["c", "m"] 86 | for _ in range(2): 87 | plt.plot( 88 | sample_index, 89 | current_price * simulated_returns_many[:, _], 90 | f"{colors[_]}", 91 | alpha=0.5, 92 | label=f"Sampled Prediction #{_+1}", 93 | ) 94 | plt.xlabel("Time (days)") 95 | plt.ylabel("Price ($)") 96 | 97 | max_return = np.percentile(simulated_returns_many, 95.0, axis=1) 98 | min_return = np.percentile(simulated_returns_many, 5.0, axis=1) 99 | 100 | plt.plot( 101 | sample_index, 102 | current_price * max_return, 103 | "b-", 104 | alpha=0.3, 105 | label="95th Percentile Price (Est)", 106 | ) 107 | plt.plot( 108 | sample_index, 109 | current_price * min_return, 110 | "r-", 111 | alpha=0.3, 112 | label="5th Percentile Price (Est)", 113 | ) 114 | plt.legend(loc="lower left") 115 | ax = plt.gca() 116 | xlim = ax.get_xlim() 117 | ylim = ax.get_ylim() 118 | current_aspect = (xlim[1] - xlim[0]) / (ylim[1] - ylim[0]) 119 | ax.set_aspect(0.5 * current_aspect) 120 | plt.savefig(f"model_evaluation_{symbol}@2x.png", dpi=200) 121 | plt.show() 122 | 123 | 124 | def do_one_symbol(symbol, model, refresh, simulations, start_date, end_date): 125 | logging.info(f"symbol: {symbol.upper()}") 126 | # logging.info(f"model: {model}") - Is having this useful? 127 | logging.info(f"refresh: {refresh}") 128 | logging.info(f"simulations: {simulations}") 129 | logging.info(f"start date: {start_date}") 130 | logging.info(f"end date: {end_date}") 131 | 132 | window_size = model.network.window_size 133 | 134 | # FIXME when we're certain the model file was saved in eval mode. 135 | model.network.eval() 136 | 137 | logging.info(f"model epochs:\t{model.epochs}") 138 | logging.info(f"model loss:\t{model.loss:.4f}") 139 | logging.info(f"model symbols:\t{model.symbols}") 140 | 141 | # Refresh historical data 142 | logging.info("Reading historical data") 143 | 144 | data_store = stock_data.FileSystemStore(os.path.join(ROOT_PATH, "current_data")) 145 | data_source = data_sources.YFinanceSource() 146 | history_loader = stock_data.CachingSymbolHistoryLoader( 147 | data_source, data_store, overwrite_existing=True 148 | ) 149 | # The Cachingloader returns a sequence of (symbol, data). 150 | # Since we pass just one symbol rather than a list, use 151 | # next to grab the first (symbol, dataframe) pair, then [1] to grab the data. 152 | symbol_history = next(history_loader(symbol))[1] 153 | 154 | # Start date represents the date of the first prediction. In other 155 | # words, all points in the window are before that date. Grab 156 | # `window_size` points prior to that date which will be used for 157 | # the prediction. 158 | 159 | # Note: start_date and end_date represent the first and last dates 160 | # where we have both a prediction and a return value for 161 | # validating the prediction. The first prediction will be for 162 | # start_date but will be based on a full window of history prior 163 | # to and not including start_date. When we make predictions using 164 | # all of this data the first prediction will be for start_date. 165 | # The last prediction will be for the first business day after 166 | # end_date. This will require data up to and including 167 | # end_date. This is one more prediction than we need for 168 | # validation. This prediction will automatically be dropped by a 169 | # merge below because there is no in-window historical data to 170 | # compare it to. We will print this prediction for reference 171 | # before the merge. 172 | 173 | if start_date: 174 | start_position = symbol_history.index.get_loc(start_date) - window_size 175 | else: 176 | start_position = 0 177 | if end_date: 178 | end_position = symbol_history.index.get_loc(end_date) + 1 179 | else: 180 | end_position = None 181 | symbol_history = symbol_history.iloc[start_position:end_position] 182 | print(symbol_history) 183 | 184 | logging.info(f"symbol history:\n{symbol_history}") 185 | 186 | current_price = symbol_history.close[-1] 187 | windowed_returns = time_series_datasets.RollingWindow( 188 | symbol_history.log_return, 189 | window_size, 190 | create_channel_dim=True, 191 | ) 192 | logging.debug(f"{symbol} windowed_returns[0]: {windowed_returns[0].shape}") 193 | logging.debug(f"{symbol} windowed_returns[0]: {windowed_returns[0]}") 194 | 195 | simulate(model.network, symbol, windowed_returns[-1], current_price, simulations) 196 | 197 | with torch.no_grad(): 198 | # Discard the last windowed_return because it would make a 199 | # prediction beyond end_date. We're only interested in 200 | # predictions that we can compare to actual returns. 201 | windows = torch.stack(tuple(windowed_returns)[:-1], dim=0) 202 | logging.debug(f"{symbol} windows: {windows.shape}") 203 | 204 | # First prediction date is first date following the first window. 205 | # Last prediction date is the date of the last data point. 206 | # These are the dates for which we make predictions. 207 | prediction_dates = symbol_history.index[window_size:] 208 | ar = symbol_history.loc[prediction_dates].log_return 209 | actual_returns = torch.tensor(ar, dtype=torch.float).unsqueeze(1) 210 | 211 | print("actual_returns on prediction dates:\n", actual_returns) 212 | 213 | if model.network.is_mixture: 214 | log_p, mu, sigma_inv = model.network(windows)[:3] 215 | p = torch.exp(log_p) 216 | ll = mixture_model_stats.univariate_log_likelihood( 217 | actual_returns, log_p, mu, sigma_inv 218 | ) 219 | 220 | logging.debug(f"p: {p}") 221 | logging.debug(f"mu: {mu}") 222 | logging.debug(f"sigma_inv: {sigma_inv}") 223 | 224 | mean, variance = mixture_model_stats.univariate_combine_metrics( 225 | p, mu, sigma_inv 226 | ) 227 | else: 228 | mu, sigma_inv = model.network(windows)[:2] 229 | ll = loss_functions.univariate_log_likelihood(actual_returns, mu, sigma_inv) 230 | 231 | logging.debug(f"mu: {mu}") 232 | logging.debug(f"sigma_inv: {sigma_inv}") 233 | 234 | mean = mu.squeeze(1) 235 | sigma = torch.inverse(sigma_inv) 236 | variance = (sigma.squeeze(2).squeeze(1)) ** 2 237 | p = torch.ones((mean.shape[0],)) 238 | 239 | annual_return = ANNUAL_TRADING_DAYS * mean 240 | daily_std_dev = np.sqrt(variance) 241 | volatility = np.sqrt(ANNUAL_TRADING_DAYS) * daily_std_dev 242 | 243 | logging.debug(f"daily mean: {mean}") 244 | logging.debug(f"daily std_dev: {daily_std_dev}") 245 | 246 | logging.debug(f"annual return: {annual_return}") 247 | logging.debug(f"annual volatility: {volatility}") 248 | 249 | logging.info( 250 | f"*** Validation range: {prediction_dates[0].date()} to {prediction_dates[-1].date()} ***" 251 | ) 252 | logging.info(f"*** mean log likelihood: {round(float(torch.mean(ll)),4)} ***") 253 | 254 | df = pd.DataFrame( 255 | { 256 | "pred_volatility": volatility, 257 | "pred_return": map( 258 | lambda x: x.numpy(), mean 259 | ), # Hack so it will print but won't plot 260 | "pred_sigma": daily_std_dev, 261 | "p": map(lambda x: x.numpy(), p), 262 | "mu": map(lambda x: x.numpy(), mu), 263 | "sigma_inv": map(lambda x: x.numpy(), sigma_inv), 264 | }, 265 | index=prediction_dates, 266 | ) 267 | 268 | df = df.merge( 269 | symbol_history, 270 | left_index=True, 271 | right_index=True, 272 | ) 273 | 274 | df = df[ 275 | [ 276 | "pred_volatility", 277 | "log_return", 278 | "close", 279 | "pred_return", 280 | "pred_sigma", 281 | "p", 282 | "mu", 283 | "sigma_inv", 284 | ] 285 | ] 286 | 287 | return_df = df[ 288 | ["log_return", "pred_return", "pred_volatility", "p", "mu", "sigma_inv"] 289 | ] 290 | return return_df 291 | 292 | 293 | def run(model, symbol, simulations, start_date=None, end_date=None): 294 | wrapped_model = torch.load(model) 295 | single_symbol_model_factory = embedding_models.SingleSymbolModelFactory( 296 | wrapped_model.encoding, wrapped_model 297 | ) 298 | 299 | # symbols_to_process = list(set(symbol).difference(exclude_symbols)) 300 | symbols_to_process = sorted(list(set(symbol))) 301 | logging.info(f"symbols_to_process: {symbols_to_process}") 302 | 303 | dataframes = {} 304 | for s in symbols_to_process: 305 | df = do_one_symbol( 306 | s, 307 | single_symbol_model_factory(s.upper()), 308 | True, 309 | simulations, 310 | start_date, 311 | end_date, 312 | ) 313 | dataframes[s] = df 314 | 315 | combined_df = pd.concat( 316 | dataframes.values(), keys=dataframes.keys(), axis=1 317 | ).dropna() 318 | 319 | return combined_df 320 | 321 | 322 | @click.command() 323 | @click.option( 324 | "--model", 325 | show_default=True, 326 | help="Model file to use.", 327 | ) 328 | @click.option( 329 | "--symbol", 330 | multiple=True, 331 | show_default=True, 332 | help="Load model for this symbol.", 333 | ) 334 | @click.option( 335 | "--start-date", 336 | type=click.DateTime(formats=["%Y-%m-%d"]), 337 | show_default=True, 338 | help="Date of first return prediction (must be a business day)", 339 | ) 340 | @click.option( 341 | "--end-date", 342 | type=click.DateTime(formats=["%Y-%m-%d"]), 343 | default=None, 344 | show_default=True, 345 | help="Date of last return prediction (must be a business day)", 346 | ) 347 | @click.option( 348 | "--simulations", 349 | type=int, 350 | show_default=True, 351 | default=10, 352 | help="Number of simulations to run", 353 | ) 354 | def run_cli( 355 | model, 356 | symbol, 357 | start_date, 358 | end_date, 359 | simulations, 360 | ): 361 | logging.info(f"model: {model}") 362 | logging.info(f"symbol: {symbol}") 363 | logging.info(f"start_date: {start_date}") 364 | logging.info(f"simulations: {simulations}") 365 | 366 | df = run(model, symbol, simulations, start_date, end_date) 367 | 368 | logging.info(df) 369 | df.plot(subplots=True) 370 | plt.savefig("volatility_over_time.png") 371 | plt.show() 372 | 373 | 374 | if __name__ == "__main__": 375 | # Run everything 376 | run_cli() 377 | -------------------------------------------------------------------------------- /src/deep_volatility_models/hyperopt_opt.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from hyperopt import hp, tpe, fmin, Trials 4 | import numpy as np 5 | 6 | import deep_volatility_models.train_univariate as train_univariate 7 | 8 | logging.basicConfig(level=logging.INFO) 9 | 10 | SYMBOLS = [ 11 | "bnd", 12 | "edv", 13 | "tyd", 14 | "gld", 15 | "vnq", 16 | "vti", 17 | "spy", 18 | "qqq", 19 | "qld", 20 | "xmvm", 21 | "vbk", 22 | "xlv", 23 | "fxg", 24 | "rxl", 25 | "fxl", 26 | "ibb", 27 | "vgt", 28 | "iyf", 29 | "xly", 30 | "uge", 31 | "jnk", 32 | ] 33 | 34 | search_space = { 35 | "mixture_components": 3 + hp.randint("mixture_components", 4), 36 | "feature_dimension": 40 + hp.randint("feature_dimension", 51), 37 | "embedding_dimension": 3 + hp.randint("embedding_dimension", 13), 38 | "gaussian_noise": hp.loguniform("gaussian_noise", np.log(1e-4), np.log(1e-2)), 39 | "dropout": hp.uniform("dropout", 0, 0.25), 40 | "learning_rate": hp.loguniform("learning_rate", np.log(1e-4), np.log(1e-2)), 41 | "weight_decay": hp.loguniform("weight_decay", np.log(1e-8), np.log(1e-6)), 42 | "window_size": hp.choice("window_size", [64, 256]), 43 | "minibatch_size": 64 + hp.randint("minibatch_size", 193), 44 | } 45 | 46 | 47 | def objective(parameters): 48 | # Be a good citizen and make a copy since we're going to modify the dictionary 49 | parameters = parameters.copy() 50 | 51 | # `minibatch_size` has to be a Python int, not a numpy int. 52 | parameters["minibatch_size"] = int(parameters["minibatch_size"]) 53 | 54 | logging.info("************************") 55 | for key, value in parameters.items(): 56 | logging.info(f"{key}: {value}") 57 | 58 | loss = train_univariate.run( 59 | existing_model=None, 60 | symbols=SYMBOLS, 61 | refresh=False, 62 | only_embeddings=False, 63 | max_epochs=400, 64 | early_termination=20, 65 | **parameters, 66 | ) 67 | 68 | logging.info(f"loss: {loss}") 69 | logging.info("************************") 70 | 71 | return loss 72 | 73 | 74 | if __name__ == "__main__": 75 | trials = Trials() 76 | 77 | best = fmin( 78 | objective, 79 | space=search_space, 80 | algo=tpe.suggest, 81 | max_evals=200, 82 | trials=trials, 83 | ) 84 | print(trials.trials) 85 | 86 | print("\n***** Best parameters *****") 87 | 88 | print(best) 89 | -------------------------------------------------------------------------------- /src/deep_volatility_models/hyperopt_opt2.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from hyperopt import hp, tpe, fmin, Trials 4 | import numpy as np 5 | 6 | import deep_volatility_models.train_univariate as train_univariate 7 | 8 | logging.basicConfig(level=logging.INFO) 9 | 10 | SYMBOLS = [ 11 | "bnd", 12 | "edv", 13 | "tyd", 14 | "gld", 15 | "vnq", 16 | "vti", 17 | "spy", 18 | "qqq", 19 | "qld", 20 | "xmvm", 21 | "vbk", 22 | "xlv", 23 | "fxg", 24 | "rxl", 25 | "fxl", 26 | "ibb", 27 | "vgt", 28 | "iyf", 29 | "xly", 30 | "uge", 31 | "jnk", 32 | "aal", 33 | "amd", 34 | "amzn", 35 | "bac", 36 | "cmcsa", 37 | "cmg", 38 | "dis", 39 | "f", 40 | "fb", 41 | "ge", 42 | "gld", 43 | "gme", 44 | "goog", 45 | "iyr", 46 | "jnk", 47 | "mro", 48 | "nflx", 49 | "qqq", 50 | "sbux", 51 | "spy", 52 | "t", 53 | "trip", 54 | "twtr", 55 | "v", 56 | "wfc", 57 | "vti", 58 | "ba", 59 | "c", 60 | "gm", 61 | "intc", 62 | "jpm", 63 | "hpe", 64 | "ko", 65 | "kr", 66 | "mgm", 67 | "msft", 68 | "mvis", 69 | "oxy", 70 | "pins", 71 | "uber", 72 | "x", 73 | "xom", 74 | "gps", 75 | "jnj", 76 | "nke", 77 | "pypl", 78 | "wmt", 79 | "ups", 80 | "baba", 81 | "sq", 82 | "fdx", 83 | "snap", 84 | "amc", 85 | "pfe", 86 | "rkt", 87 | "aapl", 88 | "pton", 89 | "csco", 90 | "roku", 91 | "sq", 92 | "snow", 93 | "bnd", 94 | "vbk", 95 | "xmvm", 96 | "nvda", 97 | "vz", 98 | ] 99 | 100 | # Dedup 101 | SYMBOLS = list(set(SYMBOLS)) 102 | 103 | search_space = { 104 | "mixture_components": 3 + hp.randint("mixture_components", 3), 105 | "feature_dimension": 50 + hp.randint("feature_dimension", 41), 106 | "embedding_dimension": 3 + hp.randint("embedding_dimension", 8), 107 | "gaussian_noise": hp.loguniform("gaussian_noise", np.log(1e-4), np.log(1e-3)), 108 | "dropout": hp.uniform("dropout", 0.075, 0.125), 109 | "learning_rate": hp.loguniform("learning_rate", np.log(1e-4), np.log(1e-3)), 110 | "weight_decay": hp.loguniform("weight_decay", np.log(1e-7), np.log(1e-6)), 111 | "window_size": hp.choice("window_size", [64, 256]), 112 | "minibatch_size": 128 + hp.randint("minibatch_size", 129), 113 | } 114 | 115 | 116 | def objective(parameters): 117 | # Be a good citizen and make a copy since we're going to modify the dictionary 118 | parameters = parameters.copy() 119 | 120 | # `minibatch_size` has to be a Python int, not a numpy int. 121 | parameters["minibatch_size"] = int(parameters["minibatch_size"]) 122 | 123 | logging.info("************************") 124 | for key, value in parameters.items(): 125 | logging.info(f"{key}: {value}") 126 | 127 | loss = train_univariate.run( 128 | existing_model=None, 129 | symbols=SYMBOLS, 130 | refresh=False, 131 | only_embeddings=False, 132 | max_epochs=400, 133 | early_termination=20, 134 | **parameters, 135 | ) 136 | 137 | logging.info(f"loss: {loss}") 138 | logging.info("************************") 139 | 140 | return loss 141 | 142 | 143 | if __name__ == "__main__": 144 | trials = Trials() 145 | 146 | best = fmin( 147 | objective, 148 | space=search_space, 149 | algo=tpe.suggest, 150 | max_evals=200, 151 | trials=trials, 152 | ) 153 | print(trials.trials) 154 | 155 | print("\n***** Best parameters *****") 156 | 157 | print(best) 158 | -------------------------------------------------------------------------------- /src/deep_volatility_models/hyperopt_opt_risk_neutral.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from hyperopt import hp, tpe, fmin, Trials 4 | import numpy as np 5 | 6 | import deep_volatility_models.train_univariate as train_univariate 7 | 8 | logging.basicConfig(level=logging.INFO) 9 | 10 | SYMBOLS = [ 11 | "bnd", 12 | "edv", 13 | "tyd", 14 | "gld", 15 | "vnq", 16 | "vti", 17 | "spy", 18 | "qqq", 19 | "qld", 20 | "xmvm", 21 | "vbk", 22 | "xlv", 23 | "fxg", 24 | "rxl", 25 | "fxl", 26 | "ibb", 27 | "vgt", 28 | "iyf", 29 | "xly", 30 | "uge", 31 | "jnk", 32 | "aal", 33 | "amzn", 34 | "bac", 35 | "cmcsa", 36 | "cmg", 37 | "dis", 38 | "f", 39 | "fb", 40 | "gld", 41 | "gme", 42 | "iyr", 43 | "jnk", 44 | "mro", 45 | "nflx", 46 | "qqq", 47 | "sbux", 48 | "spy", 49 | "t", 50 | "trip", 51 | "twtr", 52 | "v", 53 | "wfc", 54 | "vti", 55 | "ba", 56 | "c", 57 | "gm", 58 | "intc", 59 | "jpm", 60 | "hpe", 61 | "ko", 62 | "kr", 63 | "mgm", 64 | "msft", 65 | "mvis", 66 | "oxy", 67 | "pins", 68 | "uber", 69 | "x", 70 | "xom", 71 | "gps", 72 | "jnj", 73 | "nke", 74 | "pypl", 75 | "wmt", 76 | "ups", 77 | "baba", 78 | "sq", 79 | "fdx", 80 | "snap", 81 | "amc", 82 | "pfe", 83 | "rkt", 84 | "aapl", 85 | "pton", 86 | "csco", 87 | "roku", 88 | "sq", 89 | "snow", 90 | "nvda", 91 | "vz", 92 | ] 93 | 94 | search_space = { 95 | "mixture_components": 1 + hp.randint("mixture_components", 6), 96 | "feature_dimension": 40 + hp.randint("feature_dimension", 51), 97 | "embedding_dimension": 3 + hp.randint("embedding_dimension", 6), 98 | "gaussian_noise": hp.loguniform("gaussian_noise", np.log(1e-4), np.log(1e-2)), 99 | "dropout": hp.uniform("dropout", 0, 0.01), 100 | "learning_rate": hp.loguniform("learning_rate", np.log(4e-4), np.log(2e-3)), 101 | "weight_decay": hp.loguniform("weight_decay", np.log(5e-7), np.log(2e-6)), 102 | "window_size": hp.choice("window_size", [64, 256]), 103 | "minibatch_size": 64 + hp.randint("minibatch_size", 193), 104 | } 105 | 106 | 107 | def objective(parameters): 108 | # Be a good citizen and make a copy since we're going to modify the dictionary 109 | parameters = parameters.copy() 110 | 111 | # `minibatch_size` has to be a Python int, not a numpy int. 112 | parameters["minibatch_size"] = int(parameters["minibatch_size"]) 113 | 114 | logging.info("************************") 115 | for key, value in parameters.items(): 116 | logging.info(f"{key}: {value}") 117 | 118 | loss = train_univariate.run( 119 | use_hsmd=False, 120 | model_file="hyperopt_risk_neutral.pt", 121 | existing_model=None, 122 | symbols=SYMBOLS, 123 | refresh=False, 124 | risk_neutral=True, 125 | only_embeddings=False, 126 | max_epochs=400, 127 | early_termination=20, 128 | use_batch_norm=False, 129 | **parameters, 130 | )[1] 131 | 132 | logging.info(f"loss: {loss}") 133 | logging.info("************************") 134 | 135 | return loss 136 | 137 | 138 | if __name__ == "__main__": 139 | trials = Trials() 140 | 141 | best = fmin( 142 | objective, 143 | space=search_space, 144 | algo=tpe.suggest, 145 | max_evals=200, 146 | trials=trials, 147 | ) 148 | print(trials.trials) 149 | 150 | print("\n***** Best parameters *****") 151 | 152 | print(best) 153 | -------------------------------------------------------------------------------- /src/deep_volatility_models/hyperopt_risk_neutral_no_mixture.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from hyperopt import hp, tpe, fmin, Trials 4 | import numpy as np 5 | 6 | import deep_volatility_models.train_univariate as train_univariate 7 | 8 | logging.basicConfig(level=logging.INFO) 9 | 10 | SYMBOLS = [ 11 | "bnd", 12 | "edv", 13 | "tyd", 14 | "gld", 15 | "vnq", 16 | "vti", 17 | "spy", 18 | "qqq", 19 | "qld", 20 | "xmvm", 21 | "vbk", 22 | "xlv", 23 | "fxg", 24 | "rxl", 25 | "fxl", 26 | "ibb", 27 | "vgt", 28 | "iyf", 29 | "xly", 30 | "uge", 31 | "jnk", 32 | "aal", 33 | "amzn", 34 | "bac", 35 | "cmcsa", 36 | "cmg", 37 | "dis", 38 | "f", 39 | "fb", 40 | "gld", 41 | "gme", 42 | "iyr", 43 | "jnk", 44 | "mro", 45 | "nflx", 46 | "qqq", 47 | "sbux", 48 | "spy", 49 | "t", 50 | "trip", 51 | "twtr", 52 | "v", 53 | "wfc", 54 | "vti", 55 | "ba", 56 | "c", 57 | "gm", 58 | "intc", 59 | "jpm", 60 | "hpe", 61 | "ko", 62 | "kr", 63 | "mgm", 64 | "msft", 65 | "mvis", 66 | "oxy", 67 | "pins", 68 | "uber", 69 | "x", 70 | "xom", 71 | "gps", 72 | "jnj", 73 | "nke", 74 | "pypl", 75 | "wmt", 76 | "ups", 77 | "baba", 78 | "sq", 79 | "fdx", 80 | "snap", 81 | "amc", 82 | "pfe", 83 | "rkt", 84 | "aapl", 85 | "pton", 86 | "csco", 87 | "roku", 88 | "sq", 89 | "snow", 90 | "nvda", 91 | "vz", 92 | ] 93 | 94 | search_space = { 95 | "feature_dimension": 30 + hp.randint("feature_dimension", 61), 96 | "embedding_dimension": 2 + hp.randint("embedding_dimension", 6), 97 | "gaussian_noise": hp.loguniform("gaussian_noise", np.log(1e-4), np.log(1e-2)), 98 | "dropout": hp.uniform("dropout", 0, 0.01), 99 | "learning_rate": hp.loguniform("learning_rate", np.log(4e-4), np.log(2e-3)), 100 | "weight_decay": hp.loguniform("weight_decay", np.log(5e-7), np.log(2e-6)), 101 | "window_size": hp.choice("window_size", [64, 256]), 102 | "use_batch_norm": hp.choice("use_batch_norm", [False]), 103 | "use_mixture": hp.choice("use_mixture", [False]), 104 | "minibatch_size": 64 + hp.randint("minibatch_size", 193), 105 | } 106 | 107 | 108 | def objective(parameters): 109 | # Be a good citizen and make a copy since we're going to modify the dictionary 110 | parameters = parameters.copy() 111 | 112 | # `minibatch_size` has to be a Python int, not a numpy int. 113 | parameters["minibatch_size"] = int(parameters["minibatch_size"]) 114 | 115 | logging.info("************************") 116 | for key, value in parameters.items(): 117 | logging.info(f"{key}: {value}") 118 | 119 | loss = train_univariate.run( 120 | use_hsmd=False, 121 | model_file="hyperopt_risk_neutral_no_mixture.pt", 122 | existing_model=None, 123 | symbols=SYMBOLS, 124 | refresh=False, 125 | risk_neutral=True, 126 | mixture_components=1, 127 | only_embeddings=False, 128 | max_epochs=400, 129 | early_termination=20, 130 | **parameters, 131 | )[1] 132 | 133 | logging.info(f"loss: {loss}") 134 | logging.info("************************") 135 | 136 | return loss 137 | 138 | 139 | if __name__ == "__main__": 140 | trials = Trials() 141 | 142 | best = fmin( 143 | objective, 144 | space=search_space, 145 | algo=tpe.suggest, 146 | max_evals=200, 147 | trials=trials, 148 | ) 149 | print(trials.trials) 150 | 151 | print("\n***** Best parameters *****") 152 | 153 | print(best) 154 | -------------------------------------------------------------------------------- /src/deep_volatility_models/loss_functions.py: -------------------------------------------------------------------------------- 1 | # Standard Python 2 | import math 3 | 4 | # Common packages 5 | import torch 6 | 7 | LOG_SQRT_TWO_PI = 0.5 * math.log(2.0 * math.pi) 8 | EPS_FOR_LOG = 1e-8 9 | 10 | # TODO: Write a test for this 11 | 12 | 13 | def univariate_log_likelihood( 14 | x: torch.Tensor, mu: torch.Tensor, sigma_inv: torch.Tensor 15 | ): 16 | """Inputs: 17 | x: tensor of shape tensor(mb_size, symbols=1) containing the observed values 18 | 19 | mu: tensor of shape (mb_size, symbols=1) containing the mu 20 | estimate for each component 21 | 22 | sigma_inv: tensor of shape (mb_size, input_symbols=1, 23 | output_symbols=1) containing the estimate of the reciprocal of 24 | the sqrt of the variance for each component 25 | 26 | Output: 27 | tensor of shape (mb_size,) containing the log likelihood for each sample 28 | in the batch 29 | 30 | Note: 31 | The symbol dimension may seem superfluous, but the 32 | dimensions of the input tensors have been chosen for 33 | compatability with a multivarate version of this function, 34 | which requires the number of symbols. The dimensions 35 | associated with the number of symbols are required to be 1. 36 | 37 | """ 38 | if not isinstance(x, torch.Tensor): 39 | x = torch.tensor(x, dtype=torch.float) 40 | if not isinstance(mu, torch.Tensor): 41 | mu = torch.tensor(mu, dtype=torch.float) 42 | if not isinstance(sigma_inv, torch.Tensor): 43 | sigma_inv = torch.tensor(sigma_inv, dtype=torch.float) 44 | 45 | mb_size, symbols = sigma_inv.shape[:2] 46 | if ( 47 | x.shape != (mb_size, symbols) 48 | or mu.shape != (mb_size, symbols) 49 | or sigma_inv.shape != (mb_size, symbols, symbols) 50 | ): 51 | raise ValueError( 52 | f"Dimensions of x {x.shape}, mu {mu.shape}, and sigma_inv {sigma_inv.shape} are inconsistent" 53 | ) 54 | 55 | if symbols != 1: 56 | raise ValueError( 57 | f"This function requires the number of symbols to be 1 and not {symbols}" 58 | ) 59 | 60 | # Drop the dimensions that were just confirmed to be one. 61 | x = x.squeeze(1) 62 | mu = mu.squeeze(1) 63 | sigma_inv = sigma_inv.squeeze(2).squeeze(1) 64 | 65 | z_squared = (sigma_inv * (x - mu)) ** 2 66 | 67 | # Inclusion of EPS is to ensure argument remains bounded away from zero. 68 | log_sigma_inv = torch.log( 69 | torch.maximum(torch.tensor(EPS_FOR_LOG), torch.abs(sigma_inv)) 70 | ) 71 | 72 | # log_p, z_squared, and log_sigma_inv have the same shape: (mb_size, mixture_components) 73 | 74 | ll = -0.5 * z_squared + log_sigma_inv - LOG_SQRT_TWO_PI 75 | 76 | return ll 77 | -------------------------------------------------------------------------------- /src/deep_volatility_models/mixture_model_stats.py: -------------------------------------------------------------------------------- 1 | # Standard Python 2 | import math 3 | 4 | # Common packages 5 | import torch 6 | 7 | LOG_SQRT_TWO_PI = 0.5 * math.log(2.0 * math.pi) 8 | EPS_FOR_LOG = 1e-8 9 | EPS_FOR_SINGULARITY = 1e-4 10 | 11 | # TODO: Write a test for this 12 | 13 | 14 | def univariate_log_likelihood( 15 | x: torch.Tensor, log_p: torch.Tensor, mu: torch.Tensor, sigma_inv: torch.Tensor 16 | ): 17 | """ 18 | Inputs: 19 | x: tensor of shape tensor(mb_size,1) containing the observed values 20 | 21 | log_p: tensor of shape (mb_size, mixture_componente) containing the log 22 | probability of each component. 23 | 24 | mu: tensor of shape (mb_size, mixture_components, 1) containing the mu 25 | estimate for each component 26 | 27 | sigma_inv: tensor of shape (mb_size, mixture_components, 1, 1) containing the 28 | estimate of the reciprocal of the sqrt of the variance for each component 29 | 30 | Output: 31 | tensor of shape (mb_size,) containing the log likelihood for each sample 32 | in the batch 33 | 34 | Note: The dimensions of the input tensors have been chosen for compatability 35 | with a multivarate version of this function. The dimensions associated with 36 | the number of symbols are required to be 1. 37 | 38 | """ 39 | if not isinstance(x, torch.Tensor): 40 | x = torch.tensor(x, dtype=torch.float) 41 | if not isinstance(log_p, torch.Tensor): 42 | log_p = torch.tensor(log_p, dtype=torch.float) 43 | if not isinstance(mu, torch.Tensor): 44 | mu = torch.tensor(mu, dtype=torch.float) 45 | if not isinstance(sigma_inv, torch.Tensor): 46 | sigma_inv = torch.tensor(sigma_inv, dtype=torch.float) 47 | 48 | mb_size, mixture_components, symbols = sigma_inv.shape[:3] 49 | if ( 50 | x.shape != (mb_size, symbols) 51 | or log_p.shape != (mb_size, mixture_components) 52 | or mu.shape != (mb_size, mixture_components, symbols) 53 | or sigma_inv.shape != (mb_size, mixture_components, symbols, symbols) 54 | ): 55 | raise ValueError( 56 | f"Dimensions of x ({x.shape}), log_p ({log_p.shape}), mu ({mu.shape}), and sigma_inv ({sigma_inv.shape}) are inconsistent" 57 | ) 58 | 59 | if symbols != 1: 60 | raise ValueError( 61 | f"Symbol dim is {symbols}. This function requires the number of symbols to be 1" 62 | ) 63 | 64 | # Drop the dimensions that were just confirmed to be one. 65 | x = x.squeeze(1) 66 | mu = mu.squeeze(2) 67 | sigma_inv = sigma_inv.squeeze(3).squeeze(2) 68 | 69 | # Subtract mu from x in each component. 70 | # Be explicit rather than relying on broadcasting 71 | e = x.unsqueeze(1).expand(mu.shape) - mu 72 | 73 | z_squared = (sigma_inv * e) ** 2 74 | 75 | # Inclusion of EPS is to ensure argument remains bounded away from zero. 76 | log_sigma_inv = torch.log(EPS_FOR_LOG + torch.abs(sigma_inv)) 77 | 78 | # log_p, z_squared, and log_sigma_inv have the same shape: (mb_size, mixture_components) 79 | 80 | ll_components = log_p - 0.5 * z_squared + log_sigma_inv - LOG_SQRT_TWO_PI 81 | 82 | # Now sum over the mixture components with logsumexp to get the liklihoods 83 | # for each batch sample 84 | ll = torch.logsumexp(ll_components, dim=1) 85 | return ll 86 | 87 | 88 | def multivariate_log_likelihood( 89 | x: torch.Tensor, log_p: torch.Tensor, mu: torch.Tensor, sigma_inv: torch.Tensor 90 | ): 91 | """Inputs: 92 | x (tensor(mb_size, channels)): values 93 | log_p (tensor(mb_size, mixture_componente)): 94 | log probability of each component (this code assumes 95 | these have been normalized with logsumexp!!) 96 | mu (tensor(mb_size, mixture_components, channels): mu for each component 97 | sigma_inv (tensor(mb_size, mixture_components, channels, channels)): 98 | - sqrt of inverse of covariance matrix 99 | (More specifically, the inverse of the lower triangular 100 | Cholesky factor of the channel covariances so that 101 | C^{-1} = L^T L) 102 | 103 | 104 | Output: 105 | tensor(mb_size): log likelihood for each sample in batch 106 | 107 | """ 108 | mb_size, mixture_components, channels = sigma_inv.shape[:3] 109 | if ( 110 | x.shape != (mb_size, channels) 111 | or log_p.shape != (mb_size, mixture_components) 112 | or mu.shape != (mb_size, mixture_components, channels) 113 | or sigma_inv.shape != (mb_size, mixture_components, channels, channels) 114 | ): 115 | raise ValueError( 116 | f"Dimensions of x ({x.shape}), log_p ({log_p.shape}), mu ({mu.shape}), and sigma_inv ({sigma_inv.shape}) are inconsistent" 117 | ) 118 | 119 | # Ensure the sigma_inv matrix is lower triangular 120 | # Values in the upper triangle part get ignored 121 | sigma_inv = torch.tril(sigma_inv) 122 | 123 | e = x.unsqueeze(1).expand(mu.shape) - mu 124 | # e is (mb_size, mixture_components, channels) 125 | 126 | e = e.unsqueeze(3) 127 | # e is now (mb_size, mixture_components, channels, 1) 128 | 129 | z = torch.matmul(sigma_inv, e) 130 | # z is (mb_size, mixture_components, channels, 1) 131 | 132 | z_squared = torch.sum((z**2).squeeze(3), dim=2) 133 | # z_squared is (mb_size, mixture_components) 134 | 135 | # print('x: ', x) 136 | # print('mu: ', mu) 137 | # print('e: ', e) 138 | # print('z_squared: ', z_squared) 139 | 140 | # Compute the log of the diagonal entries of the inverse covariance matrix 141 | # Inclusion of EPS is to ensure argument stays well above zero. 142 | log_diag_sigma_inv = torch.log( 143 | EPS_FOR_LOG + torch.abs(torch.diagonal(sigma_inv, 0, -2, -1)) 144 | ) 145 | # log_diag_sigma_inv is (mb_size, mixture_components, channels) 146 | 147 | # Compute the log of the determinant of the inverse covariance 148 | # matrix by summing the above 149 | log_det_sigma_inv = torch.sum(log_diag_sigma_inv, dim=2) 150 | # print('log_det_sigma_inv', log_det_sigma_inv) 151 | # log_det_sigma_inv is (mb_size, mixture_components) 152 | 153 | ll_components = ( 154 | log_p - 0.5 * z_squared + log_det_sigma_inv - channels * LOG_SQRT_TWO_PI 155 | ) 156 | 157 | # Now sum over the components with logsumexp to get the liklihoods 158 | # for each batch sample 159 | ll = torch.logsumexp(ll_components, dim=1) 160 | return ll 161 | 162 | 163 | # TODO: Generalize the following function for the multivariate case. 164 | 165 | 166 | def new_univariate_combine_metrics(p, mu, sigma_inv): 167 | """ 168 | Given a mixture model of normal distributions charaterized by probabilities 169 | (p), components-wise mean (mu) and component-wise inverse standard deviation 170 | (sigma_inv), compute the overall mean and inverse standard deviation for the 171 | mixture. 172 | 173 | Note: This assumes a univariate mu and sigma_inv. It's simpler than the multivariate version. 174 | 175 | Inputs: 176 | p: tensor of shape (mb_size, mixture_componente): probability of each component 177 | mu: tensor of shape (mb_size, mixture_components): mu for each 178 | component. 179 | sigma_inv: tensor of shape (mb_size, mixture_components) containing 180 | the inverse of the standard deviation of each component. 181 | 182 | Outputs: 183 | mu: tensor of shape (mb_size,) containing the expected mean 184 | variance: tensor of shape (mb_size,) containing the 185 | variance of the mixture. 186 | 187 | Note that the return value is the variance (i.e., the standard deviation squared) and *not* the inverse 188 | of the standard deviation that's often used elsewhere in this code. 189 | 190 | """ 191 | if not isinstance(p, torch.Tensor): 192 | p = torch.tensor(p, dtype=torch.float) 193 | if not isinstance(mu, torch.Tensor): 194 | mu = torch.tensor(mu, dtype=torch.float) 195 | if not isinstance(sigma_inv, torch.Tensor): 196 | sigma_inv = torch.tensor(sigma_inv, dtype=torch.float) 197 | 198 | if p.shape != mu.shape or p.shape != sigma_inv.shape: 199 | raise ValueError( 200 | f"Dimensions of p ({p.shape}), mu ({mu.shape}), and sigma_inv ({sigma_inv.shape}) are inconsistent" 201 | ) 202 | 203 | variance = (1.0 / sigma_inv) ** 2 204 | composite_mean = torch.sum(p * mu, dim=1) 205 | 206 | # Composite variance comes from the shifted component means and 207 | # shifted component covariances. Here's a derivation: 208 | 209 | # E[(x-mu)**2] = sum p_i E[(x_i-mu)**2] 210 | # E[(x_i-mu)**2] = E[((x_i-mu_i) + (mu_i-mu))**2] 211 | # = sigma_i**2 + (mu_i-mu)**2 212 | 213 | shifted_component_means = mu - composite_mean.unsqueeze(1).expand(mu.shape) 214 | shifted_component_variances = variance + shifted_component_means**2 215 | composite_variance = torch.sum(p * shifted_component_variances, dim=1) 216 | return composite_mean, composite_variance 217 | 218 | 219 | def univariate_combine_metrics(p, mu, sigma_inv): 220 | """ 221 | Given a mixture model of normal distributions charaterized by probabilities 222 | (p), components-wise mean (mu) and component-wise inverse standard deviation 223 | (sigma_inv), compute the overall mean and inverse standard deviation for the 224 | mixture. 225 | 226 | Note: This assumes a univariate mu and sigma_inv. It's simpler than the multivariate version. 227 | 228 | Inputs: 229 | p: tensor of shape (mb_size, mixture_componente): probability of each component 230 | mu: tensor of shape (mb_size, mixture_components, 1): mu for each 231 | component. 232 | sigma_inv: tensor of shape (mb_size, mixture_components, 1, 1) containing 233 | the inverse of the standard deviation of each component. 234 | 235 | Outputs: 236 | mu: tensor of shape (mb_size,) containing the expected mean 237 | variance: tensor of shape (mb_size,) containing the 238 | variance of the mixture. 239 | 240 | Note that the return value is the variance (i.e., the standard deviation squared) and *not* the inverse 241 | of the standard deviation that's often used elsewhere in this code. 242 | 243 | """ 244 | if not isinstance(p, torch.Tensor): 245 | p = torch.tensor(p, dtype=torch.float) 246 | if not isinstance(mu, torch.Tensor): 247 | mu = torch.tensor(mu, dtype=torch.float) 248 | if not isinstance(sigma_inv, torch.Tensor): 249 | sigma_inv = torch.tensor(sigma_inv, dtype=torch.float) 250 | 251 | mb_size, mixture_components, symbols = sigma_inv.shape[:3] 252 | if ( 253 | p.shape != (mb_size, mixture_components) 254 | or mu.shape != (mb_size, mixture_components, symbols) 255 | or sigma_inv.shape != (mb_size, mixture_components, symbols, symbols) 256 | ): 257 | raise ValueError( 258 | f"Dimensions of p ({p.shape}), mu ({mu.shape}), and sigma_inv ({sigma_inv.shape}) are inconsistent" 259 | ) 260 | 261 | if symbols != 1: 262 | raise ValueError( 263 | f"Symbol dim is {symbols}. This code requires the number of symbols to be 1" 264 | ) 265 | 266 | # Drop the symbol dimension on mu and sigma_inv which is known to be 1 267 | # for this special case. 268 | 269 | sigma_inv = sigma_inv.squeeze(3).squeeze(2) 270 | mu = mu.squeeze(2) 271 | 272 | variance = (1.0 / sigma_inv) ** 2 273 | composite_mean = torch.sum(p * mu, dim=1) 274 | 275 | # Composite variance comes from the shifted component means and 276 | # shifted component covariances. Here's a derivation: 277 | 278 | # E[(x-mu)**2] = sum p_i E[(x_i-mu)**2] 279 | # E[(x_i-mu)**2] = E[((x_i-mu_i) + (mu_i-mu))**2] 280 | # = sigma_i**2 + (mu_i-mu)**2 281 | 282 | shifted_component_means = mu - composite_mean.unsqueeze(1).expand(mu.shape) 283 | shifted_component_variances = variance + shifted_component_means**2 284 | composite_variance = torch.sum(p * shifted_component_variances, dim=1) 285 | return composite_mean, composite_variance 286 | 287 | 288 | def multivariate_combine_metrics(p, mu, sigma_inv): 289 | """Given a mixture model of normal distributions charaterized by 290 | probabilities (p), components-wise mean (mu) and component-wise 291 | inverse standard deviation (sigma_inv), compute the overall mean 292 | and inverse standard deviation for the mixture. 293 | 294 | Note: This is the multivariate version of univariate_combine_metrics. 295 | 296 | Inputs: 297 | p: tensor of shape (mb_size, mixture_componente) - probability of each component 298 | mu: tensor of shape (mb_size, mixture_components, symbols) - mu for each 299 | component. 300 | sigma_inv: tensor of shape (mb_size, mixture_components, symbols, symbols) - 301 | the inverse of the standard deviation of each component. 302 | 303 | Outputs: 304 | mu: tensor of shape (mb_size, symbols) - the mean of the mixture. 305 | covariance: tensor of shape (mb_size, symbols, symbols) - the covariance of the mixture. 306 | 307 | Note that the return value is the covariance matrix. This is 308 | different from elsewhere in the code where we often use the 309 | Cholesky factor of the inverse of the covariance matrix to 310 | represent the variance. 311 | 312 | """ 313 | if not isinstance(p, torch.Tensor): 314 | p = torch.tensor(p, dtype=torch.float) 315 | if not isinstance(mu, torch.Tensor): 316 | mu = torch.tensor(mu, dtype=torch.float) 317 | if not isinstance(sigma_inv, torch.Tensor): 318 | sigma_inv = torch.tensor(sigma_inv, dtype=torch.float) 319 | 320 | if ( 321 | p.shape != sigma_inv.shape[:2] 322 | or mu.shape != sigma_inv.shape[:3] 323 | or sigma_inv.shape[2] > sigma_inv.shape[3] 324 | ): 325 | raise ValueError( 326 | f"Dimensions of p ({p.shape}), mu ({mu.shape}), and sigma_inv ({sigma_inv.shape}) are inconsistent" 327 | ) 328 | 329 | # Note that sigma_inv may not be square but the number of rows 330 | # should be no more than the number of columns 331 | 332 | inverse_covariance = torch.matmul(sigma_inv, torch.transpose(sigma_inv, 2, 3)) 333 | covariance = torch.inverse(inverse_covariance) 334 | composite_mean = torch.sum(p.unsqueeze(2).expand(mu.shape) * mu, dim=1) 335 | 336 | # Composite covariance comes from the shifted component means and 337 | # shifted component covariances. Here's a derivation: 338 | 339 | # E[(x-mu)(x-mu)'] = sum p_i E[(x_i-mu)(x_i-mu)'] 340 | # But E[(x_i-mu)(x_i-mu)'] = E[((x_i-mu_i)(x_i-mu_i)' + (mu_i-mu)(mu_i-mu)'] 341 | # = cov_i+ (mu_i-mu)(mu_i-mu)' 342 | 343 | shifted_means = (mu - composite_mean.unsqueeze(1).expand(mu.shape)).unsqueeze(3) 344 | shifted_component_variances = covariance + torch.matmul( 345 | shifted_means, torch.transpose(shifted_means, 2, 3) 346 | ) 347 | composite_covariance = torch.sum( 348 | p.unsqueeze(2).unsqueeze(3).expand(shifted_component_variances.shape) 349 | * shifted_component_variances, 350 | dim=1, 351 | ) 352 | return composite_mean, composite_covariance 353 | -------------------------------------------------------------------------------- /src/deep_volatility_models/model_wrappers.py: -------------------------------------------------------------------------------- 1 | import datetime as dt 2 | from dataclasses import dataclass, field 3 | from typing import Dict, Tuple, Union 4 | 5 | import torch 6 | 7 | 8 | @dataclass 9 | class StockModel: 10 | symbols: Tuple[str] 11 | network: torch.nn.Module 12 | date: dt.datetime 13 | epochs: int 14 | loss: float 15 | encoding: Dict[str, int] = field(default_factory=dict) 16 | training_data_start_date: Union[dt.datetime, None] = None 17 | training_data_end_date: Union[dt.datetime, None] = None 18 | -------------------------------------------------------------------------------- /src/deep_volatility_models/models.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | 3 | StockModel = namedtuple('StockModel', 'symbols network date epochs loss') 4 | StockModelV2 = namedtuple('StockModelV2', 'symbols network date epochs null_model_loss loss') 5 | -------------------------------------------------------------------------------- /src/deep_volatility_models/optuna_opt.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import optuna 4 | 5 | import deep_volatility_models.train_univariate as train_univariate 6 | 7 | logging.basicConfig(level=logging.INFO) 8 | 9 | SYMBOLS = [ 10 | "bnd", 11 | "edv", 12 | "tyd", 13 | "gld", 14 | "vnq", 15 | "vti", 16 | "spy", 17 | "qqq", 18 | "qld", 19 | "xmvm", 20 | "vbk", 21 | "xlv", 22 | "fxg", 23 | "rxl", 24 | "fxl", 25 | "ibb", 26 | "vgt", 27 | "iyf", 28 | "xly", 29 | "uge", 30 | "jnk", 31 | ] 32 | 33 | 34 | def objective(trial): 35 | mixture_components = trial.suggest_int("mixture_components", 1, 5) 36 | feature_dimension = trial.suggest_int("feature_dimension", 5, 50) 37 | embedding_dimension = trial.suggest_int("embedding_dimension", 3, 15) 38 | gaussian_noise = trial.suggest_float("gaussian_noise", 1e-5, 1e-2, log=True) 39 | dropout = trial.suggest_uniform("dropout", 0.0, 0.75) 40 | learning_rate = trial.suggest_float("learning_rate", 1e-5, 1e-2, log=True) 41 | weight_decay = trial.suggest_float("weight_decay", 1e-9, 1e-5, log=True) 42 | window_size = trial.suggest_categorical("window_size", [16, 64, 256]) 43 | minibatch_size = trial.suggest_int("minibatch_size", 32, 256) 44 | 45 | logging.info("************************") 46 | logging.info(f"mixture_components: {mixture_components}") 47 | logging.info(f"feature_dimension: {feature_dimension}") 48 | logging.info(f"embedding_dimension: {embedding_dimension}") 49 | logging.info(f"gaussian_noise: {gaussian_noise}") 50 | logging.info(f"dropout: {dropout}") 51 | logging.info(f"learning_rate: {learning_rate}") 52 | logging.info(f"weight_decay: {weight_decay}") 53 | logging.info(f"window_size: {window_size}") 54 | logging.info(f"minibatch_size: {minibatch_size}") 55 | 56 | loss = train_univariate.run( 57 | existing_model=None, 58 | symbols=SYMBOLS, 59 | refresh=False, 60 | only_embeddings=False, 61 | window_size=window_size, 62 | mixture_components=mixture_components, 63 | feature_dimension=feature_dimension, 64 | gaussian_noise=gaussian_noise, 65 | embedding_dimension=embedding_dimension, 66 | minibatch_size=minibatch_size, 67 | use_batch_norm=False, 68 | dropout=dropout, 69 | learning_rate=learning_rate, 70 | weight_decay=weight_decay, 71 | ) 72 | 73 | logging.info(f"loss: {loss}") 74 | logging.info("************************") 75 | 76 | return loss 77 | 78 | 79 | if __name__ == "__main__": 80 | study = optuna.create_study() 81 | study.optimize(objective, n_trials=400) 82 | logging.info(f"{study.best_params}") 83 | -------------------------------------------------------------------------------- /src/deep_volatility_models/sample.py: -------------------------------------------------------------------------------- 1 | from typing import Callable, Tuple, Union 2 | 3 | # Common packages 4 | import torch 5 | 6 | 7 | def multivariate_mixture_sample( 8 | mixture_model: torch.nn.Module, 9 | predictors: Union[torch.Tensor, Tuple[torch.Tensor, Union[torch.Tensor, None]]], 10 | sample_size: int, 11 | normalize: bool = False, 12 | n_sigma=1, 13 | ): 14 | """Draw samples from a mixture model 15 | Parameters: 16 | mixture_model: torch.nn.Module - The model to evaluate_model 17 | predictors: Union[torch.Tensor, Tuple[torch.Tensor, Union[torch.Tensor, None]]], 18 | First element of predictors is a window: torch.Tensor of shape (batch_size, symbols, window_size) 19 | sample_size: int - The number of samples to draw 20 | normalize: bool - Draw samples that are a fixed number of standard 21 | deviations away (useful for generating multivariate contours of points that are 22 | n-sigma from the mean, but not useful for univariate distributions). 23 | n_sigma: int - The number of standard deviations away to generate 24 | samples (only used when `normalize` is True) 25 | 26 | Returns: 27 | torch.Tensor of shape (batch_size, symbols, sample_size) - Log returns 28 | sampled from the model's distribution. Note that a "sample" represents 29 | the distribution at a particular moment in time and does not generate a simulated 30 | time series. 31 | 32 | Note: 33 | In the case that `predictors` is not a tuple, it is assumed to be 34 | the time_series portion. 35 | 36 | """ 37 | log_p, mu, sigma_inv = mixture_model(predictors)[:3] 38 | p = torch.exp(log_p) 39 | 40 | batch_size, _, symbols = mu.shape 41 | 42 | # Create an initial simulation day having returns of zero for day 43 | # 0. By day 0, we mean "right now" so the returns are zero compared 44 | # relative to the current stock price. It may seem unnecessary to 45 | # explicitly add these zeros, but it's really convenient to be able to index 46 | # into the simulation with day index==0 meaning the current stock price. 47 | # The simulation results are typically evaluated by cumsum, exponentiated, 48 | # and multiplied by the current stock price. Using this approach, the 0th 49 | # entry (the price on day 0) will be the current price because a log return of 50 | # zero has been applied. This avoids having to do some awkward indexing 51 | # elsewhere. 52 | 53 | samples = torch.Tensor([]) 54 | 55 | for _ in range(sample_size): 56 | selections = torch.multinomial(p, 1) 57 | mu_selector = selections.unsqueeze(2).expand(batch_size, 1, symbols) 58 | selected_mu = torch.gather(mu, 1, mu_selector).squeeze(1).unsqueeze(2) 59 | # selected_mu is (nb_size x channels x 1) 60 | assert selected_mu.shape == (batch_size, symbols, 1) 61 | 62 | sigma_selector = ( 63 | selections.unsqueeze(2).unsqueeze(3).expand(batch_size, 1, symbols, symbols) 64 | ) 65 | selected_sigma_inv = torch.gather(sigma_inv, 1, sigma_selector) 66 | selected_sigma = torch.inverse(selected_sigma_inv).squeeze(1) 67 | # selected_sigma is (nb_size x channels x channels) 68 | assert selected_sigma.shape == (batch_size, symbols, symbols) 69 | 70 | z = torch.randn(batch_size, symbols, 1) 71 | if normalize: 72 | norm_z = ( 73 | torch.norm(z, p=2, dim=1).unsqueeze(1).expand(batch_size, symbols, 1) 74 | ) 75 | z = n_sigma * z / norm_z 76 | assert z.shape == (batch_size, symbols, 1) 77 | 78 | next_values = selected_mu + torch.matmul(selected_sigma, z) 79 | # next_values is (mb_size, symbols, 1) 80 | assert next_values.shape == (batch_size, symbols, 1) 81 | 82 | samples = torch.cat((samples, next_values), dim=2) 83 | 84 | return samples.detach() 85 | 86 | 87 | def multivariate_sample( 88 | model: torch.nn.Module, 89 | predictors: Union[torch.Tensor, Tuple[torch.Tensor, Union[torch.Tensor, None]]], 90 | sample_size: int, 91 | normalize: bool = False, 92 | n_sigma=1, 93 | ): 94 | """Draw samples from a mixture model 95 | Parameters: 96 | model: torch.nn.Module - The model to evaluate_model 97 | predictors: Union[torch.Tensor, Tuple[torch.Tensor, Union[torch.Tensor, None]]], 98 | First element of predictors is a window: torch.Tensor of shape (batch_size, symbols, window_size) 99 | sample_size: int - The number of samples to draw 100 | normalize: bool - Draw samples that are a fixed number of standard 101 | deviations away (useful for generating multivariate contours of points that are 102 | n-sigma from the mean, but not useful for univariate distributions). 103 | n_sigma: int - The number of standard deviations away to generate 104 | samples (only used when `normalize` is True) 105 | 106 | Returns: 107 | torch.Tensor of shape (batch_size, symbols, sample_size) - Log returns 108 | sampled from the model's distribution Note that a "sample" represents 109 | the distribution at a particular moment in time and does not generate a simulated 110 | time series. 111 | 112 | Note: 113 | In the case that `predictors` is not a tuple, it is assumed to be 114 | the time_series portion. 115 | """ 116 | mu, sigma_inv = model(predictors)[:2] 117 | sigma = torch.inverse( 118 | sigma_inv 119 | ) # Removed a .squeeze(1) from multivariate implementation 120 | 121 | batch_size, symbols = mu.shape 122 | 123 | # Create an initial simulation day having returns of zero for day 124 | # 0. By day 0, we mean "right now" so the returns are zero compared 125 | # relative to the current stock price. It may seem unnecessary to 126 | # explicitly add these zeros, but it's really convenient to be able to index 127 | # into the simulation with day index==0 meaning the current stock price. 128 | # The simulation results are typically evaluated by cumsum, exponentiated, 129 | # and multiplied by the current stock price. Using this approach, the 0th 130 | # entry (the price on day 0) will be the current price because a log return of 131 | # zero has been applied. This avoids having to do some awkward indexing 132 | # elsewhere. 133 | 134 | samples = torch.Tensor([]) 135 | for _ in range(sample_size): 136 | z = torch.randn(batch_size, symbols, 1) 137 | if normalize: 138 | norm_z = ( 139 | torch.norm(z, p=2, dim=1).unsqueeze(1).expand(batch_size, symbols, 1) 140 | ) 141 | z = n_sigma * z / norm_z 142 | assert z.shape == (batch_size, symbols, 1) 143 | 144 | next_values = mu + torch.matmul(sigma, z) 145 | # next_values is (batch_size, symbols, 1) 146 | assert next_values.shape == (batch_size, symbols, 1) 147 | 148 | samples = torch.cat((samples, next_values), dim=2) 149 | 150 | return samples.detach() 151 | 152 | 153 | def simulate_one( 154 | model: torch.nn.Module, 155 | predictors: Union[torch.Tensor, Tuple[torch.Tensor, Union[torch.Tensor, None]]], 156 | time_samples: int, 157 | ): 158 | """ 159 | For each row of `predictors`, generate simulated log returns for `time_samples` intervals 160 | 161 | Parameters: 162 | model: torch.nn.Module - model to evaluate 163 | sampler: Callable[[torch.nn.Module, torch.Tensor, int, bool, int], torch.tensor] - samples the distribution returned by the model. 164 | The sampler must be compatible with the model (e.g., a mixture model sampler or a non-mixture model sampler depending on the model). 165 | predictors: Union[torch.Tensor, Tuple[torch.Tensor, Union[torch.Tensor, None]]], 166 | First element of predictors is a window: torch.Tensor of shape (minibatch, symbols, window_size) 167 | time_samples: number of time intervals to simulate. 168 | 169 | Returns: 170 | torch.Tensor of shape (batch_size, symbols, time_samples+1) - For each batch row, a single time series 171 | containing the simulated returns. 172 | 173 | Notes: 174 | Last dimension of sample is sample_size+1 because the first 175 | position isn't actually sampled. An artificial zero sample 176 | is inserted in the first position. 177 | """ 178 | 179 | if isinstance(predictors, tuple): 180 | window, exogenous = predictors 181 | make_predictors = lambda window, exogenous: (window, exogenous) 182 | else: 183 | window = predictors 184 | exogenous = None 185 | make_predictors = lambda window, exogenous: window 186 | 187 | batch_size, symbols = window.shape[:2] 188 | simulated_returns = torch.zeros(batch_size, symbols, 1) 189 | 190 | sampler = model.sampler 191 | for _ in range(time_samples): 192 | next_values = sampler(model, make_predictors(window, exogenous), 1) 193 | window = torch.cat([window[:, :, 1:], next_values], dim=2) 194 | simulated_returns = torch.cat((simulated_returns, next_values), dim=2) 195 | 196 | # Aggregate the 'forward_days' future returns into the cumulative return 197 | cumulative_returns = torch.exp(torch.cumsum(simulated_returns, dim=2)).detach() 198 | return cumulative_returns 199 | 200 | 201 | def simulate_many( 202 | model: torch.nn.Module, 203 | predictors: Union[torch.Tensor, Tuple[torch.Tensor, Union[torch.Tensor, None]]], 204 | time_samples: int, 205 | simulation_count: int, 206 | ): 207 | """ 208 | This is a wrapper that calls simulate_one `simulation_count` times. 209 | """ 210 | 211 | simulations = torch.stack( 212 | tuple( 213 | model.simulate_one(predictors, time_samples) 214 | for _ in range(simulation_count) 215 | ), 216 | dim=3, 217 | ) 218 | return simulations 219 | 220 | 221 | def multivariate_mixture_simulate_extremes( 222 | mixture_model: torch.nn.Module, 223 | window: torch.Tensor, 224 | time_samples: int, 225 | simulation_count: int, 226 | ): 227 | 228 | simulations = multivariate_mixture_simulate_many( 229 | mixture_model, window, time_samples, simulation_count 230 | ) 231 | 232 | max_outcomes = torch.max(simulations, dim=0)[0] 233 | min_outcomes = torch.min(simulations, dim=0)[0] 234 | median_outcomes = torch.median(simulations, dim=0)[0] 235 | return min_outcomes, median_outcomes, max_outcomes 236 | -------------------------------------------------------------------------------- /src/deep_volatility_models/stock_data.py: -------------------------------------------------------------------------------- 1 | import abc 2 | 3 | import io 4 | import logging 5 | import os 6 | from typing import Any, Callable, Dict, Iterator, Iterable, Tuple, Union 7 | 8 | # Third party libraries 9 | import pandas as pd 10 | 11 | # Local imports 12 | import deep_volatility_models.data_sources as data_sources 13 | import deep_volatility_models.util as util 14 | 15 | # Initialization 16 | logging.basicConfig(level=logging.INFO) 17 | 18 | # This section defines the types that we will be using. 19 | 20 | Reader = Callable[[io.BufferedReader], pd.DataFrame] 21 | Writer = Callable[[io.BufferedWriter], None] 22 | Concatenator = Callable[[Iterable[Tuple[str, pd.DataFrame]]], pd.DataFrame] 23 | ReaderFactory = Callable[[], Reader] 24 | WriterFactory = Callable[[Any], Writer] 25 | 26 | 27 | class DataStore(abc.ABC): 28 | @abc.abstractmethod 29 | def exists(self, key: str) -> bool: 30 | """Does data for `key` exist in the data store?""" 31 | 32 | @abc.abstractmethod 33 | def read(self, key: str, reader: Reader) -> Any: 34 | """Read data for `key` from the data store""" 35 | 36 | @abc.abstractmethod 37 | def write(self, key: str, writer: Writer) -> None: 38 | """Write the data to the data store under `key`""" 39 | 40 | 41 | # Here's an iplementation of a Reader 42 | 43 | 44 | def SymbolHistoryReader() -> Reader: 45 | """ 46 | Constructs a reader() function that will read symbol history from an open 47 | file-like object. 48 | 49 | Returns: 50 | Callable[BinaryIO, pd.DataFrame] - Reader that whenn called on an open 51 | file returns a history dataframe. 52 | """ 53 | 54 | def read_symbol_history(f: io.BufferedReader) -> pd.DataFrame: 55 | df = pd.read_csv( 56 | f, 57 | index_col="date", 58 | parse_dates=["date"], 59 | ) 60 | 61 | # Be 100% certain it's in ascending order, even though it should have 62 | # been stored that way. 63 | df.sort_index(inplace=True) 64 | return df 65 | 66 | return read_symbol_history 67 | 68 | 69 | # Here's an iplementation of a Writer 70 | 71 | 72 | def SymbolHistoryWriter(df: pd.DataFrame) -> Writer: 73 | def write_symbol_history(f: io.BufferedWriter) -> None: 74 | # Create an index on date and write to CSV in ascending order by date 75 | # with index=True 76 | indexed_df = df.copy() 77 | 78 | if indexed_df.index.name != "date": 79 | indexed_df.set_index("date", inplace=True) 80 | 81 | indexed_df.sort_index(inplace=True) 82 | indexed_df.to_csv(f, index=True) 83 | 84 | return write_symbol_history 85 | 86 | 87 | # Here's an iplementation of a DataStore 88 | 89 | 90 | class FileSystemStore(DataStore): 91 | """ 92 | This clsss implements an abstract interface for data storage. It 93 | implements three methods: 94 | exists() to determine whether an object has beenstored 95 | write() to store an object 96 | load() to load an object. 97 | This particular implementation is specific to writing and loading 98 | dataframes. It does some additional housekeeping and sanity checking on the dataframe. 99 | 100 | Abstracting this interface allows the file system to be replaced or 101 | mocked out more esily for testing. 102 | """ 103 | 104 | def __init__(self, cache_dir="."): 105 | self.cache_dir = cache_dir 106 | os.makedirs(cache_dir, exist_ok=True) 107 | 108 | def _path(self, symbol: str) -> str: 109 | """Construct a filesystem path to store and retrieve the data for the 110 | associated givwn key 111 | Arguments: 112 | symbol: str 113 | Returns: 114 | str - The filesystem path to be used for the key 115 | """ 116 | # TODO: Return a path object rather than a string to increase porability. 117 | symbol_path = os.path.join(self.cache_dir, f"{symbol.lower()}.csv") 118 | return symbol_path 119 | 120 | def exists(self, symbol: str) -> bool: 121 | """Return whether the symbol exists in the data store 122 | Arguments: 123 | symbol: str - the symbol or key to retrieve 124 | Returns: 125 | True if the key exists in the data store. 126 | """ 127 | return os.path.exists(self._path(symbol)) 128 | 129 | def write(self, symbol: str, writer: Writer): 130 | """ 131 | Write a key and data (must be a dataframe) to the data store 132 | Arguments: 133 | symbol: str - The symbol or "key" for the data. 134 | df: pd.DataFrame - The dataframe to store for that symbol. 135 | Returns: 136 | None 137 | """ 138 | with open(self._path(symbol), "wb") as f: 139 | writer(f) 140 | 141 | def read(self, symbol: str, reader: Reader) -> Any: 142 | """ 143 | Read a dataframe given its symbol. 144 | Arguments: 145 | symbol: str 146 | Returns: 147 | pd.DataFrame - The associated dataframe. 148 | """ 149 | with open(self._path(symbol), "rb") as f: 150 | result = reader(f) 151 | return result 152 | 153 | 154 | def CachingDownloader( 155 | data_source: data_sources.DataSource, 156 | data_store: DataStore, 157 | writer_factory: WriterFactory, 158 | overwrite_existing: bool = False, 159 | ): 160 | """ 161 | Construct and return a download function that will download and write the 162 | results to the data store as necessary. 163 | Arguments: 164 | data_source: Callable[[Union[str, Iterable[str]]], Dict[str, 165 | pd.DataFrame]] - A datasource function which given a list of symbols 166 | returns a dictionary keyed by the symbol with values that are dataframe with history data for 167 | that symbol. 168 | 169 | data_store: FilesystemStore (or similar) - An implementation of a data_store class (see 170 | FileSystemStore above) 171 | 172 | """ 173 | 174 | def download( 175 | symbols: Union[Iterable[str], str], 176 | ) -> Dict[str, pd.DataFrame]: 177 | """ 178 | Arguments: 179 | symbols: Union[Iterable[str], str] - A symbol of list of symbols to populate 180 | in the cache. 181 | overwrite_existing: bool - Forces all symbols to be downloaded whether or not 182 | they already exist in the cache. 183 | """ 184 | # Handle the case where `symbol`is a single symbol 185 | symbols = util.to_symbol_list(symbols) 186 | 187 | if not overwrite_existing: 188 | # Determine what's missing 189 | missing = [] 190 | for symbol in symbols: 191 | if not data_store.exists(symbol): 192 | missing.append(symbol) 193 | 194 | # Replace full list with missing list 195 | symbols = missing 196 | 197 | if len(symbols) > 0: 198 | ds = data_source(symbols) 199 | 200 | # Write the results to the cache 201 | for symbol in symbols: 202 | writer = writer_factory(ds[symbol]) 203 | data_store.write(symbol, writer) 204 | else: 205 | ds = {} 206 | 207 | return ds 208 | 209 | return download 210 | 211 | 212 | def PriceHistoryConcatenator() -> Concatenator: 213 | def concatenator(sequence: Iterable[Tuple[str, pd.DataFrame]]) -> pd.DataFrame: 214 | """ 215 | Return a dataframe containing all historic values for the given set of symbosl. 216 | The dates are inner joined so there is one row for each date where all symbols 217 | have a value for that date. The row index for the returned dataframe is the 218 | date. The column is a muli-level index where the first position is the symbol 219 | and the second position is the value of interest (e.g., "close", "log_return", etc.) 220 | 221 | The expected use case is to get the log returns for a portfolio of stocks. For example, 222 | the following returns a datafram of log returns for a portfolio on the dates where every 223 | item in the portfolio has a return: 224 | 225 | df.loc[:, (symbol_list, 'log_return')] 226 | 227 | This is intended for a portfolio, but you can specify just one stock if that's all that's required: 228 | 229 | df.loc[:, (symbol, 'log_return')] 230 | 231 | Arguments: 232 | symbols: Union[Iterable[str], str] - a list of symbols of interest 233 | overwrite_existing: bool - whether to overwrite previously downloaded data (default False) 234 | 235 | Returns 236 | pd.DataFrame - The column is a muli-level index where the first position is the symbol 237 | and the second position is the value of interest (e.g., "close", "log_return", etc.) 238 | """ 239 | dataframes = [] 240 | symbols = [] 241 | for symbol, df in sequence: 242 | df["symbol"] = symbol 243 | symbols.append(symbol) 244 | dataframes.append(df) 245 | 246 | combined_df = pd.concat(dataframes, axis=1, join="inner", keys=symbols) 247 | return combined_df 248 | 249 | return concatenator 250 | 251 | 252 | def CachingLoader( 253 | data_source: data_sources.DataSource, 254 | data_store: DataStore, 255 | reader_factory: ReaderFactory, 256 | writer_factory: WriterFactory, 257 | overwrite_existing: bool, 258 | ): 259 | """ 260 | Construct a caching downloader frmo a data source, data store, reader 261 | factory and writer factory. The resulting caching downloader is called on a 262 | list of symbols and returns a generator that lazily returns a sequence of 263 | typles of (symbol, data). The data_source is invoked only for elements that 264 | do not already exist in `data_store`. A reader instance generated from 265 | reader_factory is used to read the data. The type of the reader output can 266 | be anything, but typically could be a pd.DataFrame. A writer is used to 267 | write the data to the data store (in a format that the reader can read it) 268 | after being downloaded from the data_source. 269 | 270 | See the sample code below for an example of these are glued together. 271 | """ 272 | caching_download = CachingDownloader( 273 | data_source, data_store, writer_factory, overwrite_existing=overwrite_existing 274 | ) 275 | 276 | def load(symbols: Union[Iterable[str], str]) -> Iterator[Tuple[str, Any]]: 277 | """ """ 278 | symbols = util.to_symbol_list(symbols) 279 | caching_download(symbols) 280 | 281 | reader = reader_factory() 282 | for symbol in symbols: 283 | data = data_store.read(symbol, reader) 284 | yield (symbol, data) 285 | 286 | return load 287 | 288 | 289 | def CachingSymbolHistoryLoader( 290 | data_source: data_sources.DataSource, 291 | data_store: DataStore, 292 | overwrite_existing: bool, 293 | ): 294 | """ 295 | This loader factory returns an instance of a loader that handles the typical 296 | use-case where we're interested in DataFrames containing history for a 297 | particular stock symbol. It's the special case of a CachingLoader that 298 | knows how to read and write symbol histories as DataFrames. 299 | """ 300 | return CachingLoader( 301 | data_source, 302 | data_store, 303 | SymbolHistoryReader, 304 | SymbolHistoryWriter, 305 | overwrite_existing=overwrite_existing, 306 | ) 307 | 308 | 309 | if __name__ == "__main__": # pragma: no cover 310 | data_store = FileSystemStore("training_data") 311 | data_source = data_sources.YFinanceSource() 312 | loader = CachingSymbolHistoryLoader( 313 | data_source, data_store, overwrite_existing=False 314 | ) 315 | combiner = PriceHistoryConcatenator() 316 | symbols = ("QQQ", "SPY", "BND", "EDV") 317 | df = combiner(loader(symbols)) 318 | 319 | selection = df.loc[:, (symbols, "log_return")] # type: ignore 320 | print(selection) 321 | print(selection.values.shape) 322 | 323 | selection = df.loc[:, (symbols[0], "log_return")] # type: ignore 324 | print(selection) 325 | print(selection.values.shape) 326 | -------------------------------------------------------------------------------- /src/deep_volatility_models/time_series_datasets.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Iterable, Tuple 2 | 3 | import numpy as np 4 | import torch 5 | import torch.utils.data 6 | 7 | 8 | def multivariate_stats(x): 9 | """ 10 | Given a time series x, estimate the mean (mu) and the square root of 11 | the covariance (sigma) for that time series. 12 | Inputs: 13 | x: tensor of shape (tensor(mb_size, channels)) containing the sequence values 14 | Outputs: 15 | mu: tensor of shape: (channels,) containing the mean estimates 16 | sigma: tensor of shape: ((channels, channels) containing an estimate of 17 | the lower Cholesky factor of the covariance matrix. 18 | 19 | TODO: To improve numerical stability, Use an SVD to compute the Cholesky 20 | factor rather than the naive formula. 21 | """ 22 | # Create tensor version of x in case it isn't already 23 | if not isinstance(x, torch.Tensor): 24 | x = torch.tensor(x) 25 | 26 | mb_size, channels = x.shape 27 | mu = torch.mean(x, dim=0) 28 | error = x - mu.unsqueeze(0).expand((mb_size, channels)) 29 | # error is mb_size x channels 30 | error1 = error.unsqueeze(2) 31 | # error1 represents e (mb_size, channels, 1) 32 | error2 = error.unsqueeze(1) 33 | # error2 represents e^T (mb_size, 1, channels) 34 | cov = torch.mean(torch.matmul(error1, error2), dim=0) 35 | # cov is (channels, channels) 36 | 37 | # Return cholesky factor 38 | sigma = torch.linalg.cholesky(cov) 39 | return mu, sigma 40 | 41 | 42 | class RollingWindow(torch.utils.data.Dataset): 43 | """ 44 | Given a time series, construct a sequence of rolling windows on the series. 45 | The resuling windows are compatible with the pytorch dataloader: the kth 46 | window is obtained by indexing the kth element of the output series. Also, 47 | for compatibility with pytorch the output is represented by pytorch tensors 48 | and it follows pytorch conventions dimension order as explained below. 49 | 50 | Example usage: 51 | 52 | >>> import time_series_datasets 53 | 54 | This modules works with time seris of scalars or time series of vectors. The 55 | first example is a sequence of scalars: 56 | 57 | >>> series = list(range(5)) 58 | >>> series 59 | [0, 1, 2, 3, 4] 60 | 61 | Construct a rolling sequence of windows for the series with a window size of 62 | 3 and a default stride of 1. 63 | 64 | >>> windowed_series = time_series_datasets.RollingWindow(series, 3) 65 | 66 | The first element (element 0) is a window with the first three values: 67 | 68 | >>> windowed_series[0] 69 | tensor([0., 1., 2.]) 70 | 71 | The second element (element 1) is a window with the next three values: 72 | 73 | >>> windowed_series[1] 74 | tensor([1., 2., 3.]) 75 | 76 | The third element (element 2) is a window with the next three values: 77 | 78 | >>> windowed_series[2] 79 | tensor([2., 3., 4.]) 80 | 81 | For use with convolutional neworks, it's often necessary to create a channel dimension: 82 | 83 | >>> windowed_series = time_series_datasets.RollingWindow(series, 3, create_channel_dim=True) 84 | >>> windowed_series[0] 85 | tensor([[0., 1., 2.]]) 86 | 87 | RollingWindowSeries also works for vector-valued time series as long as you 88 | understand some conventions about the ordering of dimensions. We assume 89 | that the first dimension of the input (dimension 0) represents time. In 90 | other words, we assume the input is a sequence of vectors. This is a 91 | natural convention for the input sequence. However, we follow the pytorch 92 | convention on the output. The pytorch convention is that the *last* 93 | dimension represents time. In effect, the vector dimension becomes the 94 | channel dimension, so the `create_channel_dim` option is meaningless in this 95 | case. 96 | 97 | An example will clarify these ideas. 98 | 99 | >>> vector_series = [[1, 2], [3, 4], [5, 6], [7, 8]] 100 | >>> windowed_vector_series = time_series_datasets.RollingWindow(vector_series, 3) 101 | >>> windowed_vector_series[0] 102 | tensor([[1., 3., 5.], 103 | [2., 4., 6.]]) 104 | 105 | The result may seem "transposed", but that's for consistency with pytorch 106 | conventions and necessary for use with a number of pytorch functions. Here's 107 | the rationale. For a sequence of vectors, the vector dimension should be 108 | thought of as the "depth" dimension (e.g., RGB for images). The pytorch 109 | convention is for the depth to be the first dimension dimension 0) of the 110 | tensor and for the "time" (or space) dimension to be dimension 1 for 1d or 111 | dimensions 1 and 2 for 2d. When these ecords get batched for machine 112 | learning, the index of the record is always dimension 0, so the depth 113 | becomes dimension 1, and "time" becomes dimension 2. The convention for 114 | batched records is typically as follows: 115 | 116 | dimension 0 - index of record within a batch dimension 1 - "depth" 117 | dimension dimension 2 - "time" dimension for 1d or "x" dimensions for 2d 118 | dimension 3 - "y" dimension for 2d 119 | 120 | Since we're looking at records before they have been batched, the convention 121 | is 122 | 123 | dimension 0 - "depth" dimension dimension 1 - "time" dimension for 1d or 124 | "x" dimensions for 2d dimension 2 - "y" dimension for 2d 125 | 126 | More generally, the pytorch convention for time series (or any 1d signal) is 127 | that time (or whatever the 1d dimension represents) should always be the 128 | *last* dimension. For images, "x" and "y" should be the last *two* 129 | dimensions. Continuing the exmaple, here's the next window: 130 | 131 | >>> windowed_vector_series[1] 132 | tensor([[3., 5., 7.], 133 | [4., 6., 8.]]) 134 | 135 | Note: This code currently works for sequences of scalars and sequences of 1d 136 | vectors. 137 | 138 | TODO: Make this code work for sequences of tensors with two or more 139 | diemsions while following the above conventions that "time" should be the 140 | last dimension. 141 | """ 142 | 143 | def __init__( 144 | self, 145 | series: Iterable[Any], 146 | sequence_length: int, 147 | stride: int = 1, 148 | create_channel_dim: bool = False, 149 | dtype: torch.dtype = torch.float, 150 | ): 151 | if stride <= 0: 152 | raise ValueError("Stride cannot be negative") 153 | 154 | # Originally np.array() was simply suple(series) 155 | # pytorch issued a warning a recommended the use of np.array() 156 | self.__series = np.array(series) 157 | 158 | if ( 159 | len(self.__series) > 0 160 | and hasattr(self.__series[0], "__len__") 161 | and len(self.__series[0]) > 0 162 | and create_channel_dim 163 | ): 164 | raise ValueError("create_channel_dim should be False for this series shape") 165 | 166 | self.__sequence_length = sequence_length 167 | self.__stride = stride 168 | self.__length = (len(self.__series) - sequence_length) // stride + 1 169 | self.__create_channel_dim = create_channel_dim 170 | self.__dtype = dtype 171 | 172 | def __len__(self) -> int: 173 | return self.__length 174 | 175 | def __getitem__(self, index) -> torch.Tensor: 176 | if index < 0: 177 | index = self.__length + index 178 | 179 | if index >= 0 and index < self.__length: 180 | start = index * self.__stride 181 | result = torch.tensor( 182 | self.__series[start : start + self.__sequence_length], 183 | dtype=self.__dtype, 184 | ) 185 | if len(result.shape) == 1: 186 | if self.__create_channel_dim: 187 | result = result.unsqueeze(0) 188 | else: 189 | result = result.t() 190 | 191 | return result 192 | else: 193 | raise IndexError() 194 | 195 | 196 | class ContextWindowAndTarget(torch.utils.data.Dataset): 197 | """Split sequence of windows into a context window and a target""" 198 | 199 | def __init__(self, rolling_window_series: RollingWindow, target_dim: int = 1): 200 | """Typically, the stride used to construct rolling_window_series would be equal to 201 | target_dim 202 | 203 | """ 204 | self.__time_series_dataset = rolling_window_series 205 | self.__target_dim = target_dim 206 | 207 | def __len__(self) -> int: 208 | return len(self.__time_series_dataset) 209 | 210 | def __getitem__(self, index) -> Tuple[torch.Tensor, torch.Tensor]: 211 | t = self.__time_series_dataset[index] 212 | if len(t.shape) == 1: # Sequence of scalars 213 | context_window = t[: -self.__target_dim] 214 | target = t[-self.__target_dim :] 215 | # Drop the last dimension when it's one. 216 | if self.__target_dim == 1: 217 | target = target.squeeze(-1) 218 | else: # Sequence of vectors 219 | context_window = t[:, : -self.__target_dim] 220 | target = t[:, -self.__target_dim :] 221 | 222 | return context_window, target 223 | 224 | 225 | class ContextWindowEncodingAndTarget(torch.utils.data.Dataset): 226 | """This augments the data from an instance of WindowAndTarget by adding the encoding for 227 | its symbol. This would only be appropriate when building a Dataset for a 228 | set of different symbols, but a WindowAndTarget instance contains no symbol 229 | information. It represents the history for just a single symbol. This 230 | class adds a single encoding for that symbol to the Dataset. To build a 231 | dataset representing multiple symbols each with their own encodings, you 232 | first construct an EncodingWindowAndTarget instance for each symbol 233 | separately, then combine the various symbols using 234 | torch.utils.data.ConcatDataset()""" 235 | 236 | def __init__( 237 | self, 238 | symbol_encoding: int, 239 | symbol_history_dataset: ContextWindowAndTarget, 240 | device=None, 241 | ): 242 | self.__symbol_encoding = torch.tensor(symbol_encoding) 243 | if device is not None: 244 | self.__symbol_encoding = self.__symbol_encoding.to(device) 245 | self.__symbol_history_dataset = symbol_history_dataset 246 | self.__device = device 247 | 248 | def __len__(self) -> int: 249 | return len(self.__symbol_history_dataset) 250 | 251 | def __getitem__(self, i) -> Tuple[Tuple[torch.Tensor, int], torch.Tensor]: 252 | window, target = self.__symbol_history_dataset[i] 253 | if self.__device is not None: 254 | window = window.to(self.__device) 255 | target = target.to(self.__device) 256 | return (window, self.__symbol_encoding), target 257 | -------------------------------------------------------------------------------- /src/deep_volatility_models/train_univariate.py: -------------------------------------------------------------------------------- 1 | # Standard Python 2 | import datetime as dt 3 | import logging 4 | 5 | import os 6 | from typing import Callable, Dict, Iterable, Iterator, Union, Tuple 7 | 8 | # Common packages 9 | import click 10 | import numpy as np 11 | import pandas as pd 12 | 13 | import torch 14 | import torch.utils.data 15 | import torch.utils.data.dataloader 16 | 17 | # Local imports 18 | from deep_volatility_models import data_sources 19 | from deep_volatility_models import stock_data 20 | from deep_volatility_models import mixture_model_stats 21 | from deep_volatility_models import loss_functions 22 | from deep_volatility_models import time_series_datasets 23 | from deep_volatility_models import model_wrappers 24 | from deep_volatility_models import architecture 25 | from deep_volatility_models import training 26 | 27 | logging.basicConfig( 28 | level=logging.INFO, 29 | format="%(asctime)s %(levelname)s:%(message)s", 30 | force=True, 31 | ) 32 | 33 | ESTIMATE = "estimate" 34 | RISK_NEUTRAL = "risk-neutral" 35 | ZERO = "zero" 36 | 37 | MEAN_STRATEGIES = { 38 | ESTIMATE: architecture.MeanStrategy.ESTIMATE, 39 | RISK_NEUTRAL: architecture.MeanStrategy.RISK_NEUTRAL, 40 | ZERO: architecture.MeanStrategy.ZERO, 41 | } 42 | 43 | TRAIN_FRACTION = 0.80 44 | DEFAULT_SEED = 24 # Previously 42 45 | RANDOM_SPLIT_SEED = 1701 46 | 47 | EPOCHS = 1000 # 30000 48 | EARLY_TERMINATION = 100 # Was 1000 49 | 50 | USE_MIXTURE = False 51 | USE_DEV_MODELS = False 52 | DEFAULT_MIXING_LAYERS = 0 53 | 54 | RISK_NEUTRAL_PARAMETERS = True 55 | if RISK_NEUTRAL_PARAMETERS: # These are the values for the univariate non-mixture model 56 | OPT_LEARNING_RATE = 0.000712 # Previously 0.000535 57 | OPT_DROPOUT = 0.009291 # Previously 0.001675 58 | OPT_FEATURE_DIMENSION = 37 # Previously 41 59 | OPT_MIXTURE_COMPONENTS = 1 # Previously 4 60 | OPT_WINDOW_SIZE = 256 61 | OPT_EMBEDDING_DIMENSION = 6 # Previously 4 62 | OPT_MINIBATCH_SIZE = 230 # Previously 124 63 | OPT_GAUSSIAN_NOISE = 0.000657 # Previosly 0.002789 64 | OPT_WEIGHT_DECAY = 1.438462e-06 # Previously 1.407138e-06 65 | USE_BATCH_NORM = False # risk neutral version has trouble with batch normalization 66 | else: 67 | # Current values were optimized with hyperopt. Values shown in comment were used before optimization. 68 | OPT_LEARNING_RATE = 0.000689 # Previously 0.000375 69 | OPT_DROPOUT = 0.130894 # Previously 0.50 70 | OPT_FEATURE_DIMENSION = 86 # Previously 40 71 | OPT_MIXTURE_COMPONENTS = 3 # Previously 4 72 | OPT_WINDOW_SIZE = 256 # Previously 64 73 | OPT_EMBEDDING_DIMENSION = 3 # Previously 10 74 | OPT_MINIBATCH_SIZE = 248 # Previously 75 75 | OPT_GAUSSIAN_NOISE = 0.000226 # Previously 0.0025 76 | OPT_WEIGHT_DECAY = 8.489603e-07 # Previously 5e-9 77 | # Value of USE_BATCH_NORM wasn't optimized with hyperopt but was set to True. 78 | USE_BATCH_NORM = True 79 | 80 | BETA1 = 0.95 81 | BETA2 = 0.999 82 | ADAM_EPSILON = 1e-8 83 | ACTIVATION = torch.nn.ReLU() 84 | MAX_GRADIENT_NORM = 1.0 85 | 86 | if torch.cuda.is_available(): 87 | dev = "cuda:0" 88 | # elif torch.has_mps: 89 | # dev = "mps" 90 | else: 91 | dev = "cpu" 92 | 93 | device = torch.device(dev) 94 | 95 | 96 | def create_new_model( 97 | embedding_size=None, 98 | window_size=OPT_WINDOW_SIZE, 99 | mixture_components=OPT_MIXTURE_COMPONENTS, 100 | feature_dimension=OPT_FEATURE_DIMENSION, 101 | embedding_dimension=OPT_EMBEDDING_DIMENSION, 102 | gaussian_noise=OPT_GAUSSIAN_NOISE, 103 | use_batch_norm=USE_BATCH_NORM, 104 | dropout=OPT_DROPOUT, 105 | mean_strategy=MEAN_STRATEGIES[RISK_NEUTRAL], 106 | use_mixture=USE_MIXTURE, 107 | use_dev_models=USE_DEV_MODELS, 108 | extra_mixing_layers=DEFAULT_MIXING_LAYERS, 109 | ): 110 | if use_dev_models: 111 | network = architecture.DeepVolatilityModel( 112 | window_size=window_size, 113 | mean_strategy=mean_strategy, 114 | model_type=architecture.ModelType.UNIVARIATE, 115 | input_symbols=1, 116 | feature_dimension=feature_dimension, 117 | exogenous_dimension=embedding_dimension, 118 | is_mixture=use_mixture, 119 | mixture_components=mixture_components, 120 | gaussian_noise=gaussian_noise, 121 | activation=ACTIVATION, 122 | dropout=dropout, 123 | use_batch_norm=use_batch_norm, 124 | extra_mixing_layers=extra_mixing_layers, 125 | ) 126 | elif use_mixture: 127 | network = architecture.MixtureModel( 128 | window_size, 129 | 1, 130 | feature_dimension=feature_dimension, 131 | mixture_components=mixture_components, 132 | exogenous_dimension=embedding_dimension, 133 | gaussian_noise=gaussian_noise, 134 | dropout=dropout, 135 | use_batch_norm=use_batch_norm, 136 | activation=ACTIVATION, 137 | mean_strategy=mean_strategy, 138 | ) 139 | else: 140 | network = architecture.UnivariateModel( 141 | window_size, 142 | feature_dimension=feature_dimension, 143 | mixture_components=mixture_components, 144 | exogenous_dimension=embedding_dimension, 145 | gaussian_noise=gaussian_noise, 146 | dropout=dropout, 147 | use_batch_norm=use_batch_norm, 148 | activation=ACTIVATION, 149 | mean_strategy=mean_strategy, 150 | ) 151 | 152 | embedding = torch.nn.Embedding(embedding_size, embedding_dimension) 153 | 154 | return network, embedding 155 | 156 | 157 | def load_existing_model(existing_model, symbols): 158 | """ 159 | This function loads an existing model and adjusts its embedding 160 | and encoding objects to accomodate any new symbols in `symbols` 161 | 162 | Arguments: 163 | existing_model: path - path to existing model 164 | symbols: List[str] - list of symbols to be trained. 165 | 166 | Returns: 167 | model_network: torch.Module 168 | embeddings: torch.Embedding 169 | encoding: Dict[str, i] - encoding 170 | 171 | Note the list of symbols is required so that the embedding can be extended 172 | (with values to be trained) to accomodate the new symbol list. 173 | 174 | """ 175 | 176 | model = torch.load(existing_model) 177 | # Dump the old wrapper and keep only the network and the embeddings 178 | # We'll create a new wrapper 179 | model_network = model.network.model 180 | embeddings = model.network.embedding 181 | encoding = model.encoding 182 | 183 | # If there are new symbols since the previous model was trained, 184 | # extend the encoding and initialize the new embeddings with the 185 | # mean of the old embedding. This initialization seems to work 186 | # better than a random initialization with using a pre-trained 187 | # model 188 | 189 | new_symbols = set(symbols).difference(set(encoding.keys())) 190 | 191 | if len(new_symbols) > 0: 192 | # Extend the encoding for any symbols unknown to the pre-loaded model 193 | for s in new_symbols: 194 | encoding[s] = len(encoding) 195 | 196 | # Extend the embedding for any symbols unknown to the pre-loaded model 197 | embedding_parameters = next(embeddings.parameters()) 198 | mean_embedding = embedding_parameters.mean(dim=0) 199 | # Extract and use old embedding dimension 200 | old_embedding_dimension = embedding_parameters.shape[1] 201 | 202 | new_embeddings = ( 203 | mean_embedding.unsqueeze(0) 204 | .expand(len(new_symbols), old_embedding_dimension) 205 | .clone() 206 | ) 207 | 208 | # Extend the mean to current number of symbols 209 | embedding_parameters.data = torch.concat( 210 | (embedding_parameters, new_embeddings), dim=0 211 | ) 212 | 213 | logging.info("Using existing model") 214 | return model_network, embeddings, encoding 215 | 216 | 217 | def prepare_data( 218 | history_loader: Callable[ 219 | [Union[str, Iterable[str]]], Iterator[Tuple[str, pd.DataFrame]] 220 | ], 221 | symbol_list: Iterable[str], 222 | encoding: Dict[str, int], 223 | window_size: int, 224 | minibatch_size: int = OPT_MINIBATCH_SIZE, 225 | start_date: Union[dt.date, None] = None, 226 | end_date: Union[dt.date, None] = None, 227 | ): 228 | generator = torch.Generator().manual_seed(RANDOM_SPLIT_SEED) 229 | 230 | # Refresh historical data 231 | logging.info("Reading historical data") 232 | splits_by_symbol = {} 233 | 234 | # For the purposes of hyperparameter optimization, make sure that 235 | # changing the window size doesn't change the number of rows. In 236 | # other words, we always consume the first 256 points of history, 237 | # even if we don't use them as context so that first target return 238 | # in the dataset is always the same, independent of the window 239 | # size. Also, this won't work if window_size exceeds 256, so we 240 | # trap that case: 241 | if window_size > 256: 242 | raise ValueError( 243 | f"Window size of {window_size} isn't allowed. Window size must be 256 or less" 244 | ) 245 | 246 | skip = 256 - window_size 247 | 248 | for s in sorted(symbol_list): 249 | logging.info(f"Reading {s}") 250 | i = encoding[s] 251 | 252 | # history_loader can load many symbols at once for multivariate 253 | # but here we just load the single symbol of interest. Since we expect 254 | # just one dataframe, grab it with next() instead of using a combiner() 255 | # (see stock-data.py).) 256 | symbol_history = next(history_loader(s))[1].loc[start_date:end_date] 257 | logging.info(f"symbol_history:\n {symbol_history}") 258 | 259 | # Symbol history is a combined history for all symbols. We process it 260 | # one symbols at a time, so get the log returns for the current symbol 261 | # of interest. 262 | # log_returns = symbol_history.loc[:, (s, "log_return")] # type: ignore 263 | windowed_returns = time_series_datasets.RollingWindow( 264 | symbol_history.log_return[skip:], 265 | 1 + window_size, 266 | create_channel_dim=True, 267 | ) 268 | logging.debug(f"{s} windowed_returns[0]: {windowed_returns[0]}") 269 | 270 | symbol_dataset = time_series_datasets.ContextWindowAndTarget( 271 | windowed_returns, 1 272 | ) 273 | symbol_dataset_with_encoding = ( 274 | time_series_datasets.ContextWindowEncodingAndTarget( 275 | i, symbol_dataset, device=device 276 | ) 277 | ) 278 | 279 | train_size = int(TRAIN_FRACTION * len(symbol_dataset_with_encoding)) 280 | lengths = [train_size, len(symbol_dataset_with_encoding) - train_size] 281 | train, test = torch.utils.data.random_split( 282 | symbol_dataset_with_encoding, lengths, generator=generator 283 | ) 284 | splits_by_symbol[s] = {"train": train, "test": test} 285 | 286 | train_dataset = torch.utils.data.ConcatDataset( 287 | [splits_by_symbol[s]["train"] for s in symbol_list] 288 | ) 289 | validation_dataset = torch.utils.data.ConcatDataset( 290 | [splits_by_symbol[s]["test"] for s in symbol_list] 291 | ) 292 | 293 | train_dataloader = torch.utils.data.dataloader.DataLoader( 294 | train_dataset, batch_size=minibatch_size, drop_last=True, shuffle=True 295 | ) 296 | 297 | validation_dataloader = torch.utils.data.dataloader.DataLoader( 298 | validation_dataset, 299 | batch_size=len(validation_dataset), 300 | drop_last=True, 301 | shuffle=True, 302 | ) 303 | 304 | return train_dataloader, validation_dataloader 305 | 306 | 307 | def make_mixture_loss_function(): 308 | def loss_function(output, target): 309 | log_p, mu, inv_sigma = output[:3] 310 | 311 | loss = -torch.mean( 312 | mixture_model_stats.multivariate_log_likelihood( 313 | target.squeeze(2), log_p, mu, inv_sigma 314 | ) 315 | ) 316 | 317 | if np.isnan(float(loss)): 318 | logging.error("log_p: ", log_p) 319 | logging.error("mu: ", mu) 320 | logging.error("inv_sigma: ", inv_sigma) 321 | 322 | return loss 323 | 324 | return loss_function 325 | 326 | 327 | def make_loss_function(): 328 | def loss_function(output, target): 329 | mu, inv_sigma = output[:2] 330 | 331 | loss = -torch.mean( 332 | loss_functions.univariate_log_likelihood(target.squeeze(2), mu, inv_sigma) 333 | ) 334 | 335 | if np.isnan(float(loss)): 336 | logging.error("mu: ", mu) 337 | logging.error("inv_sigma: ", inv_sigma) 338 | 339 | return loss 340 | 341 | return loss_function 342 | 343 | 344 | def log_mixture_mean_error(epoch, output, target): 345 | log_p, mu = output[:2] 346 | mb_size, components, channels = mu.shape 347 | combined_mu = torch.sum( 348 | mu * torch.exp(log_p).unsqueeze(2).expand((mb_size, components, channels)), 349 | dim=1, 350 | ) 351 | mean_error = torch.mean(target.squeeze(2) - combined_mu, dim=0) 352 | logging.debug(f"epoch: {epoch} mean_error: {float(mean_error):.5f}") 353 | 354 | 355 | def make_mixture_validation_batch_logger(): 356 | def log_epoch(epoch, batch, output, target, loss): 357 | log_p, mu, inv_sigma = output[:3] 358 | logging.debug(f"last epoch p:\n{torch.exp(log_p)[:6].detach().cpu().numpy()}") 359 | logging.debug(f"last epoch mu:\n{mu[:6].detach().cpu().numpy()}") 360 | logging.debug(f"last epoch sigma:\n{inv_sigma[:6].detach().cpu().numpy()}") 361 | 362 | log_mixture_mean_error(epoch, output, target) 363 | 364 | return log_epoch 365 | 366 | 367 | def log_mean_error(epoch, output, target): 368 | mu = output[0] 369 | mean_error = torch.mean(target.squeeze(2) - mu, dim=0) 370 | logging.debug(f"epoch: {epoch} mean_error: {float(mean_error):.5f}") 371 | 372 | 373 | def make_validation_batch_logger(): 374 | def log_epoch(epoch, batch, output, target, loss): 375 | mu, inv_sigma = output[:2] 376 | logging.debug(f"last epoch mu:\n{mu[:6].detach().cpu().numpy()}") 377 | logging.debug(f"last epoch sigma:\n{inv_sigma[:6].detach().cpu().numpy()}") 378 | 379 | log_mean_error(epoch, output, target) 380 | 381 | return log_epoch 382 | 383 | 384 | def make_save_model( 385 | model_file, only_embeddings, model, encoding, symbols, start_date, end_date 386 | ): 387 | def save_model(epoch, epoch_loss, prefix=""): 388 | wrapped_model = model_wrappers.StockModel( 389 | symbols=symbols, 390 | encoding=encoding, 391 | network=model, 392 | epochs=epoch, 393 | date=dt.datetime.now(), 394 | loss=epoch_loss, 395 | training_data_start_date=start_date, 396 | training_data_end_date=end_date, 397 | ) 398 | 399 | torch.save(wrapped_model, f"{model_file}") 400 | 401 | return save_model 402 | 403 | 404 | def make_loss_improvement_callback( 405 | model_file, only_embeddings, model, encoding, symbols, start_date, end_date 406 | ): 407 | save_model = make_save_model( 408 | model_file, only_embeddings, model, encoding, symbols, start_date, end_date 409 | ) 410 | 411 | def model_improvement_callback(epoch, epoch_loss): 412 | save_model(epoch, epoch_loss) 413 | 414 | return model_improvement_callback 415 | 416 | 417 | def make_epoch_callback(model): 418 | def epoch_callback(epoch, train_loss, validation_loss): 419 | logging.debug(f"parameters: {(list(model.embedding.parameters()))}") 420 | 421 | return epoch_callback 422 | 423 | 424 | def run( 425 | use_hsmd, 426 | model_file, 427 | existing_model, 428 | symbols, 429 | refresh, 430 | mean_strategy, 431 | only_embeddings, 432 | use_mixture=USE_MIXTURE, 433 | max_epochs=EPOCHS, 434 | early_termination=EARLY_TERMINATION, 435 | window_size=OPT_WINDOW_SIZE, 436 | mixture_components=OPT_MIXTURE_COMPONENTS, 437 | feature_dimension=OPT_FEATURE_DIMENSION, 438 | embedding_dimension=OPT_EMBEDDING_DIMENSION, 439 | gaussian_noise=OPT_GAUSSIAN_NOISE, 440 | minibatch_size=OPT_MINIBATCH_SIZE, 441 | dropout=OPT_DROPOUT, 442 | learning_rate=OPT_LEARNING_RATE, 443 | weight_decay=OPT_WEIGHT_DECAY, 444 | use_batch_norm=USE_BATCH_NORM, 445 | beta1=BETA1, 446 | beta2=BETA2, 447 | seed=DEFAULT_SEED, 448 | start_date=None, 449 | end_date=None, 450 | use_dev_models=USE_DEV_MODELS, 451 | extra_mixing_layers=0, 452 | ): 453 | # Rewrite symbols with deduped, uppercase versions 454 | symbols = list(map(str.upper, set(symbols))) 455 | 456 | logging.info(f"model: {model_file}") 457 | logging.info(f"device: {device}") 458 | logging.info(f"existing_model: {existing_model}") 459 | logging.info(f"symbols: {symbols}") 460 | logging.info(f"refresh: {refresh}") 461 | logging.info(f"mean_strategy: {mean_strategy}") 462 | logging.info(f"only_embeddings: {only_embeddings}") 463 | logging.info(f"use_mixture: {use_mixture}") 464 | logging.info(f"window_size: {window_size}") 465 | logging.info(f"mixture_components: {mixture_components}") 466 | logging.info(f"feature_dimension: {feature_dimension}") 467 | logging.info(f"embedding_dimension: {embedding_dimension}") 468 | logging.info(f"gaussian_noise: {gaussian_noise}") 469 | logging.info(f"minibatch_size: {minibatch_size}") 470 | logging.info(f"dropout: {dropout}") 471 | logging.info(f"learning_rate: {learning_rate}") 472 | logging.info(f"weight_decay: {weight_decay}") 473 | logging.info(f"use_batch_norm: {use_batch_norm}") 474 | logging.info(f"ADAM beta1: {beta1}") 475 | logging.info(f"ADAM beta2: {beta2}") 476 | logging.info(f"Seed: {seed}") 477 | logging.info(f"Start date: {start_date}") 478 | logging.info(f"End date: {end_date}") 479 | logging.info(f"Use dev models: {use_dev_models}") 480 | logging.info(f"Extra mixing layers: {extra_mixing_layers}") 481 | 482 | model_network = embeddings = None 483 | if existing_model: 484 | model_network, embeddings, encoding = load_existing_model( 485 | existing_model, symbols 486 | ) 487 | logging.info(f"Loaded model from file: {existing_model}") 488 | else: 489 | encoding = {s: i for i, s in enumerate(symbols)} 490 | 491 | logging.info(f"Encoding: {encoding}") 492 | 493 | data_store = stock_data.FileSystemStore("training_data") 494 | if use_hsmd: 495 | data_source = data_sources.HugeStockMarketDatasetSource(use_hsmd) 496 | else: 497 | data_source = data_sources.YFinanceSource() 498 | 499 | history_loader = stock_data.CachingSymbolHistoryLoader( 500 | data_source, data_store, refresh 501 | ) 502 | 503 | torch.random.manual_seed(seed) 504 | 505 | # Do split before any random weight initialization so that any 506 | # subsequent random number generator calls won't affect the split. 507 | # We want the splits to be the same for different architecture 508 | # parameters to provide fair comparisons of different 509 | # architectures on the same split. 510 | 511 | train_loader, validation_loader = prepare_data( 512 | history_loader, 513 | symbols, 514 | encoding, 515 | window_size, 516 | minibatch_size=minibatch_size, 517 | start_date=start_date, 518 | end_date=end_date, 519 | ) 520 | 521 | if model_network is None or embeddings is None: 522 | model_network, embeddings = create_new_model( 523 | embedding_size=len(symbols), 524 | window_size=window_size, 525 | mixture_components=mixture_components, 526 | feature_dimension=feature_dimension, 527 | embedding_dimension=embedding_dimension, 528 | gaussian_noise=gaussian_noise, 529 | use_mixture=use_mixture, 530 | use_batch_norm=use_batch_norm, 531 | dropout=dropout, 532 | mean_strategy=mean_strategy, 533 | use_dev_models=use_dev_models, 534 | extra_mixing_layers=extra_mixing_layers, 535 | ) 536 | logging.info("Initialized new model") 537 | 538 | # Generate list of parameters we choose to train. 539 | # We always tune or train the embeddings: 540 | parameters = list(embeddings.parameters()) 541 | 542 | # Add rest of model parameters unless we're training only the embeddings. 543 | if not only_embeddings: 544 | parameters.extend(model_network.parameters()) 545 | 546 | logging.debug(f"parameters: {parameters}") 547 | 548 | # Define model, optimizer, loss function, and callbacks before calling train() 549 | model = architecture.ModelWithEmbedding(model_network, embeddings) 550 | model.to(device) 551 | 552 | sgd_optim = torch.optim.SGD( 553 | parameters, 554 | lr=learning_rate, 555 | weight_decay=weight_decay, 556 | momentum=0.0, 557 | ) 558 | adam_optim = torch.optim.Adam( 559 | parameters, 560 | lr=learning_rate, 561 | betas=(beta1, beta2), 562 | weight_decay=weight_decay, 563 | eps=ADAM_EPSILON, 564 | ) 565 | optim = adam_optim 566 | 567 | if model.is_mixture: 568 | loss_function = make_mixture_loss_function() 569 | validation_batch_callback = make_mixture_validation_batch_logger() 570 | else: 571 | loss_function = make_loss_function() 572 | validation_batch_callback = make_validation_batch_logger() 573 | 574 | epoch_callback = make_epoch_callback(model) 575 | loss_improvement_callback = make_loss_improvement_callback( 576 | model_file, only_embeddings, model, encoding, symbols, start_date, end_date 577 | ) 578 | 579 | logging.info("Starting training loop.") 580 | best_epoch, best_validation_loss, best_model = training.train( 581 | model=model, 582 | loss_function=loss_function, 583 | optim=optim, 584 | train_loader=train_loader, 585 | validation_loader=validation_loader, 586 | max_epochs=max_epochs, 587 | early_termination=early_termination, 588 | validation_batch_callback=validation_batch_callback, 589 | epoch_callback=epoch_callback, 590 | loss_improvement_callback=loss_improvement_callback, 591 | ) 592 | logging.info( 593 | f"Training terminated. Best epoch: {best_epoch}; Best validation loss: {best_validation_loss}" 594 | ) 595 | return best_epoch, best_validation_loss, best_model 596 | 597 | 598 | @click.command() 599 | @click.option( 600 | "--use-hsmd", 601 | default=None, 602 | show_default=True, 603 | help="Use huge stock market dataset if specified zip file (else use yfinance)", 604 | ) 605 | @click.option( 606 | "--model", 607 | default="model.pt", 608 | show_default=True, 609 | help="Trained model output file.", 610 | ) 611 | @click.option( 612 | "--existing-model", 613 | default=None, 614 | show_default=True, 615 | help="Existing model to load (for tuning).", 616 | ) 617 | @click.option("--symbol", "-s", multiple=True, show_default=True) 618 | @click.option( 619 | "--refresh", 620 | is_flag=True, 621 | default=False, 622 | show_default=True, 623 | help="Refresh stock data", 624 | ) 625 | @click.option( 626 | "--mean-strategy", 627 | type=click.Choice([RISK_NEUTRAL, ZERO, ESTIMATE]), 628 | show_default=True, 629 | default=RISK_NEUTRAL, 630 | help="Method to use for mean output.", 631 | ) 632 | @click.option( 633 | "--only-embeddings", 634 | is_flag=True, 635 | default=False, 636 | show_default=True, 637 | help="Train only the embeddings", 638 | ) 639 | @click.option( 640 | "--use-mixture/--no-mixture", 641 | is_flag=True, 642 | default=USE_MIXTURE, 643 | show_default=True, 644 | help="Use a mixture model?", 645 | ) 646 | @click.option( 647 | "--early-termination", 648 | default=EARLY_TERMINATION, 649 | show_default=True, 650 | help="Terminate if no improvement in this number of iterations", 651 | ) 652 | @click.option( 653 | "--learning-rate", default=OPT_LEARNING_RATE, show_default=True, type=float 654 | ) 655 | @click.option("--dropout", default=OPT_DROPOUT, show_default=True, type=float) 656 | @click.option( 657 | "--use-batch-norm/--no-use-batch-norm", 658 | is_flag=True, 659 | default=USE_BATCH_NORM, 660 | show_default=True, 661 | ) 662 | @click.option( 663 | "--feature-dimension", default=OPT_FEATURE_DIMENSION, show_default=True, type=int 664 | ) 665 | @click.option( 666 | "--mixture-components", default=OPT_MIXTURE_COMPONENTS, show_default=True, type=int 667 | ) 668 | @click.option("--window-size", default=OPT_WINDOW_SIZE, show_default=True, type=int) 669 | @click.option( 670 | "--embedding-dimension", 671 | default=OPT_EMBEDDING_DIMENSION, 672 | show_default=True, 673 | type=int, 674 | ) 675 | @click.option( 676 | "--minibatch-size", default=OPT_MINIBATCH_SIZE, show_default=True, type=int 677 | ) 678 | @click.option( 679 | "--gaussian-noise", default=OPT_GAUSSIAN_NOISE, show_default=True, type=float 680 | ) 681 | @click.option("--weight-decay", default=OPT_WEIGHT_DECAY, show_default=True, type=float) 682 | @click.option("--seed", default=DEFAULT_SEED, show_default=True, type=int) 683 | @click.option( 684 | "--start-date", 685 | default=None, 686 | show_default=True, 687 | type=click.DateTime(formats=["%Y-%m-%d"]), 688 | help="Exclude training data (returns) before this date", 689 | ) 690 | @click.option( 691 | "--end-date", 692 | show_default=True, 693 | type=click.DateTime(formats=["%Y-%m-%d"]), 694 | help="Exclude training data on or after this date", 695 | ) 696 | @click.option( 697 | "--use-dev-models", 698 | is_flag=True, 699 | show_default=True, 700 | help="Use development version of models.", 701 | ) 702 | @click.option( 703 | "--extra-mixing-layers", 704 | type=int, 705 | default=DEFAULT_MIXING_LAYERS, 706 | show_default=True, 707 | help="Number of additional layers to blend exogenous and time series latents.", 708 | ) 709 | def main_cli( 710 | use_hsmd, 711 | model, 712 | existing_model, 713 | symbol, 714 | refresh, 715 | mean_strategy, 716 | only_embeddings, 717 | use_mixture, 718 | early_termination, 719 | learning_rate, 720 | dropout, 721 | use_batch_norm, 722 | feature_dimension, 723 | mixture_components, 724 | window_size, 725 | embedding_dimension, 726 | minibatch_size, 727 | gaussian_noise, 728 | weight_decay, 729 | seed, 730 | start_date, 731 | end_date, 732 | use_dev_models, 733 | extra_mixing_layers, 734 | ): 735 | 736 | if start_date: 737 | start_date = start_date.date() 738 | 739 | if end_date: 740 | end_date = end_date.date() 741 | 742 | run( 743 | use_hsmd, 744 | model_file=model, 745 | existing_model=existing_model, 746 | symbols=symbol, 747 | refresh=refresh, 748 | mean_strategy=MEAN_STRATEGIES[mean_strategy], 749 | use_mixture=use_mixture, 750 | only_embeddings=only_embeddings, 751 | early_termination=early_termination, 752 | learning_rate=learning_rate, 753 | dropout=dropout, 754 | use_batch_norm=use_batch_norm, 755 | feature_dimension=feature_dimension, 756 | mixture_components=mixture_components, 757 | window_size=window_size, 758 | embedding_dimension=embedding_dimension, 759 | minibatch_size=minibatch_size, 760 | gaussian_noise=gaussian_noise, 761 | weight_decay=weight_decay, 762 | seed=seed, 763 | start_date=start_date, 764 | end_date=end_date, 765 | use_dev_models=use_dev_models, 766 | extra_mixing_layers=extra_mixing_layers, 767 | ) 768 | 769 | 770 | if __name__ == "__main__": 771 | main_cli() 772 | -------------------------------------------------------------------------------- /src/deep_volatility_models/training.py: -------------------------------------------------------------------------------- 1 | """This is a generic PyTorch training loop that can be adapted for different problems.""" 2 | 3 | # Standard Python packages 4 | from copy import deepcopy 5 | from itertools import count 6 | import logging 7 | from typing import Callable, Union 8 | 9 | 10 | # Third party packages 11 | import numpy as np 12 | import torch 13 | import torch.utils.data.dataloader 14 | 15 | 16 | def default_batch_callback( 17 | epoch: int, 18 | batch: int, 19 | output: torch.Tensor, 20 | target: torch.Tensor, 21 | loss: float, 22 | ) -> None: 23 | return None 24 | 25 | 26 | def default_epoch_callback( 27 | epoch: int, 28 | train_loss: float, 29 | validation_loss: float, 30 | ) -> None: 31 | return None 32 | 33 | 34 | def default_loss_improvement_callback( 35 | epoch: int, 36 | loss: float, 37 | ) -> None: 38 | return None 39 | 40 | 41 | def _do_batches( 42 | epoch: int, 43 | model: torch.nn.Module, 44 | data_loader: torch.utils.data.dataloader.DataLoader, 45 | loss_function: Callable[[torch.Tensor, torch.Tensor], torch.Tensor], 46 | optim: torch.optim.Optimizer, 47 | training: bool, 48 | callback: Callable[[int, int, torch.Tensor, torch.Tensor, float], None], 49 | ): 50 | model.train(training) 51 | batch_losses = [] 52 | 53 | for batch, (predictors, target) in enumerate(data_loader): 54 | model_output = model(predictors) 55 | batch_loss = loss_function(model_output, target) 56 | batch_losses.append(float(batch_loss)) 57 | 58 | if training: 59 | optim.zero_grad() 60 | batch_loss.backward() 61 | optim.step() 62 | 63 | callback(epoch, batch, model_output, target, float(batch_loss)) 64 | 65 | epoch_loss = float(np.mean(batch_losses)) 66 | return epoch_loss 67 | 68 | 69 | def train( 70 | model: torch.nn.Module, 71 | loss_function: Callable[[torch.Tensor, torch.Tensor], torch.Tensor], 72 | optim: torch.optim.Optimizer, 73 | train_loader: torch.utils.data.dataloader.DataLoader, 74 | validation_loader: torch.utils.data.dataloader.DataLoader, 75 | max_epochs: Union[int, None] = None, 76 | early_termination: Union[int, None] = None, 77 | train_batch_callback: Callable[ 78 | [int, int, torch.Tensor, torch.Tensor, float], None 79 | ] = default_batch_callback, 80 | validation_batch_callback: Callable[ 81 | [int, int, torch.Tensor, torch.Tensor, float], None 82 | ] = default_batch_callback, 83 | loss_improvement_callback: Callable[ 84 | [int, float], None 85 | ] = default_loss_improvement_callback, 86 | epoch_callback: Callable[[int, float, float], None] = default_epoch_callback, 87 | ): 88 | # Initialize state for early termination monitoring 89 | best_model = deepcopy(model) 90 | best_validation_loss = float("inf") 91 | best_epoch = -1 92 | 93 | # This is the main epoch loop 94 | if max_epochs is None and early_termination is None: 95 | raise ValueError( 96 | f"At least one of max_epochs ({max_epochs}) or early_termination ({early_termination}) must be specified" 97 | ) 98 | 99 | if max_epochs is not None: 100 | epoch_iterator = range(max_epochs) 101 | else: 102 | epoch_iterator = count() 103 | 104 | for epoch in epoch_iterator: 105 | 106 | epoch_train_loss = _do_batches( 107 | epoch, 108 | model, 109 | train_loader, 110 | loss_function, 111 | optim, 112 | training=True, 113 | callback=train_batch_callback, 114 | ) 115 | 116 | # Evalute the loss on the test set 117 | # Don't compute gradients 118 | with torch.no_grad(): 119 | epoch_validation_loss = _do_batches( 120 | epoch, 121 | model, 122 | validation_loader, 123 | loss_function, 124 | optim, 125 | training=False, 126 | callback=validation_batch_callback, 127 | ) 128 | 129 | epoch_callback(epoch, float(epoch_train_loss), float(epoch_validation_loss)) 130 | 131 | logging.info(f" Epoch {epoch}: loss (train): {epoch_train_loss:.4f}") 132 | 133 | if epoch_validation_loss < best_validation_loss: 134 | best_validation_loss = epoch_validation_loss 135 | best_epoch = epoch 136 | best_model = deepcopy(model) 137 | flag = "**" 138 | 139 | loss_improvement_callback(epoch, epoch_validation_loss) 140 | else: 141 | flag = " " 142 | 143 | logging.info( 144 | f" {flag} Epoch {epoch}: loss (test): {epoch_validation_loss:.4f} best epoch: {best_epoch} best loss:{best_validation_loss:.4f} {flag}" 145 | ) 146 | if early_termination is not None and epoch >= best_epoch + early_termination: 147 | logging.info( 148 | f"No improvement in {early_termination} epochs. Terminating early." 149 | ) 150 | break # Terminate early 151 | 152 | return best_epoch, best_validation_loss, best_model 153 | -------------------------------------------------------------------------------- /src/deep_volatility_models/util.py: -------------------------------------------------------------------------------- 1 | from typing import List, Iterable, Union 2 | 3 | 4 | def to_symbol_list(symbols: Union[Iterable[str], str]) -> List[str]: 5 | """ 6 | This function converts its `symbols` argument to a list of strings. 7 | It's used as a convenence function so that a caller can provide a 8 | single symbol to a function rather than in Iterable. 9 | We also normalize symbols lists by converting the symbols to upper case. 10 | 11 | Arguments: 12 | symbols: Union[Iterable[str], str]: The collection of symbols to convert or a single symbol 13 | Returns: 14 | List[str]: An instantiated list of symbols, hich may the same list passed in 15 | """ 16 | 17 | if isinstance(symbols, str): 18 | symbols = (symbols,) 19 | 20 | return list(map(str.upper, symbols)) 21 | 22 | 23 | def is_sorted(l: Iterable) -> bool: 24 | l = tuple(l) 25 | return all([x <= y for x, y in zip(l, l[1:])]) 26 | 27 | 28 | def rename_column(c: str): 29 | """ 30 | Standardize column naming. No spaces and no caps. 31 | """ 32 | return c.lower().replace(" ", "_") 33 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mawicks/deep-volatility-models/8a97672c9d0f4ee1237b04747dea81dd54d07360/tests/__init__.py -------------------------------------------------------------------------------- /tests/test_architecture.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import torch 4 | 5 | from deep_volatility_models import architecture 6 | from deep_volatility_models import mixture_model_stats 7 | 8 | 9 | logsoftmax = torch.nn.LogSoftmax(dim=1) 10 | 11 | BATCH_SIZE = 13 12 | FEATURE_DIMENSION = 11 13 | WINDOW_SIZE = 16 14 | NOISE_DIM = 77 15 | EMBEDDING_SYMBOLS = 9 16 | EXTRA_MIXING_LAYERS = 0 17 | EPS = 1e-6 18 | 19 | EST = architecture.MeanStrategy.ESTIMATE 20 | RN = architecture.MeanStrategy.RISK_NEUTRAL 21 | ZERO = architecture.MeanStrategy.ZERO 22 | 23 | UV = architecture.ModelType.UNIVARIATE 24 | MV = architecture.ModelType.MULTIVARIATE 25 | 26 | 27 | def test_min_max_clamping(): 28 | clamper = architecture.MinMaxClamping() 29 | x1 = torch.tensor([[1.0, 4.0], [2.0, 5.0], [3.0, 6.0]]) 30 | x2 = torch.tensor([[4.0, 7.0], [5.0, 8.0], [6.0, 9.0]]) 31 | # Column 1 ranges from 1 to 6 32 | # Column 2 ranges from 4 to 9 33 | # "Train" on x1 and x2 34 | clamper.train() 35 | assert clamper(x1) is x1 36 | assert clamper(x2) is x2 37 | 38 | # Evaluate on x_test 39 | clamper.eval() 40 | x_test = torch.tensor([[0.0, 3.0], [7.0, 10.0], [5.0, 5.0]]) 41 | y = clamper(x_test) 42 | max_y = torch.max(y, dim=0)[0] 43 | min_y = torch.min(y, dim=0)[0] 44 | assert float(max_y[0]) <= 6.0 45 | assert float(max_y[1]) <= 9.0 46 | assert float(min_y[0]) >= 1.0 47 | assert float(min_y[1]) <= 4.0 48 | 49 | 50 | def test_untrained_mixmax_clamping_passes_all(): 51 | MAGNITUDE = 1e6 52 | filter = architecture.MinMaxClamping() 53 | filter.train(False) 54 | x = MAGNITUDE * torch.randn(3, 2) 55 | y = filter(x) 56 | assert (y == x).all() 57 | 58 | 59 | def test_gaussian_noise(): 60 | SIGMA = 0.1 61 | noise = architecture.GaussianNoise(SIGMA) 62 | x = torch.tensor([[1.0, 4.0], [2.0, 5.0], [3.0, 6.0]]) 63 | y = noise(x) 64 | error = x - y 65 | squared_error = error * error 66 | rmse = torch.sqrt(torch.mean(squared_error)) 67 | assert rmse > 0.1 * SIGMA 68 | assert rmse < 10.0 * SIGMA 69 | 70 | 71 | def is_lower_triangular(m): 72 | mb, mixtures, oc, ic = m.shape 73 | col_offset = ic - oc 74 | assert ic >= oc 75 | 76 | for mb_i in range(mb): 77 | for mx_i in range(mixtures): 78 | for i in range(oc): 79 | for j in range(oc): 80 | if j > i: 81 | assert m[mb_i, mx_i, i, col_offset + j] == 0.0 82 | else: 83 | assert m[mb_i, mx_i, i, col_offset + j] != 0.0 84 | 85 | 86 | @pytest.mark.parametrize( 87 | "batch_size, input_symbols, window_size, feature_dim," 88 | "exogenous_dim, extra_mixing_layers," 89 | "use_batch_norm, expect_value_error", 90 | [ 91 | (13, 1, 0, 7, 3, 2, True, False), # Window size of zero 92 | (13, 1, 4, 7, 3, 2, True, False), # Change window size to 4 93 | (13, 1, 16, 7, 3, 2, True, False), # Chnage window size to 16 94 | (13, 1, 64, 7, 3, 2, True, False), # Change window size to 64 95 | (13, 1, 256, 7, 3, 2, True, False), # Change window size to 256 96 | (13, 1, 64, 7, 0, 2, True, False), # Without an exogenous input 97 | (13, 1, 64, 7, 3, 0, True, False), # Without extra mixing layers 98 | (13, 13, 64, 7, 3, 2, True, False), # Symbol dimension other than 1 99 | (13, 13, 64, 7, 3, 2, True, False), # Speciying output symbol dim 100 | (13, 13, 64, 7, 3, 2, True, False), # Differing input/output symbol dim 101 | (13, 13, 64, 7, 3, 2, False, False), # Without batch norm 102 | (13, 13, 60, 7, 3, 2, True, True), # Window size is not valid 103 | (13, 1, 0, 7, 0, 2, True, True), # No Window AND no exogenous input 104 | (13, 1, 64, 0, 3, 2, True, True), # Feature dimension of zero 105 | ], 106 | ) 107 | def test_time_series_features( 108 | batch_size, 109 | window_size, 110 | input_symbols, 111 | feature_dim, 112 | exogenous_dim, 113 | extra_mixing_layers, 114 | use_batch_norm, 115 | expect_value_error, 116 | ): 117 | """Test that a time series network can be created and evaluated 118 | with different dimensions. This is only a sanity check 119 | that all of the dimensions conform and the network can produce output. 120 | These are untrained networks so that's all we expect. There is more 121 | extensive validatation for unit tests of the individual head classes. Here 122 | we also check that the network executes properly with the training flag on 123 | and off. 124 | 125 | """ 126 | if expect_value_error: 127 | with pytest.raises(ValueError): 128 | time_series_model = architecture.TimeSeriesFeatures( 129 | input_symbols, 130 | window_size=window_size, 131 | exogenous_dimension=exogenous_dim, 132 | feature_dimension=feature_dim, 133 | use_batch_norm=use_batch_norm, 134 | extra_mixing_layers=extra_mixing_layers, 135 | ) 136 | else: 137 | # This is the base mixture model we're testing. 138 | time_series_model = architecture.TimeSeriesFeatures( 139 | input_symbols, 140 | window_size=window_size, 141 | exogenous_dimension=exogenous_dim, 142 | feature_dimension=feature_dim, 143 | use_batch_norm=use_batch_norm, 144 | extra_mixing_layers=extra_mixing_layers, 145 | ) 146 | 147 | # Create some test inputs. 148 | 149 | # 1) time series data: 150 | ts_data = torch.randn((batch_size, input_symbols, window_size)) 151 | 152 | # 2) exogenous data (in this package that's an embedding, but that's not 153 | # necessarily the case).) 154 | exogenous_data = ( 155 | torch.randn(batch_size, exogenous_dim) if exogenous_dim > 0 else None 156 | ) 157 | 158 | # Below we call the forward() methods of time_series_model 159 | # and make sure it returns a tensor with the correct dimensions. 160 | 161 | for train in (True, False): 162 | time_series_model.train(train) 163 | 164 | latents = time_series_model.forward(ts_data, exogenous_data) 165 | assert latents.shape == (batch_size, feature_dim) 166 | 167 | # Confirm that the window_size property returns the correct size: 168 | assert time_series_model.window_size == window_size 169 | 170 | 171 | @pytest.mark.parametrize( 172 | "model_type, mean_strategy, input_symbols, output_symbols," 173 | "is_mixture, mixture_components, exogenous_dim," 174 | "use_batch_norm, expect_value_error", 175 | [ 176 | # Non mixture models 177 | # Univariate 178 | (UV, EST, 1, 1, False, 0, 0, True, False), # No exogenous input 179 | (UV, EST, 1, 1, False, 0, 7, True, False), # With exogenous 180 | (UV, ZERO, 1, 1, False, 0, 7, True, False), # Zero head 181 | (UV, RN, 1, 1, False, 0, 7, True, False), # Risk-neutral head 182 | (UV, EST, 1, 1, False, 0, 7, False, False), # No batch norm 183 | # Multivariate 184 | (MV, EST, 9, None, False, 0, 7, True, False), # Estimate mu 185 | (MV, ZERO, 9, None, False, 0, 7, True, False), # Zero head 186 | (MV, RN, 9, None, False, 0, 7, True, True), # Risk-neutral head (ValueError) 187 | # Mixture models 188 | # Univariate 189 | (UV, EST, 1, None, True, 5, 0, True, False), # No exogenous input 190 | (UV, ZERO, 1, None, True, 5, 0, True, False), # No exogenous input - FAILS 191 | (UV, RN, 1, None, True, 5, 0, True, False), # No exogenous input - FAILS 192 | # Multivariate 193 | (MV, EST, 9, None, True, 5, 7, True, False), 194 | (MV, EST, 9, 9, True, 5, 7, True, False), # Specifying output symbol dim 195 | (MV, EST, 9, 8, True, 5, 7, True, False), # Output < input symbols 196 | (MV, ZERO, 8, 8, True, 5, 7, True, False), # Zero mean with output < input 197 | (MV, RN, 9, 8, True, 5, 7, True, True), # Risk-neutral (ValueError) 198 | (MV, EST, 9, None, True, 5, 7, False, False), # No batch norm 199 | ], 200 | ) 201 | def test_deep_volatility_model( 202 | model_type, 203 | mean_strategy, 204 | input_symbols, 205 | output_symbols, 206 | is_mixture, 207 | mixture_components, 208 | exogenous_dim, 209 | use_batch_norm, 210 | expect_value_error, 211 | ): 212 | """Test that a deep volatility model network can be created and evaluated 213 | with different internal feature dimensions. This is only a sanity check 214 | that all of the dimensions conform and the network can produce output. 215 | These are untrained networks so that's all we expect. There is more 216 | extensive validatation for unit tests of the individual head classes. Here 217 | we also check that the network executes properly with the training flag on 218 | and off. 219 | 220 | This code actually tests three things: 221 | 1) Does the forward() method of the mixture network provide sane outputs 222 | 2) Does the forward_unpacked() method of the mixture netowrk provide sane 223 | outputs 224 | 3) Does the forward() method of the ModelAndEmbedding work after combining 225 | a mixture model with an embedding. 226 | 227 | """ 228 | 229 | if expect_value_error: 230 | with pytest.raises(ValueError): 231 | volatility_model = architecture.DeepVolatilityModel( 232 | window_size=WINDOW_SIZE, 233 | mean_strategy=mean_strategy, 234 | model_type=model_type, 235 | input_symbols=input_symbols, 236 | output_symbols=output_symbols, 237 | feature_dimension=FEATURE_DIMENSION, 238 | exogenous_dimension=exogenous_dim, 239 | is_mixture=is_mixture, 240 | mixture_components=mixture_components, 241 | extra_mixing_layers=EXTRA_MIXING_LAYERS, 242 | use_batch_norm=use_batch_norm, 243 | ) 244 | else: 245 | # This is the base mixture model we're testing. 246 | volatility_model = architecture.DeepVolatilityModel( 247 | window_size=WINDOW_SIZE, 248 | mean_strategy=mean_strategy, 249 | model_type=model_type, 250 | input_symbols=input_symbols, 251 | output_symbols=output_symbols, 252 | feature_dimension=FEATURE_DIMENSION, 253 | exogenous_dimension=exogenous_dim, 254 | is_mixture=is_mixture, 255 | mixture_components=mixture_components, 256 | extra_mixing_layers=EXTRA_MIXING_LAYERS, 257 | use_batch_norm=use_batch_norm, 258 | ) 259 | # Also create an embedding to test that ModelWithEmbedding returns sane results 260 | embedding = torch.nn.Embedding(EMBEDDING_SYMBOLS, exogenous_dim) 261 | 262 | # Combing volatility_model with embedding in embedding_model 263 | embedding_model = architecture.ModelWithEmbedding(volatility_model, embedding) 264 | 265 | # Create some test inputs. 266 | 267 | # 1) time series data: 268 | ts_data = torch.randn((BATCH_SIZE, input_symbols, WINDOW_SIZE)) 269 | 270 | # 2) exogenous data (in this package that's an embedding, but that's not 271 | # necessarily the case).) 272 | exogenous_data = ( 273 | torch.randn(BATCH_SIZE, exogenous_dim) if exogenous_dim > 0 else None 274 | ) 275 | 276 | # 3) an encoding vector to test with embedding_model 277 | encoding = torch.randint(0, EMBEDDING_SYMBOLS, (BATCH_SIZE,)) 278 | 279 | # Below we call the forward() methods of volatility_model and 280 | # embedding_model and also the forward_unpacked() method of 281 | # volatility_model and make sure they return tensors with the correct dimensions. 282 | 283 | for train in (True, False): 284 | volatility_model.train(train) 285 | embedding_model.train(train) 286 | 287 | if output_symbols is None: 288 | output_symbols = input_symbols 289 | 290 | # Call forward_unpacked() 291 | output_u, latents_u = volatility_model.forward_unpacked( 292 | ts_data, 293 | exogenous_data, 294 | ) 295 | 296 | # Call volatility_model.forward() with different variations 297 | if exogenous_data is None: 298 | output = volatility_model(ts_data) 299 | else: 300 | output = volatility_model( 301 | (ts_data, exogenous_data), 302 | ) 303 | 304 | # Call embedding_model.forward() 305 | output_e = embedding_model((ts_data, encoding)) 306 | 307 | if is_mixture: 308 | log_p_u, mu_u, sigma_inv_u = output_u 309 | log_p, mu, sigma_inv, latents = output 310 | log_p_e, mu_e, sigma_inv_e, latents_e = output_e 311 | assert sigma_inv.shape == ( 312 | BATCH_SIZE, 313 | mixture_components, 314 | output_symbols, 315 | input_symbols, 316 | ) 317 | assert mu.shape == sigma_inv.shape[:3] 318 | assert log_p.shape == sigma_inv.shape[:2] 319 | 320 | assert log_p.shape == log_p_u.shape 321 | assert log_p.shape == log_p_e.shape 322 | else: 323 | mu_u, sigma_inv_u = output_u 324 | mu, sigma_inv, latents = output 325 | mu_e, sigma_inv_e, latents_e = output 326 | log_p_u = log_p = log_p_e = None 327 | assert sigma_inv.shape == (BATCH_SIZE, output_symbols, input_symbols) 328 | assert mu.shape == sigma_inv.shape[:2] 329 | 330 | assert latents_u.shape == (BATCH_SIZE, FEATURE_DIMENSION) 331 | 332 | assert mu.shape == mu_u.shape 333 | assert sigma_inv.shape == sigma_inv_u.shape 334 | assert latents.shape == latents_u.shape 335 | 336 | assert mu.shape == mu_e.shape 337 | assert sigma_inv.shape == sigma_inv_e.shape 338 | assert latents.shape == latents_e.shape 339 | 340 | # Confirm that the window_size property returns the correct size: 341 | assert volatility_model.window_size == WINDOW_SIZE 342 | 343 | # For mixture models do additional testing on log_p. 344 | if is_mixture: 345 | assert log_p_u.shape == (BATCH_SIZE, mixture_components) 346 | assert log_p.shape == log_p_u.shape 347 | assert log_p.shape == log_p_e.shape 348 | 349 | # Make sure the probabilities for a mixture sum to approximately 1. 350 | summed_p = torch.sum(torch.exp(log_p), dim=1) 351 | assert all(torch.abs(summed_p - 1.0) < EPS) 352 | 353 | if mean_strategy is ZERO and not is_mixture: 354 | assert torch.norm(mu) == 0.0 355 | 356 | if mean_strategy is ZERO and is_mixture: 357 | mu_c = mixture_model_stats.multivariate_combine_metrics( 358 | torch.exp(log_p), mu, sigma_inv 359 | )[0] 360 | assert torch.norm(mu_c) < EPS * torch.norm(mu) 361 | 362 | 363 | @pytest.mark.parametrize( 364 | "batch_size, input_symbols, output_symbols, feature_dim," 365 | "mixture_components, exogenous_dim," 366 | "use_batch_norm, expect_value_error", 367 | [ 368 | (13, 1, None, 3, 5, 0, True, False), # Without an exogenous input 369 | (13, 13, None, 3, 5, 7, True, False), # Symbol dimension other than 1 370 | (13, 13, 13, 3, 5, 7, True, False), # Speciying output symbol dim 371 | (13, 13, 11, 3, 5, 7, True, False), # Differing input/output symbol dim 372 | (13, 1, None, 3, 5, 7, False, False), # Without batch norm 373 | ], 374 | ) 375 | def test_mixture_model( 376 | batch_size, 377 | input_symbols, 378 | output_symbols, 379 | feature_dim, 380 | mixture_components, 381 | exogenous_dim, 382 | use_batch_norm, 383 | expect_value_error, 384 | ): 385 | """Test that a mixture network can be created and evaluated 386 | with different internal feature dimensions. This is only a sanity check 387 | that all of the dimensions conform and the network can produce output. 388 | These are untrained networks so that's all we expect. There is more 389 | extensive validatation for unit tests of the individual head classes. Here 390 | we also check that the network executes properly with the training flag on 391 | and off. 392 | 393 | This code actually tests three things: 394 | 1) Does the forward() method of the mixture network provide sane outputs 395 | 2) Does the forward_unpacked() method of the mixture netowrk provide sane 396 | outputs 397 | 3) Does the forward() method of the ModelAndEmbedding work after combining 398 | a mixture model with an embedding. 399 | 400 | """ 401 | if expect_value_error: 402 | with pytest.raises(ValueError): 403 | mixture_model = architecture.MixtureModel( 404 | WINDOW_SIZE, 405 | input_symbols, 406 | output_symbols, 407 | exogenous_dimension=exogenous_dim, 408 | output_head_factory=architecture.MultivariateMixtureHead, 409 | feature_dimension=feature_dim, 410 | mixture_components=mixture_components, 411 | extra_mixing_layers=EXTRA_MIXING_LAYERS, 412 | use_batch_norm=use_batch_norm, 413 | mean_strategy=EST, 414 | ) 415 | else: 416 | # This is the base mixture model we're testing. 417 | mixture_model = architecture.MixtureModel( 418 | WINDOW_SIZE, 419 | input_symbols, 420 | output_symbols, 421 | exogenous_dimension=exogenous_dim, 422 | output_head_factory=architecture.MultivariateMixtureHead, 423 | feature_dimension=feature_dim, 424 | mixture_components=mixture_components, 425 | extra_mixing_layers=EXTRA_MIXING_LAYERS, 426 | use_batch_norm=use_batch_norm, 427 | mean_strategy=EST, 428 | ) 429 | # Also create an embedding to test that ModelWithEmbedding returns sane results 430 | embedding = torch.nn.Embedding(EMBEDDING_SYMBOLS, exogenous_dim) 431 | 432 | # Combing mixture_model with embedding in embedding_model 433 | embedding_model = architecture.ModelWithEmbedding(mixture_model, embedding) 434 | 435 | # Create some test inputs. 436 | # 1) time series data: 437 | ts_data = torch.randn((batch_size, input_symbols, WINDOW_SIZE)) 438 | # 2) exogenous data (in this package that's an embedding, but that's not 439 | # necessarily the case).) 440 | exogenous_data = ( 441 | torch.randn(batch_size, exogenous_dim) if exogenous_dim > 0 else None 442 | ) 443 | # 3) an encoding vector to test with embedding_model 444 | encoding = torch.randint(0, EMBEDDING_SYMBOLS, (batch_size,)) 445 | 446 | # Below we call the forward() methods of mixture_model and 447 | # embedding_model and also the forward_unpacked() method of 448 | # mixture_model and make sure they return tensors with the correct dimensions. 449 | 450 | for train in (True, False): 451 | mixture_model.train(train) 452 | embedding_model.train(train) 453 | 454 | if output_symbols is None: 455 | output_symbols = input_symbols 456 | 457 | # Call forward_unpacked() 458 | log_p_u, mu_u, sigma_inv_u, latents_u = mixture_model.forward_unpacked( 459 | ts_data, 460 | exogenous_data, 461 | ) 462 | 463 | # Call mixture_model.forward() with different variations 464 | if exogenous_data is None: 465 | log_p, mu, sigma_inv, latents = mixture_model(ts_data) 466 | else: 467 | log_p, mu, sigma_inv, latents = mixture_model( 468 | (ts_data, exogenous_data), 469 | ) 470 | 471 | # Call embedding_model.forward() 472 | log_p_e, mu_e, sigma_inv_e, latents_e = embedding_model((ts_data, encoding)) 473 | 474 | assert sigma_inv.shape == ( 475 | batch_size, 476 | mixture_components, 477 | output_symbols, 478 | input_symbols, 479 | ) 480 | assert mu.shape == sigma_inv.shape[:3] 481 | assert log_p.shape == sigma_inv.shape[:2] 482 | 483 | assert latents_u.shape == (batch_size, feature_dim) 484 | 485 | assert log_p.shape == log_p_u.shape 486 | assert mu.shape == mu_u.shape 487 | assert sigma_inv.shape == sigma_inv_u.shape 488 | assert latents.shape == latents_u.shape 489 | 490 | assert log_p.shape == log_p_e.shape 491 | assert mu.shape == mu_e.shape 492 | assert sigma_inv.shape == sigma_inv_e.shape 493 | assert latents.shape == latents_e.shape 494 | 495 | # Make sure the probabilities for a mixture sum to approximately 1. 496 | summed_p = torch.sum(torch.exp(log_p), dim=1) 497 | assert all(torch.abs(summed_p - 1.0) < EPS) 498 | 499 | # Confirm that the window_size property returns the correct size: 500 | assert mixture_model.window_size == WINDOW_SIZE 501 | 502 | 503 | @pytest.mark.parametrize( 504 | "batch_size, feature_dim," "exogenous_dim," "use_batch_norm, expect_value_error", 505 | [ 506 | (13, 3, 0, True, False), # Without an exogenous input 507 | (13, 3, 7, True, False), # Without extra mixing layers 508 | (13, 3, 7, False, False), # Without batch norm 509 | ], 510 | ) 511 | def test_basic_model( # basic model referes to a non-mixture model 512 | batch_size, 513 | feature_dim, 514 | exogenous_dim, 515 | use_batch_norm, 516 | expect_value_error, 517 | ): 518 | """Test that a mmixture network can be created and evaluated 519 | with different internal feature dimensions. This is only a sanity check 520 | that all of the dimensions conform and the network can produce output. 521 | These are untrained networks so that's all we expect. There is more 522 | extensive validatation for unit tests of the individual head classes. Here 523 | we also check that the network executes properly with the training flag on 524 | and off. 525 | 526 | This code actually tests three things: 527 | 1) Does for the forward() method of the mixture network provide sane outputs 528 | 2) Does the forward_unpacked() method of the mixture netowrk provide sane 529 | outputs 530 | 3) Does the forward() method of the ModelAndEmbedding work after combining 531 | a mixture model with an embedding. 532 | 533 | """ 534 | if expect_value_error: 535 | with pytest.raises(ValueError): 536 | model = architecture.UnivariateModel( 537 | WINDOW_SIZE, 538 | exogenous_dimension=exogenous_dim, 539 | feature_dimension=feature_dim, 540 | extra_mixing_layers=EXTRA_MIXING_LAYERS, 541 | use_batch_norm=use_batch_norm, 542 | mean_strategy=EST, 543 | ) 544 | else: 545 | # This is the base model we're testing. 546 | model = architecture.UnivariateModel( 547 | WINDOW_SIZE, 548 | exogenous_dimension=exogenous_dim, 549 | feature_dimension=feature_dim, 550 | extra_mixing_layers=EXTRA_MIXING_LAYERS, 551 | use_batch_norm=use_batch_norm, 552 | mean_strategy=EST, 553 | ) 554 | # Also create an embedding to test that ModelWithEmbedding returns sane results 555 | embedding = torch.nn.Embedding(EMBEDDING_SYMBOLS, exogenous_dim) 556 | 557 | # Combing model with embedding in embedding_model 558 | embedding_model = architecture.ModelWithEmbedding(model, embedding) 559 | 560 | # Create some test inputs. 561 | # 1) time series data: 562 | ts_data = torch.randn((batch_size, 1, WINDOW_SIZE)) 563 | # 2) exogenous data (in this package that's an embedding, but that's not 564 | # necessarily the case).) 565 | exogenous_data = ( 566 | torch.randn(batch_size, exogenous_dim) if exogenous_dim > 0 else None 567 | ) 568 | # 3) an encoding vector to test with embedding_model 569 | encoding = torch.randint(0, EMBEDDING_SYMBOLS, (batch_size,)) 570 | 571 | # Below we call the forward() methods of model and 572 | # embedding_model and also the forward_unpacked() method of 573 | # model and make sure they return tensors with the correct dimensions. 574 | 575 | for train in (True, False): 576 | model.train(train) 577 | embedding_model.train(train) 578 | 579 | # Call forward_unpacked() 580 | mu_u, sigma_inv_u, latents_u = model.forward_unpacked( 581 | ts_data, 582 | exogenous_data, 583 | ) 584 | 585 | # Call model.forward() with different variations 586 | if exogenous_data is None: 587 | mu, sigma_inv, latents = model(ts_data) 588 | else: 589 | mu, sigma_inv, latents = model( 590 | (ts_data, exogenous_data), 591 | ) 592 | 593 | # Call embedding_model.forward() 594 | mu_e, sigma_inv_e, latents_e = embedding_model((ts_data, encoding)) 595 | 596 | assert sigma_inv.shape == ( 597 | batch_size, 598 | 1, 599 | 1, 600 | ) 601 | assert mu.shape == sigma_inv.shape[:2] 602 | assert latents.shape == (batch_size, feature_dim) 603 | 604 | assert mu.shape == mu_u.shape 605 | assert sigma_inv.shape == sigma_inv_u.shape 606 | assert latents.shape == latents_u.shape 607 | 608 | assert mu.shape == mu_e.shape 609 | assert sigma_inv.shape == sigma_inv_e.shape 610 | assert latents.shape == latents_e.shape 611 | 612 | # Confirm that the window_size property returns the correct size: 613 | assert model.window_size == WINDOW_SIZE 614 | 615 | 616 | @pytest.mark.parametrize( 617 | "head_class, batch_size, input_symbols, output_symbols, feature_dim," 618 | "mixture_components, expect_value_error", 619 | [ 620 | (architecture.MultivariateMixtureHead, 13, 3, None, 5, 7, False), 621 | (architecture.MultivariateMixtureHead, 13, 3, 3, 5, 7, False), 622 | (architecture.MultivariateMixtureHead, 13, 3, 2, 5, 7, False), 623 | (architecture.UnivariateMixtureHead, 13, 1, 1, 5, 7, False), 624 | (architecture.UnivariateMixtureHead, 13, 3, None, 5, 7, True), 625 | (architecture.UnivariateMixtureHead, 13, 3, 3, 5, 7, True), 626 | ], 627 | ) 628 | def test_head_classes( 629 | head_class, 630 | batch_size, 631 | input_symbols, 632 | output_symbols, 633 | feature_dim, 634 | mixture_components, 635 | expect_value_error, 636 | ): 637 | """Test that a head network can be created and evaluated 638 | with different internal feature dimensions. Also do some sanity checks on 639 | the output where the head should constrain it, such as having probabilities 640 | that add up to one and having an inverse sqrt of covariance matrix that is 641 | triangular. These are untrained networks so that's all we expect. We also 642 | check that the network executes properly with the training flag on and off. 643 | """ 644 | if expect_value_error: 645 | with pytest.raises(ValueError): 646 | head = head_class( 647 | input_symbols, 648 | output_symbols, 649 | feature_dimension=feature_dim, 650 | mixture_components=mixture_components, 651 | ) 652 | else: 653 | head = head_class( 654 | input_symbols, 655 | output_symbols, 656 | feature_dimension=feature_dim, 657 | mixture_components=mixture_components, 658 | ) 659 | for train in (True, False): 660 | head.train(train) 661 | log_p, mu, sigma_inv = head(torch.randn(batch_size, feature_dim)) 662 | 663 | assert log_p.shape == (batch_size, mixture_components) 664 | 665 | # Make sure all probabilities add up to one 666 | # (logs add up to zero) 667 | assert torch.abs(torch.sum(torch.logsumexp(log_p, dim=1))) < 1e-5 668 | 669 | if output_symbols is None: 670 | output_symbols = input_symbols 671 | 672 | assert mu.shape == (batch_size, mixture_components, output_symbols) 673 | assert sigma_inv.shape == ( 674 | batch_size, 675 | mixture_components, 676 | output_symbols, 677 | input_symbols, 678 | ) 679 | 680 | is_lower_triangular(sigma_inv) 681 | -------------------------------------------------------------------------------- /tests/test_loss_functions.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import math 4 | 5 | import torch 6 | 7 | import deep_volatility_models.loss_functions as loss_functions 8 | 9 | LOG_SQRT_TWO_PI = 0.5 * math.log(2.0 * math.pi) 10 | # TODO: Why can't this be smaller? 11 | EPS = 1e-7 12 | 13 | 14 | @pytest.mark.parametrize( 15 | "x,mu,sigma_inv,expected", 16 | [ 17 | # Case 0 18 | ( 19 | torch.ones((1, 1)), 20 | torch.ones((1, 1)), 21 | torch.ones((1, 1, 1)), 22 | -LOG_SQRT_TWO_PI, 23 | ), 24 | # Case 1 25 | ( 26 | torch.ones((1, 1)), 27 | torch.ones((1, 1)), 28 | 2.0 * torch.ones((1, 1, 1)), 29 | math.log(2.0) - LOG_SQRT_TWO_PI, 30 | ), 31 | # Case 2 32 | ( 33 | torch.ones((1, 1)), 34 | 0 * torch.ones((1, 1)), 35 | 2.0 * torch.ones((1, 1, 1)), 36 | math.log(2.0) - LOG_SQRT_TWO_PI - 2.0, 37 | ), 38 | # Case 3 39 | ( 40 | 0 * torch.ones((1, 1)), 41 | torch.ones((1, 1)), 42 | 2.0 * torch.ones((1, 1, 1)), 43 | math.log(2.0) - LOG_SQRT_TWO_PI - 2.0, 44 | ), 45 | ], 46 | ) 47 | def test_likelihood_cases(x, mu, sigma_inv, expected): 48 | log_loss = loss_functions.univariate_log_likelihood(x, mu, sigma_inv) 49 | assert float(log_loss) == pytest.approx(expected, EPS) 50 | -------------------------------------------------------------------------------- /tests/test_mixture_model_stats.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import torch 3 | import math 4 | import numpy as np 5 | 6 | import deep_volatility_models.mixture_model_stats as mixture_model_stats 7 | 8 | BATCH_SIZE = 16 9 | 10 | softmax = torch.nn.Softmax(dim=1) 11 | logsoftmax = torch.nn.LogSoftmax(dim=1) 12 | 13 | 14 | def test_multivariate_likelihood(): 15 | """ 16 | This only checks that multivariate_mixture_log_likelihood can be called 17 | with arguments of consistent dimension within the correct range. It does 18 | not confirm the correctness of the values returned. 19 | """ 20 | for batch_size in range(BATCH_SIZE, BATCH_SIZE + 2): 21 | for mixture in range(1, 3): 22 | for channels in range(1, 3): 23 | x = torch.randn(batch_size, channels) 24 | log_p = logsoftmax(torch.randn(batch_size, mixture)) 25 | mu = torch.randn(batch_size, mixture, channels) 26 | sigma_inv = torch.tril( 27 | torch.randn(batch_size, mixture, channels, channels) 28 | ) 29 | 30 | log_loss = mixture_model_stats.multivariate_log_likelihood( 31 | x, log_p, mu, sigma_inv 32 | ) 33 | 34 | assert log_loss.shape == (batch_size,) 35 | assert float(torch.sum(log_loss)) != 0.0 36 | 37 | 38 | LOG_SQRT_TWO_PI = 0.5 * math.log(2.0 * math.pi) 39 | # TODO: Why can't this be smaller? 40 | EPS = 1e-7 41 | 42 | 43 | @pytest.mark.parametrize( 44 | "x,log_p,mu,sigma_inv,expected", 45 | [ 46 | # Case 0 47 | ( 48 | torch.ones((1, 1)), 49 | 0 * torch.ones((1, 1)), 50 | torch.ones((1, 1, 1)), 51 | torch.ones((1, 1, 1, 1)), 52 | -LOG_SQRT_TWO_PI, 53 | ), 54 | # Case 1 55 | ( 56 | torch.ones((1, 1)), 57 | 0 * torch.ones((1, 1)), 58 | torch.ones((1, 1, 1)), 59 | 2.0 * torch.ones((1, 1, 1, 1)), 60 | math.log(2.0) - LOG_SQRT_TWO_PI, 61 | ), 62 | # Case 2 63 | ( 64 | torch.ones((1, 1)), 65 | 0 * torch.ones((1, 1)), 66 | 0 * torch.ones((1, 1, 1)), 67 | 2.0 * torch.ones((1, 1, 1, 1)), 68 | math.log(2.0) - LOG_SQRT_TWO_PI - 2.0, 69 | ), 70 | # Case 3 71 | ( 72 | 0 * torch.ones((1, 1)), 73 | 0 * torch.ones((1, 1)), 74 | torch.ones((1, 1, 1)), 75 | 2.0 * torch.ones((1, 1, 1, 1)), 76 | math.log(2.0) - LOG_SQRT_TWO_PI - 2.0, 77 | ), 78 | ], 79 | ) 80 | def test_multivariate_likelihood_cases(x, log_p, mu, sigma_inv, expected): 81 | log_loss = mixture_model_stats.multivariate_log_likelihood(x, log_p, mu, sigma_inv) 82 | assert float(log_loss) == pytest.approx(expected, EPS) 83 | 84 | log_loss = mixture_model_stats.univariate_log_likelihood(x, log_p, mu, sigma_inv) 85 | assert float(log_loss) == pytest.approx(expected, EPS) 86 | 87 | 88 | def test_univeriate_fails_on_multivariate_input(): 89 | mb_size, mixture_components, symbols = (5, 3, 2) 90 | 91 | x = torch.randn(mb_size, symbols) 92 | log_p = torch.randn(mb_size, mixture_components) 93 | mu = torch.randn(mb_size, mixture_components, symbols) 94 | sigma_inv = torch.randn(mb_size, mixture_components, symbols, symbols) 95 | 96 | # As a sanity check that the dimensions are correct except for being 97 | # multi-variate, a ccall to 98 | # mixture_model_state.multivariate_log_likelihood() should return 99 | # *something*: 100 | 101 | mixture_model_stats.multivariate_log_likelihood(x, log_p, mu, sigma_inv) 102 | 103 | with pytest.raises(ValueError): 104 | log_loss = mixture_model_stats.univariate_log_likelihood( 105 | x, log_p, mu, sigma_inv 106 | ) 107 | 108 | p = torch.exp(log_p) 109 | with pytest.raises(ValueError): 110 | mixture_model_stats.univariate_combine_metrics(p, mu, sigma_inv) 111 | 112 | 113 | def test_fail_on_inconsistent_dimensions(): 114 | 115 | mb_size, mixture_components, symbols = (5, 3, 1) 116 | 117 | x = torch.randn(mb_size, symbols) 118 | log_p = torch.randn(mb_size, mixture_components) 119 | # Introduce an incompatible dimension 120 | mu = torch.randn(mb_size, mixture_components + 1, symbols) 121 | sigma_inv = torch.randn(mb_size, mixture_components, symbols, symbols) 122 | 123 | with pytest.raises(ValueError): 124 | mixture_model_stats.univariate_log_likelihood(x, log_p, mu, sigma_inv) 125 | 126 | with pytest.raises(ValueError): 127 | mixture_model_stats.multivariate_log_likelihood(x, log_p, mu, sigma_inv) 128 | 129 | p = torch.exp(log_p) 130 | with pytest.raises(ValueError): 131 | mixture_model_stats.univariate_combine_metrics(p, mu, sigma_inv) 132 | 133 | 134 | @pytest.mark.parametrize( 135 | "p, mu, sigma_inv, expected_mean, expected_variance", 136 | [ 137 | ( 138 | [[1.0]], 139 | [[[2.0]]], 140 | [[[[0.25]]]], 141 | torch.tensor([2.0]), 142 | torch.tensor([16.0]), 143 | ), 144 | ( 145 | [[0.75, 0.25]], 146 | [[[4.0], [8.0]]], 147 | [[[[0.25]], [[0.125]]]], 148 | torch.tensor([5.0]), 149 | torch.tensor([31.0]), 150 | ), 151 | ], 152 | ) 153 | def test_univariate_combine_metrics(p, mu, sigma_inv, expected_mean, expected_variance): 154 | mean, variance = mixture_model_stats.univariate_combine_metrics(p, mu, sigma_inv) 155 | print(f"\nReturned mean:\n{mean}") 156 | print(f"Expected mean:\n{expected_mean}") 157 | print(f"\nReturned variance:\n{variance}") 158 | print(f"Expected variance:\n{expected_variance}") 159 | assert mean == expected_mean 160 | assert variance == expected_variance 161 | -------------------------------------------------------------------------------- /tests/test_stock_data.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | # Standard Python modules 4 | import os 5 | from unittest.mock import patch 6 | 7 | # Third party modules 8 | import pandas as pd 9 | 10 | # Local imports 11 | import deep_volatility_models.util as util 12 | import deep_volatility_models.stock_data as stock_data 13 | 14 | SAMPLE_DF = pd.DataFrame( 15 | { 16 | "date": pd.to_datetime(["2020-01-01", "2020-01-02", "2020-01-03"]), 17 | "open": [1.0, 2.0, 3.0], 18 | "close": [0.5, 2.5, 3.1], 19 | } 20 | ).set_index("date") 21 | 22 | SAMPLE_PATH = "any_path" 23 | 24 | 25 | @pytest.fixture 26 | def data_source(): 27 | """ 28 | Create an instance of a data source for testing 29 | """ 30 | mock_data_source = lambda symbols: {s.upper(): SAMPLE_DF for s in symbols} 31 | return mock_data_source 32 | 33 | 34 | def test_symbol_history_reader_and_writer(tmp_path): 35 | filename = os.path.join(tmp_path, "foo.csv") 36 | 37 | # Use writer to write SAMPLE_df 38 | 39 | # But first, intentionally reverse the order of dates. 40 | sample_copy = SAMPLE_DF.reset_index().sort_values("date", ascending=False) 41 | assert not util.is_sorted(sample_copy.date) 42 | 43 | # Define a helper to simplify writing slightly different versions of SAMEPLE_DF 44 | reader = stock_data.SymbolHistoryReader() 45 | 46 | def check(writer): 47 | with open(filename, "wb") as f: 48 | writer(f) 49 | 50 | # Use reader to read it back and compare the results. 51 | with open(filename, "rb") as f: 52 | loaded_df = reader(f) 53 | 54 | assert loaded_df.index.name == "date" 55 | assert util.is_sorted(loaded_df.index) 56 | assert (loaded_df == SAMPLE_DF).all().all() 57 | 58 | for df in [sample_copy, sample_copy.set_index("date")]: 59 | check(stock_data.SymbolHistoryWriter(df)) 60 | 61 | 62 | def test_file_system_store(tmp_path): 63 | symbol = "FOO" 64 | store = stock_data.FileSystemStore(tmp_path) 65 | assert not store.exists(symbol) 66 | 67 | store.write("FOO", stock_data.SymbolHistoryWriter(SAMPLE_DF)) 68 | assert store.exists("FOO") 69 | 70 | loaded_df = store.read(symbol, stock_data.SymbolHistoryReader()) 71 | assert (loaded_df == SAMPLE_DF).all().all() 72 | 73 | 74 | def test_check_cache_exists_path(tmp_path): 75 | """ 76 | Check that the os.path.exists() gets called with the correct path 77 | and check that exists is not case sensitive. 78 | """ 79 | tmp_path_store = stock_data.FileSystemStore(tmp_path) 80 | with patch("deep_volatility_models.stock_data.os.path.exists") as os_path_exists: 81 | tmp_path_store.exists("symbol1") 82 | os_path_exists.assert_called_with( 83 | os.path.join(tmp_path_store.cache_dir, "symbol1.csv") 84 | ) 85 | 86 | tmp_path_store.exists("SyMbOL2") 87 | os_path_exists.assert_called_with( 88 | os.path.join(tmp_path_store.cache_dir, "symbol2.csv") 89 | ) 90 | 91 | 92 | def test_history(data_source, tmp_path): 93 | partial_symbol_set = set(["ABC", "DEF"]) 94 | missing_symbol_set = set(["GHI", "JKL"]) 95 | full_symbol_set = partial_symbol_set.union(missing_symbol_set) 96 | 97 | tmp_path_store = stock_data.FileSystemStore(tmp_path) 98 | caching_download = stock_data.CachingDownloader( 99 | data_source, 100 | tmp_path_store, 101 | stock_data.SymbolHistoryWriter, 102 | overwrite_existing=False, 103 | ) 104 | 105 | response = caching_download(partial_symbol_set) 106 | assert len(response) == len(partial_symbol_set) 107 | for symbol in partial_symbol_set: 108 | assert tmp_path_store.exists(symbol) 109 | 110 | for symbol in missing_symbol_set: 111 | assert not tmp_path_store.exists(symbol) 112 | 113 | response = caching_download(full_symbol_set) 114 | # Check that only the missing symbols were downloaded 115 | # This is true if all missing symbols are in the response 116 | # and if the length of the response is equal to the number 117 | # of missing symbols 118 | assert len(response) == len(missing_symbol_set) 119 | for symbol in missing_symbol_set: 120 | assert symbol in response 121 | 122 | for symbol in full_symbol_set: 123 | assert tmp_path_store.exists(symbol) 124 | 125 | # Try downloading again, which should be a no-op 126 | response = caching_download(full_symbol_set) 127 | assert len(response) == 0 128 | 129 | # Try loading one of the downloaded files 130 | loader = stock_data.CachingSymbolHistoryLoader( 131 | data_source, tmp_path_store, overwrite_existing=False 132 | ) 133 | # load("pqr") 134 | combiner = stock_data.PriceHistoryConcatenator() 135 | combiner(loader("pqr")) 136 | -------------------------------------------------------------------------------- /tests/test_time_series_datasets.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | # Third party libraries 4 | import torch 5 | 6 | # Local modules 7 | import deep_volatility_models.time_series_datasets as time_series_datasets 8 | 9 | 10 | # Constants used in tests. 11 | A_SYMBOL_ENCODING = 21 12 | 13 | 14 | """ 15 | The test cases for multivariate_stats() was generated as follows: 16 | 17 | Assume x = L*z + b has zero mean and covariance = I 18 | 19 | Now E[x] = b 20 | C_x = E[xx’] = E[L (zz’) L’] = LL’ 21 | Where L is a lower triangular matrix. 22 | 23 | We can use this to generate series of x with a specific L and b. 24 | For example, let b = [1, 2] 25 | let L = [1, 0], [-1; 2]] 26 | 27 | We need to choose values for z with zero mean and C = I 28 | 29 | One possibility is 30 | Z = [[1, 1], [1, -1], [-1, 1], [-1, -1]] 31 | 32 | Where each row is a (z1, z2) pair. 33 | 34 | Because “time” is the row dimension of Z, we need to transpose the original 35 | equation to be: 36 | 37 | x = ZL’ + b’ 38 | 39 | where Z is as above, L=[[1, 0], [-1, 2]], 40 | and b’ = [[1, 2], [1, 2], [1, 2], [1, 2]] 41 | 42 | >>> l = np.array([[1, 0], [-1, 2]]) 43 | >>> l 44 | array([[ 1, 0], 45 | [-1, 2]]) 46 | >>> z=np.array([[1, 1], [1, -1], [-1, 1], [-1, -1]]) 47 | >>> z 48 | array([[ 1, 1], 49 | [ 1, -1], 50 | [-1, 1], 51 | [-1, -1]]) 52 | >>> b=np.array([[1, 2], [1, 2], [1, 2], [1, 2]]) 53 | >>> b 54 | array([[1, 2], 55 | [1, 2], 56 | [1, 2], 57 | [1, 2]]) 58 | 59 | >>> np.matmul(z, l.T) + b 60 | array([[ 2, 3], 61 | [ 2, -1], 62 | [ 0, 5], 63 | [ 0, 1]]) 64 | >>> 65 | 66 | """ 67 | 68 | 69 | @pytest.mark.parametrize( 70 | "series, mu_expected, l_expected", 71 | [ 72 | ( 73 | [[2.0, 3.0], [2.0, -1.0], [0.0, 5.0], [0.0, 1.0]], 74 | torch.tensor([1, 2], dtype=torch.float), 75 | torch.tensor([[1.0, 0.0], [-1.0, 2.0]]), 76 | ), 77 | ], 78 | ) 79 | def test_multivariate_stats(series, mu_expected, l_expected): 80 | mu, l = time_series_datasets.multivariate_stats(series) 81 | assert mu.shape == mu_expected.shape 82 | assert l.shape == l_expected.shape 83 | print(f"mu returned:\n{mu}") 84 | print(f"mu expected:\n{mu_expected}") 85 | print(f"\nl returned:\n{l}") 86 | print(f"l expected:\n{l_expected}") 87 | 88 | # Fortunately the test case is compute *exactly* so no approximate 89 | # comparisons are necessary. 90 | assert (mu == mu_expected).all() 91 | assert (l == l_expected).all() 92 | 93 | 94 | def test_rolling_window_arg_check(): 95 | with pytest.raises(ValueError): 96 | time_series_datasets.RollingWindow(range(10), 3, stride=0) 97 | 98 | with pytest.raises(ValueError): 99 | time_series_datasets.RollingWindow( 100 | [[1, 3], [3, 4], [5, 6]], 101 | 2, 102 | create_channel_dim=True, 103 | ) 104 | 105 | 106 | @pytest.mark.parametrize( 107 | "series,window_size,stride,create_channel_dim,expected", 108 | [ 109 | ( 110 | range(10), 111 | 3, 112 | 1, 113 | False, 114 | [ 115 | torch.tensor(range(0, 3)), 116 | torch.tensor(range(1, 4)), 117 | torch.tensor(range(2, 5)), 118 | torch.tensor(range(3, 6)), 119 | torch.tensor(range(4, 7)), 120 | torch.tensor(range(5, 8)), 121 | torch.tensor(range(6, 9)), 122 | torch.tensor(range(7, 10)), 123 | ], 124 | ), 125 | # Same case with a different stride 126 | ( 127 | range(10), 128 | 3, 129 | 2, 130 | False, 131 | [ 132 | torch.tensor(range(0, 3)), 133 | torch.tensor(range(2, 5)), 134 | torch.tensor(range(4, 7)), 135 | torch.tensor(range(6, 9)), 136 | ], 137 | ), 138 | # Same case with create_channel_dim=True 139 | ( 140 | range(10), 141 | 3, 142 | 1, 143 | True, 144 | [ 145 | torch.tensor([list(range(0, 3))]), 146 | torch.tensor([list(range(1, 4))]), 147 | torch.tensor([list(range(2, 5))]), 148 | torch.tensor([list(range(3, 6))]), 149 | torch.tensor([list(range(4, 7))]), 150 | torch.tensor([list(range(5, 8))]), 151 | torch.tensor([list(range(6, 9))]), 152 | torch.tensor([list(range(7, 10))]), 153 | ], 154 | ), 155 | # Check a sequence of vectors 156 | ( 157 | [ 158 | [1, 2, 3], 159 | [4, 5, 6], 160 | [7, 8, 9], 161 | [10, 11, 12], 162 | ], 163 | 2, 164 | 1, 165 | False, 166 | [ 167 | torch.tensor([[1, 4], [2, 5], [3, 6]]), 168 | torch.tensor([[4, 7], [5, 8], [6, 9]]), 169 | torch.tensor([[7, 10], [8, 11], [9, 12]]), 170 | ], 171 | ), 172 | ], 173 | ) 174 | def test_rolling_window_series( 175 | series, window_size, stride, create_channel_dim, expected 176 | ): 177 | d = time_series_datasets.RollingWindow( 178 | series, window_size, stride=stride, create_channel_dim=create_channel_dim 179 | ) 180 | assert len(d) == len(expected) 181 | 182 | # We use indexes here rather than iterators because we're specifically 183 | # testing the implementation of __getitem__() 184 | for i in range(len(expected)): 185 | print(f"\nwindow returned:\n{d[i]}") 186 | print(f"window expected:\n{expected[i]}") 187 | assert d[i].shape == expected[i].shape 188 | assert (d[i] == expected[i]).all() 189 | 190 | # Make sure negative indexes work 191 | for i in range(-len(expected), 0): 192 | assert (d[i] == expected[i]).all() 193 | 194 | with pytest.raises(IndexError): 195 | d[len(expected)] 196 | with pytest.raises(IndexError): 197 | d[-len(expected) - 1] 198 | 199 | 200 | @pytest.mark.parametrize( 201 | "series,window_size,stride,expected_window,expected_target", 202 | [ 203 | ( 204 | range(10), 205 | 3, 206 | 2, 207 | [ 208 | torch.tensor([0, 1]), 209 | torch.tensor([2, 3]), 210 | torch.tensor([4, 5]), 211 | torch.tensor([6, 7]), 212 | ], 213 | [ 214 | torch.tensor(2), 215 | torch.tensor(4), 216 | torch.tensor(6), 217 | torch.tensor(8), 218 | ], 219 | ), 220 | # Check a sequence of vectors 221 | ( 222 | [ 223 | [1, 2, 3], 224 | [4, 5, 6], 225 | [7, 8, 9], 226 | [10, 11, 12], 227 | ], 228 | 2, 229 | 1, 230 | [ 231 | torch.tensor([[1], [2], [3]]), 232 | torch.tensor([[4], [5], [6]]), 233 | torch.tensor([[7], [8], [9]]), 234 | ], 235 | [ 236 | torch.tensor([[4], [5], [6]]), 237 | torch.tensor([[7], [8], [9]]), 238 | torch.tensor([[10], [11], [12]]), 239 | ], 240 | ), 241 | ], 242 | ) 243 | def test_target_selection( 244 | series, window_size, stride, expected_window, expected_target 245 | ): 246 | raw_windows = time_series_datasets.RollingWindow(series, window_size, stride=stride) 247 | window_and_target = time_series_datasets.ContextWindowAndTarget( 248 | raw_windows, target_dim=1 249 | ) 250 | encoding_window_and_target = time_series_datasets.ContextWindowEncodingAndTarget( 251 | A_SYMBOL_ENCODING, window_and_target 252 | ) 253 | 254 | assert len(window_and_target) == len(expected_target) 255 | assert len(encoding_window_and_target) == len(expected_target) 256 | 257 | # We use indexes here rather than iterators because we're specifically 258 | # testing the implementation of __getitem__() 259 | for i in range(len(expected_target)): 260 | window, target = window_and_target[i] 261 | print(f"window returned:\n\n{window}") 262 | print(f"window expected:\n{expected_window[i]}") 263 | assert window.shape == expected_window[i].shape 264 | assert (window == expected_window[i]).all() 265 | 266 | print(f"\ntarget returned:\n{target}") 267 | print(f"target expected:\n{expected_target[i]}") 268 | assert target.shape == expected_target[i].shape 269 | assert (target == expected_target[i]).all() 270 | 271 | (window, encoding), target = encoding_window_and_target[i] 272 | assert encoding == A_SYMBOL_ENCODING 273 | assert window.shape == expected_window[i].shape 274 | assert (window == expected_window[i]).all() 275 | assert target.shape == expected_target[i].shape 276 | assert (target == expected_target[i]).all() 277 | 278 | # Make sure negatives indexes work 279 | for i in range(-len(expected_target), 0): 280 | window, target = window_and_target[i] 281 | assert (window == expected_window[i]).all() 282 | assert (target == expected_target[i]).all() 283 | 284 | (window, encoding), target = encoding_window_and_target[i] 285 | assert encoding == A_SYMBOL_ENCODING 286 | assert (window == expected_window[i]).all() 287 | assert (target == expected_target[i]).all() 288 | 289 | with pytest.raises(IndexError): 290 | window_and_target[len(expected_target)] 291 | 292 | with pytest.raises(IndexError): 293 | window_and_target[-len(expected_target) - 1] 294 | -------------------------------------------------------------------------------- /tests/test_util.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | # Local modules 4 | import deep_volatility_models.util as util 5 | 6 | 7 | @pytest.mark.parametrize( 8 | "l,expected", 9 | [ 10 | ([], True), # Check empty list. 11 | (range(4), True), # Make sure iterables work 12 | (tuple(range(4)), True), # Make sure instantiated tuples work 13 | (list(range(4)), True), # Instantiated lists 14 | (reversed(range(4)), False), # Reversed lists should fail. 15 | ([1, 1, 2, 2], True), # Check that non-strict inequality is ok. 16 | ([1] * 5, True), 17 | ([1, 1, 1, 0], False), # Edge cases? 18 | ], 19 | ) 20 | def test_is_sorted(l, expected): 21 | assert util.is_sorted(l) == expected 22 | 23 | 24 | @pytest.mark.parametrize( 25 | "test_input, expected_output", 26 | [ 27 | ("foo", ["FOO"]), 28 | ([], []), 29 | (["foo"], ["FOO"]), 30 | (("a", "b"), ["A", "B"]), 31 | (iter(("x", "y", "z")), ["X", "Y", "Z"]), 32 | ], 33 | ) 34 | def test_to_symbol_list(test_input, expected_output): 35 | print(f"test input: {test_input}") 36 | print(f"expected output: {expected_output}") 37 | assert util.to_symbol_list(test_input) == expected_output 38 | 39 | 40 | @pytest.mark.parametrize( 41 | "test_input, expected_output", 42 | [ 43 | ("foo", "foo"), 44 | ("Foo", "foo"), 45 | ("a b c", "a_b_c"), 46 | ("A b C", "a_b_c"), 47 | ], 48 | ) 49 | def test_rename_column(test_input, expected_output): 50 | print(f"test input: {test_input}") 51 | print(f"expected output: {expected_output}") 52 | assert util.rename_column(test_input) == expected_output 53 | --------------------------------------------------------------------------------