├── .gitignore
├── Drawings
    ├── deep_volatility_architecture.tex
    └── deep_volatility_architecture_horiz.tex
├── LICENSE
├── README.md
├── poetry.lock
├── pyproject.toml
├── scripts
    └── train_example.sh
├── setup.cfg
├── src
    └── deep_volatility_models
    │   ├── __init__.py
    │   ├── architecture.py
    │   ├── data_sources.py
    │   ├── embedding_models.py
    │   ├── evaluate_model.py
    │   ├── hyperopt_opt.py
    │   ├── hyperopt_opt2.py
    │   ├── hyperopt_opt_risk_neutral.py
    │   ├── hyperopt_risk_neutral_no_mixture.py
    │   ├── loss_functions.py
    │   ├── mixture_model_stats.py
    │   ├── model_wrappers.py
    │   ├── models.py
    │   ├── optuna_opt.py
    │   ├── sample.py
    │   ├── stock_data.py
    │   ├── time_series_datasets.py
    │   ├── train_univariate.py
    │   ├── training.py
    │   └── util.py
└── tests
    ├── __init__.py
    ├── test_architecture.py
    ├── test_loss_functions.py
    ├── test_mixture_model_stats.py
    ├── test_stock_data.py
    ├── test_time_series_datasets.py
    └── test_util.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | *.csv
 2 | *.pt
 3 | *~
 4 | *.log
 5 | *.output
 6 | *.png
 7 | *.aux
 8 | *.dvi
 9 | *.pdf
10 | *.ps
11 | __pycache__
12 | .ipynb_checkpoints
13 | 


--------------------------------------------------------------------------------
/Drawings/deep_volatility_architecture.tex:
--------------------------------------------------------------------------------
 1 | % \def\pgfsysdriver{pgfsys-dvipdfm.def} 
 2 | \documentclass[dvips,tikz,12pt,convert={true,density=1200}]{standalone}
 3 | % \documentclass[dvipdfm,tikz]{standalone}
 4 | \usepackage{tikz}
 5 | \usetikzlibrary{arrows.meta}
 6 | \usetikzlibrary{positioning}
 7 | \usetikzlibrary{calc}
 8 | \newdimen\cellsize\cellsize=6pt
 9 | \tikzset{tensor/.style args={#1#2}{rectangle,draw=blue!50,fill=blue!20,minimum width=#1,minimum height=#2,inner sep=0pt,%
10 |     path picture={\draw[xstep=\the\cellsize,ystep=0cm,black, very thin] (path picture bounding box.south west) grid (path picture bounding box.north east);}%
11 |     }}
12 | \tikzset{tensor2/.style args={#1#2}{rectangle,draw=blue!50,fill=blue!20,minimum width=#1,minimum height=#2,inner sep=0pt,%
13 |     path picture={\dimen0=#1\count0=\dimen0\divide\count0 by \cellsize
14 |                   \dimen1=#2\count1=\dimen1\divide\count1 by \cellsize
15 |                   \draw[black,ultra thin] 
16 |                        (path picture bounding box.south west) \foreach \j in {1,...,\the\count1}{++(0pt,\the\cellsize) -- +(\the\dimen0, 0pt)}
17 |                        ;
18 |                   \draw[black,very thin]
19 |                        (path picture bounding box.south west) \foreach \i in {1,...,\the\count0}{++(\the\cellsize,0pt) -- +(0pt, #2)}
20 |                        ;
21 |     }}}
22 | \tikzset{tensor3/.style args={#1#2}{rectangle,draw=blue!50,fill=blue!20,minimum width=#1,minimum height=#2,inner sep=0pt,%
23 |     path picture={\dimen0=#1\divide\dimen0 by 2\advance\dimen0 by -4pt\count0=\dimen0\divide\count0 by \cellsize
24 |                   \dimen1=#2\count1=\dimen1\divide\count1 by \cellsize
25 |                   \dimen2=\cellsize\multiply\dimen2 by \count0
26 |                   \draw[black,ultra thin] 
27 |                        (path picture bounding box.south west) \foreach \j in {1,...,\the\count1}{++(0pt,\the\cellsize) -- +(\the\dimen2, 0pt)}
28 |                        (path picture bounding box.south east) \foreach \j in {1,...,\the\count1}{++(0pt,\the\cellsize) -- +(-\the\dimen2, 0pt)}
29 |                        ;
30 |                   \draw[black,very thin]
31 |                        (path picture bounding box.south west) \foreach \i in {1,...,\the\count0}{++(\the\cellsize,0pt) -- +(0pt, #2)}
32 |                        (path picture bounding box.south east) \foreach \i in {1,...,\the\count0}{++(-\the\cellsize,0pt) -- +(0pt, #2)}
33 |                        ;
34 |                   \draw[black,very thin]
35 |                        (path picture bounding box) +(-2pt, 0pt) [fill] circle[radius=0.3pt]  +(0,0) circle[radius=0.3pt] +(2pt,0pt) circle[fill,radius=0.3pt]                                     ;
36 |     }}}
37 | 
38 | \begin{document}
39 | \begin{tikzpicture}
40 |     [block/.style ={rectangle,draw=red!50,fill=red!20,minimum size=4mm},
41 |      node distance=0.30cm
42 |     ]
43 |     \node[tensor3={256pt}{\the\cellsize},rotate=90] (timeseriesinput) [label=left:$1\times256$] {};
44 |     \node[block] (firstconvolution) [below=of timeseriesinput] {1D Conv}
45 |         edge [{Latex}-] (timeseriesinput)
46 |         ;
47 |     \node[tensor3={64pt}{18pt}] (firstlayeroutput) [below=of firstconvolution] [label=left:${\rm features}\times 64$]{}
48 |         edge [{Latex}-] (firstconvolution)
49 |         ;
50 |     \node[block] (secondconvolution) [below=of firstlayeroutput] {1D Conv}
51 |         edge [{Latex}-] (firstlayeroutput)
52 |         ;
53 |     \node[tensor2={\the\cellsize}{18pt}] (latent) [below=of secondconvolution] [label=left:${\rm features}\times 1$] {}
54 |         edge [{Latex}-] (secondconvolution)
55 |         ;
56 |     \node[tensor2={\the\cellsize}{24pt}] (embedding) [below=of latent] [label=left:${\rm embedding\ dimension}\times 1$] {}
57 |         ;
58 |     \node[circle,fill,inner sep=1pt,outer sep=0pt] (aggregate) at ($(latent)!0.5!(embedding)$) [right=0.35cm] {}
59 |         ;
60 |     \node[block] (mixing) [right=of aggregate] {FC}
61 |         ;
62 | 
63 |     \draw[-{Latex}] (latent) -| (aggregate) -- (mixing) ;
64 |     \draw (embedding) -| (aggregate) ;
65 |    
66 |   \end{tikzpicture}
67 | \end{document}
68 | 


--------------------------------------------------------------------------------
/Drawings/deep_volatility_architecture_horiz.tex:
--------------------------------------------------------------------------------
 1 | \def\pgfsysdriver{pgfsys-dvipdfm.def} 
 2 | \documentclass[dvipdfm,tikz,12pt]{standalone}
 3 | % \documentclass[dvips,tikz,12pt,convert={true,density=1200}]{standalone}
 4 | \usepackage{tikz}
 5 | %
 6 | % \usepackage{xcharter-otf}
 7 | % \usepackage[scaled=.98,sups,osf]{XCharter}% lining figures in math, osf in text
 8 | \usepackage[scaled=.98,sups]{XCharter}% lining figures in math, osf in text
 9 | \usepackage[scaled=1.04,varqu,varl]{inconsolata}% inconsolata typewriter
10 | \usepackage[type1]{cabin}% sans serif
11 | \usepackage[uprightscript,charter,vvarbb,scaled=1.05]{newtxmath}
12 | \linespread{1.04}
13 | %
14 | \usetikzlibrary{arrows.meta}
15 | \usetikzlibrary{positioning}
16 | \usetikzlibrary{calc}
17 | \usetikzlibrary{quotes}
18 | \newdimen\cellsize\cellsize=6pt
19 | \tikzset{tensor/.style args={#1#2#3}{rectangle,draw=black!50,fill=#3!20,minimum width=#1,minimum height=#2,inner sep=0pt,%
20 |     path picture={\dimen0=#1\count0=\dimen0\divide\count0 by \cellsize
21 |                   \dimen1=#2\count1=\dimen1\divide\count1 by \cellsize
22 |                   \draw[black!50,ultra thin]
23 |                        (path picture bounding box.south west) \foreach \i in {1,...,\the\count0}{++(\the\cellsize,0pt) -- +(0pt, #2)}
24 |                        ;
25 |                   \draw[black!50,thin] 
26 |                        (path picture bounding box.south west) \foreach \j in {1,...,\the\count1}{++(0pt,\the\cellsize) -- +(\the\dimen0, 0pt)}
27 |                        ;
28 |     }}}
29 | \tikzset{bigtensor/.style args={#1#2#3}{rectangle,draw=black!50,fill=#3!20,minimum width=#1,minimum height=#2,inner sep=0pt,%
30 |     path picture={\dimen0=#1\count0=\dimen1\divide\count0 by \cellsize
31 |                   \dimen1=#2\divide\dimen1 by 2\advance\dimen1 by -4pt\count1=\dimen1\divide\count1 by \cellsize
32 |                   \dimen2=\cellsize\multiply\dimen2 by \count1
33 |                   \draw[black!50,thin]
34 |                        (path picture bounding box.south west) \foreach \i in {1,...,\the\count1}{++(0pt,\the\cellsize) -- +(#1,0pt)}
35 |                        (path picture bounding box.north west) \foreach \i in {1,...,\the\count1}{++(0pt,-\the\cellsize) -- +(#1,0pt)}
36 |                        ;
37 |                   \draw[black!50,ultra thin] 
38 |                        (path picture bounding box.south west) \foreach \j in {1,...,\the\count0}{++(\the\cellsize,0pt) -- +(0pt,\the\dimen2)}
39 |                        (path picture bounding box.north west) \foreach \j in {1,...,\the\count0}{++(\the\cellsize,0pt) -- +(0pt,-\the\dimen2)}
40 |                        ;
41 |                   \draw[black!75,very thin]
42 |                        (path picture bounding box) +(0pt,-2pt) [fill] circle[radius=0.4pt]  +(0,0) circle[radius=0.4pt] +(0pt,2pt) circle[fill,radius=0.4pt]                                     ;
43 |     }}}
44 | 
45 | \begin{document}
46 | \begin{tikzpicture}
47 |     [block/.style={rectangle,draw=red!50,fill=red!20,minimum size=4mm,align=center},
48 |      connection/.style={circle,fill,inner sep=1pt,outer sep=0pt},
49 |      node distance=1.70cm and 0.95cm,
50 |      every edge quotes/.style={font=\tiny,auto=right},
51 |      every label/.style={font=\tiny,text width=1.75cm,align=center}
52 |     ]
53 |     \node[bigtensor={\the\cellsize}{124pt}{green},fill=green!20] (timeseriesinput) [label=below:$256\times1$\\ ($x_{n-256} \hbox{ \it to } x_{n-1}$)]
54 |           [label={[font=\small]above:{\it time series}}] {};
55 |         ;
56 |     \node[bigtensor={18pt}{72pt}{blue}] (layer1) [right=of timeseriesinput] [label=below:$64 \times n_f$]{}
57 |         edge ["conv1",{Latex}-] (timeseriesinput)
58 |         ;
59 |     \node[bigtensor={18pt}{46pt}{blue}] (layer2) [right=of layer1] [label=below:$16 \times n_f$]{}
60 |         edge ["conv2",{Latex}-] (layer1)
61 |         ;
62 |     \node[tensor={18pt}{24pt}{blue}] (layer3) [right=of layer2] [label=below:$4 \times n_f$] {}
63 |         edge ["conv3",{Latex}-] (layer2)
64 |         ;
65 |     \node[tensor={18pt}{\the\cellsize}{blue}] (latent) [right=of layer3] [label=below:$1 \times n_f$] {}
66 |         edge ["conv4",{Latex}-] (layer3)
67 |         ;
68 |     \node[tensor={\the\cellsize}{18pt}{orange}] (flat latent) [right=of latent] [label=below:$n_f \times 1$] [label={[font=\small]above:{\it ts latent}}] {}
69 |         edge ["transpose",{Latex}-] (latent)
70 |         ;
71 |     \node[tensor={\the\cellsize}{24pt}{green}] (embedding) [below=of flat latent]
72 |     [label=below:$n_e \times 1$] [label={[font=\small,text depth=0pt]above:\parbox[b]{1.75cm}{\centering \it stock\\[-0.8ex]embedding}}] {}
73 |         ;
74 |         
75 |     \node (hidden1) at ($(flat latent)!0.5!(embedding)$) {}
76 |         ;
77 |     \node[connection] (aggregate) [right=0.85cm of hidden1] {}
78 |         ;
79 |     \draw (flat latent) -| (aggregate) ;
80 |     \draw (embedding) -| (aggregate) ;
81 | 
82 | 
83 |     \node[tensor={\the\cellsize}{18pt}{red}] (latent2) [right=0.6cm of aggregate] [label=below:$n_f \times 1$] [label={[font=\small]above:{\it latent}}] {}
84 |         edge ["fc1",{Latex}-] (aggregate)
85 |         ;
86 |         
87 |     \node[connection] (split) [right=0.6cm of latent2] {};
88 |         
89 |     \node (sigma) [right=3.4cm of flat latent]  {$\sigma_n$};
90 |     \node (mu) [right=3.4cm of embedding]  {$\mu_n$};
91 | 
92 |     \draw[-{Latex}] (latent2) -- (split) |- node[near end,auto=left]  {\tiny $\sigma$-head} (sigma) ;
93 |     \draw[-{Latex}] (split) |- node[near end,auto=left]  {\tiny $\mu$-head} (mu) ;
94 |     
95 |    
96 |   \end{tikzpicture}
97 | \end{document}
98 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Deep Volatility Models
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Deep Volatility Models (for stock prices)
 2 | 
 3 | This package uses convolutional neural networks (implemented in PyTorch) to train mixture models to model
 4 | the volatility of stock prices.
 5 | 
 6 | A single model is trained on a number of different stock symbols.  Internally,
 7 | an embedding is learned for each symbol.  In other words, a convolutional neural
 8 | network learns general features of time series of daily returns that predict the
 9 | volatilty along with an embedding that tunes the result for different symbolsl.
10 | 
11 | ## Motivation
12 | 
13 | The volatility of stock returns changes daily.  The models produced by this
14 | package predict the *distribution* of the log returns for the next trading date.
15 | The actual turn is virtually impossible to predict, but predicting the
16 | distribution of return has several uses:
17 | 
18 | 1. *The distribution can be sampled to generate simulated sequences of returns
19 |    that can be used as synthetic data to test various trading algorithms.*
20 |    Datasets with historic daily returns are very small so testing algorithms using
21 |    historic data is very prone to overfitting.
22 | 
23 | 
24 | 2. *Knowing the distribution of the daily returns (especially the volatility)
25 |    can be used to determine fair prices for stock options.*  The famous
26 |    Black-Scholes formula predicts fair option prices.  However, it assumes the
27 |    daily returns to be stationary and normally distributed.  However, observed
28 |    daily returns are not stationary (the variance varies with time) and the
29 |    returns are not normally distributed.  They tend to have "long tails"
30 |    compared to a normal distrubution (i.e., kurtosis) and they are not always symmetric
31 |    (i.e., skew).  It's possible to estimate the variances by computing the
32 |    variance of a trailing sample.  However, during periods of increasing
33 |    volatility this would underestimate the volatility since the volatility today
34 |    can be significantly greater than the volatility of the past N days.
35 |    Likewise, during periods of
36 |    decreasing volatility this would overestimate the volatility.  The goal is to
37 |    determine the *instantaneous* volatility to provide estimates of the distribution of
38 |    daily returns during the next trading day (or the next few trading days)
39 | 
40 | ### Installation
41 | 
42 | This package can be installed by running `pip install .` in the top level directory of a `git clone` checkout
43 | 
44 |      pip install .
45 | 
46 | ### Train a new model on a set of symbols:
47 | 
48 | Ideally you would train models on a larger set of symbols.  Here we use a small
49 | set for demo purposes:
50 | 
51 |     python -m deep_volatility_models.train_univariate -s SPY -s QQQ -s BND 
52 | 
53 | 
54 | ### Evaluate the model on some of the symbols
55 | This script will produce a table and a plot with volatility and mean predictions
56 | for the past and for the trading day.
57 | 
58 |     python -m deep)volatility_models.evaluate_model -s SPY -s BND
59 | 
60 | # Future extensions
61 | 
62 | Models generated as described above do not model correlations between symbols.
63 | It's possible to generate multivariate models that represent the correlations
64 | between symbols.
65 | 
66 | The inference code described above infers the parameters of a mixture
67 | model representing the distribution of daily returns.  No code has
68 | been provided here to sample these distributions to generate synthetic
69 | data.
70 | 
71 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "deep_volatility_models"
 3 | version = "0.0.1"
 4 | description = "Volatility models for stock prices using deep learning and mixture models."
 5 | authors = ["Mark A Wicks <mawicks@gmail.com>"]
 6 | 
 7 | [tool.poetry.dependencies]
 8 | python = "^3.8,<3.11"
 9 | pandas = "^1.2.3"
10 | torch = "^1.12.0"
11 | yfinance = "^0.1.59"
12 | matplotlib = "^3.4.0"
13 | sklearn = "^0.0"
14 | hyperopt = "^0.2"
15 | optuna = "^2.10"
16 | 
17 | [tool.poetry.dev-dependencies]
18 | pytest = "^5.2"
19 | black = "^22.3.0"
20 | 
21 | [build-system]
22 | requires = ["poetry-core>=1.0.0a6", "setuptools>=46.0.0"]
23 | build-backend = "poetry.core.masonry.api"
24 | 


--------------------------------------------------------------------------------
/scripts/train_example.sh:
--------------------------------------------------------------------------------
1 | SYMBOLS="bnd edv tyd gld vnq vti spy qqq qld xmvm vbk xlv fxg rxl fxl ibb vgt iyf xly uge jnk"
2 | 
3 | args=""
4 | for symbol in $SYMBOLS
5 | do
6 |     args="$args --symbol $symbol"
7 | done
8 | python -m deep_volatility_models.train_univariate $* $args
9 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [metadata]
 2 | name = deep-volatility-models-mawicks
 3 | version = 0.0.1
 4 | author = Mark A Wicks
 5 | author_email = Mark A Wicks <mawicks@gmail.com>
 6 | description = Volatility models for stock prices using deep learning and mixture models.
 7 | long_description = file: README.md
 8 | long_description_content_type = text/markdown
 9 | url = https://github.com/mawicks/deep-volatility-models
10 | project_urls =
11 |     Bug Tracker = https://github.com/mawicks/deep-volatility-models/issues
12 | classifiers =
13 |     Programming Language :: Python :: 3
14 |     License :: OSI Approved :: MIT License
15 |     Operating System :: OS Independent
16 | 
17 | [options]
18 | package_dir =
19 |     = src
20 | packages = find:
21 | python_requires = >=3.8
22 | 
23 | [options.packages.find]
24 | where = src
25 | 


--------------------------------------------------------------------------------
/src/deep_volatility_models/__init__.py:
--------------------------------------------------------------------------------
 1 | from . import architecture
 2 | from . import data_sources
 3 | from . import embedding_models
 4 | 
 5 | # from . import evaluate_model
 6 | from . import mixture_model_stats
 7 | from . import model_wrappers
 8 | from . import models
 9 | from . import sample
10 | from . import time_series_datasets
11 | 
12 | # from . import train_univariate
13 | from . import training
14 | from . import util
15 | 


--------------------------------------------------------------------------------
/src/deep_volatility_models/data_sources.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | 
  3 | from typing import Callable, Dict, Iterable, Union
  4 | import zipfile
  5 | 
  6 | # Third party modules
  7 | import numpy as np
  8 | import pandas as pd
  9 | import yfinance as yf
 10 | 
 11 | # Local modules
 12 | from deep_volatility_models import util
 13 | 
 14 | logging.basicConfig(level=logging.INFO)
 15 | 
 16 | DataSource = Callable[[Union[str, Iterable[str]]], Dict[str, pd.DataFrame]]
 17 | 
 18 | 
 19 | def YFinanceSource() -> DataSource:
 20 |     """
 21 |     Sample usage:
 22 |     >>> from deep_volatility_models import data_sources
 23 |     >>> symbols = ["SPY", "QQQ"]
 24 |     >>> ds = data_sources.YFinanceSource()
 25 |     >>> response = ds(symbols)
 26 |     >>> response["SPY"][:4][['open', 'close']]  # doctest: +NORMALIZE_WHITESPACE
 27 |                     open     close
 28 |     date
 29 |     1993-02-01  43.96875  44.25000
 30 |     1993-02-02  44.21875  44.34375
 31 |     1993-02-03  44.40625  44.81250
 32 |     1993-02-04  44.96875  45.00000
 33 |         >>>
 34 |     """
 35 | 
 36 |     def _add_columns(df):
 37 |         new_df = df.dropna().reset_index()
 38 |         rename_dict = {c: util.rename_column(c) for c in new_df.columns}
 39 |         log_return = np.log(new_df["Adj Close"] / new_df["Adj Close"].shift(1))
 40 |         new_df = new_df.assign(log_return=log_return)
 41 |         new_df.rename(columns=rename_dict, inplace=True)
 42 |         new_df.set_index("date", inplace=True)
 43 |         return new_df
 44 | 
 45 |     def price_history(symbol_set: Union[Iterable[str], str]) -> Dict[str, pd.DataFrame]:
 46 | 
 47 |         # Convert symbol_set to a list
 48 |         symbols = util.to_symbol_list(symbol_set)
 49 | 
 50 |         # Do the download
 51 |         df = yf.download(
 52 |             symbols, period="max", group_by="ticker", actions=True, progress=False
 53 |         )
 54 |         response = {}
 55 | 
 56 |         for symbol in symbols:
 57 |             # The `group_by` option for yf.download() behaves differently when there's only one symbol.
 58 |             # Always return a dictionary of dataframes, even for one symbol.
 59 |             if len(symbols) > 1:
 60 |                 symbol_df = df[symbol]
 61 |             else:
 62 |                 symbol_df = df
 63 | 
 64 |             response[symbol] = (
 65 |                 _add_columns(symbol_df).dropna().applymap(lambda x: round(x, 6))
 66 |             )
 67 | 
 68 |         return response
 69 | 
 70 |     return price_history
 71 | 
 72 | 
 73 | def HugeStockMarketDatasetSource(zip_filename) -> DataSource:
 74 |     """
 75 |     Sample usage
 76 |     >>> from deep_volatility_models import data_sources
 77 |     >>> symbols = ["SPY", "QQQ"]
 78 |     >>> ds = data_sources.HugeStockMarketDatasetSource('archive.zip')
 79 |     >>> response = ds(symbols)
 80 |     """
 81 | 
 82 |     def _add_columns(df):
 83 |         new_df = df.dropna().reset_index()
 84 |         rename_dict = {c: util.rename_column(c) for c in new_df.columns}
 85 |         new_df.rename(columns=rename_dict, inplace=True)
 86 | 
 87 |         log_return = np.log(new_df["close"] / new_df["close"].shift(1))
 88 |         new_df = new_df.assign(log_return=log_return)
 89 | 
 90 |         new_df.set_index("date", inplace=True)
 91 |         return new_df
 92 | 
 93 |     def price_history(symbol_set: Union[Iterable[str], str]) -> Dict[str, pd.DataFrame]:
 94 | 
 95 |         # Convert symbol_set to a list
 96 |         symbols = util.to_symbol_list(symbol_set)
 97 |         response = {}
 98 | 
 99 |         with zipfile.ZipFile(zip_filename, "r") as open_zipfile:
100 |             for symbol in symbols:
101 |                 found = False
102 |                 for prefix in ["Data/Stocks", "Data/ETFs"]:
103 |                     try:
104 |                         name = f"{prefix}/{symbol.lower()}.us.txt"
105 |                         symbol_df = pd.read_csv(open_zipfile.open(name))
106 |                         response[symbol] = (
107 |                             _add_columns(symbol_df)
108 |                             .dropna()
109 |                             .applymap(lambda x: round(x, 6))
110 |                         )
111 |                         found = True
112 |                     except KeyError:
113 |                         pass
114 |                 if not found:
115 |                     raise ValueError(
116 |                         f"Symbol {symbol} not found in Huge Stock Market Dataset"
117 |                     )
118 | 
119 |         return response
120 | 
121 |     return price_history
122 | 
123 | 
124 | if __name__ == "__main__":  # pragma: no cover
125 |     symbols = ["spy", "qqq"]
126 |     ds = YFinanceSource()
127 |     response = ds(symbols)
128 | 
129 |     for k, v in response.items():
130 |         print(f"{k}:\n{v.head(3)}")
131 | 


--------------------------------------------------------------------------------
/src/deep_volatility_models/embedding_models.py:
--------------------------------------------------------------------------------
 1 | from typing import Callable, Dict
 2 | 
 3 | # Third party packages
 4 | import torch
 5 | 
 6 | # Local packages
 7 | from deep_volatility_models import model_wrappers
 8 | from deep_volatility_models import sample
 9 | 
10 | 
11 | class SingleSymbolModelFromEmbedding(torch.nn.Module):
12 |     def __init__(self, network: torch.nn.Module, single_embedding: torch.Tensor):
13 |         super().__init__()
14 |         self.network = network
15 |         self.single_embedding = single_embedding
16 | 
17 |         # Client code reads the window_size attribute :(
18 |         self.window_size = network.window_size
19 | 
20 |     @property
21 |     def is_mixture(self):
22 |         return self.network.is_mixture
23 | 
24 |     def make_predictors(self, window: torch.Tensor) -> torch.Tensor:
25 |         """
26 |         Combine the `window` and the `embedding` to make `predictors` input for
27 |         use with the underlying network.
28 |         """
29 | 
30 |         minibatch_dim = window.shape[0]
31 |         embedding_dim = len(self.single_embedding)
32 |         embedding = self.single_embedding.unsqueeze(0).expand(
33 |             minibatch_dim, embedding_dim
34 |         )
35 |         predictors = (window, embedding)
36 |         return predictors
37 | 
38 |     def simulate_one(
39 |         self,
40 |         window: torch.Tensor,
41 |         time_samples: int,
42 |     ):
43 |         return sample.simulate_one(
44 |             self.network,
45 |             self.make_predictors(window),
46 |             time_samples,
47 |         )
48 | 
49 |     def forward(self, window: torch.Tensor) -> torch.Tensor:
50 |         return self.network.forward(self.make_predictors(window))
51 | 
52 | 
53 | def SingleSymbolModelFactory(
54 |     encoding: Dict[str, int], wrapped_model: model_wrappers.StockModel
55 | ) -> Callable[[str], model_wrappers.StockModel]:
56 |     if isinstance(wrapped_model.network.model, torch.nn.Module):
57 |         model = wrapped_model.network.model
58 |     else:
59 |         raise ValueError(
60 |             "wrapped_model must have `network` field with `model` of type `Module`"
61 |         )
62 | 
63 |     if isinstance(wrapped_model.network.embedding, torch.nn.Module):
64 |         embeddings = wrapped_model.network.embedding
65 |     else:
66 |         raise ValueError(
67 |             "wrapped_model must have `network` field with `embeddings` of type `Module`"
68 |         )
69 | 
70 |     def single_symbol_model(symbol: str) -> model_wrappers.StockModel:
71 |         single_embedding = embeddings(torch.tensor(encoding[symbol])).detach()
72 |         return model_wrappers.StockModel(
73 |             symbols=(symbol.upper(),),
74 |             network=SingleSymbolModelFromEmbedding(model, single_embedding),
75 |             date=wrapped_model.date,
76 |             epochs=wrapped_model.epochs,
77 |             loss=wrapped_model.loss,
78 |             training_data_start_date=None,
79 |             training_data_end_date=None,
80 |         )
81 | 
82 |     return single_symbol_model
83 | 


--------------------------------------------------------------------------------
/src/deep_volatility_models/evaluate_model.py:
--------------------------------------------------------------------------------
  1 | #  Standard Python
  2 | 
  3 | import datetime as dt
  4 | import logging
  5 | import os
  6 | import pickle
  7 | import sys
  8 | import traceback
  9 | 
 10 | # Common packages
 11 | import click
 12 | import matplotlib.pyplot as plt
 13 | import numpy as np
 14 | import pandas as pd
 15 | import torch
 16 | 
 17 | plt.style.use("ggplot")
 18 | 
 19 | # import cufflinks as cf
 20 | # from IPython.display import display,HTML
 21 | 
 22 | # Local imports
 23 | from deep_volatility_models import data_sources
 24 | from deep_volatility_models import embedding_models
 25 | from deep_volatility_models import sample
 26 | from deep_volatility_models import loss_functions
 27 | from deep_volatility_models import mixture_model_stats
 28 | from deep_volatility_models import stock_data
 29 | from deep_volatility_models import time_series_datasets
 30 | 
 31 | 
 32 | pd.set_option("display.width", None)
 33 | pd.set_option("display.max_columns", None)
 34 | pd.set_option("display.min_rows", None)
 35 | pd.set_option("display.max_rows", 10)
 36 | 
 37 | # Configure external packages and run()
 38 | logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO, force=True)
 39 | 
 40 | # Torch configuration
 41 | torch.set_printoptions(
 42 |     precision=4, threshold=20, edgeitems=3, linewidth=None, profile="short"
 43 | )
 44 | 
 45 | ANNUAL_TRADING_DAYS = 252.0
 46 | #  ROOT_PATH = os.path.dirname(os.path.realpath(__file__))
 47 | ROOT_PATH = "."
 48 | 
 49 | TIME_SAMPLES = 98
 50 | 
 51 | 
 52 | def simulate(model, symbol, window, current_price, simulations):
 53 |     """
 54 |     Arguments:
 55 |         model: torch.nn.Module
 56 |         symbol: str
 57 |         window: single input row as a torch.Tensor of shape (symbols, window_size)
 58 |     """
 59 |     # Create a batch dimension (we'll doing a single row, so the batch dimension is one):
 60 |     window = window.unsqueeze(0)
 61 | 
 62 |     logging.info(f"{symbol} window: {window.shape}")
 63 |     logging.info(f"{symbol} window]: {window}")
 64 | 
 65 |     simulated_returns = model.simulate_one(window, TIME_SAMPLES)
 66 |     simulated_returns_many = sample.simulate_many(
 67 |         model, window, TIME_SAMPLES, simulations
 68 |     )
 69 | 
 70 |     historic_returns = np.exp(np.cumsum(window.squeeze(1).squeeze(0).numpy()))
 71 |     simulated_returns_many = simulated_returns_many.squeeze(1).squeeze(0).numpy()
 72 |     logging.info(f"mean simulated return: {np.mean(simulated_returns_many)}")
 73 |     sample_index = list(
 74 |         range(
 75 |             len(historic_returns) - 1,
 76 |             len(historic_returns) + len(simulated_returns_many) - 1,
 77 |         )
 78 |     )
 79 |     plt.plot(
 80 |         current_price * historic_returns / historic_returns[-1],
 81 |         color="k",
 82 |         alpha=0.5,
 83 |         label=f"Time Series Input ({symbol})",
 84 |     )
 85 |     colors = ["c", "m"]
 86 |     for _ in range(2):
 87 |         plt.plot(
 88 |             sample_index,
 89 |             current_price * simulated_returns_many[:, _],
 90 |             f"{colors[_]}",
 91 |             alpha=0.5,
 92 |             label=f"Sampled Prediction #{_+1}",
 93 |         )
 94 |     plt.xlabel("Time (days)")
 95 |     plt.ylabel("Price ($)")
 96 | 
 97 |     max_return = np.percentile(simulated_returns_many, 95.0, axis=1)
 98 |     min_return = np.percentile(simulated_returns_many, 5.0, axis=1)
 99 | 
100 |     plt.plot(
101 |         sample_index,
102 |         current_price * max_return,
103 |         "b-",
104 |         alpha=0.3,
105 |         label="95th Percentile Price (Est)",
106 |     )
107 |     plt.plot(
108 |         sample_index,
109 |         current_price * min_return,
110 |         "r-",
111 |         alpha=0.3,
112 |         label="5th Percentile Price (Est)",
113 |     )
114 |     plt.legend(loc="lower left")
115 |     ax = plt.gca()
116 |     xlim = ax.get_xlim()
117 |     ylim = ax.get_ylim()
118 |     current_aspect = (xlim[1] - xlim[0]) / (ylim[1] - ylim[0])
119 |     ax.set_aspect(0.5 * current_aspect)
120 |     plt.savefig(f"model_evaluation_{symbol}@2x.png", dpi=200)
121 |     plt.show()
122 | 
123 | 
124 | def do_one_symbol(symbol, model, refresh, simulations, start_date, end_date):
125 |     logging.info(f"symbol: {symbol.upper()}")
126 |     # logging.info(f"model: {model}") - Is having this useful?
127 |     logging.info(f"refresh: {refresh}")
128 |     logging.info(f"simulations: {simulations}")
129 |     logging.info(f"start date: {start_date}")
130 |     logging.info(f"end date: {end_date}")
131 | 
132 |     window_size = model.network.window_size
133 | 
134 |     # FIXME when we're certain the model file was saved in eval mode.
135 |     model.network.eval()
136 | 
137 |     logging.info(f"model epochs:\t{model.epochs}")
138 |     logging.info(f"model loss:\t{model.loss:.4f}")
139 |     logging.info(f"model symbols:\t{model.symbols}")
140 | 
141 |     # Refresh historical data
142 |     logging.info("Reading historical data")
143 | 
144 |     data_store = stock_data.FileSystemStore(os.path.join(ROOT_PATH, "current_data"))
145 |     data_source = data_sources.YFinanceSource()
146 |     history_loader = stock_data.CachingSymbolHistoryLoader(
147 |         data_source, data_store, overwrite_existing=True
148 |     )
149 |     # The Cachingloader returns a sequence of (symbol, data).
150 |     # Since we pass just one symbol rather than a list, use
151 |     # next to grab the first (symbol, dataframe) pair, then [1] to grab the data.
152 |     symbol_history = next(history_loader(symbol))[1]
153 | 
154 |     # Start date represents the date of the first prediction. In other
155 |     # words, all points in the window are before that date.  Grab
156 |     # `window_size` points prior to that date which will be used for
157 |     # the prediction.
158 | 
159 |     # Note: start_date and end_date represent the first and last dates
160 |     # where we have both a prediction and a return value for
161 |     # validating the prediction.  The first prediction will be for
162 |     # start_date but will be based on a full window of history prior
163 |     # to and not including start_date.  When we make predictions using
164 |     # all of this data the first prediction will be for start_date.
165 |     # The last prediction will be for the first business day after
166 |     # end_date.  This will require data up to and including
167 |     # end_date. This is one more prediction than we need for
168 |     # validation.  This prediction will automatically be dropped by a
169 |     # merge below because there is no in-window historical data to
170 |     # compare it to.  We will print this prediction for reference
171 |     # before the merge.
172 | 
173 |     if start_date:
174 |         start_position = symbol_history.index.get_loc(start_date) - window_size
175 |     else:
176 |         start_position = 0
177 |     if end_date:
178 |         end_position = symbol_history.index.get_loc(end_date) + 1
179 |     else:
180 |         end_position = None
181 |     symbol_history = symbol_history.iloc[start_position:end_position]
182 |     print(symbol_history)
183 | 
184 |     logging.info(f"symbol history:\n{symbol_history}")
185 | 
186 |     current_price = symbol_history.close[-1]
187 |     windowed_returns = time_series_datasets.RollingWindow(
188 |         symbol_history.log_return,
189 |         window_size,
190 |         create_channel_dim=True,
191 |     )
192 |     logging.debug(f"{symbol} windowed_returns[0]: {windowed_returns[0].shape}")
193 |     logging.debug(f"{symbol} windowed_returns[0]: {windowed_returns[0]}")
194 | 
195 |     simulate(model.network, symbol, windowed_returns[-1], current_price, simulations)
196 | 
197 |     with torch.no_grad():
198 |         # Discard the last windowed_return because it would make a
199 |         # prediction beyond end_date.  We're only interested in
200 |         # predictions that we can compare to actual returns.
201 |         windows = torch.stack(tuple(windowed_returns)[:-1], dim=0)
202 |         logging.debug(f"{symbol} windows: {windows.shape}")
203 | 
204 |         # First prediction date is first date following the first window.
205 |         # Last prediction date is the date of the last data point.
206 |         # These are the dates for which we make predictions.
207 |         prediction_dates = symbol_history.index[window_size:]
208 |         ar = symbol_history.loc[prediction_dates].log_return
209 |         actual_returns = torch.tensor(ar, dtype=torch.float).unsqueeze(1)
210 | 
211 |         print("actual_returns on prediction dates:\n", actual_returns)
212 | 
213 |         if model.network.is_mixture:
214 |             log_p, mu, sigma_inv = model.network(windows)[:3]
215 |             p = torch.exp(log_p)
216 |             ll = mixture_model_stats.univariate_log_likelihood(
217 |                 actual_returns, log_p, mu, sigma_inv
218 |             )
219 | 
220 |             logging.debug(f"p: {p}")
221 |             logging.debug(f"mu: {mu}")
222 |             logging.debug(f"sigma_inv: {sigma_inv}")
223 | 
224 |             mean, variance = mixture_model_stats.univariate_combine_metrics(
225 |                 p, mu, sigma_inv
226 |             )
227 |         else:
228 |             mu, sigma_inv = model.network(windows)[:2]
229 |             ll = loss_functions.univariate_log_likelihood(actual_returns, mu, sigma_inv)
230 | 
231 |             logging.debug(f"mu: {mu}")
232 |             logging.debug(f"sigma_inv: {sigma_inv}")
233 | 
234 |             mean = mu.squeeze(1)
235 |             sigma = torch.inverse(sigma_inv)
236 |             variance = (sigma.squeeze(2).squeeze(1)) ** 2
237 |             p = torch.ones((mean.shape[0],))
238 | 
239 |         annual_return = ANNUAL_TRADING_DAYS * mean
240 |         daily_std_dev = np.sqrt(variance)
241 |         volatility = np.sqrt(ANNUAL_TRADING_DAYS) * daily_std_dev
242 | 
243 |         logging.debug(f"daily mean: {mean}")
244 |         logging.debug(f"daily std_dev: {daily_std_dev}")
245 | 
246 |         logging.debug(f"annual return: {annual_return}")
247 |         logging.debug(f"annual volatility: {volatility}")
248 | 
249 |         logging.info(
250 |             f"*** Validation range: {prediction_dates[0].date()} to {prediction_dates[-1].date()} ***"
251 |         )
252 |         logging.info(f"*** mean log likelihood: {round(float(torch.mean(ll)),4)} ***")
253 | 
254 |         df = pd.DataFrame(
255 |             {
256 |                 "pred_volatility": volatility,
257 |                 "pred_return": map(
258 |                     lambda x: x.numpy(), mean
259 |                 ),  # Hack so it will print but won't plot
260 |                 "pred_sigma": daily_std_dev,
261 |                 "p": map(lambda x: x.numpy(), p),
262 |                 "mu": map(lambda x: x.numpy(), mu),
263 |                 "sigma_inv": map(lambda x: x.numpy(), sigma_inv),
264 |             },
265 |             index=prediction_dates,
266 |         )
267 | 
268 |         df = df.merge(
269 |             symbol_history,
270 |             left_index=True,
271 |             right_index=True,
272 |         )
273 | 
274 |         df = df[
275 |             [
276 |                 "pred_volatility",
277 |                 "log_return",
278 |                 "close",
279 |                 "pred_return",
280 |                 "pred_sigma",
281 |                 "p",
282 |                 "mu",
283 |                 "sigma_inv",
284 |             ]
285 |         ]
286 | 
287 |         return_df = df[
288 |             ["log_return", "pred_return", "pred_volatility", "p", "mu", "sigma_inv"]
289 |         ]
290 |         return return_df
291 | 
292 | 
293 | def run(model, symbol, simulations, start_date=None, end_date=None):
294 |     wrapped_model = torch.load(model)
295 |     single_symbol_model_factory = embedding_models.SingleSymbolModelFactory(
296 |         wrapped_model.encoding, wrapped_model
297 |     )
298 | 
299 |     # symbols_to_process = list(set(symbol).difference(exclude_symbols))
300 |     symbols_to_process = sorted(list(set(symbol)))
301 |     logging.info(f"symbols_to_process: {symbols_to_process}")
302 | 
303 |     dataframes = {}
304 |     for s in symbols_to_process:
305 |         df = do_one_symbol(
306 |             s,
307 |             single_symbol_model_factory(s.upper()),
308 |             True,
309 |             simulations,
310 |             start_date,
311 |             end_date,
312 |         )
313 |         dataframes[s] = df
314 | 
315 |     combined_df = pd.concat(
316 |         dataframes.values(), keys=dataframes.keys(), axis=1
317 |     ).dropna()
318 | 
319 |     return combined_df
320 | 
321 | 
322 | @click.command()
323 | @click.option(
324 |     "--model",
325 |     show_default=True,
326 |     help="Model file to use.",
327 | )
328 | @click.option(
329 |     "--symbol",
330 |     multiple=True,
331 |     show_default=True,
332 |     help="Load model for this symbol.",
333 | )
334 | @click.option(
335 |     "--start-date",
336 |     type=click.DateTime(formats=["%Y-%m-%d"]),
337 |     show_default=True,
338 |     help="Date of first return prediction (must be a business day)",
339 | )
340 | @click.option(
341 |     "--end-date",
342 |     type=click.DateTime(formats=["%Y-%m-%d"]),
343 |     default=None,
344 |     show_default=True,
345 |     help="Date of last return prediction (must be a business day)",
346 | )
347 | @click.option(
348 |     "--simulations",
349 |     type=int,
350 |     show_default=True,
351 |     default=10,
352 |     help="Number of simulations to run",
353 | )
354 | def run_cli(
355 |     model,
356 |     symbol,
357 |     start_date,
358 |     end_date,
359 |     simulations,
360 | ):
361 |     logging.info(f"model: {model}")
362 |     logging.info(f"symbol: {symbol}")
363 |     logging.info(f"start_date: {start_date}")
364 |     logging.info(f"simulations: {simulations}")
365 | 
366 |     df = run(model, symbol, simulations, start_date, end_date)
367 | 
368 |     logging.info(df)
369 |     df.plot(subplots=True)
370 |     plt.savefig("volatility_over_time.png")
371 |     plt.show()
372 | 
373 | 
374 | if __name__ == "__main__":
375 |     # Run everything
376 |     run_cli()
377 | 


--------------------------------------------------------------------------------
/src/deep_volatility_models/hyperopt_opt.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from hyperopt import hp, tpe, fmin, Trials
 4 | import numpy as np
 5 | 
 6 | import deep_volatility_models.train_univariate as train_univariate
 7 | 
 8 | logging.basicConfig(level=logging.INFO)
 9 | 
10 | SYMBOLS = [
11 |     "bnd",
12 |     "edv",
13 |     "tyd",
14 |     "gld",
15 |     "vnq",
16 |     "vti",
17 |     "spy",
18 |     "qqq",
19 |     "qld",
20 |     "xmvm",
21 |     "vbk",
22 |     "xlv",
23 |     "fxg",
24 |     "rxl",
25 |     "fxl",
26 |     "ibb",
27 |     "vgt",
28 |     "iyf",
29 |     "xly",
30 |     "uge",
31 |     "jnk",
32 | ]
33 | 
34 | search_space = {
35 |     "mixture_components": 3 + hp.randint("mixture_components", 4),
36 |     "feature_dimension": 40 + hp.randint("feature_dimension", 51),
37 |     "embedding_dimension": 3 + hp.randint("embedding_dimension", 13),
38 |     "gaussian_noise": hp.loguniform("gaussian_noise", np.log(1e-4), np.log(1e-2)),
39 |     "dropout": hp.uniform("dropout", 0, 0.25),
40 |     "learning_rate": hp.loguniform("learning_rate", np.log(1e-4), np.log(1e-2)),
41 |     "weight_decay": hp.loguniform("weight_decay", np.log(1e-8), np.log(1e-6)),
42 |     "window_size": hp.choice("window_size", [64, 256]),
43 |     "minibatch_size": 64 + hp.randint("minibatch_size", 193),
44 | }
45 | 
46 | 
47 | def objective(parameters):
48 |     # Be a good citizen and make a copy since we're going to modify the dictionary
49 |     parameters = parameters.copy()
50 | 
51 |     # `minibatch_size` has to be a Python int, not a numpy int.
52 |     parameters["minibatch_size"] = int(parameters["minibatch_size"])
53 | 
54 |     logging.info("************************")
55 |     for key, value in parameters.items():
56 |         logging.info(f"{key}: {value}")
57 | 
58 |     loss = train_univariate.run(
59 |         existing_model=None,
60 |         symbols=SYMBOLS,
61 |         refresh=False,
62 |         only_embeddings=False,
63 |         max_epochs=400,
64 |         early_termination=20,
65 |         **parameters,
66 |     )
67 | 
68 |     logging.info(f"loss: {loss}")
69 |     logging.info("************************")
70 | 
71 |     return loss
72 | 
73 | 
74 | if __name__ == "__main__":
75 |     trials = Trials()
76 | 
77 |     best = fmin(
78 |         objective,
79 |         space=search_space,
80 |         algo=tpe.suggest,
81 |         max_evals=200,
82 |         trials=trials,
83 |     )
84 |     print(trials.trials)
85 | 
86 |     print("\n***** Best parameters *****")
87 | 
88 |     print(best)
89 | 


--------------------------------------------------------------------------------
/src/deep_volatility_models/hyperopt_opt2.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | 
  3 | from hyperopt import hp, tpe, fmin, Trials
  4 | import numpy as np
  5 | 
  6 | import deep_volatility_models.train_univariate as train_univariate
  7 | 
  8 | logging.basicConfig(level=logging.INFO)
  9 | 
 10 | SYMBOLS = [
 11 |     "bnd",
 12 |     "edv",
 13 |     "tyd",
 14 |     "gld",
 15 |     "vnq",
 16 |     "vti",
 17 |     "spy",
 18 |     "qqq",
 19 |     "qld",
 20 |     "xmvm",
 21 |     "vbk",
 22 |     "xlv",
 23 |     "fxg",
 24 |     "rxl",
 25 |     "fxl",
 26 |     "ibb",
 27 |     "vgt",
 28 |     "iyf",
 29 |     "xly",
 30 |     "uge",
 31 |     "jnk",
 32 |     "aal",
 33 |     "amd",
 34 |     "amzn",
 35 |     "bac",
 36 |     "cmcsa",
 37 |     "cmg",
 38 |     "dis",
 39 |     "f",
 40 |     "fb",
 41 |     "ge",
 42 |     "gld",
 43 |     "gme",
 44 |     "goog",
 45 |     "iyr",
 46 |     "jnk",
 47 |     "mro",
 48 |     "nflx",
 49 |     "qqq",
 50 |     "sbux",
 51 |     "spy",
 52 |     "t",
 53 |     "trip",
 54 |     "twtr",
 55 |     "v",
 56 |     "wfc",
 57 |     "vti",
 58 |     "ba",
 59 |     "c",
 60 |     "gm",
 61 |     "intc",
 62 |     "jpm",
 63 |     "hpe",
 64 |     "ko",
 65 |     "kr",
 66 |     "mgm",
 67 |     "msft",
 68 |     "mvis",
 69 |     "oxy",
 70 |     "pins",
 71 |     "uber",
 72 |     "x",
 73 |     "xom",
 74 |     "gps",
 75 |     "jnj",
 76 |     "nke",
 77 |     "pypl",
 78 |     "wmt",
 79 |     "ups",
 80 |     "baba",
 81 |     "sq",
 82 |     "fdx",
 83 |     "snap",
 84 |     "amc",
 85 |     "pfe",
 86 |     "rkt",
 87 |     "aapl",
 88 |     "pton",
 89 |     "csco",
 90 |     "roku",
 91 |     "sq",
 92 |     "snow",
 93 |     "bnd",
 94 |     "vbk",
 95 |     "xmvm",
 96 |     "nvda",
 97 |     "vz",
 98 | ]
 99 | 
100 | # Dedup
101 | SYMBOLS = list(set(SYMBOLS))
102 | 
103 | search_space = {
104 |     "mixture_components": 3 + hp.randint("mixture_components", 3),
105 |     "feature_dimension": 50 + hp.randint("feature_dimension", 41),
106 |     "embedding_dimension": 3 + hp.randint("embedding_dimension", 8),
107 |     "gaussian_noise": hp.loguniform("gaussian_noise", np.log(1e-4), np.log(1e-3)),
108 |     "dropout": hp.uniform("dropout", 0.075, 0.125),
109 |     "learning_rate": hp.loguniform("learning_rate", np.log(1e-4), np.log(1e-3)),
110 |     "weight_decay": hp.loguniform("weight_decay", np.log(1e-7), np.log(1e-6)),
111 |     "window_size": hp.choice("window_size", [64, 256]),
112 |     "minibatch_size": 128 + hp.randint("minibatch_size", 129),
113 | }
114 | 
115 | 
116 | def objective(parameters):
117 |     # Be a good citizen and make a copy since we're going to modify the dictionary
118 |     parameters = parameters.copy()
119 | 
120 |     # `minibatch_size` has to be a Python int, not a numpy int.
121 |     parameters["minibatch_size"] = int(parameters["minibatch_size"])
122 | 
123 |     logging.info("************************")
124 |     for key, value in parameters.items():
125 |         logging.info(f"{key}: {value}")
126 | 
127 |     loss = train_univariate.run(
128 |         existing_model=None,
129 |         symbols=SYMBOLS,
130 |         refresh=False,
131 |         only_embeddings=False,
132 |         max_epochs=400,
133 |         early_termination=20,
134 |         **parameters,
135 |     )
136 | 
137 |     logging.info(f"loss: {loss}")
138 |     logging.info("************************")
139 | 
140 |     return loss
141 | 
142 | 
143 | if __name__ == "__main__":
144 |     trials = Trials()
145 | 
146 |     best = fmin(
147 |         objective,
148 |         space=search_space,
149 |         algo=tpe.suggest,
150 |         max_evals=200,
151 |         trials=trials,
152 |     )
153 |     print(trials.trials)
154 | 
155 |     print("\n***** Best parameters *****")
156 | 
157 |     print(best)
158 | 


--------------------------------------------------------------------------------
/src/deep_volatility_models/hyperopt_opt_risk_neutral.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | 
  3 | from hyperopt import hp, tpe, fmin, Trials
  4 | import numpy as np
  5 | 
  6 | import deep_volatility_models.train_univariate as train_univariate
  7 | 
  8 | logging.basicConfig(level=logging.INFO)
  9 | 
 10 | SYMBOLS = [
 11 |     "bnd",
 12 |     "edv",
 13 |     "tyd",
 14 |     "gld",
 15 |     "vnq",
 16 |     "vti",
 17 |     "spy",
 18 |     "qqq",
 19 |     "qld",
 20 |     "xmvm",
 21 |     "vbk",
 22 |     "xlv",
 23 |     "fxg",
 24 |     "rxl",
 25 |     "fxl",
 26 |     "ibb",
 27 |     "vgt",
 28 |     "iyf",
 29 |     "xly",
 30 |     "uge",
 31 |     "jnk",
 32 |     "aal",
 33 |     "amzn",
 34 |     "bac",
 35 |     "cmcsa",
 36 |     "cmg",
 37 |     "dis",
 38 |     "f",
 39 |     "fb",
 40 |     "gld",
 41 |     "gme",
 42 |     "iyr",
 43 |     "jnk",
 44 |     "mro",
 45 |     "nflx",
 46 |     "qqq",
 47 |     "sbux",
 48 |     "spy",
 49 |     "t",
 50 |     "trip",
 51 |     "twtr",
 52 |     "v",
 53 |     "wfc",
 54 |     "vti",
 55 |     "ba",
 56 |     "c",
 57 |     "gm",
 58 |     "intc",
 59 |     "jpm",
 60 |     "hpe",
 61 |     "ko",
 62 |     "kr",
 63 |     "mgm",
 64 |     "msft",
 65 |     "mvis",
 66 |     "oxy",
 67 |     "pins",
 68 |     "uber",
 69 |     "x",
 70 |     "xom",
 71 |     "gps",
 72 |     "jnj",
 73 |     "nke",
 74 |     "pypl",
 75 |     "wmt",
 76 |     "ups",
 77 |     "baba",
 78 |     "sq",
 79 |     "fdx",
 80 |     "snap",
 81 |     "amc",
 82 |     "pfe",
 83 |     "rkt",
 84 |     "aapl",
 85 |     "pton",
 86 |     "csco",
 87 |     "roku",
 88 |     "sq",
 89 |     "snow",
 90 |     "nvda",
 91 |     "vz",
 92 | ]
 93 | 
 94 | search_space = {
 95 |     "mixture_components": 1 + hp.randint("mixture_components", 6),
 96 |     "feature_dimension": 40 + hp.randint("feature_dimension", 51),
 97 |     "embedding_dimension": 3 + hp.randint("embedding_dimension", 6),
 98 |     "gaussian_noise": hp.loguniform("gaussian_noise", np.log(1e-4), np.log(1e-2)),
 99 |     "dropout": hp.uniform("dropout", 0, 0.01),
100 |     "learning_rate": hp.loguniform("learning_rate", np.log(4e-4), np.log(2e-3)),
101 |     "weight_decay": hp.loguniform("weight_decay", np.log(5e-7), np.log(2e-6)),
102 |     "window_size": hp.choice("window_size", [64, 256]),
103 |     "minibatch_size": 64 + hp.randint("minibatch_size", 193),
104 | }
105 | 
106 | 
107 | def objective(parameters):
108 |     # Be a good citizen and make a copy since we're going to modify the dictionary
109 |     parameters = parameters.copy()
110 | 
111 |     # `minibatch_size` has to be a Python int, not a numpy int.
112 |     parameters["minibatch_size"] = int(parameters["minibatch_size"])
113 | 
114 |     logging.info("************************")
115 |     for key, value in parameters.items():
116 |         logging.info(f"{key}: {value}")
117 | 
118 |     loss = train_univariate.run(
119 |         use_hsmd=False,
120 |         model_file="hyperopt_risk_neutral.pt",
121 |         existing_model=None,
122 |         symbols=SYMBOLS,
123 |         refresh=False,
124 |         risk_neutral=True,
125 |         only_embeddings=False,
126 |         max_epochs=400,
127 |         early_termination=20,
128 |         use_batch_norm=False,
129 |         **parameters,
130 |     )[1]
131 | 
132 |     logging.info(f"loss: {loss}")
133 |     logging.info("************************")
134 | 
135 |     return loss
136 | 
137 | 
138 | if __name__ == "__main__":
139 |     trials = Trials()
140 | 
141 |     best = fmin(
142 |         objective,
143 |         space=search_space,
144 |         algo=tpe.suggest,
145 |         max_evals=200,
146 |         trials=trials,
147 |     )
148 |     print(trials.trials)
149 | 
150 |     print("\n***** Best parameters *****")
151 | 
152 |     print(best)
153 | 


--------------------------------------------------------------------------------
/src/deep_volatility_models/hyperopt_risk_neutral_no_mixture.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | 
  3 | from hyperopt import hp, tpe, fmin, Trials
  4 | import numpy as np
  5 | 
  6 | import deep_volatility_models.train_univariate as train_univariate
  7 | 
  8 | logging.basicConfig(level=logging.INFO)
  9 | 
 10 | SYMBOLS = [
 11 |     "bnd",
 12 |     "edv",
 13 |     "tyd",
 14 |     "gld",
 15 |     "vnq",
 16 |     "vti",
 17 |     "spy",
 18 |     "qqq",
 19 |     "qld",
 20 |     "xmvm",
 21 |     "vbk",
 22 |     "xlv",
 23 |     "fxg",
 24 |     "rxl",
 25 |     "fxl",
 26 |     "ibb",
 27 |     "vgt",
 28 |     "iyf",
 29 |     "xly",
 30 |     "uge",
 31 |     "jnk",
 32 |     "aal",
 33 |     "amzn",
 34 |     "bac",
 35 |     "cmcsa",
 36 |     "cmg",
 37 |     "dis",
 38 |     "f",
 39 |     "fb",
 40 |     "gld",
 41 |     "gme",
 42 |     "iyr",
 43 |     "jnk",
 44 |     "mro",
 45 |     "nflx",
 46 |     "qqq",
 47 |     "sbux",
 48 |     "spy",
 49 |     "t",
 50 |     "trip",
 51 |     "twtr",
 52 |     "v",
 53 |     "wfc",
 54 |     "vti",
 55 |     "ba",
 56 |     "c",
 57 |     "gm",
 58 |     "intc",
 59 |     "jpm",
 60 |     "hpe",
 61 |     "ko",
 62 |     "kr",
 63 |     "mgm",
 64 |     "msft",
 65 |     "mvis",
 66 |     "oxy",
 67 |     "pins",
 68 |     "uber",
 69 |     "x",
 70 |     "xom",
 71 |     "gps",
 72 |     "jnj",
 73 |     "nke",
 74 |     "pypl",
 75 |     "wmt",
 76 |     "ups",
 77 |     "baba",
 78 |     "sq",
 79 |     "fdx",
 80 |     "snap",
 81 |     "amc",
 82 |     "pfe",
 83 |     "rkt",
 84 |     "aapl",
 85 |     "pton",
 86 |     "csco",
 87 |     "roku",
 88 |     "sq",
 89 |     "snow",
 90 |     "nvda",
 91 |     "vz",
 92 | ]
 93 | 
 94 | search_space = {
 95 |     "feature_dimension": 30 + hp.randint("feature_dimension", 61),
 96 |     "embedding_dimension": 2 + hp.randint("embedding_dimension", 6),
 97 |     "gaussian_noise": hp.loguniform("gaussian_noise", np.log(1e-4), np.log(1e-2)),
 98 |     "dropout": hp.uniform("dropout", 0, 0.01),
 99 |     "learning_rate": hp.loguniform("learning_rate", np.log(4e-4), np.log(2e-3)),
100 |     "weight_decay": hp.loguniform("weight_decay", np.log(5e-7), np.log(2e-6)),
101 |     "window_size": hp.choice("window_size", [64, 256]),
102 |     "use_batch_norm": hp.choice("use_batch_norm", [False]),
103 |     "use_mixture": hp.choice("use_mixture", [False]),
104 |     "minibatch_size": 64 + hp.randint("minibatch_size", 193),
105 | }
106 | 
107 | 
108 | def objective(parameters):
109 |     # Be a good citizen and make a copy since we're going to modify the dictionary
110 |     parameters = parameters.copy()
111 | 
112 |     # `minibatch_size` has to be a Python int, not a numpy int.
113 |     parameters["minibatch_size"] = int(parameters["minibatch_size"])
114 | 
115 |     logging.info("************************")
116 |     for key, value in parameters.items():
117 |         logging.info(f"{key}: {value}")
118 | 
119 |     loss = train_univariate.run(
120 |         use_hsmd=False,
121 |         model_file="hyperopt_risk_neutral_no_mixture.pt",
122 |         existing_model=None,
123 |         symbols=SYMBOLS,
124 |         refresh=False,
125 |         risk_neutral=True,
126 |         mixture_components=1,
127 |         only_embeddings=False,
128 |         max_epochs=400,
129 |         early_termination=20,
130 |         **parameters,
131 |     )[1]
132 | 
133 |     logging.info(f"loss: {loss}")
134 |     logging.info("************************")
135 | 
136 |     return loss
137 | 
138 | 
139 | if __name__ == "__main__":
140 |     trials = Trials()
141 | 
142 |     best = fmin(
143 |         objective,
144 |         space=search_space,
145 |         algo=tpe.suggest,
146 |         max_evals=200,
147 |         trials=trials,
148 |     )
149 |     print(trials.trials)
150 | 
151 |     print("\n***** Best parameters *****")
152 | 
153 |     print(best)
154 | 


--------------------------------------------------------------------------------
/src/deep_volatility_models/loss_functions.py:
--------------------------------------------------------------------------------
 1 | # Standard Python
 2 | import math
 3 | 
 4 | # Common packages
 5 | import torch
 6 | 
 7 | LOG_SQRT_TWO_PI = 0.5 * math.log(2.0 * math.pi)
 8 | EPS_FOR_LOG = 1e-8
 9 | 
10 | # TODO: Write a test for this
11 | 
12 | 
13 | def univariate_log_likelihood(
14 |     x: torch.Tensor, mu: torch.Tensor, sigma_inv: torch.Tensor
15 | ):
16 |     """Inputs:
17 |        x: tensor of shape tensor(mb_size, symbols=1) containing the observed values
18 | 
19 |        mu: tensor of shape (mb_size, symbols=1) containing the mu
20 |        estimate for each component
21 | 
22 |        sigma_inv: tensor of shape (mb_size, input_symbols=1,
23 |        output_symbols=1) containing the estimate of the reciprocal of
24 |        the sqrt of the variance for each component
25 | 
26 |     Output:
27 |        tensor of shape (mb_size,) containing the log likelihood for each sample
28 |        in the batch
29 | 
30 |     Note:
31 |        The symbol dimension may seem superfluous, but the
32 |        dimensions of the input tensors have been chosen for
33 |        compatability with a multivarate version of this function,
34 |        which requires the number of symbols.  The dimensions
35 |        associated with the number of symbols are required to be 1.
36 | 
37 |     """
38 |     if not isinstance(x, torch.Tensor):
39 |         x = torch.tensor(x, dtype=torch.float)
40 |     if not isinstance(mu, torch.Tensor):
41 |         mu = torch.tensor(mu, dtype=torch.float)
42 |     if not isinstance(sigma_inv, torch.Tensor):
43 |         sigma_inv = torch.tensor(sigma_inv, dtype=torch.float)
44 | 
45 |     mb_size, symbols = sigma_inv.shape[:2]
46 |     if (
47 |         x.shape != (mb_size, symbols)
48 |         or mu.shape != (mb_size, symbols)
49 |         or sigma_inv.shape != (mb_size, symbols, symbols)
50 |     ):
51 |         raise ValueError(
52 |             f"Dimensions of x {x.shape}, mu {mu.shape}, and sigma_inv {sigma_inv.shape} are inconsistent"
53 |         )
54 | 
55 |     if symbols != 1:
56 |         raise ValueError(
57 |             f"This function requires the number of symbols to be 1 and not {symbols}"
58 |         )
59 | 
60 |     # Drop the dimensions that were just confirmed to be one.
61 |     x = x.squeeze(1)
62 |     mu = mu.squeeze(1)
63 |     sigma_inv = sigma_inv.squeeze(2).squeeze(1)
64 | 
65 |     z_squared = (sigma_inv * (x - mu)) ** 2
66 | 
67 |     # Inclusion of EPS is to ensure argument remains bounded away from zero.
68 |     log_sigma_inv = torch.log(
69 |         torch.maximum(torch.tensor(EPS_FOR_LOG), torch.abs(sigma_inv))
70 |     )
71 | 
72 |     # log_p, z_squared, and log_sigma_inv have the same shape: (mb_size, mixture_components)
73 | 
74 |     ll = -0.5 * z_squared + log_sigma_inv - LOG_SQRT_TWO_PI
75 | 
76 |     return ll
77 | 


--------------------------------------------------------------------------------
/src/deep_volatility_models/mixture_model_stats.py:
--------------------------------------------------------------------------------
  1 | # Standard Python
  2 | import math
  3 | 
  4 | # Common packages
  5 | import torch
  6 | 
  7 | LOG_SQRT_TWO_PI = 0.5 * math.log(2.0 * math.pi)
  8 | EPS_FOR_LOG = 1e-8
  9 | EPS_FOR_SINGULARITY = 1e-4
 10 | 
 11 | # TODO: Write a test for this
 12 | 
 13 | 
 14 | def univariate_log_likelihood(
 15 |     x: torch.Tensor, log_p: torch.Tensor, mu: torch.Tensor, sigma_inv: torch.Tensor
 16 | ):
 17 |     """
 18 |     Inputs:
 19 |        x: tensor of shape tensor(mb_size,1) containing the observed values
 20 | 
 21 |        log_p: tensor of shape (mb_size, mixture_componente) containing the log
 22 |        probability of each component.
 23 | 
 24 |        mu: tensor of shape (mb_size, mixture_components, 1) containing the mu
 25 |        estimate for each component
 26 | 
 27 |        sigma_inv: tensor of shape (mb_size, mixture_components, 1, 1) containing the
 28 |        estimate of the reciprocal of the sqrt of the variance for each component
 29 | 
 30 |     Output:
 31 |        tensor of shape (mb_size,) containing the log likelihood for each sample
 32 |        in the batch
 33 | 
 34 |     Note: The dimensions of the input tensors have been chosen for compatability
 35 |     with a multivarate version of this function.  The dimensions associated with
 36 |     the number of symbols are required to be 1.
 37 | 
 38 |     """
 39 |     if not isinstance(x, torch.Tensor):
 40 |         x = torch.tensor(x, dtype=torch.float)
 41 |     if not isinstance(log_p, torch.Tensor):
 42 |         log_p = torch.tensor(log_p, dtype=torch.float)
 43 |     if not isinstance(mu, torch.Tensor):
 44 |         mu = torch.tensor(mu, dtype=torch.float)
 45 |     if not isinstance(sigma_inv, torch.Tensor):
 46 |         sigma_inv = torch.tensor(sigma_inv, dtype=torch.float)
 47 | 
 48 |     mb_size, mixture_components, symbols = sigma_inv.shape[:3]
 49 |     if (
 50 |         x.shape != (mb_size, symbols)
 51 |         or log_p.shape != (mb_size, mixture_components)
 52 |         or mu.shape != (mb_size, mixture_components, symbols)
 53 |         or sigma_inv.shape != (mb_size, mixture_components, symbols, symbols)
 54 |     ):
 55 |         raise ValueError(
 56 |             f"Dimensions of x ({x.shape}), log_p ({log_p.shape}), mu ({mu.shape}), and sigma_inv ({sigma_inv.shape}) are inconsistent"
 57 |         )
 58 | 
 59 |     if symbols != 1:
 60 |         raise ValueError(
 61 |             f"Symbol dim is {symbols}. This function requires the number of symbols to be 1"
 62 |         )
 63 | 
 64 |     # Drop the dimensions that were just confirmed to be one.
 65 |     x = x.squeeze(1)
 66 |     mu = mu.squeeze(2)
 67 |     sigma_inv = sigma_inv.squeeze(3).squeeze(2)
 68 | 
 69 |     # Subtract mu from x in each component.
 70 |     # Be explicit rather than relying on broadcasting
 71 |     e = x.unsqueeze(1).expand(mu.shape) - mu
 72 | 
 73 |     z_squared = (sigma_inv * e) ** 2
 74 | 
 75 |     # Inclusion of EPS is to ensure argument remains bounded away from zero.
 76 |     log_sigma_inv = torch.log(EPS_FOR_LOG + torch.abs(sigma_inv))
 77 | 
 78 |     # log_p, z_squared, and log_sigma_inv have the same shape: (mb_size, mixture_components)
 79 | 
 80 |     ll_components = log_p - 0.5 * z_squared + log_sigma_inv - LOG_SQRT_TWO_PI
 81 | 
 82 |     # Now sum over the mixture components with logsumexp to get the liklihoods
 83 |     # for each batch sample
 84 |     ll = torch.logsumexp(ll_components, dim=1)
 85 |     return ll
 86 | 
 87 | 
 88 | def multivariate_log_likelihood(
 89 |     x: torch.Tensor, log_p: torch.Tensor, mu: torch.Tensor, sigma_inv: torch.Tensor
 90 | ):
 91 |     """Inputs:
 92 |        x (tensor(mb_size, channels)): values
 93 |        log_p (tensor(mb_size, mixture_componente)):
 94 |                       log probability of each component (this code assumes
 95 |                       these have been normalized with logsumexp!!)
 96 |        mu (tensor(mb_size, mixture_components, channels): mu for each component
 97 |        sigma_inv (tensor(mb_size, mixture_components, channels, channels)):
 98 |                      - sqrt of inverse of covariance matrix
 99 |                        (More specifically, the inverse of the lower triangular
100 |                        Cholesky factor of the channel covariances so that
101 |                        C^{-1} = L^T L)
102 | 
103 | 
104 |     Output:
105 |        tensor(mb_size): log likelihood for each sample in batch
106 | 
107 |     """
108 |     mb_size, mixture_components, channels = sigma_inv.shape[:3]
109 |     if (
110 |         x.shape != (mb_size, channels)
111 |         or log_p.shape != (mb_size, mixture_components)
112 |         or mu.shape != (mb_size, mixture_components, channels)
113 |         or sigma_inv.shape != (mb_size, mixture_components, channels, channels)
114 |     ):
115 |         raise ValueError(
116 |             f"Dimensions of x ({x.shape}), log_p ({log_p.shape}), mu ({mu.shape}), and sigma_inv ({sigma_inv.shape}) are inconsistent"
117 |         )
118 | 
119 |     # Ensure the sigma_inv matrix is lower triangular
120 |     # Values in the upper triangle part get ignored
121 |     sigma_inv = torch.tril(sigma_inv)
122 | 
123 |     e = x.unsqueeze(1).expand(mu.shape) - mu
124 |     # e is (mb_size, mixture_components, channels)
125 | 
126 |     e = e.unsqueeze(3)
127 |     # e is now (mb_size, mixture_components, channels, 1)
128 | 
129 |     z = torch.matmul(sigma_inv, e)
130 |     # z is (mb_size, mixture_components, channels, 1)
131 | 
132 |     z_squared = torch.sum((z**2).squeeze(3), dim=2)
133 |     # z_squared is (mb_size, mixture_components)
134 | 
135 |     # print('x: ', x)
136 |     # print('mu: ', mu)
137 |     # print('e: ', e)
138 |     # print('z_squared: ', z_squared)
139 | 
140 |     # Compute the log of the diagonal entries of the inverse covariance matrix
141 |     # Inclusion of EPS is to ensure argument stays well above zero.
142 |     log_diag_sigma_inv = torch.log(
143 |         EPS_FOR_LOG + torch.abs(torch.diagonal(sigma_inv, 0, -2, -1))
144 |     )
145 |     # log_diag_sigma_inv is (mb_size, mixture_components, channels)
146 | 
147 |     # Compute the log of the determinant of the inverse covariance
148 |     # matrix by summing the above
149 |     log_det_sigma_inv = torch.sum(log_diag_sigma_inv, dim=2)
150 |     # print('log_det_sigma_inv', log_det_sigma_inv)
151 |     # log_det_sigma_inv is (mb_size, mixture_components)
152 | 
153 |     ll_components = (
154 |         log_p - 0.5 * z_squared + log_det_sigma_inv - channels * LOG_SQRT_TWO_PI
155 |     )
156 | 
157 |     # Now sum over the components with logsumexp to get the liklihoods
158 |     # for each batch sample
159 |     ll = torch.logsumexp(ll_components, dim=1)
160 |     return ll
161 | 
162 | 
163 | # TODO: Generalize the following function for the multivariate case.
164 | 
165 | 
166 | def new_univariate_combine_metrics(p, mu, sigma_inv):
167 |     """
168 |     Given a mixture model of normal distributions charaterized by probabilities
169 |     (p), components-wise mean (mu) and component-wise inverse standard deviation
170 |     (sigma_inv), compute the overall mean and inverse standard deviation for the
171 |     mixture.
172 | 
173 |     Note:  This assumes a univariate mu and sigma_inv.  It's simpler than the multivariate version.
174 | 
175 |     Inputs:
176 |         p: tensor of shape (mb_size, mixture_componente): probability of each component
177 |         mu: tensor of shape (mb_size, mixture_components): mu for each
178 |             component.
179 |         sigma_inv: tensor of shape (mb_size, mixture_components) containing
180 |         the inverse of the standard deviation of each component.
181 | 
182 |     Outputs:
183 |         mu: tensor of shape (mb_size,) containing the expected mean
184 |         variance: tensor of shape (mb_size,) containing the
185 |             variance of the mixture.
186 | 
187 |         Note that the return value is the variance (i.e., the standard deviation squared) and *not* the inverse
188 |         of the standard deviation that's often used elsewhere in this code.
189 | 
190 |     """
191 |     if not isinstance(p, torch.Tensor):
192 |         p = torch.tensor(p, dtype=torch.float)
193 |     if not isinstance(mu, torch.Tensor):
194 |         mu = torch.tensor(mu, dtype=torch.float)
195 |     if not isinstance(sigma_inv, torch.Tensor):
196 |         sigma_inv = torch.tensor(sigma_inv, dtype=torch.float)
197 | 
198 |     if p.shape != mu.shape or p.shape != sigma_inv.shape:
199 |         raise ValueError(
200 |             f"Dimensions of p ({p.shape}), mu ({mu.shape}), and sigma_inv ({sigma_inv.shape}) are inconsistent"
201 |         )
202 | 
203 |     variance = (1.0 / sigma_inv) ** 2
204 |     composite_mean = torch.sum(p * mu, dim=1)
205 | 
206 |     # Composite variance comes from the shifted component means and
207 |     # shifted component covariances.  Here's a derivation:
208 | 
209 |     # E[(x-mu)**2] = sum p_i E[(x_i-mu)**2]
210 |     # E[(x_i-mu)**2] = E[((x_i-mu_i) + (mu_i-mu))**2]
211 |     #                = sigma_i**2 + (mu_i-mu)**2
212 | 
213 |     shifted_component_means = mu - composite_mean.unsqueeze(1).expand(mu.shape)
214 |     shifted_component_variances = variance + shifted_component_means**2
215 |     composite_variance = torch.sum(p * shifted_component_variances, dim=1)
216 |     return composite_mean, composite_variance
217 | 
218 | 
219 | def univariate_combine_metrics(p, mu, sigma_inv):
220 |     """
221 |     Given a mixture model of normal distributions charaterized by probabilities
222 |     (p), components-wise mean (mu) and component-wise inverse standard deviation
223 |     (sigma_inv), compute the overall mean and inverse standard deviation for the
224 |     mixture.
225 | 
226 |     Note:  This assumes a univariate mu and sigma_inv.  It's simpler than the multivariate version.
227 | 
228 |     Inputs:
229 |         p: tensor of shape (mb_size, mixture_componente): probability of each component
230 |         mu: tensor of shape (mb_size, mixture_components, 1): mu for each
231 |             component.
232 |         sigma_inv: tensor of shape (mb_size, mixture_components, 1, 1) containing
233 |         the inverse of the standard deviation of each component.
234 | 
235 |     Outputs:
236 |         mu: tensor of shape (mb_size,) containing the expected mean
237 |         variance: tensor of shape (mb_size,) containing the
238 |             variance of the mixture.
239 | 
240 |         Note that the return value is the variance (i.e., the standard deviation squared) and *not* the inverse
241 |         of the standard deviation that's often used elsewhere in this code.
242 | 
243 |     """
244 |     if not isinstance(p, torch.Tensor):
245 |         p = torch.tensor(p, dtype=torch.float)
246 |     if not isinstance(mu, torch.Tensor):
247 |         mu = torch.tensor(mu, dtype=torch.float)
248 |     if not isinstance(sigma_inv, torch.Tensor):
249 |         sigma_inv = torch.tensor(sigma_inv, dtype=torch.float)
250 | 
251 |     mb_size, mixture_components, symbols = sigma_inv.shape[:3]
252 |     if (
253 |         p.shape != (mb_size, mixture_components)
254 |         or mu.shape != (mb_size, mixture_components, symbols)
255 |         or sigma_inv.shape != (mb_size, mixture_components, symbols, symbols)
256 |     ):
257 |         raise ValueError(
258 |             f"Dimensions of p ({p.shape}), mu ({mu.shape}), and sigma_inv ({sigma_inv.shape}) are inconsistent"
259 |         )
260 | 
261 |     if symbols != 1:
262 |         raise ValueError(
263 |             f"Symbol dim is {symbols}. This code requires the number of symbols to be 1"
264 |         )
265 | 
266 |     # Drop the symbol dimension on mu and sigma_inv which is known to be 1
267 |     # for this special case.
268 | 
269 |     sigma_inv = sigma_inv.squeeze(3).squeeze(2)
270 |     mu = mu.squeeze(2)
271 | 
272 |     variance = (1.0 / sigma_inv) ** 2
273 |     composite_mean = torch.sum(p * mu, dim=1)
274 | 
275 |     # Composite variance comes from the shifted component means and
276 |     # shifted component covariances.  Here's a derivation:
277 | 
278 |     # E[(x-mu)**2] = sum p_i E[(x_i-mu)**2]
279 |     # E[(x_i-mu)**2] = E[((x_i-mu_i) + (mu_i-mu))**2]
280 |     #                = sigma_i**2 + (mu_i-mu)**2
281 | 
282 |     shifted_component_means = mu - composite_mean.unsqueeze(1).expand(mu.shape)
283 |     shifted_component_variances = variance + shifted_component_means**2
284 |     composite_variance = torch.sum(p * shifted_component_variances, dim=1)
285 |     return composite_mean, composite_variance
286 | 
287 | 
288 | def multivariate_combine_metrics(p, mu, sigma_inv):
289 |     """Given a mixture model of normal distributions charaterized by
290 |     probabilities (p), components-wise mean (mu) and component-wise
291 |     inverse standard deviation (sigma_inv), compute the overall mean
292 |     and inverse standard deviation for the mixture.
293 | 
294 |     Note:  This is the multivariate version of univariate_combine_metrics.
295 | 
296 |     Inputs:
297 |         p: tensor of shape (mb_size, mixture_componente) - probability of each component
298 |         mu: tensor of shape (mb_size, mixture_components, symbols) - mu for each
299 |             component.
300 |         sigma_inv: tensor of shape (mb_size, mixture_components, symbols, symbols) -
301 |             the inverse of the standard deviation of each component.
302 | 
303 |     Outputs:
304 |         mu: tensor of shape (mb_size, symbols) - the mean of the mixture.
305 |         covariance: tensor of shape (mb_size, symbols, symbols) - the covariance of the mixture.
306 | 
307 |         Note that the return value is the covariance matrix.  This is
308 |         different from elsewhere in the code where we often use the
309 |         Cholesky factor of the inverse of the covariance matrix to
310 |         represent the variance.
311 | 
312 |     """
313 |     if not isinstance(p, torch.Tensor):
314 |         p = torch.tensor(p, dtype=torch.float)
315 |     if not isinstance(mu, torch.Tensor):
316 |         mu = torch.tensor(mu, dtype=torch.float)
317 |     if not isinstance(sigma_inv, torch.Tensor):
318 |         sigma_inv = torch.tensor(sigma_inv, dtype=torch.float)
319 | 
320 |     if (
321 |         p.shape != sigma_inv.shape[:2]
322 |         or mu.shape != sigma_inv.shape[:3]
323 |         or sigma_inv.shape[2] > sigma_inv.shape[3]
324 |     ):
325 |         raise ValueError(
326 |             f"Dimensions of p ({p.shape}), mu ({mu.shape}), and sigma_inv ({sigma_inv.shape}) are inconsistent"
327 |         )
328 | 
329 |     #  Note that sigma_inv may not be square but the number of rows
330 |     #  should be no more than the number of columns
331 | 
332 |     inverse_covariance = torch.matmul(sigma_inv, torch.transpose(sigma_inv, 2, 3))
333 |     covariance = torch.inverse(inverse_covariance)
334 |     composite_mean = torch.sum(p.unsqueeze(2).expand(mu.shape) * mu, dim=1)
335 | 
336 |     # Composite covariance comes from the shifted component means and
337 |     # shifted component covariances.  Here's a derivation:
338 | 
339 |     # E[(x-mu)(x-mu)'] = sum p_i E[(x_i-mu)(x_i-mu)']
340 |     # But E[(x_i-mu)(x_i-mu)'] = E[((x_i-mu_i)(x_i-mu_i)' + (mu_i-mu)(mu_i-mu)']
341 |     #                          = cov_i+ (mu_i-mu)(mu_i-mu)'
342 | 
343 |     shifted_means = (mu - composite_mean.unsqueeze(1).expand(mu.shape)).unsqueeze(3)
344 |     shifted_component_variances = covariance + torch.matmul(
345 |         shifted_means, torch.transpose(shifted_means, 2, 3)
346 |     )
347 |     composite_covariance = torch.sum(
348 |         p.unsqueeze(2).unsqueeze(3).expand(shifted_component_variances.shape)
349 |         * shifted_component_variances,
350 |         dim=1,
351 |     )
352 |     return composite_mean, composite_covariance
353 | 


--------------------------------------------------------------------------------
/src/deep_volatility_models/model_wrappers.py:
--------------------------------------------------------------------------------
 1 | import datetime as dt
 2 | from dataclasses import dataclass, field
 3 | from typing import Dict, Tuple, Union
 4 | 
 5 | import torch
 6 | 
 7 | 
 8 | @dataclass
 9 | class StockModel:
10 |     symbols: Tuple[str]
11 |     network: torch.nn.Module
12 |     date: dt.datetime
13 |     epochs: int
14 |     loss: float
15 |     encoding: Dict[str, int] = field(default_factory=dict)
16 |     training_data_start_date: Union[dt.datetime, None] = None
17 |     training_data_end_date: Union[dt.datetime, None] = None
18 | 


--------------------------------------------------------------------------------
/src/deep_volatility_models/models.py:
--------------------------------------------------------------------------------
1 | from collections import namedtuple
2 | 
3 | StockModel = namedtuple('StockModel', 'symbols network date epochs loss')
4 | StockModelV2 = namedtuple('StockModelV2', 'symbols network date epochs null_model_loss loss')
5 | 


--------------------------------------------------------------------------------
/src/deep_volatility_models/optuna_opt.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | import optuna
 4 | 
 5 | import deep_volatility_models.train_univariate as train_univariate
 6 | 
 7 | logging.basicConfig(level=logging.INFO)
 8 | 
 9 | SYMBOLS = [
10 |     "bnd",
11 |     "edv",
12 |     "tyd",
13 |     "gld",
14 |     "vnq",
15 |     "vti",
16 |     "spy",
17 |     "qqq",
18 |     "qld",
19 |     "xmvm",
20 |     "vbk",
21 |     "xlv",
22 |     "fxg",
23 |     "rxl",
24 |     "fxl",
25 |     "ibb",
26 |     "vgt",
27 |     "iyf",
28 |     "xly",
29 |     "uge",
30 |     "jnk",
31 | ]
32 | 
33 | 
34 | def objective(trial):
35 |     mixture_components = trial.suggest_int("mixture_components", 1, 5)
36 |     feature_dimension = trial.suggest_int("feature_dimension", 5, 50)
37 |     embedding_dimension = trial.suggest_int("embedding_dimension", 3, 15)
38 |     gaussian_noise = trial.suggest_float("gaussian_noise", 1e-5, 1e-2, log=True)
39 |     dropout = trial.suggest_uniform("dropout", 0.0, 0.75)
40 |     learning_rate = trial.suggest_float("learning_rate", 1e-5, 1e-2, log=True)
41 |     weight_decay = trial.suggest_float("weight_decay", 1e-9, 1e-5, log=True)
42 |     window_size = trial.suggest_categorical("window_size", [16, 64, 256])
43 |     minibatch_size = trial.suggest_int("minibatch_size", 32, 256)
44 | 
45 |     logging.info("************************")
46 |     logging.info(f"mixture_components: {mixture_components}")
47 |     logging.info(f"feature_dimension: {feature_dimension}")
48 |     logging.info(f"embedding_dimension: {embedding_dimension}")
49 |     logging.info(f"gaussian_noise: {gaussian_noise}")
50 |     logging.info(f"dropout: {dropout}")
51 |     logging.info(f"learning_rate: {learning_rate}")
52 |     logging.info(f"weight_decay: {weight_decay}")
53 |     logging.info(f"window_size: {window_size}")
54 |     logging.info(f"minibatch_size: {minibatch_size}")
55 | 
56 |     loss = train_univariate.run(
57 |         existing_model=None,
58 |         symbols=SYMBOLS,
59 |         refresh=False,
60 |         only_embeddings=False,
61 |         window_size=window_size,
62 |         mixture_components=mixture_components,
63 |         feature_dimension=feature_dimension,
64 |         gaussian_noise=gaussian_noise,
65 |         embedding_dimension=embedding_dimension,
66 |         minibatch_size=minibatch_size,
67 |         use_batch_norm=False,
68 |         dropout=dropout,
69 |         learning_rate=learning_rate,
70 |         weight_decay=weight_decay,
71 |     )
72 | 
73 |     logging.info(f"loss: {loss}")
74 |     logging.info("************************")
75 | 
76 |     return loss
77 | 
78 | 
79 | if __name__ == "__main__":
80 |     study = optuna.create_study()
81 |     study.optimize(objective, n_trials=400)
82 |     logging.info(f"{study.best_params}")
83 | 


--------------------------------------------------------------------------------
/src/deep_volatility_models/sample.py:
--------------------------------------------------------------------------------
  1 | from typing import Callable, Tuple, Union
  2 | 
  3 | # Common packages
  4 | import torch
  5 | 
  6 | 
  7 | def multivariate_mixture_sample(
  8 |     mixture_model: torch.nn.Module,
  9 |     predictors: Union[torch.Tensor, Tuple[torch.Tensor, Union[torch.Tensor, None]]],
 10 |     sample_size: int,
 11 |     normalize: bool = False,
 12 |     n_sigma=1,
 13 | ):
 14 |     """Draw samples from a mixture model
 15 |     Parameters:
 16 |         mixture_model: torch.nn.Module - The model to evaluate_model
 17 |         predictors: Union[torch.Tensor, Tuple[torch.Tensor, Union[torch.Tensor, None]]],
 18 |           First element of predictors is a window: torch.Tensor of shape (batch_size, symbols, window_size)
 19 |         sample_size: int - The number of samples to draw
 20 |         normalize: bool - Draw samples that are a fixed number of standard
 21 |         deviations away (useful for generating multivariate contours of points that are
 22 |         n-sigma from the mean, but not useful for univariate distributions).
 23 |         n_sigma: int - The number of standard deviations away to generate
 24 |         samples (only used when `normalize` is True)
 25 | 
 26 |     Returns:
 27 |         torch.Tensor of shape (batch_size, symbols, sample_size) - Log returns
 28 |         sampled from the model's distribution.  Note that a "sample" represents
 29 |         the distribution at a particular moment in time and does not generate a simulated
 30 |         time series.
 31 | 
 32 |     Note:
 33 |         In the case that `predictors` is not a tuple, it is assumed to be
 34 |         the time_series portion.
 35 | 
 36 |     """
 37 |     log_p, mu, sigma_inv = mixture_model(predictors)[:3]
 38 |     p = torch.exp(log_p)
 39 | 
 40 |     batch_size, _, symbols = mu.shape
 41 | 
 42 |     # Create an initial simulation day having returns of zero for day
 43 |     # 0.  By day 0, we mean "right now" so the returns are zero compared
 44 |     # relative to the current stock price.  It may seem unnecessary to
 45 |     # explicitly add these zeros, but it's really convenient to be able to index
 46 |     # into the simulation with day index==0 meaning the current stock price.
 47 |     # The simulation results are typically evaluated by cumsum, exponentiated,
 48 |     # and multiplied by the current stock price.  Using this approach, the 0th
 49 |     # entry (the price on day 0) will be the current price because a log return of
 50 |     # zero has been applied.  This avoids having to do some awkward indexing
 51 |     # elsewhere.
 52 | 
 53 |     samples = torch.Tensor([])
 54 | 
 55 |     for _ in range(sample_size):
 56 |         selections = torch.multinomial(p, 1)
 57 |         mu_selector = selections.unsqueeze(2).expand(batch_size, 1, symbols)
 58 |         selected_mu = torch.gather(mu, 1, mu_selector).squeeze(1).unsqueeze(2)
 59 |         # selected_mu is (nb_size x channels x 1)
 60 |         assert selected_mu.shape == (batch_size, symbols, 1)
 61 | 
 62 |         sigma_selector = (
 63 |             selections.unsqueeze(2).unsqueeze(3).expand(batch_size, 1, symbols, symbols)
 64 |         )
 65 |         selected_sigma_inv = torch.gather(sigma_inv, 1, sigma_selector)
 66 |         selected_sigma = torch.inverse(selected_sigma_inv).squeeze(1)
 67 |         # selected_sigma is (nb_size x channels x channels)
 68 |         assert selected_sigma.shape == (batch_size, symbols, symbols)
 69 | 
 70 |         z = torch.randn(batch_size, symbols, 1)
 71 |         if normalize:
 72 |             norm_z = (
 73 |                 torch.norm(z, p=2, dim=1).unsqueeze(1).expand(batch_size, symbols, 1)
 74 |             )
 75 |             z = n_sigma * z / norm_z
 76 |             assert z.shape == (batch_size, symbols, 1)
 77 | 
 78 |         next_values = selected_mu + torch.matmul(selected_sigma, z)
 79 |         # next_values is (mb_size, symbols, 1)
 80 |         assert next_values.shape == (batch_size, symbols, 1)
 81 | 
 82 |         samples = torch.cat((samples, next_values), dim=2)
 83 | 
 84 |     return samples.detach()
 85 | 
 86 | 
 87 | def multivariate_sample(
 88 |     model: torch.nn.Module,
 89 |     predictors: Union[torch.Tensor, Tuple[torch.Tensor, Union[torch.Tensor, None]]],
 90 |     sample_size: int,
 91 |     normalize: bool = False,
 92 |     n_sigma=1,
 93 | ):
 94 |     """Draw samples from a mixture model
 95 |     Parameters:
 96 |         model: torch.nn.Module - The model to evaluate_model
 97 |         predictors: Union[torch.Tensor, Tuple[torch.Tensor, Union[torch.Tensor, None]]],
 98 |           First element of predictors is a window: torch.Tensor of shape (batch_size, symbols, window_size)
 99 |         sample_size: int - The number of samples to draw
100 |         normalize: bool - Draw samples that are a fixed number of standard
101 |         deviations away (useful for generating multivariate contours of points that are
102 |         n-sigma from the mean, but not useful for univariate distributions).
103 |         n_sigma: int - The number of standard deviations away to generate
104 |         samples (only used when `normalize` is True)
105 | 
106 |     Returns:
107 |         torch.Tensor of shape (batch_size, symbols, sample_size) - Log returns
108 |         sampled from the model's distribution  Note that a "sample" represents
109 |         the distribution at a particular moment in time and does not generate a simulated
110 |         time series.
111 | 
112 |     Note:
113 |         In the case that `predictors` is not a tuple, it is assumed to be
114 |         the time_series portion.
115 |     """
116 |     mu, sigma_inv = model(predictors)[:2]
117 |     sigma = torch.inverse(
118 |         sigma_inv
119 |     )  # Removed a  .squeeze(1) from multivariate implementation
120 | 
121 |     batch_size, symbols = mu.shape
122 | 
123 |     # Create an initial simulation day having returns of zero for day
124 |     # 0.  By day 0, we mean "right now" so the returns are zero compared
125 |     # relative to the current stock price.  It may seem unnecessary to
126 |     # explicitly add these zeros, but it's really convenient to be able to index
127 |     # into the simulation with day index==0 meaning the current stock price.
128 |     # The simulation results are typically evaluated by cumsum, exponentiated,
129 |     # and multiplied by the current stock price.  Using this approach, the 0th
130 |     # entry (the price on day 0) will be the current price because a log return of
131 |     # zero has been applied.  This avoids having to do some awkward indexing
132 |     # elsewhere.
133 | 
134 |     samples = torch.Tensor([])
135 |     for _ in range(sample_size):
136 |         z = torch.randn(batch_size, symbols, 1)
137 |         if normalize:
138 |             norm_z = (
139 |                 torch.norm(z, p=2, dim=1).unsqueeze(1).expand(batch_size, symbols, 1)
140 |             )
141 |             z = n_sigma * z / norm_z
142 |             assert z.shape == (batch_size, symbols, 1)
143 | 
144 |         next_values = mu + torch.matmul(sigma, z)
145 |         # next_values is (batch_size, symbols, 1)
146 |         assert next_values.shape == (batch_size, symbols, 1)
147 | 
148 |         samples = torch.cat((samples, next_values), dim=2)
149 | 
150 |     return samples.detach()
151 | 
152 | 
153 | def simulate_one(
154 |     model: torch.nn.Module,
155 |     predictors: Union[torch.Tensor, Tuple[torch.Tensor, Union[torch.Tensor, None]]],
156 |     time_samples: int,
157 | ):
158 |     """
159 |     For each row of `predictors`, generate simulated log returns for `time_samples` intervals
160 | 
161 |     Parameters:
162 |         model: torch.nn.Module - model to evaluate
163 |         sampler: Callable[[torch.nn.Module, torch.Tensor, int, bool, int], torch.tensor] - samples the distribution returned by the model.
164 |            The sampler must be compatible with the model (e.g., a mixture model sampler or a non-mixture model sampler depending on the model).
165 |         predictors: Union[torch.Tensor, Tuple[torch.Tensor, Union[torch.Tensor, None]]],
166 |           First element of predictors is a window: torch.Tensor of shape (minibatch, symbols, window_size)
167 |         time_samples: number of time intervals to simulate.
168 | 
169 |     Returns:
170 |         torch.Tensor of shape (batch_size, symbols, time_samples+1) - For each batch row, a single time series
171 |         containing the simulated returns.
172 | 
173 |     Notes:
174 |         Last dimension of sample is sample_size+1 because the first
175 |         position isn't actually sampled.  An artificial zero sample
176 |         is inserted in the first position.
177 |     """
178 | 
179 |     if isinstance(predictors, tuple):
180 |         window, exogenous = predictors
181 |         make_predictors = lambda window, exogenous: (window, exogenous)
182 |     else:
183 |         window = predictors
184 |         exogenous = None
185 |         make_predictors = lambda window, exogenous: window
186 | 
187 |     batch_size, symbols = window.shape[:2]
188 |     simulated_returns = torch.zeros(batch_size, symbols, 1)
189 | 
190 |     sampler = model.sampler
191 |     for _ in range(time_samples):
192 |         next_values = sampler(model, make_predictors(window, exogenous), 1)
193 |         window = torch.cat([window[:, :, 1:], next_values], dim=2)
194 |         simulated_returns = torch.cat((simulated_returns, next_values), dim=2)
195 | 
196 |     # Aggregate the 'forward_days' future returns into the cumulative return
197 |     cumulative_returns = torch.exp(torch.cumsum(simulated_returns, dim=2)).detach()
198 |     return cumulative_returns
199 | 
200 | 
201 | def simulate_many(
202 |     model: torch.nn.Module,
203 |     predictors: Union[torch.Tensor, Tuple[torch.Tensor, Union[torch.Tensor, None]]],
204 |     time_samples: int,
205 |     simulation_count: int,
206 | ):
207 |     """
208 |     This is a wrapper that calls simulate_one `simulation_count` times.
209 |     """
210 | 
211 |     simulations = torch.stack(
212 |         tuple(
213 |             model.simulate_one(predictors, time_samples)
214 |             for _ in range(simulation_count)
215 |         ),
216 |         dim=3,
217 |     )
218 |     return simulations
219 | 
220 | 
221 | def multivariate_mixture_simulate_extremes(
222 |     mixture_model: torch.nn.Module,
223 |     window: torch.Tensor,
224 |     time_samples: int,
225 |     simulation_count: int,
226 | ):
227 | 
228 |     simulations = multivariate_mixture_simulate_many(
229 |         mixture_model, window, time_samples, simulation_count
230 |     )
231 | 
232 |     max_outcomes = torch.max(simulations, dim=0)[0]
233 |     min_outcomes = torch.min(simulations, dim=0)[0]
234 |     median_outcomes = torch.median(simulations, dim=0)[0]
235 |     return min_outcomes, median_outcomes, max_outcomes
236 | 


--------------------------------------------------------------------------------
/src/deep_volatility_models/stock_data.py:
--------------------------------------------------------------------------------
  1 | import abc
  2 | 
  3 | import io
  4 | import logging
  5 | import os
  6 | from typing import Any, Callable, Dict, Iterator, Iterable, Tuple, Union
  7 | 
  8 | # Third party libraries
  9 | import pandas as pd
 10 | 
 11 | # Local imports
 12 | import deep_volatility_models.data_sources as data_sources
 13 | import deep_volatility_models.util as util
 14 | 
 15 | # Initialization
 16 | logging.basicConfig(level=logging.INFO)
 17 | 
 18 | # This section defines the types that we will be using.
 19 | 
 20 | Reader = Callable[[io.BufferedReader], pd.DataFrame]
 21 | Writer = Callable[[io.BufferedWriter], None]
 22 | Concatenator = Callable[[Iterable[Tuple[str, pd.DataFrame]]], pd.DataFrame]
 23 | ReaderFactory = Callable[[], Reader]
 24 | WriterFactory = Callable[[Any], Writer]
 25 | 
 26 | 
 27 | class DataStore(abc.ABC):
 28 |     @abc.abstractmethod
 29 |     def exists(self, key: str) -> bool:
 30 |         """Does data for `key` exist in the data store?"""
 31 | 
 32 |     @abc.abstractmethod
 33 |     def read(self, key: str, reader: Reader) -> Any:
 34 |         """Read data for `key` from the data store"""
 35 | 
 36 |     @abc.abstractmethod
 37 |     def write(self, key: str, writer: Writer) -> None:
 38 |         """Write the data to the data store under `key`"""
 39 | 
 40 | 
 41 | # Here's an iplementation of a Reader
 42 | 
 43 | 
 44 | def SymbolHistoryReader() -> Reader:
 45 |     """
 46 |     Constructs a reader() function that will read symbol history from an open
 47 |     file-like object.
 48 | 
 49 |     Returns:
 50 |         Callable[BinaryIO, pd.DataFrame] - Reader that whenn called on an open
 51 |         file returns a history dataframe.
 52 |     """
 53 | 
 54 |     def read_symbol_history(f: io.BufferedReader) -> pd.DataFrame:
 55 |         df = pd.read_csv(
 56 |             f,
 57 |             index_col="date",
 58 |             parse_dates=["date"],
 59 |         )
 60 | 
 61 |         # Be 100% certain it's in ascending order, even though it should have
 62 |         # been stored that way.
 63 |         df.sort_index(inplace=True)
 64 |         return df
 65 | 
 66 |     return read_symbol_history
 67 | 
 68 | 
 69 | # Here's an iplementation of a Writer
 70 | 
 71 | 
 72 | def SymbolHistoryWriter(df: pd.DataFrame) -> Writer:
 73 |     def write_symbol_history(f: io.BufferedWriter) -> None:
 74 |         # Create an index on date and write to CSV in ascending order by date
 75 |         # with index=True
 76 |         indexed_df = df.copy()
 77 | 
 78 |         if indexed_df.index.name != "date":
 79 |             indexed_df.set_index("date", inplace=True)
 80 | 
 81 |         indexed_df.sort_index(inplace=True)
 82 |         indexed_df.to_csv(f, index=True)
 83 | 
 84 |     return write_symbol_history
 85 | 
 86 | 
 87 | # Here's an iplementation of a DataStore
 88 | 
 89 | 
 90 | class FileSystemStore(DataStore):
 91 |     """
 92 |     This clsss implements an abstract interface for data storage.  It
 93 |     implements three methods:
 94 |         exists() to determine whether an object has beenstored
 95 |         write() to store an object
 96 |         load() to load an object.
 97 |     This particular implementation is specific to writing and loading
 98 |     dataframes.  It does some additional housekeeping and sanity checking on the dataframe.
 99 | 
100 |     Abstracting this interface allows the file system to be replaced or
101 |     mocked out more esily for testing.
102 |     """
103 | 
104 |     def __init__(self, cache_dir="."):
105 |         self.cache_dir = cache_dir
106 |         os.makedirs(cache_dir, exist_ok=True)
107 | 
108 |     def _path(self, symbol: str) -> str:
109 |         """Construct a filesystem path to store and retrieve the data for the
110 |         associated givwn key
111 |         Arguments:
112 |             symbol: str
113 |         Returns:
114 |             str - The filesystem path to be used for the key
115 |         """
116 |         # TODO: Return a path object rather than a string to increase porability.
117 |         symbol_path = os.path.join(self.cache_dir, f"{symbol.lower()}.csv")
118 |         return symbol_path
119 | 
120 |     def exists(self, symbol: str) -> bool:
121 |         """Return whether the symbol exists in the data store
122 |         Arguments:
123 |             symbol: str - the symbol or key to retrieve
124 |         Returns:
125 |             True if the key exists in the data store.
126 |         """
127 |         return os.path.exists(self._path(symbol))
128 | 
129 |     def write(self, symbol: str, writer: Writer):
130 |         """
131 |         Write a key and data (must be a dataframe) to the data store
132 |         Arguments:
133 |             symbol: str - The symbol or "key" for the data.
134 |             df: pd.DataFrame - The dataframe to store for that symbol.
135 |         Returns:
136 |             None
137 |         """
138 |         with open(self._path(symbol), "wb") as f:
139 |             writer(f)
140 | 
141 |     def read(self, symbol: str, reader: Reader) -> Any:
142 |         """
143 |         Read a dataframe given its symbol.
144 |         Arguments:
145 |             symbol: str
146 |         Returns:
147 |             pd.DataFrame - The associated dataframe.
148 |         """
149 |         with open(self._path(symbol), "rb") as f:
150 |             result = reader(f)
151 |         return result
152 | 
153 | 
154 | def CachingDownloader(
155 |     data_source: data_sources.DataSource,
156 |     data_store: DataStore,
157 |     writer_factory: WriterFactory,
158 |     overwrite_existing: bool = False,
159 | ):
160 |     """
161 |     Construct and return a download function that will download and write the
162 |     results to the data store as necessary.
163 |     Arguments:
164 |         data_source: Callable[[Union[str, Iterable[str]]], Dict[str,
165 |         pd.DataFrame]] -  A datasource function which given a list of symbols
166 |         returns a dictionary keyed by the symbol with values that are dataframe with history data for
167 |         that symbol.
168 | 
169 |         data_store: FilesystemStore (or similar) - An implementation of a data_store class (see
170 |         FileSystemStore above)
171 | 
172 |     """
173 | 
174 |     def download(
175 |         symbols: Union[Iterable[str], str],
176 |     ) -> Dict[str, pd.DataFrame]:
177 |         """
178 |         Arguments:
179 |             symbols: Union[Iterable[str], str] - A symbol of list of symbols to populate
180 |         in the cache.
181 |             overwrite_existing: bool - Forces all symbols to be downloaded whether or not
182 |             they already exist in the cache.
183 |         """
184 |         # Handle the case where `symbol`is a single symbol
185 |         symbols = util.to_symbol_list(symbols)
186 | 
187 |         if not overwrite_existing:
188 |             # Determine what's missing
189 |             missing = []
190 |             for symbol in symbols:
191 |                 if not data_store.exists(symbol):
192 |                     missing.append(symbol)
193 | 
194 |             # Replace full list with missing list
195 |             symbols = missing
196 | 
197 |         if len(symbols) > 0:
198 |             ds = data_source(symbols)
199 | 
200 |             # Write the results to the cache
201 |             for symbol in symbols:
202 |                 writer = writer_factory(ds[symbol])
203 |                 data_store.write(symbol, writer)
204 |         else:
205 |             ds = {}
206 | 
207 |         return ds
208 | 
209 |     return download
210 | 
211 | 
212 | def PriceHistoryConcatenator() -> Concatenator:
213 |     def concatenator(sequence: Iterable[Tuple[str, pd.DataFrame]]) -> pd.DataFrame:
214 |         """
215 |         Return a dataframe containing all historic values for the given set of symbosl.
216 |         The dates are inner joined so there is one row for each date where all symbols
217 |         have a value for that date.  The row index for the returned dataframe is the
218 |         date.  The column is a muli-level index where the first position is the symbol
219 |         and the second position is the value of interest (e.g., "close", "log_return", etc.)
220 | 
221 |         The expected use case is to get the log returns for a portfolio of stocks.  For example,
222 |         the following returns a datafram of log returns for a portfolio on the dates where every
223 |         item in the portfolio has a return:
224 | 
225 |         df.loc[:, (symbol_list, 'log_return')]
226 | 
227 |         This is intended for a portfolio, but you can specify just one stock if that's all that's required:
228 | 
229 |         df.loc[:, (symbol, 'log_return')]
230 | 
231 |         Arguments:
232 |             symbols:  Union[Iterable[str], str] - a list of symbols of interest
233 |             overwrite_existing: bool - whether to overwrite previously downloaded data (default False)
234 | 
235 |         Returns
236 |             pd.DataFrame - The column is a muli-level index where the first position is the symbol
237 |             and the second position is the value of interest (e.g., "close", "log_return", etc.)
238 |         """
239 |         dataframes = []
240 |         symbols = []
241 |         for symbol, df in sequence:
242 |             df["symbol"] = symbol
243 |             symbols.append(symbol)
244 |             dataframes.append(df)
245 | 
246 |         combined_df = pd.concat(dataframes, axis=1, join="inner", keys=symbols)
247 |         return combined_df
248 | 
249 |     return concatenator
250 | 
251 | 
252 | def CachingLoader(
253 |     data_source: data_sources.DataSource,
254 |     data_store: DataStore,
255 |     reader_factory: ReaderFactory,
256 |     writer_factory: WriterFactory,
257 |     overwrite_existing: bool,
258 | ):
259 |     """
260 |     Construct a caching downloader frmo a data source, data store, reader
261 |     factory and writer factory.  The resulting caching downloader is called on a
262 |     list of symbols and returns a generator that lazily returns a sequence of
263 |     typles of (symbol, data).  The data_source is invoked only for elements that
264 |     do not already exist in `data_store`.  A reader instance generated from
265 |     reader_factory is used to read the data.  The type of the reader output can
266 |     be anything, but typically could be a pd.DataFrame.  A writer is used to
267 |     write the data to the data store (in a format that the reader can read it)
268 |     after being downloaded from the data_source.
269 | 
270 |     See the sample code below for an example of these are glued together.
271 |     """
272 |     caching_download = CachingDownloader(
273 |         data_source, data_store, writer_factory, overwrite_existing=overwrite_existing
274 |     )
275 | 
276 |     def load(symbols: Union[Iterable[str], str]) -> Iterator[Tuple[str, Any]]:
277 |         """ """
278 |         symbols = util.to_symbol_list(symbols)
279 |         caching_download(symbols)
280 | 
281 |         reader = reader_factory()
282 |         for symbol in symbols:
283 |             data = data_store.read(symbol, reader)
284 |             yield (symbol, data)
285 | 
286 |     return load
287 | 
288 | 
289 | def CachingSymbolHistoryLoader(
290 |     data_source: data_sources.DataSource,
291 |     data_store: DataStore,
292 |     overwrite_existing: bool,
293 | ):
294 |     """
295 |     This loader factory returns an instance of a loader that handles the typical
296 |     use-case where we're interested in DataFrames containing history for a
297 |     particular stock symbol.  It's the special case of a CachingLoader that
298 |     knows how to read and write symbol histories as DataFrames.
299 |     """
300 |     return CachingLoader(
301 |         data_source,
302 |         data_store,
303 |         SymbolHistoryReader,
304 |         SymbolHistoryWriter,
305 |         overwrite_existing=overwrite_existing,
306 |     )
307 | 
308 | 
309 | if __name__ == "__main__":  # pragma: no cover
310 |     data_store = FileSystemStore("training_data")
311 |     data_source = data_sources.YFinanceSource()
312 |     loader = CachingSymbolHistoryLoader(
313 |         data_source, data_store, overwrite_existing=False
314 |     )
315 |     combiner = PriceHistoryConcatenator()
316 |     symbols = ("QQQ", "SPY", "BND", "EDV")
317 |     df = combiner(loader(symbols))
318 | 
319 |     selection = df.loc[:, (symbols, "log_return")]  # type: ignore
320 |     print(selection)
321 |     print(selection.values.shape)
322 | 
323 |     selection = df.loc[:, (symbols[0], "log_return")]  # type: ignore
324 |     print(selection)
325 |     print(selection.values.shape)
326 | 


--------------------------------------------------------------------------------
/src/deep_volatility_models/time_series_datasets.py:
--------------------------------------------------------------------------------
  1 | from typing import Any, Iterable, Tuple
  2 | 
  3 | import numpy as np
  4 | import torch
  5 | import torch.utils.data
  6 | 
  7 | 
  8 | def multivariate_stats(x):
  9 |     """
 10 |     Given a time series x, estimate the mean (mu) and the square root of
 11 |     the covariance (sigma) for that time series.
 12 |     Inputs:
 13 |       x: tensor of shape (tensor(mb_size, channels)) containing the sequence values
 14 |     Outputs:
 15 |       mu: tensor of shape: (channels,) containing the mean estimates
 16 |       sigma: tensor of shape: ((channels, channels) containing an estimate of
 17 |       the lower Cholesky factor of the covariance matrix.
 18 | 
 19 |       TODO: To improve numerical stability, Use an SVD to compute the Cholesky
 20 |       factor rather than the naive formula.
 21 |     """
 22 |     # Create tensor version of x in case it isn't already
 23 |     if not isinstance(x, torch.Tensor):
 24 |         x = torch.tensor(x)
 25 | 
 26 |     mb_size, channels = x.shape
 27 |     mu = torch.mean(x, dim=0)
 28 |     error = x - mu.unsqueeze(0).expand((mb_size, channels))
 29 |     # error is mb_size x channels
 30 |     error1 = error.unsqueeze(2)
 31 |     # error1 represents e (mb_size, channels, 1)
 32 |     error2 = error.unsqueeze(1)
 33 |     # error2 represents e^T (mb_size, 1, channels)
 34 |     cov = torch.mean(torch.matmul(error1, error2), dim=0)
 35 |     # cov is (channels, channels)
 36 | 
 37 |     # Return cholesky factor
 38 |     sigma = torch.linalg.cholesky(cov)
 39 |     return mu, sigma
 40 | 
 41 | 
 42 | class RollingWindow(torch.utils.data.Dataset):
 43 |     """
 44 |     Given a time series, construct a sequence of rolling windows on the series.
 45 |     The resuling windows are compatible with the pytorch dataloader: the kth
 46 |     window is obtained by indexing the kth element of the output series.  Also,
 47 |     for compatibility with pytorch the output is represented by pytorch tensors
 48 |     and it follows pytorch conventions dimension order as explained below.
 49 | 
 50 |     Example usage:
 51 | 
 52 |     >>> import time_series_datasets
 53 | 
 54 |     This modules works with time seris of scalars or time series of vectors. The
 55 |     first example is a sequence of scalars:
 56 | 
 57 |     >>> series = list(range(5))
 58 |     >>> series
 59 |     [0, 1, 2, 3, 4]
 60 | 
 61 |     Construct a rolling sequence of windows for the series with a window size of
 62 |     3 and a default stride of 1.
 63 | 
 64 |     >>> windowed_series = time_series_datasets.RollingWindow(series, 3)
 65 | 
 66 |     The first element (element 0) is a window with the first three values:
 67 | 
 68 |     >>> windowed_series[0]
 69 |     tensor([0., 1., 2.])
 70 | 
 71 |     The second element (element 1) is a window with the next three values:
 72 | 
 73 |     >>> windowed_series[1]
 74 |     tensor([1., 2., 3.])
 75 | 
 76 |     The third element (element 2) is a window with the next three values:
 77 | 
 78 |     >>> windowed_series[2]
 79 |     tensor([2., 3., 4.])
 80 | 
 81 |     For use with convolutional neworks, it's often necessary to create a channel dimension:
 82 | 
 83 |     >>> windowed_series = time_series_datasets.RollingWindow(series, 3, create_channel_dim=True)
 84 |     >>> windowed_series[0]
 85 |     tensor([[0., 1., 2.]])
 86 | 
 87 |     RollingWindowSeries also works for vector-valued time series as long as you
 88 |     understand some conventions about the ordering of dimensions.  We assume
 89 |     that the first dimension of the input (dimension 0) represents time.  In
 90 |     other words, we assume the input is a sequence of vectors.  This is a
 91 |     natural convention for the input sequence.  However, we follow the pytorch
 92 |     convention on the output.  The pytorch convention is that the *last*
 93 |     dimension represents time.  In effect, the vector dimension becomes the
 94 |     channel dimension, so the `create_channel_dim` option is meaningless in this
 95 |     case.
 96 | 
 97 |     An example will clarify these ideas.
 98 | 
 99 |     >>> vector_series = [[1, 2], [3, 4], [5, 6], [7, 8]]
100 |     >>> windowed_vector_series = time_series_datasets.RollingWindow(vector_series, 3)
101 |     >>> windowed_vector_series[0]
102 |     tensor([[1., 3., 5.],
103 |             [2., 4., 6.]])
104 | 
105 |     The result may seem "transposed", but that's for consistency with pytorch
106 |     conventions and necessary for use with a number of pytorch functions. Here's
107 |     the rationale.  For a sequence of vectors, the vector dimension should be
108 |     thought of as the "depth" dimension (e.g., RGB for images). The pytorch
109 |     convention is for the depth to be the first dimension dimension 0) of the
110 |     tensor and for the "time" (or space) dimension to be dimension 1 for 1d or
111 |     dimensions 1 and 2 for 2d.  When these ecords get batched for machine
112 |     learning, the index of the record is always dimension 0, so the depth
113 |     becomes dimension 1, and "time" becomes dimension 2.  The convention for
114 |     batched records is typically as follows:
115 | 
116 |        dimension 0 - index of record within a batch dimension 1 - "depth"
117 |        dimension dimension 2 - "time" dimension for 1d or "x" dimensions for 2d
118 |        dimension 3 - "y" dimension for 2d
119 | 
120 |     Since we're looking at records before they have been batched, the convention
121 |     is
122 | 
123 |        dimension 0 - "depth" dimension dimension 1 - "time" dimension for 1d or
124 |        "x" dimensions for 2d dimension 2 - "y" dimension for 2d
125 | 
126 |     More generally, the pytorch convention for time series (or any 1d signal) is
127 |     that time (or whatever the 1d dimension represents) should always be the
128 |     *last* dimension.  For images, "x" and "y" should be the last *two*
129 |     dimensions. Continuing the exmaple, here's the next window:
130 | 
131 |     >>> windowed_vector_series[1]
132 |     tensor([[3., 5., 7.],
133 |             [4., 6., 8.]])
134 | 
135 |     Note: This code currently works for sequences of scalars and sequences of 1d
136 |     vectors.
137 | 
138 |     TODO: Make this code work for sequences of tensors with two or more
139 |     diemsions while following the above conventions that "time" should be the
140 |     last dimension.
141 |     """
142 | 
143 |     def __init__(
144 |         self,
145 |         series: Iterable[Any],
146 |         sequence_length: int,
147 |         stride: int = 1,
148 |         create_channel_dim: bool = False,
149 |         dtype: torch.dtype = torch.float,
150 |     ):
151 |         if stride <= 0:
152 |             raise ValueError("Stride cannot be negative")
153 | 
154 |         # Originally np.array() was simply suple(series)
155 |         # pytorch issued a warning a recommended the use of np.array()
156 |         self.__series = np.array(series)
157 | 
158 |         if (
159 |             len(self.__series) > 0
160 |             and hasattr(self.__series[0], "__len__")
161 |             and len(self.__series[0]) > 0
162 |             and create_channel_dim
163 |         ):
164 |             raise ValueError("create_channel_dim should be False for this series shape")
165 | 
166 |         self.__sequence_length = sequence_length
167 |         self.__stride = stride
168 |         self.__length = (len(self.__series) - sequence_length) // stride + 1
169 |         self.__create_channel_dim = create_channel_dim
170 |         self.__dtype = dtype
171 | 
172 |     def __len__(self) -> int:
173 |         return self.__length
174 | 
175 |     def __getitem__(self, index) -> torch.Tensor:
176 |         if index < 0:
177 |             index = self.__length + index
178 | 
179 |         if index >= 0 and index < self.__length:
180 |             start = index * self.__stride
181 |             result = torch.tensor(
182 |                 self.__series[start : start + self.__sequence_length],
183 |                 dtype=self.__dtype,
184 |             )
185 |             if len(result.shape) == 1:
186 |                 if self.__create_channel_dim:
187 |                     result = result.unsqueeze(0)
188 |             else:
189 |                 result = result.t()
190 | 
191 |             return result
192 |         else:
193 |             raise IndexError()
194 | 
195 | 
196 | class ContextWindowAndTarget(torch.utils.data.Dataset):
197 |     """Split sequence of windows into a context window and a target"""
198 | 
199 |     def __init__(self, rolling_window_series: RollingWindow, target_dim: int = 1):
200 |         """Typically, the stride used to construct rolling_window_series would be equal to
201 |         target_dim
202 | 
203 |         """
204 |         self.__time_series_dataset = rolling_window_series
205 |         self.__target_dim = target_dim
206 | 
207 |     def __len__(self) -> int:
208 |         return len(self.__time_series_dataset)
209 | 
210 |     def __getitem__(self, index) -> Tuple[torch.Tensor, torch.Tensor]:
211 |         t = self.__time_series_dataset[index]
212 |         if len(t.shape) == 1:  # Sequence of scalars
213 |             context_window = t[: -self.__target_dim]
214 |             target = t[-self.__target_dim :]
215 |             # Drop the last dimension when it's one.
216 |             if self.__target_dim == 1:
217 |                 target = target.squeeze(-1)
218 |         else:  # Sequence of vectors
219 |             context_window = t[:, : -self.__target_dim]
220 |             target = t[:, -self.__target_dim :]
221 | 
222 |         return context_window, target
223 | 
224 | 
225 | class ContextWindowEncodingAndTarget(torch.utils.data.Dataset):
226 |     """This augments the data from an instance of WindowAndTarget by adding the encoding for
227 |     its symbol.  This would only be appropriate when building a Dataset for a
228 |     set of different symbols, but a WindowAndTarget instance contains no symbol
229 |     information.  It represents the history for just a single symbol.  This
230 |     class adds a single encoding for that symbol to the Dataset.  To build a
231 |     dataset representing multiple symbols each with their own encodings, you
232 |     first construct an EncodingWindowAndTarget instance for each symbol
233 |     separately, then combine the various symbols using
234 |     torch.utils.data.ConcatDataset()"""
235 | 
236 |     def __init__(
237 |         self,
238 |         symbol_encoding: int,
239 |         symbol_history_dataset: ContextWindowAndTarget,
240 |         device=None,
241 |     ):
242 |         self.__symbol_encoding = torch.tensor(symbol_encoding)
243 |         if device is not None:
244 |             self.__symbol_encoding = self.__symbol_encoding.to(device)
245 |         self.__symbol_history_dataset = symbol_history_dataset
246 |         self.__device = device
247 | 
248 |     def __len__(self) -> int:
249 |         return len(self.__symbol_history_dataset)
250 | 
251 |     def __getitem__(self, i) -> Tuple[Tuple[torch.Tensor, int], torch.Tensor]:
252 |         window, target = self.__symbol_history_dataset[i]
253 |         if self.__device is not None:
254 |             window = window.to(self.__device)
255 |             target = target.to(self.__device)
256 |         return (window, self.__symbol_encoding), target
257 | 


--------------------------------------------------------------------------------
/src/deep_volatility_models/train_univariate.py:
--------------------------------------------------------------------------------
  1 | # Standard Python
  2 | import datetime as dt
  3 | import logging
  4 | 
  5 | import os
  6 | from typing import Callable, Dict, Iterable, Iterator, Union, Tuple
  7 | 
  8 | # Common packages
  9 | import click
 10 | import numpy as np
 11 | import pandas as pd
 12 | 
 13 | import torch
 14 | import torch.utils.data
 15 | import torch.utils.data.dataloader
 16 | 
 17 | # Local imports
 18 | from deep_volatility_models import data_sources
 19 | from deep_volatility_models import stock_data
 20 | from deep_volatility_models import mixture_model_stats
 21 | from deep_volatility_models import loss_functions
 22 | from deep_volatility_models import time_series_datasets
 23 | from deep_volatility_models import model_wrappers
 24 | from deep_volatility_models import architecture
 25 | from deep_volatility_models import training
 26 | 
 27 | logging.basicConfig(
 28 |     level=logging.INFO,
 29 |     format="%(asctime)s %(levelname)s:%(message)s",
 30 |     force=True,
 31 | )
 32 | 
 33 | ESTIMATE = "estimate"
 34 | RISK_NEUTRAL = "risk-neutral"
 35 | ZERO = "zero"
 36 | 
 37 | MEAN_STRATEGIES = {
 38 |     ESTIMATE: architecture.MeanStrategy.ESTIMATE,
 39 |     RISK_NEUTRAL: architecture.MeanStrategy.RISK_NEUTRAL,
 40 |     ZERO: architecture.MeanStrategy.ZERO,
 41 | }
 42 | 
 43 | TRAIN_FRACTION = 0.80
 44 | DEFAULT_SEED = 24  # Previously 42
 45 | RANDOM_SPLIT_SEED = 1701
 46 | 
 47 | EPOCHS = 1000  # 30000
 48 | EARLY_TERMINATION = 100  # Was 1000
 49 | 
 50 | USE_MIXTURE = False
 51 | USE_DEV_MODELS = False
 52 | DEFAULT_MIXING_LAYERS = 0
 53 | 
 54 | RISK_NEUTRAL_PARAMETERS = True
 55 | if RISK_NEUTRAL_PARAMETERS:  # These are the values for the univariate non-mixture model
 56 |     OPT_LEARNING_RATE = 0.000712  # Previously 0.000535
 57 |     OPT_DROPOUT = 0.009291  # Previously 0.001675
 58 |     OPT_FEATURE_DIMENSION = 37  # Previously 41
 59 |     OPT_MIXTURE_COMPONENTS = 1  # Previously 4
 60 |     OPT_WINDOW_SIZE = 256
 61 |     OPT_EMBEDDING_DIMENSION = 6  # Previously 4
 62 |     OPT_MINIBATCH_SIZE = 230  # Previously 124
 63 |     OPT_GAUSSIAN_NOISE = 0.000657  # Previosly 0.002789
 64 |     OPT_WEIGHT_DECAY = 1.438462e-06  # Previously 1.407138e-06
 65 |     USE_BATCH_NORM = False  # risk neutral version has trouble with batch normalization
 66 | else:
 67 |     # Current values were optimized with hyperopt.  Values shown in comment were used before optimization.
 68 |     OPT_LEARNING_RATE = 0.000689  # Previously 0.000375
 69 |     OPT_DROPOUT = 0.130894  # Previously 0.50
 70 |     OPT_FEATURE_DIMENSION = 86  # Previously 40
 71 |     OPT_MIXTURE_COMPONENTS = 3  # Previously 4
 72 |     OPT_WINDOW_SIZE = 256  # Previously 64
 73 |     OPT_EMBEDDING_DIMENSION = 3  # Previously 10
 74 |     OPT_MINIBATCH_SIZE = 248  # Previously 75
 75 |     OPT_GAUSSIAN_NOISE = 0.000226  # Previously 0.0025
 76 |     OPT_WEIGHT_DECAY = 8.489603e-07  # Previously 5e-9
 77 |     # Value of USE_BATCH_NORM wasn't optimized with hyperopt but was set to True.
 78 |     USE_BATCH_NORM = True
 79 | 
 80 | BETA1 = 0.95
 81 | BETA2 = 0.999
 82 | ADAM_EPSILON = 1e-8
 83 | ACTIVATION = torch.nn.ReLU()
 84 | MAX_GRADIENT_NORM = 1.0
 85 | 
 86 | if torch.cuda.is_available():
 87 |     dev = "cuda:0"
 88 | # elif torch.has_mps:
 89 | #     dev = "mps"
 90 | else:
 91 |     dev = "cpu"
 92 | 
 93 | device = torch.device(dev)
 94 | 
 95 | 
 96 | def create_new_model(
 97 |     embedding_size=None,
 98 |     window_size=OPT_WINDOW_SIZE,
 99 |     mixture_components=OPT_MIXTURE_COMPONENTS,
100 |     feature_dimension=OPT_FEATURE_DIMENSION,
101 |     embedding_dimension=OPT_EMBEDDING_DIMENSION,
102 |     gaussian_noise=OPT_GAUSSIAN_NOISE,
103 |     use_batch_norm=USE_BATCH_NORM,
104 |     dropout=OPT_DROPOUT,
105 |     mean_strategy=MEAN_STRATEGIES[RISK_NEUTRAL],
106 |     use_mixture=USE_MIXTURE,
107 |     use_dev_models=USE_DEV_MODELS,
108 |     extra_mixing_layers=DEFAULT_MIXING_LAYERS,
109 | ):
110 |     if use_dev_models:
111 |         network = architecture.DeepVolatilityModel(
112 |             window_size=window_size,
113 |             mean_strategy=mean_strategy,
114 |             model_type=architecture.ModelType.UNIVARIATE,
115 |             input_symbols=1,
116 |             feature_dimension=feature_dimension,
117 |             exogenous_dimension=embedding_dimension,
118 |             is_mixture=use_mixture,
119 |             mixture_components=mixture_components,
120 |             gaussian_noise=gaussian_noise,
121 |             activation=ACTIVATION,
122 |             dropout=dropout,
123 |             use_batch_norm=use_batch_norm,
124 |             extra_mixing_layers=extra_mixing_layers,
125 |         )
126 |     elif use_mixture:
127 |         network = architecture.MixtureModel(
128 |             window_size,
129 |             1,
130 |             feature_dimension=feature_dimension,
131 |             mixture_components=mixture_components,
132 |             exogenous_dimension=embedding_dimension,
133 |             gaussian_noise=gaussian_noise,
134 |             dropout=dropout,
135 |             use_batch_norm=use_batch_norm,
136 |             activation=ACTIVATION,
137 |             mean_strategy=mean_strategy,
138 |         )
139 |     else:
140 |         network = architecture.UnivariateModel(
141 |             window_size,
142 |             feature_dimension=feature_dimension,
143 |             mixture_components=mixture_components,
144 |             exogenous_dimension=embedding_dimension,
145 |             gaussian_noise=gaussian_noise,
146 |             dropout=dropout,
147 |             use_batch_norm=use_batch_norm,
148 |             activation=ACTIVATION,
149 |             mean_strategy=mean_strategy,
150 |         )
151 | 
152 |     embedding = torch.nn.Embedding(embedding_size, embedding_dimension)
153 | 
154 |     return network, embedding
155 | 
156 | 
157 | def load_existing_model(existing_model, symbols):
158 |     """
159 |     This function loads an existing model and adjusts its embedding
160 |     and encoding objects to accomodate any new symbols in `symbols`
161 | 
162 |     Arguments:
163 |         existing_model: path - path to existing model
164 |         symbols: List[str] - list of symbols to be trained.
165 | 
166 |     Returns:
167 |         model_network: torch.Module
168 |         embeddings: torch.Embedding
169 |         encoding: Dict[str, i] - encoding
170 | 
171 |     Note the list of symbols is required so that the embedding can be extended
172 |     (with values to be trained) to accomodate the new symbol list.
173 | 
174 |     """
175 | 
176 |     model = torch.load(existing_model)
177 |     # Dump the old wrapper and keep only the network and the embeddings
178 |     # We'll create a new wrapper
179 |     model_network = model.network.model
180 |     embeddings = model.network.embedding
181 |     encoding = model.encoding
182 | 
183 |     # If there are new symbols since the previous model was trained,
184 |     # extend the encoding and initialize the new embeddings with the
185 |     # mean of the old embedding.  This initialization seems to work
186 |     # better than a random initialization with using a pre-trained
187 |     # model
188 | 
189 |     new_symbols = set(symbols).difference(set(encoding.keys()))
190 | 
191 |     if len(new_symbols) > 0:
192 |         # Extend the encoding for any symbols unknown to the pre-loaded model
193 |         for s in new_symbols:
194 |             encoding[s] = len(encoding)
195 | 
196 |         # Extend the embedding for any symbols unknown to the pre-loaded model
197 |         embedding_parameters = next(embeddings.parameters())
198 |         mean_embedding = embedding_parameters.mean(dim=0)
199 |         # Extract and use old embedding dimension
200 |         old_embedding_dimension = embedding_parameters.shape[1]
201 | 
202 |         new_embeddings = (
203 |             mean_embedding.unsqueeze(0)
204 |             .expand(len(new_symbols), old_embedding_dimension)
205 |             .clone()
206 |         )
207 | 
208 |         # Extend the mean to current number of symbols
209 |         embedding_parameters.data = torch.concat(
210 |             (embedding_parameters, new_embeddings), dim=0
211 |         )
212 | 
213 |     logging.info("Using existing model")
214 |     return model_network, embeddings, encoding
215 | 
216 | 
217 | def prepare_data(
218 |     history_loader: Callable[
219 |         [Union[str, Iterable[str]]], Iterator[Tuple[str, pd.DataFrame]]
220 |     ],
221 |     symbol_list: Iterable[str],
222 |     encoding: Dict[str, int],
223 |     window_size: int,
224 |     minibatch_size: int = OPT_MINIBATCH_SIZE,
225 |     start_date: Union[dt.date, None] = None,
226 |     end_date: Union[dt.date, None] = None,
227 | ):
228 |     generator = torch.Generator().manual_seed(RANDOM_SPLIT_SEED)
229 | 
230 |     # Refresh historical data
231 |     logging.info("Reading historical data")
232 |     splits_by_symbol = {}
233 | 
234 |     # For the purposes of hyperparameter optimization, make sure that
235 |     # changing the window size doesn't change the number of rows.  In
236 |     # other words, we always consume the first 256 points of history,
237 |     # even if we don't use them as context so that first target return
238 |     # in the dataset is always the same, independent of the window
239 |     # size.  Also, this won't work if window_size exceeds 256, so we
240 |     # trap that case:
241 |     if window_size > 256:
242 |         raise ValueError(
243 |             f"Window size of {window_size} isn't allowed.  Window size must be 256 or less"
244 |         )
245 | 
246 |     skip = 256 - window_size
247 | 
248 |     for s in sorted(symbol_list):
249 |         logging.info(f"Reading {s}")
250 |         i = encoding[s]
251 | 
252 |         # history_loader can load many symbols at once for multivariate
253 |         # but here we just load the single symbol of interest.  Since we expect
254 |         # just one dataframe, grab it with next() instead of using a combiner()
255 |         # (see stock-data.py).)
256 |         symbol_history = next(history_loader(s))[1].loc[start_date:end_date]
257 |         logging.info(f"symbol_history:\n {symbol_history}")
258 | 
259 |         # Symbol history is a combined history for all symbols.  We process it
260 |         # one symbols at a time, so get the log returns for the current symbol
261 |         # of interest.
262 |         # log_returns = symbol_history.loc[:, (s, "log_return")]  # type: ignore
263 |         windowed_returns = time_series_datasets.RollingWindow(
264 |             symbol_history.log_return[skip:],
265 |             1 + window_size,
266 |             create_channel_dim=True,
267 |         )
268 |         logging.debug(f"{s} windowed_returns[0]: {windowed_returns[0]}")
269 | 
270 |         symbol_dataset = time_series_datasets.ContextWindowAndTarget(
271 |             windowed_returns, 1
272 |         )
273 |         symbol_dataset_with_encoding = (
274 |             time_series_datasets.ContextWindowEncodingAndTarget(
275 |                 i, symbol_dataset, device=device
276 |             )
277 |         )
278 | 
279 |         train_size = int(TRAIN_FRACTION * len(symbol_dataset_with_encoding))
280 |         lengths = [train_size, len(symbol_dataset_with_encoding) - train_size]
281 |         train, test = torch.utils.data.random_split(
282 |             symbol_dataset_with_encoding, lengths, generator=generator
283 |         )
284 |         splits_by_symbol[s] = {"train": train, "test": test}
285 | 
286 |     train_dataset = torch.utils.data.ConcatDataset(
287 |         [splits_by_symbol[s]["train"] for s in symbol_list]
288 |     )
289 |     validation_dataset = torch.utils.data.ConcatDataset(
290 |         [splits_by_symbol[s]["test"] for s in symbol_list]
291 |     )
292 | 
293 |     train_dataloader = torch.utils.data.dataloader.DataLoader(
294 |         train_dataset, batch_size=minibatch_size, drop_last=True, shuffle=True
295 |     )
296 | 
297 |     validation_dataloader = torch.utils.data.dataloader.DataLoader(
298 |         validation_dataset,
299 |         batch_size=len(validation_dataset),
300 |         drop_last=True,
301 |         shuffle=True,
302 |     )
303 | 
304 |     return train_dataloader, validation_dataloader
305 | 
306 | 
307 | def make_mixture_loss_function():
308 |     def loss_function(output, target):
309 |         log_p, mu, inv_sigma = output[:3]
310 | 
311 |         loss = -torch.mean(
312 |             mixture_model_stats.multivariate_log_likelihood(
313 |                 target.squeeze(2), log_p, mu, inv_sigma
314 |             )
315 |         )
316 | 
317 |         if np.isnan(float(loss)):
318 |             logging.error("log_p: ", log_p)
319 |             logging.error("mu: ", mu)
320 |             logging.error("inv_sigma: ", inv_sigma)
321 | 
322 |         return loss
323 | 
324 |     return loss_function
325 | 
326 | 
327 | def make_loss_function():
328 |     def loss_function(output, target):
329 |         mu, inv_sigma = output[:2]
330 | 
331 |         loss = -torch.mean(
332 |             loss_functions.univariate_log_likelihood(target.squeeze(2), mu, inv_sigma)
333 |         )
334 | 
335 |         if np.isnan(float(loss)):
336 |             logging.error("mu: ", mu)
337 |             logging.error("inv_sigma: ", inv_sigma)
338 | 
339 |         return loss
340 | 
341 |     return loss_function
342 | 
343 | 
344 | def log_mixture_mean_error(epoch, output, target):
345 |     log_p, mu = output[:2]
346 |     mb_size, components, channels = mu.shape
347 |     combined_mu = torch.sum(
348 |         mu * torch.exp(log_p).unsqueeze(2).expand((mb_size, components, channels)),
349 |         dim=1,
350 |     )
351 |     mean_error = torch.mean(target.squeeze(2) - combined_mu, dim=0)
352 |     logging.debug(f"epoch: {epoch} mean_error: {float(mean_error):.5f}")
353 | 
354 | 
355 | def make_mixture_validation_batch_logger():
356 |     def log_epoch(epoch, batch, output, target, loss):
357 |         log_p, mu, inv_sigma = output[:3]
358 |         logging.debug(f"last epoch p:\n{torch.exp(log_p)[:6].detach().cpu().numpy()}")
359 |         logging.debug(f"last epoch mu:\n{mu[:6].detach().cpu().numpy()}")
360 |         logging.debug(f"last epoch sigma:\n{inv_sigma[:6].detach().cpu().numpy()}")
361 | 
362 |         log_mixture_mean_error(epoch, output, target)
363 | 
364 |     return log_epoch
365 | 
366 | 
367 | def log_mean_error(epoch, output, target):
368 |     mu = output[0]
369 |     mean_error = torch.mean(target.squeeze(2) - mu, dim=0)
370 |     logging.debug(f"epoch: {epoch} mean_error: {float(mean_error):.5f}")
371 | 
372 | 
373 | def make_validation_batch_logger():
374 |     def log_epoch(epoch, batch, output, target, loss):
375 |         mu, inv_sigma = output[:2]
376 |         logging.debug(f"last epoch mu:\n{mu[:6].detach().cpu().numpy()}")
377 |         logging.debug(f"last epoch sigma:\n{inv_sigma[:6].detach().cpu().numpy()}")
378 | 
379 |         log_mean_error(epoch, output, target)
380 | 
381 |     return log_epoch
382 | 
383 | 
384 | def make_save_model(
385 |     model_file, only_embeddings, model, encoding, symbols, start_date, end_date
386 | ):
387 |     def save_model(epoch, epoch_loss, prefix=""):
388 |         wrapped_model = model_wrappers.StockModel(
389 |             symbols=symbols,
390 |             encoding=encoding,
391 |             network=model,
392 |             epochs=epoch,
393 |             date=dt.datetime.now(),
394 |             loss=epoch_loss,
395 |             training_data_start_date=start_date,
396 |             training_data_end_date=end_date,
397 |         )
398 | 
399 |         torch.save(wrapped_model, f"{model_file}")
400 | 
401 |     return save_model
402 | 
403 | 
404 | def make_loss_improvement_callback(
405 |     model_file, only_embeddings, model, encoding, symbols, start_date, end_date
406 | ):
407 |     save_model = make_save_model(
408 |         model_file, only_embeddings, model, encoding, symbols, start_date, end_date
409 |     )
410 | 
411 |     def model_improvement_callback(epoch, epoch_loss):
412 |         save_model(epoch, epoch_loss)
413 | 
414 |     return model_improvement_callback
415 | 
416 | 
417 | def make_epoch_callback(model):
418 |     def epoch_callback(epoch, train_loss, validation_loss):
419 |         logging.debug(f"parameters: {(list(model.embedding.parameters()))}")
420 | 
421 |     return epoch_callback
422 | 
423 | 
424 | def run(
425 |     use_hsmd,
426 |     model_file,
427 |     existing_model,
428 |     symbols,
429 |     refresh,
430 |     mean_strategy,
431 |     only_embeddings,
432 |     use_mixture=USE_MIXTURE,
433 |     max_epochs=EPOCHS,
434 |     early_termination=EARLY_TERMINATION,
435 |     window_size=OPT_WINDOW_SIZE,
436 |     mixture_components=OPT_MIXTURE_COMPONENTS,
437 |     feature_dimension=OPT_FEATURE_DIMENSION,
438 |     embedding_dimension=OPT_EMBEDDING_DIMENSION,
439 |     gaussian_noise=OPT_GAUSSIAN_NOISE,
440 |     minibatch_size=OPT_MINIBATCH_SIZE,
441 |     dropout=OPT_DROPOUT,
442 |     learning_rate=OPT_LEARNING_RATE,
443 |     weight_decay=OPT_WEIGHT_DECAY,
444 |     use_batch_norm=USE_BATCH_NORM,
445 |     beta1=BETA1,
446 |     beta2=BETA2,
447 |     seed=DEFAULT_SEED,
448 |     start_date=None,
449 |     end_date=None,
450 |     use_dev_models=USE_DEV_MODELS,
451 |     extra_mixing_layers=0,
452 | ):
453 |     # Rewrite symbols with deduped, uppercase versions
454 |     symbols = list(map(str.upper, set(symbols)))
455 | 
456 |     logging.info(f"model: {model_file}")
457 |     logging.info(f"device: {device}")
458 |     logging.info(f"existing_model: {existing_model}")
459 |     logging.info(f"symbols: {symbols}")
460 |     logging.info(f"refresh: {refresh}")
461 |     logging.info(f"mean_strategy: {mean_strategy}")
462 |     logging.info(f"only_embeddings: {only_embeddings}")
463 |     logging.info(f"use_mixture: {use_mixture}")
464 |     logging.info(f"window_size: {window_size}")
465 |     logging.info(f"mixture_components: {mixture_components}")
466 |     logging.info(f"feature_dimension: {feature_dimension}")
467 |     logging.info(f"embedding_dimension: {embedding_dimension}")
468 |     logging.info(f"gaussian_noise: {gaussian_noise}")
469 |     logging.info(f"minibatch_size: {minibatch_size}")
470 |     logging.info(f"dropout: {dropout}")
471 |     logging.info(f"learning_rate: {learning_rate}")
472 |     logging.info(f"weight_decay: {weight_decay}")
473 |     logging.info(f"use_batch_norm: {use_batch_norm}")
474 |     logging.info(f"ADAM beta1: {beta1}")
475 |     logging.info(f"ADAM beta2: {beta2}")
476 |     logging.info(f"Seed: {seed}")
477 |     logging.info(f"Start date: {start_date}")
478 |     logging.info(f"End date: {end_date}")
479 |     logging.info(f"Use dev models: {use_dev_models}")
480 |     logging.info(f"Extra mixing layers: {extra_mixing_layers}")
481 | 
482 |     model_network = embeddings = None
483 |     if existing_model:
484 |         model_network, embeddings, encoding = load_existing_model(
485 |             existing_model, symbols
486 |         )
487 |         logging.info(f"Loaded model from file: {existing_model}")
488 |     else:
489 |         encoding = {s: i for i, s in enumerate(symbols)}
490 | 
491 |     logging.info(f"Encoding: {encoding}")
492 | 
493 |     data_store = stock_data.FileSystemStore("training_data")
494 |     if use_hsmd:
495 |         data_source = data_sources.HugeStockMarketDatasetSource(use_hsmd)
496 |     else:
497 |         data_source = data_sources.YFinanceSource()
498 | 
499 |     history_loader = stock_data.CachingSymbolHistoryLoader(
500 |         data_source, data_store, refresh
501 |     )
502 | 
503 |     torch.random.manual_seed(seed)
504 | 
505 |     # Do split before any random weight initialization so that any
506 |     # subsequent random number generator calls won't affect the split.
507 |     # We want the splits to be the same for different architecture
508 |     # parameters to provide fair comparisons of different
509 |     # architectures on the same split.
510 | 
511 |     train_loader, validation_loader = prepare_data(
512 |         history_loader,
513 |         symbols,
514 |         encoding,
515 |         window_size,
516 |         minibatch_size=minibatch_size,
517 |         start_date=start_date,
518 |         end_date=end_date,
519 |     )
520 | 
521 |     if model_network is None or embeddings is None:
522 |         model_network, embeddings = create_new_model(
523 |             embedding_size=len(symbols),
524 |             window_size=window_size,
525 |             mixture_components=mixture_components,
526 |             feature_dimension=feature_dimension,
527 |             embedding_dimension=embedding_dimension,
528 |             gaussian_noise=gaussian_noise,
529 |             use_mixture=use_mixture,
530 |             use_batch_norm=use_batch_norm,
531 |             dropout=dropout,
532 |             mean_strategy=mean_strategy,
533 |             use_dev_models=use_dev_models,
534 |             extra_mixing_layers=extra_mixing_layers,
535 |         )
536 |         logging.info("Initialized new model")
537 | 
538 |     # Generate list of parameters we choose to train.
539 |     # We always tune or train the embeddings:
540 |     parameters = list(embeddings.parameters())
541 | 
542 |     # Add rest of model parameters unless we're training only the embeddings.
543 |     if not only_embeddings:
544 |         parameters.extend(model_network.parameters())
545 | 
546 |     logging.debug(f"parameters: {parameters}")
547 | 
548 |     # Define model, optimizer, loss function, and callbacks before calling train()
549 |     model = architecture.ModelWithEmbedding(model_network, embeddings)
550 |     model.to(device)
551 | 
552 |     sgd_optim = torch.optim.SGD(
553 |         parameters,
554 |         lr=learning_rate,
555 |         weight_decay=weight_decay,
556 |         momentum=0.0,
557 |     )
558 |     adam_optim = torch.optim.Adam(
559 |         parameters,
560 |         lr=learning_rate,
561 |         betas=(beta1, beta2),
562 |         weight_decay=weight_decay,
563 |         eps=ADAM_EPSILON,
564 |     )
565 |     optim = adam_optim
566 | 
567 |     if model.is_mixture:
568 |         loss_function = make_mixture_loss_function()
569 |         validation_batch_callback = make_mixture_validation_batch_logger()
570 |     else:
571 |         loss_function = make_loss_function()
572 |         validation_batch_callback = make_validation_batch_logger()
573 | 
574 |     epoch_callback = make_epoch_callback(model)
575 |     loss_improvement_callback = make_loss_improvement_callback(
576 |         model_file, only_embeddings, model, encoding, symbols, start_date, end_date
577 |     )
578 | 
579 |     logging.info("Starting training loop.")
580 |     best_epoch, best_validation_loss, best_model = training.train(
581 |         model=model,
582 |         loss_function=loss_function,
583 |         optim=optim,
584 |         train_loader=train_loader,
585 |         validation_loader=validation_loader,
586 |         max_epochs=max_epochs,
587 |         early_termination=early_termination,
588 |         validation_batch_callback=validation_batch_callback,
589 |         epoch_callback=epoch_callback,
590 |         loss_improvement_callback=loss_improvement_callback,
591 |     )
592 |     logging.info(
593 |         f"Training terminated. Best epoch: {best_epoch}; Best validation loss: {best_validation_loss}"
594 |     )
595 |     return best_epoch, best_validation_loss, best_model
596 | 
597 | 
598 | @click.command()
599 | @click.option(
600 |     "--use-hsmd",
601 |     default=None,
602 |     show_default=True,
603 |     help="Use huge stock market dataset if specified zip file (else use yfinance)",
604 | )
605 | @click.option(
606 |     "--model",
607 |     default="model.pt",
608 |     show_default=True,
609 |     help="Trained model output file.",
610 | )
611 | @click.option(
612 |     "--existing-model",
613 |     default=None,
614 |     show_default=True,
615 |     help="Existing model to load (for tuning).",
616 | )
617 | @click.option("--symbol", "-s", multiple=True, show_default=True)
618 | @click.option(
619 |     "--refresh",
620 |     is_flag=True,
621 |     default=False,
622 |     show_default=True,
623 |     help="Refresh stock data",
624 | )
625 | @click.option(
626 |     "--mean-strategy",
627 |     type=click.Choice([RISK_NEUTRAL, ZERO, ESTIMATE]),
628 |     show_default=True,
629 |     default=RISK_NEUTRAL,
630 |     help="Method to use for mean output.",
631 | )
632 | @click.option(
633 |     "--only-embeddings",
634 |     is_flag=True,
635 |     default=False,
636 |     show_default=True,
637 |     help="Train only the embeddings",
638 | )
639 | @click.option(
640 |     "--use-mixture/--no-mixture",
641 |     is_flag=True,
642 |     default=USE_MIXTURE,
643 |     show_default=True,
644 |     help="Use a mixture model?",
645 | )
646 | @click.option(
647 |     "--early-termination",
648 |     default=EARLY_TERMINATION,
649 |     show_default=True,
650 |     help="Terminate if no improvement in this number of iterations",
651 | )
652 | @click.option(
653 |     "--learning-rate", default=OPT_LEARNING_RATE, show_default=True, type=float
654 | )
655 | @click.option("--dropout", default=OPT_DROPOUT, show_default=True, type=float)
656 | @click.option(
657 |     "--use-batch-norm/--no-use-batch-norm",
658 |     is_flag=True,
659 |     default=USE_BATCH_NORM,
660 |     show_default=True,
661 | )
662 | @click.option(
663 |     "--feature-dimension", default=OPT_FEATURE_DIMENSION, show_default=True, type=int
664 | )
665 | @click.option(
666 |     "--mixture-components", default=OPT_MIXTURE_COMPONENTS, show_default=True, type=int
667 | )
668 | @click.option("--window-size", default=OPT_WINDOW_SIZE, show_default=True, type=int)
669 | @click.option(
670 |     "--embedding-dimension",
671 |     default=OPT_EMBEDDING_DIMENSION,
672 |     show_default=True,
673 |     type=int,
674 | )
675 | @click.option(
676 |     "--minibatch-size", default=OPT_MINIBATCH_SIZE, show_default=True, type=int
677 | )
678 | @click.option(
679 |     "--gaussian-noise", default=OPT_GAUSSIAN_NOISE, show_default=True, type=float
680 | )
681 | @click.option("--weight-decay", default=OPT_WEIGHT_DECAY, show_default=True, type=float)
682 | @click.option("--seed", default=DEFAULT_SEED, show_default=True, type=int)
683 | @click.option(
684 |     "--start-date",
685 |     default=None,
686 |     show_default=True,
687 |     type=click.DateTime(formats=["%Y-%m-%d"]),
688 |     help="Exclude training data (returns) before this date",
689 | )
690 | @click.option(
691 |     "--end-date",
692 |     show_default=True,
693 |     type=click.DateTime(formats=["%Y-%m-%d"]),
694 |     help="Exclude training data on or after this date",
695 | )
696 | @click.option(
697 |     "--use-dev-models",
698 |     is_flag=True,
699 |     show_default=True,
700 |     help="Use development version of models.",
701 | )
702 | @click.option(
703 |     "--extra-mixing-layers",
704 |     type=int,
705 |     default=DEFAULT_MIXING_LAYERS,
706 |     show_default=True,
707 |     help="Number of additional layers to blend exogenous and time series latents.",
708 | )
709 | def main_cli(
710 |     use_hsmd,
711 |     model,
712 |     existing_model,
713 |     symbol,
714 |     refresh,
715 |     mean_strategy,
716 |     only_embeddings,
717 |     use_mixture,
718 |     early_termination,
719 |     learning_rate,
720 |     dropout,
721 |     use_batch_norm,
722 |     feature_dimension,
723 |     mixture_components,
724 |     window_size,
725 |     embedding_dimension,
726 |     minibatch_size,
727 |     gaussian_noise,
728 |     weight_decay,
729 |     seed,
730 |     start_date,
731 |     end_date,
732 |     use_dev_models,
733 |     extra_mixing_layers,
734 | ):
735 | 
736 |     if start_date:
737 |         start_date = start_date.date()
738 | 
739 |     if end_date:
740 |         end_date = end_date.date()
741 | 
742 |     run(
743 |         use_hsmd,
744 |         model_file=model,
745 |         existing_model=existing_model,
746 |         symbols=symbol,
747 |         refresh=refresh,
748 |         mean_strategy=MEAN_STRATEGIES[mean_strategy],
749 |         use_mixture=use_mixture,
750 |         only_embeddings=only_embeddings,
751 |         early_termination=early_termination,
752 |         learning_rate=learning_rate,
753 |         dropout=dropout,
754 |         use_batch_norm=use_batch_norm,
755 |         feature_dimension=feature_dimension,
756 |         mixture_components=mixture_components,
757 |         window_size=window_size,
758 |         embedding_dimension=embedding_dimension,
759 |         minibatch_size=minibatch_size,
760 |         gaussian_noise=gaussian_noise,
761 |         weight_decay=weight_decay,
762 |         seed=seed,
763 |         start_date=start_date,
764 |         end_date=end_date,
765 |         use_dev_models=use_dev_models,
766 |         extra_mixing_layers=extra_mixing_layers,
767 |     )
768 | 
769 | 
770 | if __name__ == "__main__":
771 |     main_cli()
772 | 


--------------------------------------------------------------------------------
/src/deep_volatility_models/training.py:
--------------------------------------------------------------------------------
  1 | """This is a generic PyTorch training loop that can be adapted for different problems."""
  2 | 
  3 | # Standard Python packages
  4 | from copy import deepcopy
  5 | from itertools import count
  6 | import logging
  7 | from typing import Callable, Union
  8 | 
  9 | 
 10 | # Third party packages
 11 | import numpy as np
 12 | import torch
 13 | import torch.utils.data.dataloader
 14 | 
 15 | 
 16 | def default_batch_callback(
 17 |     epoch: int,
 18 |     batch: int,
 19 |     output: torch.Tensor,
 20 |     target: torch.Tensor,
 21 |     loss: float,
 22 | ) -> None:
 23 |     return None
 24 | 
 25 | 
 26 | def default_epoch_callback(
 27 |     epoch: int,
 28 |     train_loss: float,
 29 |     validation_loss: float,
 30 | ) -> None:
 31 |     return None
 32 | 
 33 | 
 34 | def default_loss_improvement_callback(
 35 |     epoch: int,
 36 |     loss: float,
 37 | ) -> None:
 38 |     return None
 39 | 
 40 | 
 41 | def _do_batches(
 42 |     epoch: int,
 43 |     model: torch.nn.Module,
 44 |     data_loader: torch.utils.data.dataloader.DataLoader,
 45 |     loss_function: Callable[[torch.Tensor, torch.Tensor], torch.Tensor],
 46 |     optim: torch.optim.Optimizer,
 47 |     training: bool,
 48 |     callback: Callable[[int, int, torch.Tensor, torch.Tensor, float], None],
 49 | ):
 50 |     model.train(training)
 51 |     batch_losses = []
 52 | 
 53 |     for batch, (predictors, target) in enumerate(data_loader):
 54 |         model_output = model(predictors)
 55 |         batch_loss = loss_function(model_output, target)
 56 |         batch_losses.append(float(batch_loss))
 57 | 
 58 |         if training:
 59 |             optim.zero_grad()
 60 |             batch_loss.backward()
 61 |             optim.step()
 62 | 
 63 |         callback(epoch, batch, model_output, target, float(batch_loss))
 64 | 
 65 |     epoch_loss = float(np.mean(batch_losses))
 66 |     return epoch_loss
 67 | 
 68 | 
 69 | def train(
 70 |     model: torch.nn.Module,
 71 |     loss_function: Callable[[torch.Tensor, torch.Tensor], torch.Tensor],
 72 |     optim: torch.optim.Optimizer,
 73 |     train_loader: torch.utils.data.dataloader.DataLoader,
 74 |     validation_loader: torch.utils.data.dataloader.DataLoader,
 75 |     max_epochs: Union[int, None] = None,
 76 |     early_termination: Union[int, None] = None,
 77 |     train_batch_callback: Callable[
 78 |         [int, int, torch.Tensor, torch.Tensor, float], None
 79 |     ] = default_batch_callback,
 80 |     validation_batch_callback: Callable[
 81 |         [int, int, torch.Tensor, torch.Tensor, float], None
 82 |     ] = default_batch_callback,
 83 |     loss_improvement_callback: Callable[
 84 |         [int, float], None
 85 |     ] = default_loss_improvement_callback,
 86 |     epoch_callback: Callable[[int, float, float], None] = default_epoch_callback,
 87 | ):
 88 |     # Initialize state for early termination monitoring
 89 |     best_model = deepcopy(model)
 90 |     best_validation_loss = float("inf")
 91 |     best_epoch = -1
 92 | 
 93 |     # This is the main epoch loop
 94 |     if max_epochs is None and early_termination is None:
 95 |         raise ValueError(
 96 |             f"At least one of max_epochs ({max_epochs}) or early_termination ({early_termination}) must be specified"
 97 |         )
 98 | 
 99 |     if max_epochs is not None:
100 |         epoch_iterator = range(max_epochs)
101 |     else:
102 |         epoch_iterator = count()
103 | 
104 |     for epoch in epoch_iterator:
105 | 
106 |         epoch_train_loss = _do_batches(
107 |             epoch,
108 |             model,
109 |             train_loader,
110 |             loss_function,
111 |             optim,
112 |             training=True,
113 |             callback=train_batch_callback,
114 |         )
115 | 
116 |         # Evalute the loss on the test set
117 |         # Don't compute gradients
118 |         with torch.no_grad():
119 |             epoch_validation_loss = _do_batches(
120 |                 epoch,
121 |                 model,
122 |                 validation_loader,
123 |                 loss_function,
124 |                 optim,
125 |                 training=False,
126 |                 callback=validation_batch_callback,
127 |             )
128 | 
129 |         epoch_callback(epoch, float(epoch_train_loss), float(epoch_validation_loss))
130 | 
131 |         logging.info(f"    Epoch {epoch}: loss (train): {epoch_train_loss:.4f}")
132 | 
133 |         if epoch_validation_loss < best_validation_loss:
134 |             best_validation_loss = epoch_validation_loss
135 |             best_epoch = epoch
136 |             best_model = deepcopy(model)
137 |             flag = "**"
138 | 
139 |             loss_improvement_callback(epoch, epoch_validation_loss)
140 |         else:
141 |             flag = "  "
142 | 
143 |         logging.info(
144 |             f" {flag} Epoch {epoch}: loss (test): {epoch_validation_loss:.4f}  best epoch: {best_epoch}  best loss:{best_validation_loss:.4f} {flag}"
145 |         )
146 |         if early_termination is not None and epoch >= best_epoch + early_termination:
147 |             logging.info(
148 |                 f"No improvement in {early_termination} epochs.  Terminating early."
149 |             )
150 |             break  # Terminate early
151 | 
152 |     return best_epoch, best_validation_loss, best_model
153 | 


--------------------------------------------------------------------------------
/src/deep_volatility_models/util.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Iterable, Union
 2 | 
 3 | 
 4 | def to_symbol_list(symbols: Union[Iterable[str], str]) -> List[str]:
 5 |     """
 6 |     This function converts its `symbols` argument to a list of strings.
 7 |     It's used as a convenence function so that a caller can provide a
 8 |     single symbol to a function rather than in Iterable.
 9 |     We also normalize symbols lists by converting the symbols to upper case.
10 | 
11 |     Arguments:
12 |        symbols: Union[Iterable[str], str]: The collection of symbols to convert or a single symbol
13 |     Returns:
14 |        List[str]: An instantiated list of symbols, hich may the same list passed in
15 |     """
16 | 
17 |     if isinstance(symbols, str):
18 |         symbols = (symbols,)
19 | 
20 |     return list(map(str.upper, symbols))
21 | 
22 | 
23 | def is_sorted(l: Iterable) -> bool:
24 |     l = tuple(l)
25 |     return all([x <= y for x, y in zip(l, l[1:])])
26 | 
27 | 
28 | def rename_column(c: str):
29 |     """
30 |     Standardize column naming.  No spaces and no caps.
31 |     """
32 |     return c.lower().replace(" ", "_")
33 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mawicks/deep-volatility-models/8a97672c9d0f4ee1237b04747dea81dd54d07360/tests/__init__.py


--------------------------------------------------------------------------------
/tests/test_architecture.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | import torch
  4 | 
  5 | from deep_volatility_models import architecture
  6 | from deep_volatility_models import mixture_model_stats
  7 | 
  8 | 
  9 | logsoftmax = torch.nn.LogSoftmax(dim=1)
 10 | 
 11 | BATCH_SIZE = 13
 12 | FEATURE_DIMENSION = 11
 13 | WINDOW_SIZE = 16
 14 | NOISE_DIM = 77
 15 | EMBEDDING_SYMBOLS = 9
 16 | EXTRA_MIXING_LAYERS = 0
 17 | EPS = 1e-6
 18 | 
 19 | EST = architecture.MeanStrategy.ESTIMATE
 20 | RN = architecture.MeanStrategy.RISK_NEUTRAL
 21 | ZERO = architecture.MeanStrategy.ZERO
 22 | 
 23 | UV = architecture.ModelType.UNIVARIATE
 24 | MV = architecture.ModelType.MULTIVARIATE
 25 | 
 26 | 
 27 | def test_min_max_clamping():
 28 |     clamper = architecture.MinMaxClamping()
 29 |     x1 = torch.tensor([[1.0, 4.0], [2.0, 5.0], [3.0, 6.0]])
 30 |     x2 = torch.tensor([[4.0, 7.0], [5.0, 8.0], [6.0, 9.0]])
 31 |     # Column 1 ranges from 1 to 6
 32 |     # Column 2 ranges from 4 to 9
 33 |     # "Train" on x1 and x2
 34 |     clamper.train()
 35 |     assert clamper(x1) is x1
 36 |     assert clamper(x2) is x2
 37 | 
 38 |     # Evaluate on x_test
 39 |     clamper.eval()
 40 |     x_test = torch.tensor([[0.0, 3.0], [7.0, 10.0], [5.0, 5.0]])
 41 |     y = clamper(x_test)
 42 |     max_y = torch.max(y, dim=0)[0]
 43 |     min_y = torch.min(y, dim=0)[0]
 44 |     assert float(max_y[0]) <= 6.0
 45 |     assert float(max_y[1]) <= 9.0
 46 |     assert float(min_y[0]) >= 1.0
 47 |     assert float(min_y[1]) <= 4.0
 48 | 
 49 | 
 50 | def test_untrained_mixmax_clamping_passes_all():
 51 |     MAGNITUDE = 1e6
 52 |     filter = architecture.MinMaxClamping()
 53 |     filter.train(False)
 54 |     x = MAGNITUDE * torch.randn(3, 2)
 55 |     y = filter(x)
 56 |     assert (y == x).all()
 57 | 
 58 | 
 59 | def test_gaussian_noise():
 60 |     SIGMA = 0.1
 61 |     noise = architecture.GaussianNoise(SIGMA)
 62 |     x = torch.tensor([[1.0, 4.0], [2.0, 5.0], [3.0, 6.0]])
 63 |     y = noise(x)
 64 |     error = x - y
 65 |     squared_error = error * error
 66 |     rmse = torch.sqrt(torch.mean(squared_error))
 67 |     assert rmse > 0.1 * SIGMA
 68 |     assert rmse < 10.0 * SIGMA
 69 | 
 70 | 
 71 | def is_lower_triangular(m):
 72 |     mb, mixtures, oc, ic = m.shape
 73 |     col_offset = ic - oc
 74 |     assert ic >= oc
 75 | 
 76 |     for mb_i in range(mb):
 77 |         for mx_i in range(mixtures):
 78 |             for i in range(oc):
 79 |                 for j in range(oc):
 80 |                     if j > i:
 81 |                         assert m[mb_i, mx_i, i, col_offset + j] == 0.0
 82 |                     else:
 83 |                         assert m[mb_i, mx_i, i, col_offset + j] != 0.0
 84 | 
 85 | 
 86 | @pytest.mark.parametrize(
 87 |     "batch_size, input_symbols, window_size, feature_dim,"
 88 |     "exogenous_dim, extra_mixing_layers,"
 89 |     "use_batch_norm, expect_value_error",
 90 |     [
 91 |         (13, 1, 0, 7, 3, 2, True, False),  # Window size of zero
 92 |         (13, 1, 4, 7, 3, 2, True, False),  # Change window size to 4
 93 |         (13, 1, 16, 7, 3, 2, True, False),  # Chnage window size to 16
 94 |         (13, 1, 64, 7, 3, 2, True, False),  # Change window size to 64
 95 |         (13, 1, 256, 7, 3, 2, True, False),  # Change window size to 256
 96 |         (13, 1, 64, 7, 0, 2, True, False),  # Without an exogenous input
 97 |         (13, 1, 64, 7, 3, 0, True, False),  # Without extra mixing layers
 98 |         (13, 13, 64, 7, 3, 2, True, False),  # Symbol dimension other than 1
 99 |         (13, 13, 64, 7, 3, 2, True, False),  # Speciying output symbol dim
100 |         (13, 13, 64, 7, 3, 2, True, False),  # Differing input/output symbol dim
101 |         (13, 13, 64, 7, 3, 2, False, False),  # Without batch norm
102 |         (13, 13, 60, 7, 3, 2, True, True),  # Window size is not valid
103 |         (13, 1, 0, 7, 0, 2, True, True),  # No Window AND no exogenous input
104 |         (13, 1, 64, 0, 3, 2, True, True),  # Feature dimension of zero
105 |     ],
106 | )
107 | def test_time_series_features(
108 |     batch_size,
109 |     window_size,
110 |     input_symbols,
111 |     feature_dim,
112 |     exogenous_dim,
113 |     extra_mixing_layers,
114 |     use_batch_norm,
115 |     expect_value_error,
116 | ):
117 |     """Test that a time series network can be created and evaluated
118 |     with different dimensions.  This is only a sanity check
119 |     that all of the dimensions conform and the network can produce output.
120 |     These are untrained networks so that's all we expect.  There is more
121 |     extensive validatation for unit tests of the individual head classes.  Here
122 |     we also check that the network executes properly with the training flag on
123 |     and off.
124 | 
125 |     """
126 |     if expect_value_error:
127 |         with pytest.raises(ValueError):
128 |             time_series_model = architecture.TimeSeriesFeatures(
129 |                 input_symbols,
130 |                 window_size=window_size,
131 |                 exogenous_dimension=exogenous_dim,
132 |                 feature_dimension=feature_dim,
133 |                 use_batch_norm=use_batch_norm,
134 |                 extra_mixing_layers=extra_mixing_layers,
135 |             )
136 |     else:
137 |         # This is the base mixture model we're testing.
138 |         time_series_model = architecture.TimeSeriesFeatures(
139 |             input_symbols,
140 |             window_size=window_size,
141 |             exogenous_dimension=exogenous_dim,
142 |             feature_dimension=feature_dim,
143 |             use_batch_norm=use_batch_norm,
144 |             extra_mixing_layers=extra_mixing_layers,
145 |         )
146 | 
147 |         # Create some test inputs.
148 | 
149 |         # 1) time series data:
150 |         ts_data = torch.randn((batch_size, input_symbols, window_size))
151 | 
152 |         # 2) exogenous data (in this package that's an embedding, but that's not
153 |         # necessarily the case).)
154 |         exogenous_data = (
155 |             torch.randn(batch_size, exogenous_dim) if exogenous_dim > 0 else None
156 |         )
157 | 
158 |         # Below we call the forward() methods of time_series_model
159 |         # and make sure it returns a tensor with the correct dimensions.
160 | 
161 |         for train in (True, False):
162 |             time_series_model.train(train)
163 | 
164 |             latents = time_series_model.forward(ts_data, exogenous_data)
165 |             assert latents.shape == (batch_size, feature_dim)
166 | 
167 |             # Confirm that the window_size property returns the correct size:
168 |             assert time_series_model.window_size == window_size
169 | 
170 | 
171 | @pytest.mark.parametrize(
172 |     "model_type, mean_strategy, input_symbols, output_symbols,"
173 |     "is_mixture, mixture_components, exogenous_dim,"
174 |     "use_batch_norm, expect_value_error",
175 |     [
176 |         # Non mixture models
177 |         #   Univariate
178 |         (UV, EST, 1, 1, False, 0, 0, True, False),  # No exogenous input
179 |         (UV, EST, 1, 1, False, 0, 7, True, False),  # With exogenous
180 |         (UV, ZERO, 1, 1, False, 0, 7, True, False),  # Zero head
181 |         (UV, RN, 1, 1, False, 0, 7, True, False),  # Risk-neutral head
182 |         (UV, EST, 1, 1, False, 0, 7, False, False),  # No batch norm
183 |         #   Multivariate
184 |         (MV, EST, 9, None, False, 0, 7, True, False),  # Estimate mu
185 |         (MV, ZERO, 9, None, False, 0, 7, True, False),  # Zero head
186 |         (MV, RN, 9, None, False, 0, 7, True, True),  # Risk-neutral head (ValueError)
187 |         # Mixture models
188 |         #   Univariate
189 |         (UV, EST, 1, None, True, 5, 0, True, False),  # No exogenous input
190 |         (UV, ZERO, 1, None, True, 5, 0, True, False),  # No exogenous input - FAILS
191 |         (UV, RN, 1, None, True, 5, 0, True, False),  # No exogenous input - FAILS
192 |         #   Multivariate
193 |         (MV, EST, 9, None, True, 5, 7, True, False),
194 |         (MV, EST, 9, 9, True, 5, 7, True, False),  # Specifying output symbol dim
195 |         (MV, EST, 9, 8, True, 5, 7, True, False),  # Output < input symbols
196 |         (MV, ZERO, 8, 8, True, 5, 7, True, False),  # Zero mean with output < input
197 |         (MV, RN, 9, 8, True, 5, 7, True, True),  # Risk-neutral (ValueError)
198 |         (MV, EST, 9, None, True, 5, 7, False, False),  # No batch norm
199 |     ],
200 | )
201 | def test_deep_volatility_model(
202 |     model_type,
203 |     mean_strategy,
204 |     input_symbols,
205 |     output_symbols,
206 |     is_mixture,
207 |     mixture_components,
208 |     exogenous_dim,
209 |     use_batch_norm,
210 |     expect_value_error,
211 | ):
212 |     """Test that a deep volatility model network can be created and evaluated
213 |     with different internal feature dimensions.  This is only a sanity check
214 |     that all of the dimensions conform and the network can produce output.
215 |     These are untrained networks so that's all we expect.  There is more
216 |     extensive validatation for unit tests of the individual head classes.  Here
217 |     we also check that the network executes properly with the training flag on
218 |     and off.
219 | 
220 |     This code actually tests three things:
221 |     1) Does the forward() method of the mixture network provide sane outputs
222 |     2) Does the forward_unpacked() method of the mixture netowrk provide sane
223 |     outputs
224 |     3) Does the forward() method of the ModelAndEmbedding work after combining
225 |     a mixture model with an embedding.
226 | 
227 |     """
228 | 
229 |     if expect_value_error:
230 |         with pytest.raises(ValueError):
231 |             volatility_model = architecture.DeepVolatilityModel(
232 |                 window_size=WINDOW_SIZE,
233 |                 mean_strategy=mean_strategy,
234 |                 model_type=model_type,
235 |                 input_symbols=input_symbols,
236 |                 output_symbols=output_symbols,
237 |                 feature_dimension=FEATURE_DIMENSION,
238 |                 exogenous_dimension=exogenous_dim,
239 |                 is_mixture=is_mixture,
240 |                 mixture_components=mixture_components,
241 |                 extra_mixing_layers=EXTRA_MIXING_LAYERS,
242 |                 use_batch_norm=use_batch_norm,
243 |             )
244 |     else:
245 |         # This is the base mixture model we're testing.
246 |         volatility_model = architecture.DeepVolatilityModel(
247 |             window_size=WINDOW_SIZE,
248 |             mean_strategy=mean_strategy,
249 |             model_type=model_type,
250 |             input_symbols=input_symbols,
251 |             output_symbols=output_symbols,
252 |             feature_dimension=FEATURE_DIMENSION,
253 |             exogenous_dimension=exogenous_dim,
254 |             is_mixture=is_mixture,
255 |             mixture_components=mixture_components,
256 |             extra_mixing_layers=EXTRA_MIXING_LAYERS,
257 |             use_batch_norm=use_batch_norm,
258 |         )
259 |         # Also create an embedding to test that ModelWithEmbedding returns sane results
260 |         embedding = torch.nn.Embedding(EMBEDDING_SYMBOLS, exogenous_dim)
261 | 
262 |         # Combing volatility_model with embedding in embedding_model
263 |         embedding_model = architecture.ModelWithEmbedding(volatility_model, embedding)
264 | 
265 |         # Create some test inputs.
266 | 
267 |         # 1) time series data:
268 |         ts_data = torch.randn((BATCH_SIZE, input_symbols, WINDOW_SIZE))
269 | 
270 |         # 2) exogenous data (in this package that's an embedding, but that's not
271 |         # necessarily the case).)
272 |         exogenous_data = (
273 |             torch.randn(BATCH_SIZE, exogenous_dim) if exogenous_dim > 0 else None
274 |         )
275 | 
276 |         # 3) an encoding vector to test with embedding_model
277 |         encoding = torch.randint(0, EMBEDDING_SYMBOLS, (BATCH_SIZE,))
278 | 
279 |         # Below we call the forward() methods of volatility_model and
280 |         # embedding_model and also the forward_unpacked() method of
281 |         # volatility_model and make sure they return tensors with the correct dimensions.
282 | 
283 |         for train in (True, False):
284 |             volatility_model.train(train)
285 |             embedding_model.train(train)
286 | 
287 |             if output_symbols is None:
288 |                 output_symbols = input_symbols
289 | 
290 |             # Call forward_unpacked()
291 |             output_u, latents_u = volatility_model.forward_unpacked(
292 |                 ts_data,
293 |                 exogenous_data,
294 |             )
295 | 
296 |             # Call volatility_model.forward() with different variations
297 |             if exogenous_data is None:
298 |                 output = volatility_model(ts_data)
299 |             else:
300 |                 output = volatility_model(
301 |                     (ts_data, exogenous_data),
302 |                 )
303 | 
304 |             # Call embedding_model.forward()
305 |             output_e = embedding_model((ts_data, encoding))
306 | 
307 |             if is_mixture:
308 |                 log_p_u, mu_u, sigma_inv_u = output_u
309 |                 log_p, mu, sigma_inv, latents = output
310 |                 log_p_e, mu_e, sigma_inv_e, latents_e = output_e
311 |                 assert sigma_inv.shape == (
312 |                     BATCH_SIZE,
313 |                     mixture_components,
314 |                     output_symbols,
315 |                     input_symbols,
316 |                 )
317 |                 assert mu.shape == sigma_inv.shape[:3]
318 |                 assert log_p.shape == sigma_inv.shape[:2]
319 | 
320 |                 assert log_p.shape == log_p_u.shape
321 |                 assert log_p.shape == log_p_e.shape
322 |             else:
323 |                 mu_u, sigma_inv_u = output_u
324 |                 mu, sigma_inv, latents = output
325 |                 mu_e, sigma_inv_e, latents_e = output
326 |                 log_p_u = log_p = log_p_e = None
327 |                 assert sigma_inv.shape == (BATCH_SIZE, output_symbols, input_symbols)
328 |                 assert mu.shape == sigma_inv.shape[:2]
329 | 
330 |             assert latents_u.shape == (BATCH_SIZE, FEATURE_DIMENSION)
331 | 
332 |             assert mu.shape == mu_u.shape
333 |             assert sigma_inv.shape == sigma_inv_u.shape
334 |             assert latents.shape == latents_u.shape
335 | 
336 |             assert mu.shape == mu_e.shape
337 |             assert sigma_inv.shape == sigma_inv_e.shape
338 |             assert latents.shape == latents_e.shape
339 | 
340 |             # Confirm that the window_size property returns the correct size:
341 |             assert volatility_model.window_size == WINDOW_SIZE
342 | 
343 |             # For mixture models do additional testing on log_p.
344 |             if is_mixture:
345 |                 assert log_p_u.shape == (BATCH_SIZE, mixture_components)
346 |                 assert log_p.shape == log_p_u.shape
347 |                 assert log_p.shape == log_p_e.shape
348 | 
349 |                 # Make sure the probabilities for a mixture sum to approximately 1.
350 |                 summed_p = torch.sum(torch.exp(log_p), dim=1)
351 |                 assert all(torch.abs(summed_p - 1.0) < EPS)
352 | 
353 |             if mean_strategy is ZERO and not is_mixture:
354 |                 assert torch.norm(mu) == 0.0
355 | 
356 |             if mean_strategy is ZERO and is_mixture:
357 |                 mu_c = mixture_model_stats.multivariate_combine_metrics(
358 |                     torch.exp(log_p), mu, sigma_inv
359 |                 )[0]
360 |                 assert torch.norm(mu_c) < EPS * torch.norm(mu)
361 | 
362 | 
363 | @pytest.mark.parametrize(
364 |     "batch_size, input_symbols, output_symbols, feature_dim,"
365 |     "mixture_components, exogenous_dim,"
366 |     "use_batch_norm, expect_value_error",
367 |     [
368 |         (13, 1, None, 3, 5, 0, True, False),  # Without an exogenous input
369 |         (13, 13, None, 3, 5, 7, True, False),  # Symbol dimension other than 1
370 |         (13, 13, 13, 3, 5, 7, True, False),  # Speciying output symbol dim
371 |         (13, 13, 11, 3, 5, 7, True, False),  # Differing input/output symbol dim
372 |         (13, 1, None, 3, 5, 7, False, False),  # Without batch norm
373 |     ],
374 | )
375 | def test_mixture_model(
376 |     batch_size,
377 |     input_symbols,
378 |     output_symbols,
379 |     feature_dim,
380 |     mixture_components,
381 |     exogenous_dim,
382 |     use_batch_norm,
383 |     expect_value_error,
384 | ):
385 |     """Test that a mixture network can be created and evaluated
386 |     with different internal feature dimensions.  This is only a sanity check
387 |     that all of the dimensions conform and the network can produce output.
388 |     These are untrained networks so that's all we expect.  There is more
389 |     extensive validatation for unit tests of the individual head classes.  Here
390 |     we also check that the network executes properly with the training flag on
391 |     and off.
392 | 
393 |     This code actually tests three things:
394 |     1) Does the forward() method of the mixture network provide sane outputs
395 |     2) Does the forward_unpacked() method of the mixture netowrk provide sane
396 |     outputs
397 |     3) Does the forward() method of the ModelAndEmbedding work after combining
398 |     a mixture model with an embedding.
399 | 
400 |     """
401 |     if expect_value_error:
402 |         with pytest.raises(ValueError):
403 |             mixture_model = architecture.MixtureModel(
404 |                 WINDOW_SIZE,
405 |                 input_symbols,
406 |                 output_symbols,
407 |                 exogenous_dimension=exogenous_dim,
408 |                 output_head_factory=architecture.MultivariateMixtureHead,
409 |                 feature_dimension=feature_dim,
410 |                 mixture_components=mixture_components,
411 |                 extra_mixing_layers=EXTRA_MIXING_LAYERS,
412 |                 use_batch_norm=use_batch_norm,
413 |                 mean_strategy=EST,
414 |             )
415 |     else:
416 |         # This is the base mixture model we're testing.
417 |         mixture_model = architecture.MixtureModel(
418 |             WINDOW_SIZE,
419 |             input_symbols,
420 |             output_symbols,
421 |             exogenous_dimension=exogenous_dim,
422 |             output_head_factory=architecture.MultivariateMixtureHead,
423 |             feature_dimension=feature_dim,
424 |             mixture_components=mixture_components,
425 |             extra_mixing_layers=EXTRA_MIXING_LAYERS,
426 |             use_batch_norm=use_batch_norm,
427 |             mean_strategy=EST,
428 |         )
429 |         # Also create an embedding to test that ModelWithEmbedding returns sane results
430 |         embedding = torch.nn.Embedding(EMBEDDING_SYMBOLS, exogenous_dim)
431 | 
432 |         # Combing mixture_model with embedding in embedding_model
433 |         embedding_model = architecture.ModelWithEmbedding(mixture_model, embedding)
434 | 
435 |         # Create some test inputs.
436 |         # 1) time series data:
437 |         ts_data = torch.randn((batch_size, input_symbols, WINDOW_SIZE))
438 |         # 2) exogenous data (in this package that's an embedding, but that's not
439 |         # necessarily the case).)
440 |         exogenous_data = (
441 |             torch.randn(batch_size, exogenous_dim) if exogenous_dim > 0 else None
442 |         )
443 |         # 3) an encoding vector to test with embedding_model
444 |         encoding = torch.randint(0, EMBEDDING_SYMBOLS, (batch_size,))
445 | 
446 |         # Below we call the forward() methods of mixture_model and
447 |         # embedding_model and also the forward_unpacked() method of
448 |         # mixture_model and make sure they return tensors with the correct dimensions.
449 | 
450 |         for train in (True, False):
451 |             mixture_model.train(train)
452 |             embedding_model.train(train)
453 | 
454 |             if output_symbols is None:
455 |                 output_symbols = input_symbols
456 | 
457 |             # Call forward_unpacked()
458 |             log_p_u, mu_u, sigma_inv_u, latents_u = mixture_model.forward_unpacked(
459 |                 ts_data,
460 |                 exogenous_data,
461 |             )
462 | 
463 |             # Call mixture_model.forward() with different variations
464 |             if exogenous_data is None:
465 |                 log_p, mu, sigma_inv, latents = mixture_model(ts_data)
466 |             else:
467 |                 log_p, mu, sigma_inv, latents = mixture_model(
468 |                     (ts_data, exogenous_data),
469 |                 )
470 | 
471 |             # Call embedding_model.forward()
472 |             log_p_e, mu_e, sigma_inv_e, latents_e = embedding_model((ts_data, encoding))
473 | 
474 |             assert sigma_inv.shape == (
475 |                 batch_size,
476 |                 mixture_components,
477 |                 output_symbols,
478 |                 input_symbols,
479 |             )
480 |             assert mu.shape == sigma_inv.shape[:3]
481 |             assert log_p.shape == sigma_inv.shape[:2]
482 | 
483 |             assert latents_u.shape == (batch_size, feature_dim)
484 | 
485 |             assert log_p.shape == log_p_u.shape
486 |             assert mu.shape == mu_u.shape
487 |             assert sigma_inv.shape == sigma_inv_u.shape
488 |             assert latents.shape == latents_u.shape
489 | 
490 |             assert log_p.shape == log_p_e.shape
491 |             assert mu.shape == mu_e.shape
492 |             assert sigma_inv.shape == sigma_inv_e.shape
493 |             assert latents.shape == latents_e.shape
494 | 
495 |             # Make sure the probabilities for a mixture sum to approximately 1.
496 |             summed_p = torch.sum(torch.exp(log_p), dim=1)
497 |             assert all(torch.abs(summed_p - 1.0) < EPS)
498 | 
499 |             # Confirm that the window_size property returns the correct size:
500 |             assert mixture_model.window_size == WINDOW_SIZE
501 | 
502 | 
503 | @pytest.mark.parametrize(
504 |     "batch_size, feature_dim," "exogenous_dim," "use_batch_norm, expect_value_error",
505 |     [
506 |         (13, 3, 0, True, False),  # Without an exogenous input
507 |         (13, 3, 7, True, False),  # Without extra mixing layers
508 |         (13, 3, 7, False, False),  # Without batch norm
509 |     ],
510 | )
511 | def test_basic_model(  # basic model referes to a non-mixture model
512 |     batch_size,
513 |     feature_dim,
514 |     exogenous_dim,
515 |     use_batch_norm,
516 |     expect_value_error,
517 | ):
518 |     """Test that a mmixture network can be created and evaluated
519 |     with different internal feature dimensions.  This is only a sanity check
520 |     that all of the dimensions conform and the network can produce output.
521 |     These are untrained networks so that's all we expect.  There is more
522 |     extensive validatation for unit tests of the individual head classes.  Here
523 |     we also check that the network executes properly with the training flag on
524 |     and off.
525 | 
526 |     This code actually tests three things:
527 |     1) Does for the forward() method of the mixture network provide sane outputs
528 |     2) Does the forward_unpacked() method of the mixture netowrk provide sane
529 |     outputs
530 |     3) Does the forward() method of the ModelAndEmbedding work after combining
531 |     a mixture model with an embedding.
532 | 
533 |     """
534 |     if expect_value_error:
535 |         with pytest.raises(ValueError):
536 |             model = architecture.UnivariateModel(
537 |                 WINDOW_SIZE,
538 |                 exogenous_dimension=exogenous_dim,
539 |                 feature_dimension=feature_dim,
540 |                 extra_mixing_layers=EXTRA_MIXING_LAYERS,
541 |                 use_batch_norm=use_batch_norm,
542 |                 mean_strategy=EST,
543 |             )
544 |     else:
545 |         # This is the base model we're testing.
546 |         model = architecture.UnivariateModel(
547 |             WINDOW_SIZE,
548 |             exogenous_dimension=exogenous_dim,
549 |             feature_dimension=feature_dim,
550 |             extra_mixing_layers=EXTRA_MIXING_LAYERS,
551 |             use_batch_norm=use_batch_norm,
552 |             mean_strategy=EST,
553 |         )
554 |         # Also create an embedding to test that ModelWithEmbedding returns sane results
555 |         embedding = torch.nn.Embedding(EMBEDDING_SYMBOLS, exogenous_dim)
556 | 
557 |         # Combing model with embedding in embedding_model
558 |         embedding_model = architecture.ModelWithEmbedding(model, embedding)
559 | 
560 |         # Create some test inputs.
561 |         # 1) time series data:
562 |         ts_data = torch.randn((batch_size, 1, WINDOW_SIZE))
563 |         # 2) exogenous data (in this package that's an embedding, but that's not
564 |         # necessarily the case).)
565 |         exogenous_data = (
566 |             torch.randn(batch_size, exogenous_dim) if exogenous_dim > 0 else None
567 |         )
568 |         # 3) an encoding vector to test with embedding_model
569 |         encoding = torch.randint(0, EMBEDDING_SYMBOLS, (batch_size,))
570 | 
571 |         # Below we call the forward() methods of model and
572 |         # embedding_model and also the forward_unpacked() method of
573 |         # model and make sure they return tensors with the correct dimensions.
574 | 
575 |         for train in (True, False):
576 |             model.train(train)
577 |             embedding_model.train(train)
578 | 
579 |             # Call forward_unpacked()
580 |             mu_u, sigma_inv_u, latents_u = model.forward_unpacked(
581 |                 ts_data,
582 |                 exogenous_data,
583 |             )
584 | 
585 |             # Call model.forward() with different variations
586 |             if exogenous_data is None:
587 |                 mu, sigma_inv, latents = model(ts_data)
588 |             else:
589 |                 mu, sigma_inv, latents = model(
590 |                     (ts_data, exogenous_data),
591 |                 )
592 | 
593 |             # Call embedding_model.forward()
594 |             mu_e, sigma_inv_e, latents_e = embedding_model((ts_data, encoding))
595 | 
596 |             assert sigma_inv.shape == (
597 |                 batch_size,
598 |                 1,
599 |                 1,
600 |             )
601 |             assert mu.shape == sigma_inv.shape[:2]
602 |             assert latents.shape == (batch_size, feature_dim)
603 | 
604 |             assert mu.shape == mu_u.shape
605 |             assert sigma_inv.shape == sigma_inv_u.shape
606 |             assert latents.shape == latents_u.shape
607 | 
608 |             assert mu.shape == mu_e.shape
609 |             assert sigma_inv.shape == sigma_inv_e.shape
610 |             assert latents.shape == latents_e.shape
611 | 
612 |             # Confirm that the window_size property returns the correct size:
613 |             assert model.window_size == WINDOW_SIZE
614 | 
615 | 
616 | @pytest.mark.parametrize(
617 |     "head_class, batch_size, input_symbols, output_symbols, feature_dim,"
618 |     "mixture_components, expect_value_error",
619 |     [
620 |         (architecture.MultivariateMixtureHead, 13, 3, None, 5, 7, False),
621 |         (architecture.MultivariateMixtureHead, 13, 3, 3, 5, 7, False),
622 |         (architecture.MultivariateMixtureHead, 13, 3, 2, 5, 7, False),
623 |         (architecture.UnivariateMixtureHead, 13, 1, 1, 5, 7, False),
624 |         (architecture.UnivariateMixtureHead, 13, 3, None, 5, 7, True),
625 |         (architecture.UnivariateMixtureHead, 13, 3, 3, 5, 7, True),
626 |     ],
627 | )
628 | def test_head_classes(
629 |     head_class,
630 |     batch_size,
631 |     input_symbols,
632 |     output_symbols,
633 |     feature_dim,
634 |     mixture_components,
635 |     expect_value_error,
636 | ):
637 |     """Test that a head network can be created and evaluated
638 |     with different internal feature dimensions.  Also do some sanity checks on
639 |     the output where the head should constrain it, such as having probabilities
640 |     that add up to one and having an inverse sqrt of covariance matrix that is
641 |     triangular. These are untrained networks so that's all we expect.  We also
642 |     check that the network executes properly with the training flag on and off.
643 |     """
644 |     if expect_value_error:
645 |         with pytest.raises(ValueError):
646 |             head = head_class(
647 |                 input_symbols,
648 |                 output_symbols,
649 |                 feature_dimension=feature_dim,
650 |                 mixture_components=mixture_components,
651 |             )
652 |     else:
653 |         head = head_class(
654 |             input_symbols,
655 |             output_symbols,
656 |             feature_dimension=feature_dim,
657 |             mixture_components=mixture_components,
658 |         )
659 |         for train in (True, False):
660 |             head.train(train)
661 |             log_p, mu, sigma_inv = head(torch.randn(batch_size, feature_dim))
662 | 
663 |             assert log_p.shape == (batch_size, mixture_components)
664 | 
665 |             # Make sure all probabilities add up to one
666 |             # (logs add up to zero)
667 |             assert torch.abs(torch.sum(torch.logsumexp(log_p, dim=1))) < 1e-5
668 | 
669 |             if output_symbols is None:
670 |                 output_symbols = input_symbols
671 | 
672 |             assert mu.shape == (batch_size, mixture_components, output_symbols)
673 |             assert sigma_inv.shape == (
674 |                 batch_size,
675 |                 mixture_components,
676 |                 output_symbols,
677 |                 input_symbols,
678 |             )
679 | 
680 |             is_lower_triangular(sigma_inv)
681 | 


--------------------------------------------------------------------------------
/tests/test_loss_functions.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | import math
 4 | 
 5 | import torch
 6 | 
 7 | import deep_volatility_models.loss_functions as loss_functions
 8 | 
 9 | LOG_SQRT_TWO_PI = 0.5 * math.log(2.0 * math.pi)
10 | # TODO: Why can't this be smaller?
11 | EPS = 1e-7
12 | 
13 | 
14 | @pytest.mark.parametrize(
15 |     "x,mu,sigma_inv,expected",
16 |     [
17 |         # Case 0
18 |         (
19 |             torch.ones((1, 1)),
20 |             torch.ones((1, 1)),
21 |             torch.ones((1, 1, 1)),
22 |             -LOG_SQRT_TWO_PI,
23 |         ),
24 |         # Case 1
25 |         (
26 |             torch.ones((1, 1)),
27 |             torch.ones((1, 1)),
28 |             2.0 * torch.ones((1, 1, 1)),
29 |             math.log(2.0) - LOG_SQRT_TWO_PI,
30 |         ),
31 |         # Case 2
32 |         (
33 |             torch.ones((1, 1)),
34 |             0 * torch.ones((1, 1)),
35 |             2.0 * torch.ones((1, 1, 1)),
36 |             math.log(2.0) - LOG_SQRT_TWO_PI - 2.0,
37 |         ),
38 |         # Case 3
39 |         (
40 |             0 * torch.ones((1, 1)),
41 |             torch.ones((1, 1)),
42 |             2.0 * torch.ones((1, 1, 1)),
43 |             math.log(2.0) - LOG_SQRT_TWO_PI - 2.0,
44 |         ),
45 |     ],
46 | )
47 | def test_likelihood_cases(x, mu, sigma_inv, expected):
48 |     log_loss = loss_functions.univariate_log_likelihood(x, mu, sigma_inv)
49 |     assert float(log_loss) == pytest.approx(expected, EPS)
50 | 


--------------------------------------------------------------------------------
/tests/test_mixture_model_stats.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | import torch
  3 | import math
  4 | import numpy as np
  5 | 
  6 | import deep_volatility_models.mixture_model_stats as mixture_model_stats
  7 | 
  8 | BATCH_SIZE = 16
  9 | 
 10 | softmax = torch.nn.Softmax(dim=1)
 11 | logsoftmax = torch.nn.LogSoftmax(dim=1)
 12 | 
 13 | 
 14 | def test_multivariate_likelihood():
 15 |     """
 16 |     This only checks that multivariate_mixture_log_likelihood can be called
 17 |     with arguments of consistent dimension within the correct range.  It does
 18 |     not confirm the correctness of the values returned.
 19 |     """
 20 |     for batch_size in range(BATCH_SIZE, BATCH_SIZE + 2):
 21 |         for mixture in range(1, 3):
 22 |             for channels in range(1, 3):
 23 |                 x = torch.randn(batch_size, channels)
 24 |                 log_p = logsoftmax(torch.randn(batch_size, mixture))
 25 |                 mu = torch.randn(batch_size, mixture, channels)
 26 |                 sigma_inv = torch.tril(
 27 |                     torch.randn(batch_size, mixture, channels, channels)
 28 |                 )
 29 | 
 30 |                 log_loss = mixture_model_stats.multivariate_log_likelihood(
 31 |                     x, log_p, mu, sigma_inv
 32 |                 )
 33 | 
 34 |                 assert log_loss.shape == (batch_size,)
 35 |                 assert float(torch.sum(log_loss)) != 0.0
 36 | 
 37 | 
 38 | LOG_SQRT_TWO_PI = 0.5 * math.log(2.0 * math.pi)
 39 | # TODO: Why can't this be smaller?
 40 | EPS = 1e-7
 41 | 
 42 | 
 43 | @pytest.mark.parametrize(
 44 |     "x,log_p,mu,sigma_inv,expected",
 45 |     [
 46 |         # Case 0
 47 |         (
 48 |             torch.ones((1, 1)),
 49 |             0 * torch.ones((1, 1)),
 50 |             torch.ones((1, 1, 1)),
 51 |             torch.ones((1, 1, 1, 1)),
 52 |             -LOG_SQRT_TWO_PI,
 53 |         ),
 54 |         # Case 1
 55 |         (
 56 |             torch.ones((1, 1)),
 57 |             0 * torch.ones((1, 1)),
 58 |             torch.ones((1, 1, 1)),
 59 |             2.0 * torch.ones((1, 1, 1, 1)),
 60 |             math.log(2.0) - LOG_SQRT_TWO_PI,
 61 |         ),
 62 |         # Case 2
 63 |         (
 64 |             torch.ones((1, 1)),
 65 |             0 * torch.ones((1, 1)),
 66 |             0 * torch.ones((1, 1, 1)),
 67 |             2.0 * torch.ones((1, 1, 1, 1)),
 68 |             math.log(2.0) - LOG_SQRT_TWO_PI - 2.0,
 69 |         ),
 70 |         # Case 3
 71 |         (
 72 |             0 * torch.ones((1, 1)),
 73 |             0 * torch.ones((1, 1)),
 74 |             torch.ones((1, 1, 1)),
 75 |             2.0 * torch.ones((1, 1, 1, 1)),
 76 |             math.log(2.0) - LOG_SQRT_TWO_PI - 2.0,
 77 |         ),
 78 |     ],
 79 | )
 80 | def test_multivariate_likelihood_cases(x, log_p, mu, sigma_inv, expected):
 81 |     log_loss = mixture_model_stats.multivariate_log_likelihood(x, log_p, mu, sigma_inv)
 82 |     assert float(log_loss) == pytest.approx(expected, EPS)
 83 | 
 84 |     log_loss = mixture_model_stats.univariate_log_likelihood(x, log_p, mu, sigma_inv)
 85 |     assert float(log_loss) == pytest.approx(expected, EPS)
 86 | 
 87 | 
 88 | def test_univeriate_fails_on_multivariate_input():
 89 |     mb_size, mixture_components, symbols = (5, 3, 2)
 90 | 
 91 |     x = torch.randn(mb_size, symbols)
 92 |     log_p = torch.randn(mb_size, mixture_components)
 93 |     mu = torch.randn(mb_size, mixture_components, symbols)
 94 |     sigma_inv = torch.randn(mb_size, mixture_components, symbols, symbols)
 95 | 
 96 |     # As a sanity check that the dimensions are correct except for being
 97 |     # multi-variate, a ccall to
 98 |     # mixture_model_state.multivariate_log_likelihood() should return
 99 |     # *something*:
100 | 
101 |     mixture_model_stats.multivariate_log_likelihood(x, log_p, mu, sigma_inv)
102 | 
103 |     with pytest.raises(ValueError):
104 |         log_loss = mixture_model_stats.univariate_log_likelihood(
105 |             x, log_p, mu, sigma_inv
106 |         )
107 | 
108 |     p = torch.exp(log_p)
109 |     with pytest.raises(ValueError):
110 |         mixture_model_stats.univariate_combine_metrics(p, mu, sigma_inv)
111 | 
112 | 
113 | def test_fail_on_inconsistent_dimensions():
114 | 
115 |     mb_size, mixture_components, symbols = (5, 3, 1)
116 | 
117 |     x = torch.randn(mb_size, symbols)
118 |     log_p = torch.randn(mb_size, mixture_components)
119 |     # Introduce an incompatible dimension
120 |     mu = torch.randn(mb_size, mixture_components + 1, symbols)
121 |     sigma_inv = torch.randn(mb_size, mixture_components, symbols, symbols)
122 | 
123 |     with pytest.raises(ValueError):
124 |         mixture_model_stats.univariate_log_likelihood(x, log_p, mu, sigma_inv)
125 | 
126 |     with pytest.raises(ValueError):
127 |         mixture_model_stats.multivariate_log_likelihood(x, log_p, mu, sigma_inv)
128 | 
129 |     p = torch.exp(log_p)
130 |     with pytest.raises(ValueError):
131 |         mixture_model_stats.univariate_combine_metrics(p, mu, sigma_inv)
132 | 
133 | 
134 | @pytest.mark.parametrize(
135 |     "p, mu, sigma_inv, expected_mean, expected_variance",
136 |     [
137 |         (
138 |             [[1.0]],
139 |             [[[2.0]]],
140 |             [[[[0.25]]]],
141 |             torch.tensor([2.0]),
142 |             torch.tensor([16.0]),
143 |         ),
144 |         (
145 |             [[0.75, 0.25]],
146 |             [[[4.0], [8.0]]],
147 |             [[[[0.25]], [[0.125]]]],
148 |             torch.tensor([5.0]),
149 |             torch.tensor([31.0]),
150 |         ),
151 |     ],
152 | )
153 | def test_univariate_combine_metrics(p, mu, sigma_inv, expected_mean, expected_variance):
154 |     mean, variance = mixture_model_stats.univariate_combine_metrics(p, mu, sigma_inv)
155 |     print(f"\nReturned mean:\n{mean}")
156 |     print(f"Expected mean:\n{expected_mean}")
157 |     print(f"\nReturned variance:\n{variance}")
158 |     print(f"Expected variance:\n{expected_variance}")
159 |     assert mean == expected_mean
160 |     assert variance == expected_variance
161 | 


--------------------------------------------------------------------------------
/tests/test_stock_data.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | # Standard Python modules
  4 | import os
  5 | from unittest.mock import patch
  6 | 
  7 | # Third party modules
  8 | import pandas as pd
  9 | 
 10 | # Local imports
 11 | import deep_volatility_models.util as util
 12 | import deep_volatility_models.stock_data as stock_data
 13 | 
 14 | SAMPLE_DF = pd.DataFrame(
 15 |     {
 16 |         "date": pd.to_datetime(["2020-01-01", "2020-01-02", "2020-01-03"]),
 17 |         "open": [1.0, 2.0, 3.0],
 18 |         "close": [0.5, 2.5, 3.1],
 19 |     }
 20 | ).set_index("date")
 21 | 
 22 | SAMPLE_PATH = "any_path"
 23 | 
 24 | 
 25 | @pytest.fixture
 26 | def data_source():
 27 |     """
 28 |     Create an instance of a data source for testing
 29 |     """
 30 |     mock_data_source = lambda symbols: {s.upper(): SAMPLE_DF for s in symbols}
 31 |     return mock_data_source
 32 | 
 33 | 
 34 | def test_symbol_history_reader_and_writer(tmp_path):
 35 |     filename = os.path.join(tmp_path, "foo.csv")
 36 | 
 37 |     # Use writer to write SAMPLE_df
 38 | 
 39 |     # But first, intentionally reverse the order of dates.
 40 |     sample_copy = SAMPLE_DF.reset_index().sort_values("date", ascending=False)
 41 |     assert not util.is_sorted(sample_copy.date)
 42 | 
 43 |     # Define a helper to simplify writing slightly different versions of SAMEPLE_DF
 44 |     reader = stock_data.SymbolHistoryReader()
 45 | 
 46 |     def check(writer):
 47 |         with open(filename, "wb") as f:
 48 |             writer(f)
 49 | 
 50 |         # Use reader to read it back and compare the results.
 51 |         with open(filename, "rb") as f:
 52 |             loaded_df = reader(f)
 53 | 
 54 |         assert loaded_df.index.name == "date"
 55 |         assert util.is_sorted(loaded_df.index)
 56 |         assert (loaded_df == SAMPLE_DF).all().all()
 57 | 
 58 |     for df in [sample_copy, sample_copy.set_index("date")]:
 59 |         check(stock_data.SymbolHistoryWriter(df))
 60 | 
 61 | 
 62 | def test_file_system_store(tmp_path):
 63 |     symbol = "FOO"
 64 |     store = stock_data.FileSystemStore(tmp_path)
 65 |     assert not store.exists(symbol)
 66 | 
 67 |     store.write("FOO", stock_data.SymbolHistoryWriter(SAMPLE_DF))
 68 |     assert store.exists("FOO")
 69 | 
 70 |     loaded_df = store.read(symbol, stock_data.SymbolHistoryReader())
 71 |     assert (loaded_df == SAMPLE_DF).all().all()
 72 | 
 73 | 
 74 | def test_check_cache_exists_path(tmp_path):
 75 |     """
 76 |     Check that the os.path.exists() gets called with the correct path
 77 |     and check that exists is not case sensitive.
 78 |     """
 79 |     tmp_path_store = stock_data.FileSystemStore(tmp_path)
 80 |     with patch("deep_volatility_models.stock_data.os.path.exists") as os_path_exists:
 81 |         tmp_path_store.exists("symbol1")
 82 |         os_path_exists.assert_called_with(
 83 |             os.path.join(tmp_path_store.cache_dir, "symbol1.csv")
 84 |         )
 85 | 
 86 |         tmp_path_store.exists("SyMbOL2")
 87 |         os_path_exists.assert_called_with(
 88 |             os.path.join(tmp_path_store.cache_dir, "symbol2.csv")
 89 |         )
 90 | 
 91 | 
 92 | def test_history(data_source, tmp_path):
 93 |     partial_symbol_set = set(["ABC", "DEF"])
 94 |     missing_symbol_set = set(["GHI", "JKL"])
 95 |     full_symbol_set = partial_symbol_set.union(missing_symbol_set)
 96 | 
 97 |     tmp_path_store = stock_data.FileSystemStore(tmp_path)
 98 |     caching_download = stock_data.CachingDownloader(
 99 |         data_source,
100 |         tmp_path_store,
101 |         stock_data.SymbolHistoryWriter,
102 |         overwrite_existing=False,
103 |     )
104 | 
105 |     response = caching_download(partial_symbol_set)
106 |     assert len(response) == len(partial_symbol_set)
107 |     for symbol in partial_symbol_set:
108 |         assert tmp_path_store.exists(symbol)
109 | 
110 |     for symbol in missing_symbol_set:
111 |         assert not tmp_path_store.exists(symbol)
112 | 
113 |     response = caching_download(full_symbol_set)
114 |     # Check that only the missing symbols were downloaded
115 |     # This is true if all missing symbols are in the response
116 |     # and if the length of the response is equal to the number
117 |     # of missing symbols
118 |     assert len(response) == len(missing_symbol_set)
119 |     for symbol in missing_symbol_set:
120 |         assert symbol in response
121 | 
122 |     for symbol in full_symbol_set:
123 |         assert tmp_path_store.exists(symbol)
124 | 
125 |     # Try downloading again, which should be a no-op
126 |     response = caching_download(full_symbol_set)
127 |     assert len(response) == 0
128 | 
129 |     # Try loading one of the downloaded files
130 |     loader = stock_data.CachingSymbolHistoryLoader(
131 |         data_source, tmp_path_store, overwrite_existing=False
132 |     )
133 |     # load("pqr")
134 |     combiner = stock_data.PriceHistoryConcatenator()
135 |     combiner(loader("pqr"))
136 | 


--------------------------------------------------------------------------------
/tests/test_time_series_datasets.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | # Third party libraries
  4 | import torch
  5 | 
  6 | # Local modules
  7 | import deep_volatility_models.time_series_datasets as time_series_datasets
  8 | 
  9 | 
 10 | # Constants used in tests.
 11 | A_SYMBOL_ENCODING = 21
 12 | 
 13 | 
 14 | """
 15 | The test cases for multivariate_stats() was generated as follows:
 16 | 
 17 | Assume x = L*z + b has zero mean and covariance = I
 18 | 
 19 | Now E[x] = b
 20 | C_x = E[xx’] = E[L (zz’) L’] = LL’
 21 | Where L is a lower triangular matrix.
 22 | 
 23 | We can use this to generate series of x with a specific L and b.
 24 | For example, let b = [1, 2]
 25 | let L = [1, 0],  [-1; 2]]
 26 | 
 27 | We need to choose values for z with zero mean and C = I
 28 | 
 29 | One possibility is
 30 | Z = [[1, 1], [1, -1], [-1, 1], [-1, -1]]
 31 | 
 32 | Where each row is a (z1, z2) pair.
 33 | 
 34 | Because “time” is the row dimension of Z, we need to transpose the original 
 35 | equation to be:
 36 | 
 37 | x = ZL’ + b’
 38 | 
 39 | where Z is as above,  L=[[1, 0], [-1, 2]], 
 40 | and b’ = [[1, 2], [1, 2], [1, 2], [1, 2]]
 41 | 
 42 | >>> l = np.array([[1, 0], [-1, 2]])
 43 | >>> l
 44 | array([[ 1,  0],
 45 |        [-1,  2]])
 46 | >>> z=np.array([[1, 1], [1, -1], [-1, 1], [-1, -1]])
 47 | >>> z
 48 | array([[ 1,  1],
 49 |        [ 1, -1],
 50 |        [-1,  1],
 51 |        [-1, -1]])
 52 | >>> b=np.array([[1, 2], [1, 2], [1, 2], [1, 2]])
 53 | >>> b
 54 | array([[1, 2],
 55 |        [1, 2],
 56 |        [1, 2],
 57 |        [1, 2]])
 58 | 
 59 | >>> np.matmul(z, l.T) + b
 60 | array([[ 2,  3],
 61 |             [ 2, -1],
 62 |             [ 0,  5],
 63 |             [ 0,  1]])
 64 | >>> 
 65 | 
 66 | """
 67 | 
 68 | 
 69 | @pytest.mark.parametrize(
 70 |     "series, mu_expected, l_expected",
 71 |     [
 72 |         (
 73 |             [[2.0, 3.0], [2.0, -1.0], [0.0, 5.0], [0.0, 1.0]],
 74 |             torch.tensor([1, 2], dtype=torch.float),
 75 |             torch.tensor([[1.0, 0.0], [-1.0, 2.0]]),
 76 |         ),
 77 |     ],
 78 | )
 79 | def test_multivariate_stats(series, mu_expected, l_expected):
 80 |     mu, l = time_series_datasets.multivariate_stats(series)
 81 |     assert mu.shape == mu_expected.shape
 82 |     assert l.shape == l_expected.shape
 83 |     print(f"mu returned:\n{mu}")
 84 |     print(f"mu expected:\n{mu_expected}")
 85 |     print(f"\nl returned:\n{l}")
 86 |     print(f"l expected:\n{l_expected}")
 87 | 
 88 |     # Fortunately the test case is compute *exactly* so no approximate
 89 |     # comparisons are necessary.
 90 |     assert (mu == mu_expected).all()
 91 |     assert (l == l_expected).all()
 92 | 
 93 | 
 94 | def test_rolling_window_arg_check():
 95 |     with pytest.raises(ValueError):
 96 |         time_series_datasets.RollingWindow(range(10), 3, stride=0)
 97 | 
 98 |     with pytest.raises(ValueError):
 99 |         time_series_datasets.RollingWindow(
100 |             [[1, 3], [3, 4], [5, 6]],
101 |             2,
102 |             create_channel_dim=True,
103 |         )
104 | 
105 | 
106 | @pytest.mark.parametrize(
107 |     "series,window_size,stride,create_channel_dim,expected",
108 |     [
109 |         (
110 |             range(10),
111 |             3,
112 |             1,
113 |             False,
114 |             [
115 |                 torch.tensor(range(0, 3)),
116 |                 torch.tensor(range(1, 4)),
117 |                 torch.tensor(range(2, 5)),
118 |                 torch.tensor(range(3, 6)),
119 |                 torch.tensor(range(4, 7)),
120 |                 torch.tensor(range(5, 8)),
121 |                 torch.tensor(range(6, 9)),
122 |                 torch.tensor(range(7, 10)),
123 |             ],
124 |         ),
125 |         # Same case with a different stride
126 |         (
127 |             range(10),
128 |             3,
129 |             2,
130 |             False,
131 |             [
132 |                 torch.tensor(range(0, 3)),
133 |                 torch.tensor(range(2, 5)),
134 |                 torch.tensor(range(4, 7)),
135 |                 torch.tensor(range(6, 9)),
136 |             ],
137 |         ),
138 |         # Same case with create_channel_dim=True
139 |         (
140 |             range(10),
141 |             3,
142 |             1,
143 |             True,
144 |             [
145 |                 torch.tensor([list(range(0, 3))]),
146 |                 torch.tensor([list(range(1, 4))]),
147 |                 torch.tensor([list(range(2, 5))]),
148 |                 torch.tensor([list(range(3, 6))]),
149 |                 torch.tensor([list(range(4, 7))]),
150 |                 torch.tensor([list(range(5, 8))]),
151 |                 torch.tensor([list(range(6, 9))]),
152 |                 torch.tensor([list(range(7, 10))]),
153 |             ],
154 |         ),
155 |         # Check a sequence of vectors
156 |         (
157 |             [
158 |                 [1, 2, 3],
159 |                 [4, 5, 6],
160 |                 [7, 8, 9],
161 |                 [10, 11, 12],
162 |             ],
163 |             2,
164 |             1,
165 |             False,
166 |             [
167 |                 torch.tensor([[1, 4], [2, 5], [3, 6]]),
168 |                 torch.tensor([[4, 7], [5, 8], [6, 9]]),
169 |                 torch.tensor([[7, 10], [8, 11], [9, 12]]),
170 |             ],
171 |         ),
172 |     ],
173 | )
174 | def test_rolling_window_series(
175 |     series, window_size, stride, create_channel_dim, expected
176 | ):
177 |     d = time_series_datasets.RollingWindow(
178 |         series, window_size, stride=stride, create_channel_dim=create_channel_dim
179 |     )
180 |     assert len(d) == len(expected)
181 | 
182 |     # We use indexes here rather than iterators because we're specifically
183 |     # testing the implementation of __getitem__()
184 |     for i in range(len(expected)):
185 |         print(f"\nwindow returned:\n{d[i]}")
186 |         print(f"window expected:\n{expected[i]}")
187 |         assert d[i].shape == expected[i].shape
188 |         assert (d[i] == expected[i]).all()
189 | 
190 |     # Make sure negative indexes work
191 |     for i in range(-len(expected), 0):
192 |         assert (d[i] == expected[i]).all()
193 | 
194 |     with pytest.raises(IndexError):
195 |         d[len(expected)]
196 |     with pytest.raises(IndexError):
197 |         d[-len(expected) - 1]
198 | 
199 | 
200 | @pytest.mark.parametrize(
201 |     "series,window_size,stride,expected_window,expected_target",
202 |     [
203 |         (
204 |             range(10),
205 |             3,
206 |             2,
207 |             [
208 |                 torch.tensor([0, 1]),
209 |                 torch.tensor([2, 3]),
210 |                 torch.tensor([4, 5]),
211 |                 torch.tensor([6, 7]),
212 |             ],
213 |             [
214 |                 torch.tensor(2),
215 |                 torch.tensor(4),
216 |                 torch.tensor(6),
217 |                 torch.tensor(8),
218 |             ],
219 |         ),
220 |         # Check a sequence of vectors
221 |         (
222 |             [
223 |                 [1, 2, 3],
224 |                 [4, 5, 6],
225 |                 [7, 8, 9],
226 |                 [10, 11, 12],
227 |             ],
228 |             2,
229 |             1,
230 |             [
231 |                 torch.tensor([[1], [2], [3]]),
232 |                 torch.tensor([[4], [5], [6]]),
233 |                 torch.tensor([[7], [8], [9]]),
234 |             ],
235 |             [
236 |                 torch.tensor([[4], [5], [6]]),
237 |                 torch.tensor([[7], [8], [9]]),
238 |                 torch.tensor([[10], [11], [12]]),
239 |             ],
240 |         ),
241 |     ],
242 | )
243 | def test_target_selection(
244 |     series, window_size, stride, expected_window, expected_target
245 | ):
246 |     raw_windows = time_series_datasets.RollingWindow(series, window_size, stride=stride)
247 |     window_and_target = time_series_datasets.ContextWindowAndTarget(
248 |         raw_windows, target_dim=1
249 |     )
250 |     encoding_window_and_target = time_series_datasets.ContextWindowEncodingAndTarget(
251 |         A_SYMBOL_ENCODING, window_and_target
252 |     )
253 | 
254 |     assert len(window_and_target) == len(expected_target)
255 |     assert len(encoding_window_and_target) == len(expected_target)
256 | 
257 |     # We use indexes here rather than iterators because we're specifically
258 |     # testing the implementation of __getitem__()
259 |     for i in range(len(expected_target)):
260 |         window, target = window_and_target[i]
261 |         print(f"window returned:\n\n{window}")
262 |         print(f"window expected:\n{expected_window[i]}")
263 |         assert window.shape == expected_window[i].shape
264 |         assert (window == expected_window[i]).all()
265 | 
266 |         print(f"\ntarget returned:\n{target}")
267 |         print(f"target expected:\n{expected_target[i]}")
268 |         assert target.shape == expected_target[i].shape
269 |         assert (target == expected_target[i]).all()
270 | 
271 |         (window, encoding), target = encoding_window_and_target[i]
272 |         assert encoding == A_SYMBOL_ENCODING
273 |         assert window.shape == expected_window[i].shape
274 |         assert (window == expected_window[i]).all()
275 |         assert target.shape == expected_target[i].shape
276 |         assert (target == expected_target[i]).all()
277 | 
278 |     # Make sure negatives indexes work
279 |     for i in range(-len(expected_target), 0):
280 |         window, target = window_and_target[i]
281 |         assert (window == expected_window[i]).all()
282 |         assert (target == expected_target[i]).all()
283 | 
284 |         (window, encoding), target = encoding_window_and_target[i]
285 |         assert encoding == A_SYMBOL_ENCODING
286 |         assert (window == expected_window[i]).all()
287 |         assert (target == expected_target[i]).all()
288 | 
289 |     with pytest.raises(IndexError):
290 |         window_and_target[len(expected_target)]
291 | 
292 |     with pytest.raises(IndexError):
293 |         window_and_target[-len(expected_target) - 1]
294 | 


--------------------------------------------------------------------------------
/tests/test_util.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | # Local modules
 4 | import deep_volatility_models.util as util
 5 | 
 6 | 
 7 | @pytest.mark.parametrize(
 8 |     "l,expected",
 9 |     [
10 |         ([], True),  # Check empty list.
11 |         (range(4), True),  # Make sure iterables work
12 |         (tuple(range(4)), True),  # Make sure instantiated tuples work
13 |         (list(range(4)), True),  # Instantiated lists
14 |         (reversed(range(4)), False),  # Reversed lists should fail.
15 |         ([1, 1, 2, 2], True),  # Check that non-strict inequality is ok.
16 |         ([1] * 5, True),
17 |         ([1, 1, 1, 0], False),  # Edge cases?
18 |     ],
19 | )
20 | def test_is_sorted(l, expected):
21 |     assert util.is_sorted(l) == expected
22 | 
23 | 
24 | @pytest.mark.parametrize(
25 |     "test_input, expected_output",
26 |     [
27 |         ("foo", ["FOO"]),
28 |         ([], []),
29 |         (["foo"], ["FOO"]),
30 |         (("a", "b"), ["A", "B"]),
31 |         (iter(("x", "y", "z")), ["X", "Y", "Z"]),
32 |     ],
33 | )
34 | def test_to_symbol_list(test_input, expected_output):
35 |     print(f"test input: {test_input}")
36 |     print(f"expected output: {expected_output}")
37 |     assert util.to_symbol_list(test_input) == expected_output
38 | 
39 | 
40 | @pytest.mark.parametrize(
41 |     "test_input, expected_output",
42 |     [
43 |         ("foo", "foo"),
44 |         ("Foo", "foo"),
45 |         ("a b c", "a_b_c"),
46 |         ("A b C", "a_b_c"),
47 |     ],
48 | )
49 | def test_rename_column(test_input, expected_output):
50 |     print(f"test input: {test_input}")
51 |     print(f"expected output: {expected_output}")
52 |     assert util.rename_column(test_input) == expected_output
53 | 


--------------------------------------------------------------------------------