├── .coveragerc
├── .github
    └── workflows
    │   └── test.yml
├── .gitignore
├── .readthedocs.yml
├── LICENSE
├── README.md
├── codecov.yml
├── deepdow
    ├── __init__.py
    ├── benchmarks.py
    ├── callbacks.py
    ├── data
    │   ├── __init__.py
    │   ├── augment.py
    │   ├── load.py
    │   └── synthetic.py
    ├── experiments.py
    ├── explain.py
    ├── layers
    │   ├── __init__.py
    │   ├── allocate.py
    │   ├── collapse.py
    │   ├── misc.py
    │   └── transform.py
    ├── losses.py
    ├── nn.py
    ├── utils.py
    └── visualize.py
├── docs
    ├── Makefile
    ├── _static
    │   └── css
    │   │   └── custom.css
    ├── conf.py
    ├── index.rst
    └── source
    │   ├── api
    │       ├── deepdow.benchmarks.rst
    │       ├── deepdow.callbacks.rst
    │       ├── deepdow.data.augment.rst
    │       ├── deepdow.data.load.rst
    │       ├── deepdow.data.rst
    │       ├── deepdow.data.synthetic.rst
    │       ├── deepdow.experiments.rst
    │       ├── deepdow.explain.rst
    │       ├── deepdow.layers.allocate.rst
    │       ├── deepdow.layers.collapse.rst
    │       ├── deepdow.layers.misc.rst
    │       ├── deepdow.layers.rst
    │       ├── deepdow.layers.transform.rst
    │       ├── deepdow.losses.rst
    │       ├── deepdow.nn.rst
    │       ├── deepdow.rst
    │       └── deepdow.utils.rst
    │   ├── basics.rst
    │   ├── benchmarks.rst
    │   ├── changelog.rst
    │   ├── data_loading.rst
    │   ├── experiments.rst
    │   ├── installation.rst
    │   ├── introduction.rst
    │   ├── layers.rst
    │   ├── losses.rst
    │   └── networks.rst
├── examples
    ├── README.rst
    ├── end_to_end
    │   ├── README.rst
    │   ├── getting_started.py
    │   ├── iid.py
    │   ├── sp500_covmat.csv
    │   ├── sp500_mean.csv
    │   ├── var_coefs.npy
    │   └── var_model.py
    └── layers
    │   ├── README.rst
    │   ├── softmax_sparsemax.py
    │   ├── warp.py
    │   └── zoom.py
├── setup.cfg
├── setup.py
└── tests
    ├── __init__.py
    ├── conftest.py
    ├── test_benchmarks.py
    ├── test_callbacks.py
    ├── test_data
        ├── test_augment.py
        ├── test_load.py
        └── test_synthetic.py
    ├── test_experiments.py
    ├── test_explain.py
    ├── test_layers.py
    ├── test_losses.py
    ├── test_nn.py
    ├── test_utils.py
    └── test_visualize.py


/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | omit =
3 |     tests/*
4 |     venv/*
5 |     setup.py


--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | name: Lint and test
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [master]
 6 |   pull_request:
 7 | 
 8 | jobs:
 9 |   linting:
10 |     runs-on: ubuntu-latest
11 | 
12 |     steps:
13 |     - name: Cancel previous workflows that are still running
14 |       uses: styfle/cancel-workflow-action@0.8.0
15 |       with:
16 |         access_token: ${{ github.token }}
17 | 
18 |     - name: Checkout most recent commit
19 |       uses: actions/checkout@v3
20 | 
21 |     - name: Set up Python 3.9
22 |       uses: actions/setup-python@v3
23 |       with:
24 |         python-version: 3.9
25 | 
26 |     - name: List dependencies
27 |       run: |
28 |         pip freeze
29 | 
30 |     - name: Lint
31 |       run: |
32 |         pip install 'black==22.6.0' 'flake8==5.0.3' 'pydocstyle==6.1.1'
33 | 
34 |         black -l 79 --check deepdow/ tests
35 |         flake8 deepdow tests
36 |         pydocstyle deepdow
37 | 
38 |   testing:
39 |     needs: linting
40 |     runs-on: ${{ matrix.os }}
41 |     strategy:
42 |       matrix:
43 |         os: [ubuntu-latest]
44 |         python-version: ['3.10', '3.11']
45 |     steps:
46 |     - name: Checkout most recent commit
47 |       uses: actions/checkout@v3
48 | 
49 |     - name: Set up Python ${{ matrix.python-version }}
50 |       uses: actions/setup-python@v3
51 |       with:
52 |         python-version: ${{ matrix.python-version }}
53 | 
54 |     - name: Install dependencies
55 |       run: |
56 |         pip install .[dev,docs,examples]
57 | 
58 |     - name: List dependencies
59 |       run: |
60 |         pip freeze
61 | 
62 |     - name: Run unittests
63 |       run: |
64 |         pytest
65 | 
66 |     - name: Run doctests
67 |       run: |
68 |         pip install sphinx
69 |         sphinx-build -b doctest docs/ temp_doctest
70 |         
71 |     - name: Run coverage
72 |       uses: codecov/codecov-action@v3
73 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # custom
  2 | *DS_Store
  3 | .idea
  4 | 
  5 | # Byte-compiled / optimized / DLL files
  6 | __pycache__/
  7 | *.py[cod]
  8 | *$py.class
  9 | 
 10 | # C extensions
 11 | *.so
 12 | 
 13 | # Distribution / packaging
 14 | .Python
 15 | build/
 16 | develop-eggs/
 17 | dist/
 18 | downloads/
 19 | eggs/
 20 | .eggs/
 21 | lib/
 22 | lib64/
 23 | parts/
 24 | sdist/
 25 | var/
 26 | wheels/
 27 | pip-wheel-metadata/
 28 | share/python-wheels/
 29 | *.egg-info/
 30 | .installed.cfg
 31 | *.egg
 32 | MANIFEST
 33 | 
 34 | # PyInstaller
 35 | #  Usually these files are written by a python script from a template
 36 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 37 | *.manifest
 38 | *.spec
 39 | 
 40 | # Installer logs
 41 | pip-log.txt
 42 | pip-delete-this-directory.txt
 43 | 
 44 | # Unit test / coverage reports
 45 | htmlcov/
 46 | .tox/
 47 | .nox/
 48 | .coverage
 49 | .coverage.*
 50 | .cache
 51 | nosetests.xml
 52 | coverage.xml
 53 | *.cover
 54 | *.py,cover
 55 | .hypothesis/
 56 | .pytest_cache/
 57 | 
 58 | # Translations
 59 | *.mo
 60 | *.pot
 61 | 
 62 | # Django stuff:
 63 | *.log
 64 | local_settings.py
 65 | db.sqlite3
 66 | db.sqlite3-journal
 67 | 
 68 | # Flask stuff:
 69 | instance/
 70 | .webassets-cache
 71 | 
 72 | # Scrapy stuff:
 73 | .scrapy
 74 | 
 75 | # Sphinx documentation
 76 | docs/_build/
 77 | 
 78 | # PyBuilder
 79 | target/
 80 | 
 81 | # Jupyter Notebook
 82 | .ipynb_checkpoints
 83 | 
 84 | # IPython
 85 | profile_default/
 86 | ipython_config.py
 87 | 
 88 | # pyenv
 89 | .python-version
 90 | 
 91 | # pipenv
 92 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 93 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 94 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 95 | #   install all needed dependencies.
 96 | #Pipfile.lock
 97 | 
 98 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 99 | __pypackages__/
100 | 
101 | # Celery stuff
102 | celerybeat-schedule
103 | celerybeat.pid
104 | 
105 | # SageMath parsed files
106 | *.sage.py
107 | 
108 | # Environments
109 | .env
110 | .venv
111 | env/
112 | venv/
113 | ENV/
114 | env.bak/
115 | venv.bak/
116 | 
117 | # Spyder project settings
118 | .spyderproject
119 | .spyproject
120 | 
121 | # Rope project settings
122 | .ropeproject
123 | 
124 | # mkdocs documentation
125 | /site
126 | 
127 | # mypy
128 | .mypy_cache/
129 | .dmypy.json
130 | dmypy.json
131 | 
132 | # Pyre type checker
133 | .pyre/


--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
 1 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 2 | 
 3 | # Required
 4 | version: 2
 5 | 
 6 | # Build documentation in the docs/ directory with Sphinx
 7 | sphinx:
 8 |   configuration: docs/conf.py
 9 | 
10 | build:
11 |     image: stable
12 | 
13 | 
14 | # Optionally set the version of Python and requirements required to build your docs
15 | python:
16 |   version: 3.7
17 |   system_packages: true
18 |   install:
19 |     - method: pip
20 |       path: .
21 |       extra_requirements:
22 |       - examples
23 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ![final](https://user-images.githubusercontent.com/18519371/79003829-afca6380-7b53-11ea-8322-f05577536957.png)
 2 | 
 3 | [![codecov](https://codecov.io/gh/jankrepl/deepdow/branch/master/graph/badge.svg)](https://codecov.io/gh/jankrepl/deepdow)
 4 | [![Documentation Status](https://readthedocs.org/projects/deepdow/badge/?version=latest)](https://deepdow.readthedocs.io/en/latest/?badge=latest)
 5 | [![PyPI version](https://badge.fury.io/py/deepdow.svg)](https://badge.fury.io/py/deepdow)
 6 | [![DOI](https://zenodo.org/badge/237742797.svg)](https://zenodo.org/badge/latestdoi/237742797)
 7 | 
 8 | `deepdow` (read as "wow") is a Python package connecting portfolio optimization and deep learning. Its goal is to
 9 | facilitate research of networks that perform weight allocation in **one forward pass**.
10 | 
11 | 
12 | # Installation
13 | ```bash
14 | pip install deepdow
15 | ```
16 | # Resources
17 | - [**Getting started**](https://deepdow.readthedocs.io/en/latest/auto_examples/end_to_end/getting_started.html)
18 | - [**Detailed documentation**](https://deepdow.readthedocs.io/en/latest)
19 | - [**More examples**](https://deepdow.readthedocs.io/en/latest/auto_examples/index.html)
20 | 
21 | # Description
22 | `deepdow` attempts to **merge** two very common steps in portfolio optimization
23 | 1. Forecasting of future evolution of the market (LSTM, GARCH,...)
24 | 2. Optimization problem design and solution (convex optimization, ...)
25 | 
26 | It does so by constructing a pipeline of layers. The last layer performs the allocation and all the previous ones serve
27 | as feature extractors. The overall network is **fully differentiable** and one can optimize its parameters by gradient
28 | descent algorithms.
29 | 
30 | # `deepdow` is not ...
31 | - focused on active trading strategies, it only finds allocations to be held over some horizon (**buy and hold**)
32 |     - one implication is that transaction costs associated with frequent, short-term trades, will not be a primary concern 
33 | - a reinforcement learning framework, however, one might easily reuse `deepdow` layers in other deep learning applications
34 | - a single algorithm, instead, it is a framework that allows for easy experimentation with powerful building blocks
35 | 
36 | 
37 | # Some features
38 | - all layers built on `torch` and fully differentiable
39 | - integrates differentiable convex optimization (`cvxpylayers`)
40 | - implements clustering based portfolio allocation algorithms
41 | - multiple dataloading strategies (`RigidDataLoader`, `FlexibleDataLoader`)
42 | - integration with `mlflow` and `tensorboard` via callbacks
43 | - provides variety of losses like sharpe ratio, maximum drawdown, ...
44 | - simple to extend and customize
45 | - CPU and GPU support
46 | 
47 | # Citing
48 | If you use `deepdow` (including ideas proposed in the documentation, examples and tests) in your research please **make sure to cite it**.
49 | To obtain all the necessary citing information, click on the **DOI badge** at the beginning of this README and you will be automatically redirected to an external website.
50 | Note that we are currently using [Zenodo](https://zenodo.org/).
51 | 


--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
1 | comment: false
2 | 


--------------------------------------------------------------------------------
/deepdow/__init__.py:
--------------------------------------------------------------------------------
1 | """Package connecting deep learning and portfolio optimization.
2 | 
3 | Release markers:
4 | X.Y
5 | X.Y.Z for bug fixes
6 | """
7 | 
8 | __version__ = "0.2.3"
9 | 


--------------------------------------------------------------------------------
/deepdow/benchmarks.py:
--------------------------------------------------------------------------------
  1 | """Collection of benchmarks."""
  2 | from abc import ABC, abstractmethod
  3 | 
  4 | import cvxpy as cp
  5 | from cvxpylayers.torch import CvxpyLayer
  6 | import torch
  7 | 
  8 | from .layers import CovarianceMatrix
  9 | 
 10 | 
 11 | class Benchmark(ABC):
 12 |     """Abstract benchmark class.
 13 | 
 14 |     The idea is to create some benchmarks that we can use for comparison to our neural networks. Note that we
 15 |     assume that benchmarks are not trainable - one can only use them for inference.
 16 | 
 17 |     """
 18 | 
 19 |     @abstractmethod
 20 |     def __call__(self, x):
 21 |         """Prediction of the model."""
 22 | 
 23 |     @property
 24 |     def hparams(self):
 25 |         """Hyperparamters relevant to construction of the model."""
 26 |         return {}
 27 | 
 28 | 
 29 | class InverseVolatility(Benchmark):
 30 |     """Allocation only considering volatility of individual assets.
 31 | 
 32 |     Parameters
 33 |     ----------
 34 |     use_std : bool
 35 |         If True, then we use standard deviation as a measure of volatility. Otherwise variance is used.
 36 | 
 37 |     returns_channel : int
 38 |         Which channel in the `x` feature matrix to consider (the 2nd dimension) as returns.
 39 | 
 40 |     """
 41 | 
 42 |     def __init__(self, use_std=False, returns_channel=0):
 43 |         self.use_std = use_std
 44 |         self.returns_channel = returns_channel
 45 | 
 46 |     def __call__(self, x):
 47 |         """Predict weights.
 48 | 
 49 |         Parameters
 50 |         ----------
 51 |         x : torch.Tensor
 52 |             Tensor of shape `(n_samples, n_channels, lookback, n_assets)`.
 53 | 
 54 |         Returns
 55 |         -------
 56 |         weights : torch.Tensor
 57 |             Tensor of shape `(n_samples, n_assets)` representing the predicted weights.
 58 | 
 59 |         """
 60 |         eps = 1e-6
 61 |         x_rets = x[:, self.returns_channel, ...]
 62 |         vols = x_rets.std(dim=1) if self.use_std else x_rets.var(dim=1)
 63 |         ivols = 1 / (vols + eps)
 64 |         weights = ivols / ivols.sum(dim=1, keepdim=True)
 65 | 
 66 |         return weights
 67 | 
 68 |     @property
 69 |     def hparams(self):
 70 |         """Hyperparamters relevant to construction of the model."""
 71 |         return {
 72 |             "use_std": self.use_std,
 73 |             "returns_channel": self.returns_channel,
 74 |         }
 75 | 
 76 | 
 77 | class MaximumReturn(Benchmark):
 78 |     """Markowitz portfolio optimization - maximum return.
 79 | 
 80 |     Parameters
 81 |     ----------
 82 |     max_weight : float
 83 |         A number in (0, 1] representing the maximum weight per asset.
 84 | 
 85 |     n_assets : None or int
 86 |         If specifed the benchmark will always have to be provided with `n_assets` of assets in the `__call__`.
 87 |         This way one can achieve major speedups since the optimization problem is canonicalized only once in the
 88 |         constructor. However, when `n_assets` is None the optimization problem is canonicalized before each
 89 |         inside of `__call__` which results in overhead but allows for variable number of assets.
 90 | 
 91 |     returns_channel : int
 92 |         Which channel in the `x` feature matrix to consider (the 2nd dimension) as returns.
 93 | 
 94 |     Attributes
 95 |     ----------
 96 |     optlayer : cvxpylayers.torch.CvxpyLayer or None
 97 |         Equal to None if `n_assets` not provided in the constructor. In this case optimization problem is constructed
 98 |         with each forward pass. This allows for variable number of assets but is slower. If `n_assets` provided than
 99 |         constructed once and for all in the constructor.
100 | 
101 |     """
102 | 
103 |     def __init__(self, max_weight=1, n_assets=None, returns_channel=0):
104 |         self.max_weight = max_weight
105 |         self.n_assets = n_assets
106 |         self.returns_channel = returns_channel
107 | 
108 |         self.optlayer = (
109 |             self._construct_problem(n_assets, max_weight)
110 |             if self.n_assets is not None
111 |             else None
112 |         )
113 | 
114 |     @staticmethod
115 |     def _construct_problem(n_assets, max_weight):
116 |         """Construct cvxpylayers problem."""
117 |         rets = cp.Parameter(n_assets)
118 |         w = cp.Variable(n_assets)
119 | 
120 |         ret = rets @ w
121 |         prob = cp.Problem(
122 |             cp.Maximize(ret), [cp.sum(w) == 1, w >= 0, w <= max_weight]
123 |         )
124 | 
125 |         return CvxpyLayer(prob, parameters=[rets], variables=[w])
126 | 
127 |     def __call__(self, x):
128 |         """Predict weights.
129 | 
130 |         Parameters
131 |         ----------
132 |         x : torch.Tensor
133 |             Tensor of shape `(n_samples, n_channels, lookback, n_assets)`.
134 | 
135 |         Returns
136 |         -------
137 |         weights : torch.Tensor
138 |             Tensor of shape `(n_samples, n_assets)` representing the predicted weights.
139 | 
140 |         """
141 |         n_samples, _, lookback, n_assets = x.shape
142 | 
143 |         # Problem setup
144 |         if self.optlayer is not None:
145 |             if self.n_assets != n_assets:
146 |                 raise ValueError(
147 |                     "Incorrect number of assets: {}, expected: {}".format(
148 |                         n_assets, self.n_assets
149 |                     )
150 |                 )
151 | 
152 |             optlayer = self.optlayer
153 |         else:
154 |             optlayer = self._construct_problem(n_assets, self.max_weight)
155 | 
156 |         rets_estimate = x[:, self.returns_channel, :, :].mean(
157 |             dim=1
158 |         )  # (n_samples, n_assets)
159 | 
160 |         return optlayer(rets_estimate)[0]
161 | 
162 |     @property
163 |     def hparams(self):
164 |         """Hyperparamters relevant to construction of the model."""
165 |         return {
166 |             "max_weight": self.max_weight,
167 |             "returns_channel": self.returns_channel,
168 |             "n_assets": self.n_assets,
169 |         }
170 | 
171 | 
172 | class MinimumVariance(Benchmark):
173 |     """Markowitz portfolio optimization - minimum variance.
174 | 
175 |     Parameters
176 |     ----------
177 |     max_weight : float
178 |         A number in (0, 1] representing the maximum weight per asset.
179 | 
180 |     n_assets : None or int
181 |         If specifed the benchmark will always have to be provided with `n_assets` of assets in the `__call__`.
182 |         This way one can achieve major speedups since the optimization problem is canonicalized only once in the
183 |         constructor. However, when `n_assets` is None the optimization problem is canonicalized before each
184 |         inside of `__call__` which results in overhead but allows for variable number of assets.
185 | 
186 |     returns_channel : int
187 |         Which channel in the `x` feature matrix to consider (the 2nd dimension) as returns.
188 | 
189 |     Attributes
190 |     ----------
191 |     optlayer : cvxpylayers.torch.CvxpyLayer or None
192 |         Equal to None if `n_assets` not provided in the constructor. In this case optimization problem is constructed
193 |         with each forward pass. This allows for variable number of assets but is slower. If `n_assets` provided than
194 |         constructed once and for all in the constructor.
195 | 
196 |     """
197 | 
198 |     def __init__(self, max_weight=1, returns_channel=0, n_assets=None):
199 |         self.n_assets = n_assets
200 |         self.returns_channel = returns_channel
201 |         self.max_weight = max_weight
202 | 
203 |         self.optlayer = (
204 |             self._construct_problem(n_assets, max_weight)
205 |             if self.n_assets is not None
206 |             else None
207 |         )
208 | 
209 |     @staticmethod
210 |     def _construct_problem(n_assets, max_weight):
211 |         """Construct cvxpylayers problem."""
212 |         covmat_sqrt = cp.Parameter((n_assets, n_assets))
213 |         w = cp.Variable(n_assets)
214 | 
215 |         risk = cp.sum_squares(covmat_sqrt @ w)
216 |         prob = cp.Problem(
217 |             cp.Minimize(risk), [cp.sum(w) == 1, w >= 0, w <= max_weight]
218 |         )
219 | 
220 |         return CvxpyLayer(prob, parameters=[covmat_sqrt], variables=[w])
221 | 
222 |     def __call__(self, x):
223 |         """Predict weights.
224 | 
225 |         Parameters
226 |         ----------
227 |         x : torch.Tensor
228 |             Tensor of shape `(n_samples, n_channels, lookback, n_assets)`.
229 | 
230 |         Returns
231 |         -------
232 |         weights : torch.Tensor
233 |             Tensor of shape `(n_samples, n_assets)` representing the predicted weights.
234 | 
235 |         """
236 |         n_samples, _, lookback, n_assets = x.shape
237 | 
238 |         # Problem setup
239 |         if self.optlayer is not None:
240 |             if self.n_assets != n_assets:
241 |                 raise ValueError(
242 |                     "Incorrect number of assets: {}, expected: {}".format(
243 |                         n_assets, self.n_assets
244 |                     )
245 |                 )
246 | 
247 |             optlayer = self.optlayer
248 |         else:
249 |             optlayer = self._construct_problem(n_assets, self.max_weight)
250 | 
251 |         # problem solver
252 |         covmat_sqrt_estimates = CovarianceMatrix(sqrt=True)(
253 |             x[:, self.returns_channel, :, :]
254 |         )
255 | 
256 |         return optlayer(covmat_sqrt_estimates)[0]
257 | 
258 |     @property
259 |     def hparams(self):
260 |         """Hyperparamters relevant to construction of the model."""
261 |         return {
262 |             "max_weight": self.max_weight,
263 |             "returns_channel": self.returns_channel,
264 |             "n_assets": self.n_assets,
265 |         }
266 | 
267 | 
268 | class OneOverN(Benchmark):
269 |     """Equally weighted portfolio."""
270 | 
271 |     def __call__(self, x):
272 |         """Predict weights.
273 | 
274 |         Parameters
275 |         ----------
276 |         x : torch.Tensor
277 |             Tensor of shape `(n_samples, n_channels, lookback, n_assets)`.
278 | 
279 |         Returns
280 |         -------
281 |         weights : torch.Tensor
282 |             Tensor of shape `(n_samples, n_assets)` representing the predicted weights.
283 | 
284 |         """
285 |         n_samples, n_channels, lookback, n_assets = x.shape
286 | 
287 |         return (
288 |             torch.ones((n_samples, n_assets), dtype=x.dtype, device=x.device)
289 |             / n_assets
290 |         )
291 | 
292 | 
293 | class Random(Benchmark):
294 |     """Random allocation for each prediction."""
295 | 
296 |     def __call__(self, x):
297 |         """Predict weights.
298 | 
299 |         Parameters
300 |         ----------
301 |         x : torch.Tensor
302 |             Tensor of shape `(n_samples, n_channels, lookback, n_assets)`.
303 | 
304 |         Returns
305 |         -------
306 |         weights : torch.Tensor
307 |             Tensor of shape `(n_samples, n_assets)` representing the predicted weights.
308 | 
309 |         """
310 |         n_samples, n_channels, lookback, n_assets = x.shape
311 | 
312 |         weights_unscaled = torch.rand(
313 |             (n_samples, n_assets), dtype=x.dtype, device=x.device
314 |         )
315 |         weights_sums = weights_unscaled.sum(dim=1, keepdim=True).repeat(
316 |             1, n_assets
317 |         )
318 | 
319 |         return weights_unscaled / weights_sums
320 | 
321 | 
322 | class Singleton(Benchmark):
323 |     """Predict a single asset.
324 | 
325 |     Parameters
326 |     ----------
327 |     asset_ix : int
328 |         Index of the asset to predict.
329 | 
330 |     """
331 | 
332 |     def __init__(self, asset_ix):
333 |         self.asset_ix = asset_ix
334 | 
335 |     def __call__(self, x):
336 |         """Predict weights.
337 | 
338 |         Parameters
339 |         ----------
340 |         x : torch.Tensor
341 |             Tensor of shape `(n_samples, n_channels, lookback, n_assets)`.
342 | 
343 |         Returns
344 |         -------
345 |         weights : torch.Tensor
346 |             Tensor of shape `(n_samples, n_assets)` representing the predicted weights.
347 | 
348 |         """
349 |         n_samples, n_channels, lookback, n_assets = x.shape
350 | 
351 |         if self.asset_ix not in set(range(n_assets)):
352 |             raise IndexError("The selected asset index is out of range.")
353 | 
354 |         weights = torch.zeros(
355 |             (n_samples, n_assets), dtype=x.dtype, device=x.device
356 |         )
357 |         weights[:, self.asset_ix] = 1
358 | 
359 |         return weights
360 | 
361 |     @property
362 |     def hparams(self):
363 |         """Hyperparamters relevant to construction of the model."""
364 |         return {"asset_ix": self.asset_ix}
365 | 


--------------------------------------------------------------------------------
/deepdow/data/__init__.py:
--------------------------------------------------------------------------------
 1 | """Module dealing with data."""
 2 | 
 3 | from .augment import (
 4 |     Compose,
 5 |     Dropout,
 6 |     Multiply,
 7 |     Noise,
 8 |     Scale,
 9 |     prepare_robust_scaler,
10 |     prepare_standard_scaler,
11 | )
12 | from .load import FlexibleDataLoader, InRAMDataset, RigidDataLoader
13 | 
14 | __all__ = [
15 |     "Compose",
16 |     "Dropout",
17 |     "FlexibleDataLoader",
18 |     "InRAMDataset",
19 |     "Multiply",
20 |     "Noise",
21 |     "RigidDataLoader",
22 |     "Scale",
23 |     "prepare_robust_scaler",
24 |     "prepare_standard_scaler",
25 | ]
26 | 


--------------------------------------------------------------------------------
/deepdow/data/augment.py:
--------------------------------------------------------------------------------
  1 | """Collection of callable functions that augment deepdow tensors."""
  2 | 
  3 | import numpy as np
  4 | import torch
  5 | 
  6 | 
  7 | def prepare_standard_scaler(X, overlap=False, indices=None):
  8 |     """Compute mean and standard deviation for each channel.
  9 | 
 10 |     Parameters
 11 |     ----------
 12 |     X : np.ndarray
 13 |         Full features array of shape `(n_samples, n_channels, lookback, n_assets)`.
 14 | 
 15 |     overlap : bool
 16 |         If False, then only using the most recent timestep. This will guarantee that not counting
 17 |         the same thing multiple times.
 18 | 
 19 |     indices : list or None
 20 |         List of indices to consider from the `X.shape[0]` dimension. If None
 21 |         then considering all the samples.
 22 | 
 23 |     Returns
 24 |     -------
 25 |     means : np.ndarray
 26 |         Mean of each channel. Shape `(n_channels,)`.
 27 | 
 28 |     stds : np.ndarray
 29 |         Standard deviation of each channel. Shape `(n_channels,)`.
 30 | 
 31 |     """
 32 |     indices = indices if indices is not None else list(range(len(X)))
 33 |     considered_values = X[indices, ...] if overlap else X[indices, :, -1:, :]
 34 | 
 35 |     means = considered_values.mean(axis=(0, 2, 3))
 36 |     stds = considered_values.std(axis=(0, 2, 3))
 37 | 
 38 |     return means, stds
 39 | 
 40 | 
 41 | def prepare_robust_scaler(
 42 |     X, overlap=False, indices=None, percentile_range=(25, 75)
 43 | ):
 44 |     """Compute median and percentile range for each channel.
 45 | 
 46 |     Parameters
 47 |     ----------
 48 |     X : np.ndarray
 49 |         Full features array of shape `(n_samples, n_channels, lookback, n_assets)`.
 50 | 
 51 |     overlap : bool
 52 |         If False, then only using the most recent timestep. This will guarantee that not counting
 53 |         the same thing multiple times.
 54 | 
 55 |     indices : list or None
 56 |         List of indices to consider from the `X.shape[0]` dimension. If None
 57 |         then considering all the samples.
 58 | 
 59 |     percentile_range : tuple
 60 |         The left and right percentile to consider. Needs to be in [0, 100].
 61 | 
 62 |     Returns
 63 |     -------
 64 |     medians : np.ndarray
 65 |         Median of each channel. Shape `(n_channels,)`.
 66 | 
 67 |     ranges : np.ndarray
 68 |         Interquantile range for each channel. Shape `(n_channels,)`.
 69 | 
 70 |     """
 71 |     if not 0 <= percentile_range[0] < percentile_range[1] <= 100:
 72 |         raise ValueError(
 73 |             "The percentile range needs to be in [0, 100] and left < right"
 74 |         )
 75 | 
 76 |     indices = indices if indices is not None else list(range(len(X)))
 77 |     considered_values = X[indices, ...] if overlap else X[indices, :, -1:, :]
 78 | 
 79 |     medians = np.median(considered_values, axis=(0, 2, 3))
 80 |     percentiles = np.percentile(
 81 |         considered_values, percentile_range, axis=(0, 2, 3)
 82 |     )  # (2, n_channels)
 83 | 
 84 |     ranges = percentiles[1] - percentiles[0]
 85 | 
 86 |     return medians, ranges
 87 | 
 88 | 
 89 | class Compose:
 90 |     """Meta transform inspired by torchvision.
 91 | 
 92 |     Parameters
 93 |     ----------
 94 |     transforms : list
 95 |         List of callables that represent transforms to be composed.
 96 | 
 97 |     """
 98 | 
 99 |     def __init__(self, transforms):
100 |         self.transforms = transforms
101 | 
102 |     def __call__(self, X_sample, y_sample, timestamps_sample, asset_names):
103 |         """Transform.
104 | 
105 |         Parameters
106 |         ----------
107 |         X_sample : torch.Tensor
108 |             Feature vector of shape `(n_channels, lookback, n_assets)`.
109 | 
110 |         y_sample : torch.Tesnor
111 |             Target vector of shape `(n_channels, horizon, n_assets)`.
112 | 
113 |         timestamps_sample : datetime
114 |             Time stamp of the sample.
115 | 
116 |         asset_names
117 |             Asset names corresponding to the last channel of `X_sample` and `y_sample`.
118 | 
119 |         Returns
120 |         -------
121 |         X_sample_new : torch.Tensor
122 |             Transformed version of `X_sample`.
123 | 
124 |         y_sample_new : torch.Tesnor
125 |             Transformed version of `y_sample`.
126 | 
127 |         timestamps_sample_new : datetime
128 |             Transformed version of `timestamps_sample`.
129 | 
130 |         asset_names_new
131 |             Transformed version of `asset_names`.
132 |         """
133 |         for t in self.transforms:
134 |             X_sample, y_sample, timestamps_sample, asset_names = t(
135 |                 X_sample, y_sample, timestamps_sample, asset_names
136 |             )
137 | 
138 |         return X_sample, y_sample, timestamps_sample, asset_names
139 | 
140 | 
141 | class Dropout:
142 |     """Set random elements of the input to zero with probability p.
143 | 
144 |     Parameters
145 |     ----------
146 |     p : float
147 |         Probability of setting an element to zero.
148 | 
149 |     training : bool
150 |         If False, then dropout disabled no matter what the `p` is. Note that if True then
151 |         dropout enabled and at the same time all the elements are scaled by `1/p`.
152 |     """
153 | 
154 |     def __init__(self, p=0.2, training=True):
155 |         self.p = p
156 |         self.training = training
157 | 
158 |     def __call__(self, X_sample, y_sample, timestamps_sample, asset_names):
159 |         """Perform transform.
160 | 
161 |         Parameters
162 |         ----------
163 |         X_sample : torch.Tensor
164 |             Feature vector of shape `(n_channels, lookback, n_assets)`.
165 | 
166 |         y_sample : torch.Tesnor
167 |             Target vector of shape `(n_channels, horizon, n_assets)`.
168 | 
169 |         timestamps_sample : datetime
170 |             Time stamp of the sample.
171 | 
172 |         asset_names
173 |             Asset names corresponding to the last channel of `X_sample` and `y_sample`.
174 | 
175 |         Returns
176 |         -------
177 |         X_sample_new : torch.Tensor
178 |             Feature vector of shape `(n_channels, lookback, n_assets)` with some elements being set to zero.
179 | 
180 |         y_sample : torch.Tensor
181 |             Same as input.
182 | 
183 |         timestamps_sample : datetime
184 |             Same as input.
185 | 
186 |         asset_names
187 |             Same as input.
188 |         """
189 |         X_sample_new = torch.nn.functional.dropout(
190 |             X_sample, p=self.p, training=self.training
191 |         )
192 | 
193 |         return X_sample_new, y_sample, timestamps_sample, asset_names
194 | 
195 | 
196 | class Multiply:
197 |     """Transform multiplying the feature tensor X with a constant."""
198 | 
199 |     def __init__(self, c=100):
200 |         self.c = c
201 | 
202 |     def __call__(self, X_sample, y_sample, timestamps_sample, asset_names):
203 |         """Perform transform.
204 | 
205 |         Parameters
206 |         ----------
207 |         X_sample : torch.Tensor
208 |             Feature vector of shape `(n_channels, lookback, n_assets)`.
209 | 
210 |         y_sample : torch.Tesnor
211 |             Target vector of shape `(n_channels, horizon, n_assets)`.
212 | 
213 |         timestamps_sample : datetime
214 |             Time stamp of the sample.
215 | 
216 |         asset_names
217 |             Asset names corresponding to the last channel of `X_sample` and `y_sample`.
218 | 
219 |         Returns
220 |         -------
221 |         X_sample_new : torch.Tensor
222 |             Feature vector of shape `(n_channels, lookback, n_assets)` multiplied by a constant `self.c`.
223 | 
224 |         y_sample : torch.Tesnor
225 |             Same as input.
226 | 
227 |         timestamps_sample : datetime
228 |             Same as input.
229 | 
230 |         asset_names
231 |             Same as input.
232 |         """
233 |         return self.c * X_sample, y_sample, timestamps_sample, asset_names
234 | 
235 | 
236 | class Noise:
237 |     """Add noise to each of the channels.
238 | 
239 |     Random (Gaussian) noise is added to the original features X. One can control the standard deviation of the noise
240 |     via the `frac` parameter. Mathematically, `std(X_noise) = std(X) * frac` for each channel.
241 | 
242 | 
243 |     """
244 | 
245 |     def __init__(self, frac=0.2):
246 |         self.frac = frac
247 | 
248 |     def __call__(self, X_sample, y_sample, timestamps_sample, asset_names):
249 |         """Perform transform.
250 | 
251 |         Parameters
252 |         ----------
253 |         X_sample : torch.Tensor
254 |             Feature vector of shape `(n_channels, lookback, n_assets)`.
255 | 
256 |         y_sample : torch.Tensor
257 |             Target vector of shape `(n_channels, horizon, n_assets)`.
258 | 
259 |         timestamps_sample : datetime
260 |             Time stamp of the sample.
261 | 
262 |         asset_names
263 |             Asset names corresponding to the last channel of `X_sample` and `y_sample`.
264 | 
265 |         Returns
266 |         -------
267 |         X_sample_new : torch.Tensor
268 |             Feature vector of shape `(n_channels, lookback, n_assets)` with some added noise.
269 | 
270 |         y_sample : torch.Tesnor
271 |             Same as input.
272 | 
273 |         timestamps_sample : datetime
274 |             Same as input.
275 | 
276 |         asset_names
277 |             Same as input.
278 |         """
279 |         X_sample_new = (
280 |             self.frac
281 |             * X_sample.std([1, 2], keepdim=True)
282 |             * torch.randn_like(X_sample)
283 |             + X_sample
284 |         )
285 | 
286 |         return X_sample_new, y_sample, timestamps_sample, asset_names
287 | 
288 | 
289 | class Scale:
290 |     """Scale input features.
291 | 
292 |     The input features are per channel centered to zero and scaled to one. We use the same
293 |     terminology as scikit-learn. However, the equivalent in torchvision is `Normalize`.
294 | 
295 |     Parameters
296 |     ----------
297 |     center : np.ndarray
298 |         1D array of shape `(n_channels,)` representing the center of the features (mean or median).
299 |         Needs to be precomputed in advance.
300 | 
301 |     scale : np.ndarray
302 |         1D array of shape `(n_channels,)` representing the scale of the features (standard deviation
303 |         or quantile range). Needs to be precomputed in advance.
304 | 
305 |     See Also
306 |     --------
307 |     prepare_robust_scaler
308 |     prepare_standard_scaler
309 |     """
310 | 
311 |     def __init__(self, center, scale):
312 |         if len(center) != len(scale):
313 |             raise ValueError(
314 |                 "The center and scale need to have the same size."
315 |             )
316 | 
317 |         if np.any(scale <= 0):
318 |             raise ValueError("The scale parameters need to be positive.")
319 | 
320 |         self.center = center
321 |         self.scale = scale
322 |         self.n_channels = len(self.center)
323 | 
324 |     def __call__(self, X_sample, y_sample, timestamps_sample, asset_names):
325 |         """Perform transform.
326 | 
327 |         Parameters
328 |         ----------
329 |         X_sample : torch.Tensor
330 |             Feature vector of shape `(n_channels, lookback, n_assets)`.
331 | 
332 |         y_sample : torch.Tensor
333 |             Target vector of shape `(n_channels, horizon, n_assets)`.
334 | 
335 |         timestamps_sample : datetime
336 |             Time stamp of the sample.
337 | 
338 |         asset_names
339 |             Asset names corresponding to the last channel of `X_sample` and `y_sample`.
340 | 
341 |         Returns
342 |         -------
343 |         X_sample_new : torch.Tensor
344 |             Feature vector of shape `(n_channels, lookback, n_assets)` scaled appropriately.
345 | 
346 |         y_sample : torch.Tesnor
347 |             Same as input.
348 | 
349 |         timestamps_sample : datetime
350 |             Same as input.
351 | 
352 |         asset_names
353 |             Same as input.
354 |         """
355 |         n_channels = X_sample.shape[0]
356 |         if n_channels != self.n_channels:
357 |             raise ValueError(
358 |                 "Expected {} channels in X, got {}".format(
359 |                     self.n_channels, n_channels
360 |                 )
361 |             )
362 | 
363 |         X_sample_new = X_sample.clone()
364 |         dtype, device = X_sample_new.dtype, X_sample_new.device
365 | 
366 |         center = torch.as_tensor(self.center, dtype=dtype, device=device)[
367 |             :, None, None
368 |         ]
369 |         scale = torch.as_tensor(self.scale, dtype=dtype, device=device)[
370 |             :, None, None
371 |         ]
372 | 
373 |         X_sample_new.sub_(center).div_(scale)
374 | 
375 |         return X_sample_new, y_sample, timestamps_sample, asset_names
376 | 


--------------------------------------------------------------------------------
/deepdow/data/synthetic.py:
--------------------------------------------------------------------------------
 1 | """Collection of functions generating synthetic datasets."""
 2 | 
 3 | import numpy as np
 4 | 
 5 | 
 6 | def sin_single(n_timesteps, amplitude=1, freq=0.25, phase=0):
 7 |     """Generate sine waves.
 8 | 
 9 |     Parameters
10 |     ----------
11 |     n_timesteps : int
12 |         Number of timesteps.
13 | 
14 |     amplitude : float
15 |         The peak value.
16 | 
17 |     freq : float
18 |         Frequency - number of oscillations per timestep.
19 | 
20 |     phase : float
21 |         Offset.
22 | 
23 |     Returns
24 |     -------
25 |     y : np.ndarray
26 |         1D array of shape `(n_timesteps,)`.
27 | 
28 |     """
29 |     x = np.arange(n_timesteps)
30 | 
31 |     return amplitude * np.sin(2 * np.pi * freq * x + phase)
32 | 


--------------------------------------------------------------------------------
/deepdow/explain.py:
--------------------------------------------------------------------------------
  1 | """Collection of tools for explaining trained models."""
  2 | 
  3 | import torch
  4 | 
  5 | 
  6 | def gradient_wrt_input(
  7 |     model,
  8 |     target_weights,
  9 |     initial_guess,
 10 |     n_iter=100,
 11 |     mask=None,
 12 |     lr=1e-1,
 13 |     verbose=True,
 14 |     device=None,
 15 |     dtype=None,
 16 | ):
 17 |     """Find input tensor such that the model produces an allocation close to the target one.
 18 | 
 19 |     Parameters
 20 |     ----------
 21 |     model : torch.Module
 22 |         Network that predicts weight allocation given feature tensor.
 23 | 
 24 |     target_weights : torch.Tensor
 25 |         Vector of targeted asset weights of shape `(n_assets,)`.
 26 | 
 27 |     initial_guess : torch.Tensor
 28 |         Initial feature tensor serving as the starting point for the optimization. The shape is
 29 |         `(n_channels, lookback, n_assets)` - the sample dimension is not included.
 30 | 
 31 |     n_iter : int
 32 |         Number of iterations of the gradients descent (or other) algorithm.
 33 | 
 34 |     mask : None or torch.Tensor
 35 |         If specified, then boolean ``torch.Tensor`` of the same shape as `initial_guess` than
 36 |         one can elementwise choose what parts of the inputs to optimize (True) and which
 37 |         keep the same as the initial guess (False).
 38 | 
 39 |     lr : float
 40 |         Learning rate for the optimizer.
 41 | 
 42 |     verbose : bool
 43 |         If True, then verbosity activated.
 44 | 
 45 |     dtype : None or torch.dtype
 46 |         Dtype to be used. If specified, casts all used tensors.
 47 | 
 48 |     device : None or torch.device
 49 |         Device to be used. If specified, casts all used tensors.
 50 | 
 51 |     Returns
 52 |     -------
 53 |     result : torch.Tensor
 54 |         Feature tensor of the same shape as `initial_guess` that is mapped by the network (hopefully)
 55 |         close to `target_weights`.
 56 | 
 57 |     hist : list
 58 |         List of losses per iteration.
 59 |     """
 60 |     device = device or torch.device("cpu")
 61 |     dtype = dtype or torch.float32
 62 | 
 63 |     x = initial_guess.clone().to(device=device, dtype=dtype)
 64 |     x.requires_grad = True
 65 | 
 66 |     if mask is None:
 67 |         mask = torch.ones_like(x)
 68 | 
 69 |     elif torch.is_tensor(mask):
 70 |         if mask.shape != x.shape:
 71 |             raise ValueError("Inconsistent shape of the mask.")
 72 |     else:
 73 |         raise TypeError(
 74 |             "Incorrect type of the mask, either None or torch.Tensor."
 75 |         )
 76 | 
 77 |     # casting
 78 |     mask = mask.to(dtype=torch.bool, device=device)
 79 |     model.to(device=device, dtype=dtype)
 80 |     target_weights = target_weights.to(device=device, dtype=dtype)
 81 | 
 82 |     optimizer = torch.optim.Adam([x], lr=lr)
 83 |     model.train()
 84 | 
 85 |     hist = []
 86 |     for i in range(n_iter):
 87 |         if i % 50 == 0 and verbose:
 88 |             msg = (
 89 |                 "{}-th iteration, loss: {:.4f}".format(i, hist[-1])
 90 |                 if i != 0
 91 |                 else "Starting optimization"
 92 |             )
 93 |             print(msg)
 94 | 
 95 |         loss_per_asset = (
 96 |             model((x * mask)[None, ...])[0] - target_weights
 97 |         ) ** 2
 98 |         loss = loss_per_asset.mean()
 99 |         hist.append(loss.item())
100 | 
101 |         optimizer.zero_grad()
102 |         loss.backward()
103 |         optimizer.step()
104 | 
105 |     if verbose:
106 |         print("Optimization done, final loss: {:.4f}".format(hist[-1]))
107 | 
108 |     return x, hist
109 | 


--------------------------------------------------------------------------------
/deepdow/layers/__init__.py:
--------------------------------------------------------------------------------
 1 | """Collection of layers."""
 2 | 
 3 | from .collapse import (
 4 |     AttentionCollapse,
 5 |     AverageCollapse,
 6 |     ElementCollapse,
 7 |     ExponentialCollapse,
 8 |     MaxCollapse,
 9 |     SumCollapse,
10 | )
11 | from .allocate import (
12 |     AnalyticalMarkowitz,
13 |     NCO,
14 |     NumericalMarkowitz,
15 |     NumericalRiskBudgeting,
16 |     Resample,
17 |     SoftmaxAllocator,
18 |     SparsemaxAllocator,
19 |     WeightNorm,
20 | )
21 | from .misc import Cov2Corr, CovarianceMatrix, KMeans, MultiplyByConstant
22 | from .transform import Conv, RNN, Warp, Zoom
23 | 
24 | __all__ = [
25 |     "AnalyticalMarkowitz",
26 |     "AttentionCollapse",
27 |     "AverageCollapse",
28 |     "Conv",
29 |     "Cov2Corr",
30 |     "CovarianceMatrix",
31 |     "ElementCollapse",
32 |     "ExponentialCollapse",
33 |     "KMeans",
34 |     "MaxCollapse",
35 |     "MultiplyByConstant",
36 |     "NCO",
37 |     "NumericalMarkowitz",
38 |     "NumericalRiskBudgeting",
39 |     "Resample",
40 |     "RNN",
41 |     "SoftmaxAllocator",
42 |     "SparsemaxAllocator",
43 |     "SumCollapse",
44 |     "Warp",
45 |     "WeightNorm",
46 |     "Zoom",
47 | ]
48 | 


--------------------------------------------------------------------------------
/deepdow/layers/collapse.py:
--------------------------------------------------------------------------------
  1 | """Collection of layers that decrease the number of dimensions."""
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | 
  6 | 
  7 | class AttentionCollapse(nn.Module):
  8 |     """Collapsing over the channels with attention.
  9 | 
 10 |     Parameters
 11 |     ----------
 12 |     n_channels : int
 13 |         Number of input channels.
 14 | 
 15 |     Attributes
 16 |     ----------
 17 |     affine : nn.Module
 18 |         Fully connected layer performing linear mapping.
 19 | 
 20 |     context_vector : nn.Module
 21 |         Fully connected layer encoding direction importance.
 22 |     """
 23 | 
 24 |     def __init__(self, n_channels):
 25 |         super().__init__()
 26 | 
 27 |         self.affine = nn.Linear(n_channels, n_channels)
 28 |         self.context_vector = nn.Linear(n_channels, 1, bias=False)
 29 | 
 30 |     def forward(self, x):
 31 |         """Perform forward pass.
 32 | 
 33 |         Parameters
 34 |         ----------
 35 |         x : torch.Tensor
 36 |             Tensor of shape `(n_samples, n_channels, lookback, n_assets)`.
 37 | 
 38 |         Returns
 39 |         -------
 40 |         torch.Tensor
 41 |             Tensor of shape `(n_samples, n_channels, n_assets)`.
 42 | 
 43 |         """
 44 |         n_samples, n_channels, lookback, n_assets = x.shape
 45 | 
 46 |         res_list = []
 47 |         for i in range(n_samples):
 48 |             inp_single = x[i].permute(
 49 |                 2, 1, 0
 50 |             )  # n_assets, lookback, n_channels
 51 |             tformed = self.affine(inp_single)  # n_assets, lookback, n_channels
 52 |             w = self.context_vector(tformed)  # n_assets, lookback, 1
 53 |             scaled_w = torch.nn.functional.softmax(
 54 |                 w, dim=1
 55 |             )  # n_assets, lookback, 1
 56 |             weighted_sum = (inp_single * scaled_w).mean(
 57 |                 dim=1
 58 |             )  # n_assets, n_channels
 59 |             res_list.append(weighted_sum.permute(1, 0))  # n_channels, n_assets
 60 | 
 61 |         return torch.stack(res_list, dim=0)
 62 | 
 63 | 
 64 | class AverageCollapse(nn.Module):
 65 |     """Global average collapsing over a specified dimension."""
 66 | 
 67 |     def __init__(self, collapse_dim=2):
 68 |         super().__init__()
 69 |         self.collapse_dim = collapse_dim
 70 | 
 71 |     def forward(self, x):
 72 |         """Perform forward pass.
 73 | 
 74 |         Parameters
 75 |         ----------
 76 |         x : torch.Tensor
 77 |             N-dimensional tensor of shape (d_0, d_1, ..., d_{N-1}).
 78 | 
 79 |         Returns
 80 |         -------
 81 |         torch.Tensor
 82 |             {N-1}-dimensional tensor of shape (d_0, ..., d_{collapse_dim - 1}, d_{collapse_dim + 1}, ..., d_{N-1}).
 83 |             Average over the removeed dimension.
 84 |         """
 85 |         return x.mean(dim=self.collapse_dim)
 86 | 
 87 | 
 88 | class ElementCollapse(nn.Module):
 89 |     """Single element over a specified dimension."""
 90 | 
 91 |     def __init__(self, collapse_dim=2, element_ix=-1):
 92 |         super().__init__()
 93 |         self.collapse_dim = collapse_dim
 94 |         self.element_ix = element_ix
 95 | 
 96 |     def forward(self, x):
 97 |         """Perform forward pass.
 98 | 
 99 |         Parameters
100 |         ----------
101 |         x : torch.Tensor
102 |             N-dimensional tensor of shape (d_0, d_1, ..., d_{N-1}).
103 | 
104 |         Returns
105 |         -------
106 |         torch.Tensor
107 |             {N-1}-dimensional tensor of shape (d_0, ..., d_{collapse_dim - 1}, d_{collapse_dim + 1}, ..., d_{N-1}).
108 |             Taking the `self.element_ix` element of the removed dimension.
109 |         """
110 |         return x.unbind(self.collapse_dim)[self.element_ix]
111 | 
112 | 
113 | class ExponentialCollapse(nn.Module):
114 |     """Exponential weighted collapsing over a specified dimension.
115 | 
116 |     The unscaled weights are defined recursively with the following rules:
117 |         - w_{0}=1
118 |         - w_{t+1} = forgetting_factor * w_{t} + 1
119 | 
120 |     Parameters
121 |     ----------
122 |     collapse_dim : int
123 |         What dimension to remove.
124 | 
125 |     forgetting_factor : float or None
126 |         If float, then fixed constant. If None this will become learnable.
127 | 
128 |     """
129 | 
130 |     def __init__(self, collapse_dim=2, forgetting_factor=None):
131 |         super().__init__()
132 |         self.collapse_dim = collapse_dim
133 |         self.forgetting_factor = forgetting_factor or torch.nn.Parameter(
134 |             torch.Tensor([0.5]), requires_grad=True
135 |         )
136 | 
137 |     def forward(self, x):
138 |         """Perform forward pass.
139 | 
140 |         Parameters
141 |         ----------
142 |         x : torch.Tensor
143 |             N-dimensional tensor of shape (d_0, d_1, ..., d_{N-1}).
144 | 
145 |         Returns
146 |         -------
147 |         torch.Tensor
148 |             {N-1}-dimensional tensor of shape (d_0, ..., d_{collapse_dim - 1}, d_{collapse_dim + 1}, ..., d_{N-1}).
149 |             Exponential Average over the removed dimension.
150 |         """
151 |         n_steps = x.shape[self.collapse_dim]
152 |         n_dim = x.ndim
153 |         view = [-1 if i == self.collapse_dim else 1 for i in range(n_dim)]
154 | 
155 |         w_unscaled = [1]
156 |         for _ in range(1, n_steps):
157 |             w_unscaled.append(self.forgetting_factor * w_unscaled[-1] + 1)
158 | 
159 |         w_unscaled = torch.Tensor(w_unscaled).to(
160 |             dtype=x.dtype, device=x.device
161 |         )
162 |         w = w_unscaled / w_unscaled.sum()
163 | 
164 |         return (x * w.view(*view)).sum(dim=self.collapse_dim)
165 | 
166 | 
167 | class MaxCollapse(nn.Module):
168 |     """Global max collapsing over a specified dimension."""
169 | 
170 |     def __init__(self, collapse_dim=2):
171 |         super().__init__()
172 |         self.collapse_dim = collapse_dim
173 | 
174 |     def forward(self, x):
175 |         """Perform forward pass.
176 | 
177 |         Parameters
178 |         ----------
179 |         x : torch.Tensor
180 |             N-dimensional tensor of shape (d_0, d_1, ..., d_{N-1}).
181 | 
182 |         Returns
183 |         -------
184 |         torch.Tensor
185 |             {N-1}-dimensional tensor of shape (d_0, ..., d_{collapse_dim - 1}, d_{collapse_dim + 1}, ..., d_{N-1}).
186 |             Maximum over the removed dimension.
187 |         """
188 |         return x.max(dim=self.collapse_dim)[0]
189 | 
190 | 
191 | class SumCollapse(nn.Module):
192 |     """Global sum collapsing over a specified dimension."""
193 | 
194 |     def __init__(self, collapse_dim=2):
195 |         super().__init__()
196 |         self.collapse_dim = collapse_dim
197 | 
198 |     def forward(self, x):
199 |         """Perform forward pass.
200 | 
201 |         Parameters
202 |         ----------
203 |         x : torch.Tensor
204 |             N-dimensional tensor of shape (d_0, d_1, ..., d_{N-1}).
205 | 
206 |         Returns
207 |         -------
208 |         torch.Tensor
209 |             {N-1}-dimensional tensor of shape (d_0, ..., d_{collapse_dim - 1}, d_{collapse_dim + 1}, ..., d_{N-1}).
210 |             Sum over the removed dimension.
211 |         """
212 |         return x.sum(dim=self.collapse_dim)
213 | 


--------------------------------------------------------------------------------
/deepdow/layers/transform.py:
--------------------------------------------------------------------------------
  1 | """Collection of layers focusing on transforming tensors while keeping the number of dimensions constant."""
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | 
  6 | 
  7 | class Conv(nn.Module):
  8 |     """Convolutional layer.
  9 | 
 10 |     Parameters
 11 |     ----------
 12 |     n_input_channels : int
 13 |         Number of input channels.
 14 | 
 15 |     n_output_channels : int
 16 |         Number of output channels.
 17 | 
 18 |     kernel_size : int
 19 |         Size of the kernel.
 20 | 
 21 |     method : str, {'2D, '1D'}
 22 |         What type of convolution is used in the background.
 23 |     """
 24 | 
 25 |     def __init__(
 26 |         self, n_input_channels, n_output_channels, kernel_size=3, method="2D"
 27 |     ):
 28 |         super().__init__()
 29 | 
 30 |         self.method = method
 31 | 
 32 |         if method == "2D":
 33 |             self.conv = nn.Conv2d(
 34 |                 n_input_channels,
 35 |                 n_output_channels,
 36 |                 kernel_size=kernel_size,
 37 |                 padding=(kernel_size - 1) // 2,
 38 |             )
 39 |         elif method == "1D":
 40 |             self.conv = nn.Conv1d(
 41 |                 n_input_channels,
 42 |                 n_output_channels,
 43 |                 kernel_size=kernel_size,
 44 |                 padding=(kernel_size - 1) // 2,
 45 |             )
 46 |         else:
 47 |             raise ValueError(
 48 |                 "Invalid method {}, only supports '1D' or '2D'.".format(method)
 49 |             )
 50 | 
 51 |     def forward(self, x):
 52 |         """Perform forward pass.
 53 | 
 54 |         Parameters
 55 |         ----------
 56 |         x : torch.Tensor
 57 |             Tensor of shape `(n_samples, n_input_channels, lookback, n_assets) if `self.method='2D'`. Otherwise
 58 |             `(n_samples, n_input_channels, lookback)`.
 59 | 
 60 |         Returns
 61 |         -------
 62 |         torch.Tensor
 63 |             Tensor of shape `(n_samples, n_output_channels, lookback, n_assets)` if `self.method='2D'`. Otherwise
 64 |             `(n_samples, n_output_channels, lookback)`.
 65 | 
 66 |         """
 67 |         return self.conv(x)
 68 | 
 69 | 
 70 | class RNN(nn.Module):
 71 |     """Recurrent neural network layer.
 72 | 
 73 |     Parameters
 74 |     ----------
 75 |     n_channels : int
 76 |         Number of input channels.
 77 | 
 78 |     hidden_size : int
 79 |         Hidden state size. Alternatively one can see it as number of output channels.
 80 | 
 81 |     cell_type : str, {'LSTM', 'RNN'}
 82 |         Type of the recurrent cell.
 83 | 
 84 |     bidirectional : bool
 85 |         If True, then bidirectional. Note that `hidden_size` already takes this parameter into account.
 86 | 
 87 |     n_layers : int
 88 |         Number of stacked layers.
 89 | 
 90 |     """
 91 | 
 92 |     def __init__(
 93 |         self,
 94 |         n_channels,
 95 |         hidden_size,
 96 |         cell_type="LSTM",
 97 |         bidirectional=True,
 98 |         n_layers=1,
 99 |     ):
100 |         """Construct."""
101 |         super().__init__()
102 | 
103 |         if hidden_size % 2 != 0 and bidirectional:
104 |             raise ValueError(
105 |                 "Hidden size needs to be divisible by two for bidirectional RNNs."
106 |             )
107 | 
108 |         hidden_size_one_direction = int(
109 |             hidden_size // (1 + int(bidirectional))
110 |         )  # only will work out for
111 | 
112 |         if cell_type == "RNN":
113 |             self.cell = torch.nn.RNN(
114 |                 n_channels,
115 |                 hidden_size_one_direction,
116 |                 bidirectional=bidirectional,
117 |                 num_layers=n_layers,
118 |             )
119 | 
120 |         elif cell_type == "LSTM":
121 |             self.cell = torch.nn.LSTM(
122 |                 n_channels,
123 |                 hidden_size_one_direction,
124 |                 bidirectional=bidirectional,
125 |                 num_layers=n_layers,
126 |             )
127 | 
128 |         else:
129 |             raise ValueError("Unsupported cell_type {}".format(cell_type))
130 | 
131 |     def forward(self, x):
132 |         """Perform forward pass.
133 | 
134 |         Parameters
135 |         ----------
136 |         x : torch.Tensor
137 |             Tensor of shape `(n_samples, n_channels, lookback, n_assets)`.
138 | 
139 |         Returns
140 |         -------
141 |         torch.Tensor
142 |             Tensor of shape `(n_samples, self.hidden_size, lookback, n_assets)`.
143 | 
144 |         """
145 |         n_samples, n_channels, lookback, n_assets = x.shape
146 |         x_swapped = x.permute(
147 |             0, 2, 3, 1
148 |         )  # n_samples, lookback, n_assets, n_channels
149 |         res = []
150 | 
151 |         for i in range(n_samples):
152 |             all_hidden_ = self.cell(x_swapped[i])[
153 |                 0
154 |             ]  # lookback, n_assets, hidden_size
155 |             res.append(
156 |                 all_hidden_.permute(2, 0, 1)
157 |             )  # hidden_size, lookback, n_assets
158 | 
159 |         return torch.stack(res)
160 | 
161 | 
162 | class Warp(torch.nn.Module):
163 |     """Custom warping layer."""
164 | 
165 |     def __init__(self, mode="bilinear", padding_mode="reflection"):
166 |         super().__init__()
167 |         self.mode = mode
168 |         self.padding_mode = padding_mode
169 | 
170 |     def forward(self, x, tform):
171 |         """Warp the tensor `x` with `tform` along the time dimension.
172 | 
173 |         Parameters
174 |         ----------
175 |         x : torch.Tensor
176 |             Tensor of shape `(n_samples, n_channels, lookback, n_assets)`.
177 | 
178 |         tform : torch.Tensor
179 |             Tensor of shape `(n_samples, lookback)` or `(n_samples, lookback, n_assets)`.
180 |             Note that in the first case the same transformation is going to be used over all
181 |             assets. To prevent folding the transformation should be increasing along the
182 |             time dimension. It should range from -1 (beginning of the series) to 1 (end of
183 |             the series).
184 | 
185 |         Returns
186 |         -------
187 |         x_warped : torch.Tensor
188 |             Warped version of input `x` with transformation `tform`. The shape is the same
189 |             as the input shape - `(n_samples, n_channels, lookback, n_assets)`.
190 | 
191 |         """
192 |         n_samples, n_channels, lookback, n_assets = x.shape
193 |         dtype, device = x.dtype, x.device
194 | 
195 |         if tform.ndim == 3:
196 |             ty = tform
197 |         elif tform.ndim == 2:
198 |             ty = torch.stack(
199 |                 n_assets * [tform], dim=-1
200 |             )  # (n_samples, lookback, n_assets)
201 |         else:
202 |             raise ValueError(
203 |                 "The tform tensor needs to be either 2 or 3 dimensional."
204 |             )
205 | 
206 |         tx = torch.ones(
207 |             n_samples, lookback, n_assets, dtype=dtype, device=device
208 |         )
209 |         tx *= torch.linspace(
210 |             -1, 1, steps=n_assets, device=device, dtype=dtype
211 |         )[None, None, :]
212 | 
213 |         grid = torch.stack([tx, ty], dim=-1)
214 | 
215 |         x_warped = nn.functional.grid_sample(
216 |             x,
217 |             grid,
218 |             mode=self.mode,
219 |             padding_mode=self.padding_mode,
220 |             align_corners=True,
221 |         )
222 | 
223 |         return x_warped
224 | 
225 | 
226 | class Zoom(torch.nn.Module):
227 |     """Zoom in and out.
228 | 
229 |     It can dynamically zoom into more recent timesteps and disregard older ones. Conversely,
230 |     it can collapse more timesteps into one. Based on Spatial Transformer Network.
231 | 
232 |     Parameters
233 |     ----------
234 |     mode : str, {'bilinear', 'nearest'}
235 |         What interpolation to perform.
236 | 
237 |     padding_mode : str, {'zeros', 'border', 'reflection'}
238 |         How to fill in values that fall outisde of the grid. Relevant in the case when we
239 |         zoom out.
240 | 
241 |     References
242 |     ----------
243 |     [1] Jaderberg, Max, Karen Simonyan, and Andrew Zisserman. "Spatial transformer networks."
244 |         Advances in neural information processing systems. 2015.
245 | 
246 |     """
247 | 
248 |     def __init__(self, mode="bilinear", padding_mode="reflection"):
249 |         super().__init__()
250 |         self.mode = mode
251 |         self.padding_mode = padding_mode
252 | 
253 |     def forward(self, x, scale):
254 |         """Perform forward pass.
255 | 
256 |         Parameters
257 |         ----------
258 |         x : torch.Tensor
259 |             Tensor of shape `(n_samples, n_channels, lookback, n_assets)`.
260 | 
261 |         scale : torch.Tensor
262 |             Tensor of shape `(n_samples,)` representing how much to zoom in (`scale < 1`) or
263 |             zoom out (`scale > 1`).
264 | 
265 |         Returns
266 |         -------
267 |         torch.Tensor
268 |             Tensor of shape `(n_samples, n_channels, lookback, n_assets)` that is a zoomed
269 |             version of the input. Note that the shape is identical to the input.
270 | 
271 |         """
272 |         translate = 1 - scale
273 | 
274 |         theta = torch.stack(
275 |             [
276 |                 torch.tensor([[1, 0, 0], [0, s, t]])
277 |                 for s, t in zip(scale, translate)
278 |             ],
279 |             dim=0,
280 |         )
281 |         theta = theta.to(device=x.device, dtype=x.dtype)
282 | 
283 |         grid = nn.functional.affine_grid(theta, x.shape, align_corners=True)
284 |         x_zoomed = nn.functional.grid_sample(
285 |             x,
286 |             grid,
287 |             mode=self.mode,
288 |             padding_mode=self.padding_mode,
289 |             align_corners=True,
290 |         )
291 | 
292 |         return x_zoomed
293 | 


--------------------------------------------------------------------------------
/deepdow/utils.py:
--------------------------------------------------------------------------------
  1 | """Collection of utilities and helpers."""
  2 | import os
  3 | import pathlib
  4 | 
  5 | import numpy as np
  6 | import pandas as pd
  7 | 
  8 | 
  9 | class ChangeWorkingDirectory:
 10 |     """Context manager that changes current working directory.
 11 | 
 12 |     Parameters
 13 |     ----------
 14 |     directory : str or pathlib.Path or None
 15 |         The new working directory. If None then staying in the current one.
 16 | 
 17 |     Attributes
 18 |     ----------
 19 |     _previous : pathlib.Path
 20 |         The original working directory we want to return to after exiting the context manager.
 21 | 
 22 |     """
 23 | 
 24 |     def __init__(self, directory):
 25 |         self.directory = (
 26 |             pathlib.Path(directory)
 27 |             if directory is not None
 28 |             else pathlib.Path.cwd()
 29 |         )
 30 |         if not self.directory.is_dir():
 31 |             raise NotADirectoryError(
 32 |                 "{} is not a directory".format(str(self.directory))
 33 |             )
 34 | 
 35 |         self._previous = pathlib.Path.cwd()
 36 | 
 37 |     def __enter__(self):
 38 |         """Change directory."""
 39 |         os.chdir(str(self.directory))
 40 | 
 41 |     def __exit__(self, exc_type, exc_val, exc_tb):
 42 |         """Go bach to the original directory."""
 43 |         os.chdir(str(self._previous))
 44 | 
 45 | 
 46 | class PandasChecks:
 47 |     """General checks for pandas objects."""
 48 | 
 49 |     @staticmethod
 50 |     def check_no_gaps(index):
 51 |         """Check if a time index has no gaps.
 52 | 
 53 |         Parameters
 54 |         ----------
 55 |         index : pd.DatetimeIndex
 56 |             Time index to be checked for gaps.
 57 | 
 58 |         Raises
 59 |         ------
 60 |         TypeError
 61 |             If inconvenient type.
 62 | 
 63 |         IndexError
 64 |             If there is a gap.
 65 | 
 66 |         """
 67 |         if not isinstance(index, pd.DatetimeIndex):
 68 |             raise TypeError("Unsupported type: {}".format(type(index)))
 69 | 
 70 |         correct_index = pd.date_range(
 71 |             index[0], periods=len(index), freq=index.freq
 72 |         )
 73 | 
 74 |         if not correct_index.equals(index):
 75 |             raise IndexError("Index has gaps.")
 76 | 
 77 |     @staticmethod
 78 |     def check_valid_entries(table):
 79 |         """Check if input table has no nan or +-inf entries.
 80 | 
 81 |         Parameters
 82 |         ----------
 83 |         table : pd.Series or pd.DataFrame
 84 |             Input table.
 85 | 
 86 |         Raises
 87 |         ------
 88 |         TypeError
 89 |             Inappropriate type of `table`.
 90 | 
 91 |         ValueError
 92 |             At least one entry invalid.
 93 | 
 94 |         """
 95 |         if not isinstance(table, (pd.Series, pd.DataFrame)):
 96 |             raise TypeError("Unsupported type: {}".format(type(table)))
 97 | 
 98 |         if not np.all(np.isfinite(table.values)):
 99 |             raise ValueError("There is an invalid entry")
100 | 
101 |     @staticmethod
102 |     def check_indices_agree(*frames):
103 |         """Check if inputs are pd.Series or pd.DataFrame with same indices / columns.
104 | 
105 |         Parameters
106 |         ----------
107 |         frames : list
108 |             Elements are either `pd.Series` or `pd.DataFrame`.
109 | 
110 |         Raises
111 |         ------
112 |         TypeError
113 |             If elements are not `pd.Series` or `pd.DataFrame`.
114 | 
115 |         IndexError
116 |             If indices/colums do not agree.
117 | 
118 |         """
119 |         if not all([isinstance(x, (pd.Series, pd.DataFrame)) for x in frames]):
120 |             raise TypeError("Some elements are not pd.Series or pd.DataFrame")
121 | 
122 |         reference_index = frames[0].index
123 | 
124 |         for i, f in enumerate(frames):
125 |             if not f.index.equals(reference_index):
126 |                 raise IndexError(
127 |                     "The {} entry has wrong index: {}".format(i, f.index)
128 |                 )
129 | 
130 |             if isinstance(f, pd.DataFrame) and not f.columns.equals(
131 |                 reference_index
132 |             ):
133 |                 raise IndexError(
134 |                     "The {} entry has wrong columns: {}".format(i, f.columns)
135 |                 )
136 | 
137 | 
138 | def prices_to_returns(prices, use_log=True):
139 |     """Convert prices to returns.
140 | 
141 |     Parameters
142 |     ----------
143 |     prices : pd.DataFrame
144 |         Rows represent different time points and the columns represent different assets. Note that the columns
145 |         can also be a ``pd.MultiIndex``.
146 | 
147 |     use_log : bool
148 |         If True, then logarithmic returns are used (natural logarithm). If False, then simple returns.
149 | 
150 |     Returns
151 |     -------
152 |     returns : pd.DataFrame
153 |         Returns per asset per period. The first period is deleted.
154 | 
155 |     """
156 |     # checks
157 | 
158 |     if use_log:
159 |         values = np.log(prices.values) - np.log(prices.shift(1).values)
160 |     else:
161 |         values = (prices.values - prices.shift(1).values) / prices.shift(
162 |             1
163 |         ).values
164 | 
165 |     return pd.DataFrame(
166 |         values[1:, :], index=prices.index[1:], columns=prices.columns
167 |     )
168 | 
169 | 
170 | def returns_to_Xy(returns, lookback=10, horizon=10, gap=0):
171 |     """Create a deep learning dataset (in memory).
172 | 
173 |     Parameters
174 |     ----------
175 |     returns : pd.DataFrame
176 |         Returns where columns represent assets and rows timestamps. The last row
177 |         is the most recent.
178 | 
179 |     lookback : int
180 |         Number of timesteps to include in the features.
181 | 
182 |     horizon : int
183 |         Number of timesteps to inclued in the label.
184 | 
185 |     gap : int
186 |         Integer representing the number of time periods one cannot act after observing the features.
187 | 
188 |     Returns
189 |     -------
190 |     X : np.ndarray
191 |         Array of shape `(N, 1, lookback, n_assets)`. Generated out of the entire dataset.
192 | 
193 |     timestamps : pd.DateTimeIndex
194 |         Index corresponding to the feature matrix `X`.
195 | 
196 |     y : np.ndarray
197 |         Array of shape `(N, 1, horizon, n_assets)`. Generated out of the entire dataset.
198 | 
199 |     """
200 |     n_timesteps = len(returns.index)
201 | 
202 |     if lookback >= n_timesteps - horizon - gap + 1:
203 |         raise ValueError("Not enough timesteps to extract X and y.")
204 | 
205 |     X_list = []
206 |     timestamps_list = []
207 |     y_list = []
208 | 
209 |     for i in range(lookback, n_timesteps - horizon - gap + 1):
210 |         X_list.append(returns.iloc[i - lookback : i, :].values)
211 |         timestamps_list.append(returns.index[i - 1])
212 |         y_list.append(returns.iloc[i + gap : i + gap + horizon, :].values)
213 | 
214 |     X = np.array(X_list)
215 |     timestamps = pd.DatetimeIndex(timestamps_list, freq=returns.index.freq)
216 |     y = np.array(y_list)
217 | 
218 |     return X[:, np.newaxis, :, :], timestamps, y[:, np.newaxis, :, :]
219 | 
220 | 
221 | def raw_to_Xy(
222 |     raw_data,
223 |     lookback=10,
224 |     horizon=10,
225 |     gap=0,
226 |     freq="B",
227 |     included_assets=None,
228 |     included_indicators=None,
229 |     use_log=True,
230 | ):
231 |     """Convert raw data to features.
232 | 
233 |     Parameters
234 |     ----------
235 |     raw_data : pd.DataFrame
236 |         Rows represents different timestamps stored in index. Note that there can be gaps. Columns are pd.MultiIndex
237 |         with the zero level being assets and the first level indicator.
238 | 
239 |     lookback : int
240 |         Number of timesteps to include in the features.
241 | 
242 |     horizon : int
243 |         Number of timesteps to included in the label.
244 | 
245 |     gap : int
246 |         Integer representing the number of time periods one cannot act after observing the features.
247 | 
248 |     freq : str
249 |         Periodicity of the data.
250 | 
251 |     included_assets : None or list
252 |         Assets to be included. If None then all available.
253 | 
254 |     included_indicators : None or list
255 |         Indicators to be included. If None then all available.
256 | 
257 |     use_log : bool
258 |         If True, then logarithmic returns are used (natural logarithm). If False, then simple returns.
259 | 
260 |     Returns
261 |     -------
262 |     X : np.ndarray
263 |         Feature array of shape `(n_samples, n_indicators, lookback, n_assets)`.
264 | 
265 |     timestamps : pd.DateTimeIndex
266 |         Per row timestamp of shape length `n_samples`.
267 | 
268 |     y : np.ndarray
269 |         Targets arra of shape `(n_samples, n_indicators, horizon, n_assets)`.
270 | 
271 |     asset_names : list
272 |         Names of assets.
273 | 
274 |     indicators : list
275 |         List of indicators.
276 |     """
277 |     if freq is None:
278 |         raise ValueError("Frequency freq needs to be specified.")
279 | 
280 |     asset_names = (
281 |         included_assets
282 |         if included_assets is not None
283 |         else raw_data.columns.levels[0].to_list()
284 |     )
285 |     indicators = (
286 |         included_indicators
287 |         if included_indicators is not None
288 |         else raw_data.columns.levels[1].to_list()
289 |     )
290 | 
291 |     index = pd.date_range(
292 |         start=raw_data.index[0], end=raw_data.index[-1], freq=freq
293 |     )
294 | 
295 |     new = pd.DataFrame(raw_data, index=index).ffill().bfill()
296 | 
297 |     to_exclude = []
298 |     for a in asset_names:
299 |         is_valid = np.all(np.isfinite(new[a])) and np.all(new[a] > 0)
300 |         if not is_valid:
301 |             to_exclude.append(a)
302 | 
303 |     asset_names = sorted(list(set(asset_names) - set(to_exclude)))
304 | 
305 |     absolute = new.iloc[:, new.columns.get_level_values(0).isin(asset_names)][
306 |         asset_names
307 |     ]  # sort
308 |     absolute = absolute.iloc[
309 |         :, absolute.columns.get_level_values(1).isin(indicators)
310 |     ]
311 | 
312 |     returns = prices_to_returns(absolute, use_log=use_log)
313 | 
314 |     X_list = []
315 |     y_list = []
316 |     for ind in indicators:
317 |         X, timestamps, y = returns_to_Xy(
318 |             returns.xs(ind, axis=1, level=1),
319 |             lookback=lookback,
320 |             horizon=horizon,
321 |             gap=gap,
322 |         )
323 |         X_list.append(X)
324 |         y_list.append(y)
325 | 
326 |     X = np.concatenate(X_list, axis=1)
327 |     y = np.concatenate(y_list, axis=1)
328 | 
329 |     return X, timestamps, y, asset_names, indicators
330 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/_static/css/custom.css:
--------------------------------------------------------------------------------
1 | .wy-side-nav-search{ background-color: #F05025 } /*sidebar background color*/
2 | .caption-text{ color: #F05025 } /*Sidebar heading font color*/
3 | .rst-content dl:not(.docutils) dt{ background: #FDE4DA; /* docstring background color */
4 |                                    color: #F15A24; /* docstring font color*/
5 |                                    border-top-color: #000000  /* docstring separating line color */
6 |                                   }


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # This file only contains a selection of the most common options. For a full
 4 | # list see the documentation:
 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 6 | 
 7 | # -- Path setup --------------------------------------------------------------
 8 | 
 9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | import os
14 | import pathlib
15 | import sys
16 | 
17 | from sphinx_gallery.sorting import FileNameSortKey
18 | 
19 | some_path = pathlib.Path(os.path.abspath('.'))
20 | parent_some_path = some_path.parent
21 | sys.path.insert(0, str(some_path))
22 | sys.path.insert(0, str(parent_some_path))
23 | 
24 | # -- Project information -----------------------------------------------------
25 | 
26 | project = 'DeepDow'
27 | copyright = '2020, Jan Krepl'
28 | author = 'Jan Krepl'
29 | 
30 | # -- General configuration ---------------------------------------------------
31 | 
32 | # Add any Sphinx extension module names here, as strings. They can be
33 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
34 | # ones.
35 | extensions = [
36 |     'sphinx.ext.mathjax',
37 |     'sphinx.ext.autodoc',
38 |     'sphinx.ext.doctest',
39 |     'sphinx.ext.napoleon',
40 |     'sphinx.ext.viewcode',
41 |     'sphinx_gallery.gen_gallery'
42 | ]
43 | 
44 | # Add any paths that contain templates here, relative to this directory.
45 | templates_path = ['_templates']
46 | 
47 | # List of patterns, relative to source directory, that match files and
48 | # directories to ignore when looking for source files.
49 | # This pattern also affects html_static_path and html_extra_path.
50 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
51 | 
52 | # -- Options for HTML output -------------------------------------------------
53 | # Set the welcome page for readthedocs
54 | master_doc = 'index'
55 | 
56 | # The theme to use for HTML and HTML Help pages.  See the documentation for
57 | # a list of builtin themes.
58 | #
59 | html_theme = 'sphinx_rtd_theme'
60 | 
61 | # Add any paths that contain custom static files (such as style sheets) here,
62 | # relative to this directory. They are copied after the builtin static files,
63 | # so a file named "default.css" will overwrite the builtin "default.css".
64 | html_static_path = ['_static']
65 | 
66 | html_sidebars = {
67 |     '**': [
68 |         'about.html',
69 |         'navigation.html',
70 |         'relations.html',  # needs 'show_related': True theme option to display
71 |         'searchbox.html',
72 |         'donate.html',
73 |     ]
74 | }
75 | 
76 | # Disable prepending with package and module name
77 | add_module_names = False
78 | 
79 | 
80 | # Making sure __call__ shows up in the documentation
81 | def skip(app, what, name, obj, would_skip, options):
82 |     if name == "__call__":
83 |         return False
84 |     return would_skip
85 | 
86 | 
87 | def setup(app):
88 |     app.add_css_file('css/custom.css')  # adding custom styling
89 |     app.connect("autodoc-skip-member", skip)  # making sure __call__ is shown when implemented in child class
90 | 
91 | 
92 | # sphinx gallery
93 | sphinx_gallery_conf = {
94 |     'examples_dirs': '../examples',  # path to your example scripts
95 |     'gallery_dirs': 'auto_examples',  # path to where to save gallery generated output
96 |     'filename_pattern': '',  # include everything
97 |     'within_subsection_order': FileNameSortKey
98 | }
99 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | .. DeepDow documentation master file, created by
 3 |    sphinx-quickstart on Sat Mar 14 15:21:35 2020.
 4 |    You can adapt this file completely to your liking, but it should at least
 5 |    contain the root `toctree` directive.
 6 | 
 7 | DeepDow
 8 | =======
 9 | 
10 | .. image:: https://i.imgur.com/x77b8Lc.png
11 |    :align: center
12 | 
13 | :code:`deepdow` (read as "wow") is a Python package connecting portfolio optimization and deep learning.
14 | 
15 | 
16 | .. toctree::
17 |    :maxdepth: 2
18 |    :caption: USING DEEPDOW:
19 | 
20 |    source/installation
21 |    source/introduction
22 |    source/basics
23 |    source/data_loading
24 |    source/benchmarks
25 |    source/layers
26 |    source/networks
27 |    source/losses
28 |    source/experiments
29 |    auto_examples/index
30 | 
31 | .. toctree::
32 |    :maxdepth: 2
33 |    :caption: DEVELOPMENT
34 | 
35 |    source/changelog
36 | 
37 | 
38 | .. toctree::
39 |    :maxdepth: 2
40 |    :caption: API Reference:
41 | 
42 |    source/api/deepdow


--------------------------------------------------------------------------------
/docs/source/api/deepdow.benchmarks.rst:
--------------------------------------------------------------------------------
 1 | .. _benchmarks_API:
 2 | 
 3 | deepdow.benchmarks module
 4 | =========================
 5 | 
 6 | .. automodule:: deepdow.benchmarks
 7 |    :members:
 8 |    :undoc-members:
 9 |    :show-inheritance:
10 | 


--------------------------------------------------------------------------------
/docs/source/api/deepdow.callbacks.rst:
--------------------------------------------------------------------------------
 1 | .. _callbacks_API:
 2 | 
 3 | deepdow.callbacks module
 4 | ========================
 5 | 
 6 | .. automodule:: deepdow.callbacks
 7 |    :members:
 8 |    :undoc-members:
 9 |    :show-inheritance:
10 | 


--------------------------------------------------------------------------------
/docs/source/api/deepdow.data.augment.rst:
--------------------------------------------------------------------------------
1 | deepdow.data.augment module
2 | ===========================
3 | 
4 | .. automodule:: deepdow.data.augment
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/api/deepdow.data.load.rst:
--------------------------------------------------------------------------------
1 | deepdow.data.load module
2 | ========================
3 | 
4 | .. automodule:: deepdow.data.load
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/api/deepdow.data.rst:
--------------------------------------------------------------------------------
1 | deepdow.data module
2 | ===================
3 | 
4 | .. toctree::
5 | 
6 |    deepdow.data.augment
7 |    deepdow.data.load
8 |    deepdow.data.synthetic


--------------------------------------------------------------------------------
/docs/source/api/deepdow.data.synthetic.rst:
--------------------------------------------------------------------------------
1 | deepdow.data.synthetic module
2 | =============================
3 | 
4 | .. automodule:: deepdow.data.synthetic
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/api/deepdow.experiments.rst:
--------------------------------------------------------------------------------
 1 | .. _experiments_API:
 2 | 
 3 | deepdow.experiments module
 4 | ==========================
 5 | 
 6 | .. automodule:: deepdow.experiments
 7 |    :members:
 8 |    :undoc-members:
 9 |    :show-inheritance:
10 | 


--------------------------------------------------------------------------------
/docs/source/api/deepdow.explain.rst:
--------------------------------------------------------------------------------
1 | deepdow.explain module
2 | ======================
3 | 
4 | .. automodule:: deepdow.explain
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/api/deepdow.layers.allocate.rst:
--------------------------------------------------------------------------------
 1 | .. _layers_allocate_API:
 2 | 
 3 | deepdow.layers.allocate module
 4 | ==============================
 5 | 
 6 | .. automodule:: deepdow.layers.allocate
 7 |    :members:
 8 |    :undoc-members:
 9 |    :show-inheritance:
10 | 


--------------------------------------------------------------------------------
/docs/source/api/deepdow.layers.collapse.rst:
--------------------------------------------------------------------------------
 1 | .. _layers_collapse_API:
 2 | 
 3 | deepdow.layers.collapse module
 4 | ==============================
 5 | 
 6 | .. automodule:: deepdow.layers.collapse
 7 |    :members:
 8 |    :undoc-members:
 9 |    :show-inheritance:
10 | 


--------------------------------------------------------------------------------
/docs/source/api/deepdow.layers.misc.rst:
--------------------------------------------------------------------------------
 1 | .. _layers_misc_API:
 2 | 
 3 | deepdow.layers.misc module
 4 | ==========================
 5 | 
 6 | .. automodule:: deepdow.layers.misc
 7 |    :members:
 8 |    :undoc-members:
 9 |    :show-inheritance:
10 | 


--------------------------------------------------------------------------------
/docs/source/api/deepdow.layers.rst:
--------------------------------------------------------------------------------
1 | deepdow.layers package
2 | ======================
3 | 
4 | .. toctree::
5 | 
6 |    deepdow.layers.collapse
7 |    deepdow.layers.allocate
8 |    deepdow.layers.misc
9 |    deepdow.layers.transform


--------------------------------------------------------------------------------
/docs/source/api/deepdow.layers.transform.rst:
--------------------------------------------------------------------------------
 1 | .. _layers_transform_API:
 2 | 
 3 | deepdow.layers.transform module
 4 | ===============================
 5 | 
 6 | .. automodule:: deepdow.layers.transform
 7 |    :members:
 8 |    :undoc-members:
 9 |    :show-inheritance:
10 | 


--------------------------------------------------------------------------------
/docs/source/api/deepdow.losses.rst:
--------------------------------------------------------------------------------
 1 | .. _losses_API:
 2 | 
 3 | deepdow.losses module
 4 | =====================
 5 | 
 6 | .. automodule:: deepdow.losses
 7 |    :members:
 8 |    :undoc-members:
 9 |    :show-inheritance:
10 | 


--------------------------------------------------------------------------------
/docs/source/api/deepdow.nn.rst:
--------------------------------------------------------------------------------
 1 | .. _networks_API:
 2 | 
 3 | deepdow.nn module
 4 | =================
 5 | 
 6 | .. automodule:: deepdow.nn
 7 |    :members:
 8 |    :undoc-members:
 9 |    :show-inheritance:
10 | 


--------------------------------------------------------------------------------
/docs/source/api/deepdow.rst:
--------------------------------------------------------------------------------
 1 | deepdow package
 2 | ===============
 3 | 
 4 | .. toctree::
 5 | 
 6 |    deepdow.benchmarks
 7 |    deepdow.callbacks
 8 |    deepdow.data
 9 |    deepdow.experiments
10 |    deepdow.explain
11 |    deepdow.layers
12 |    deepdow.losses
13 |    deepdow.nn
14 |    deepdow.utils


--------------------------------------------------------------------------------
/docs/source/api/deepdow.utils.rst:
--------------------------------------------------------------------------------
1 | deepdow.utils module
2 | ====================
3 | 
4 | .. automodule:: deepdow.utils
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/basics.rst:
--------------------------------------------------------------------------------
 1 | .. _basics:
 2 | 
 3 | Basics
 4 | ======
 5 | This page introduces all the important concepts used within :code:`deepdow`.
 6 | 
 7 | Data
 8 | ----
 9 | Financial timeseries can be seen as a 3D tensor with the following dimensions
10 | 
11 | - **time**
12 | - **asset**
13 | - **indicator/channel**
14 | 
15 | To give a specific example, one can investigate daily (**time** dimension) open price returns, close price returns and
16 | volumes (**channel** dimension) of multiple NASDAQ stocks (**asset** dimension). Graphically, one can imagine
17 | 
18 | .. image:: https://i.imgur.com/RYcdN6y.png
19 |    :align: center
20 |    :width: 450
21 | 
22 | 
23 | Let us denote the shape of our tensor :code:`(n_channels, n_timesteps, n_assets) = (3, 10, 6)`. By fixing a time step
24 | (representing **now**), we can split our tensor into 3 disjoint subtensors:
25 | 
26 | - **x**  - :code:`(n_channels, lookback, n_assets) = (3, 5, 6)`
27 | - **g** - :code:`(n_channels, gap, n_assets) = (3, 1, 6)`
28 | - **y**  - :code:`(n_channels, horizon, n_assets) = (3, 4, 6)`
29 | 
30 | 
31 | .. image:: https://i.imgur.com/rsttnxn.png
32 |    :align: center
33 |    :width: 400
34 | 
35 | Firstly, **x** represents all the knowledge about the past and present. The second tensor **g** represents information
36 | contained in the immediate future that we cannot use to make investment decisions. Finally, **y** is the future
37 | evolution of the market.
38 | 
39 | One can now move along the time dimension and apply the same decomposition at every time step. This method
40 | of generating a dataset is called the **rolling window**. To illustrate this idea, let us take a slightly
41 | bigger starting tensor (:code:`n_timesteps = 12`) while keeping :code:`lookback = 5`, :code:`gap = 1` and
42 | :code:`horizon = 4`. Let's roll it!
43 | 
44 | 
45 | .. image:: https://i.imgur.com/okSUzOk.pngc
46 |    :align: center
47 |    :width: 550
48 | 
49 | We now possess a collection of 3 **feature** tensors (**x1**, **x2**, **x3**) and 3 **label** tensors (**y1**, **y2**, **y3**).
50 | And that is all we need!
51 | 
52 | Predictions AKA weights
53 | -----------------------
54 | In the :code:`deepdow` framework, we study networks that input **x** and return a single weight allocation **w** of
55 | shape :code:`(n_assets,)` such that :math:`\sum_{i} w_{i} = 1`. In other words, given our past knowledge **x** we
56 | construct a portfolio **w** that we buy right away and hold for :code:`horizon` time steps. Let **F** some neural network
57 | with parameters :math:`\theta`, the below image represents the high level prediction pipeline:
58 | 
59 | .. image:: https://i.imgur.com/sJ30WFE.png
60 |    :align: center
61 |    :width: 500
62 | 
63 | .. _basics_loss:
64 | 
65 | Loss
66 | ----
67 | The last piece of the puzzle is definition of the loss function. In the most general terms, the per sample loss **L**
68 | is any function that inputs **w** and **y** and outputs a real number. However, in most cases we first compute the
69 | portfolio returns **r** over each time step in the :code:`horizon` and then apply some summarization function
70 | **S** like mean, standard deviation, etc.
71 | 
72 | 
73 | .. image:: https://i.imgur.com/L0A2bRS.png
74 |    :align: center
75 |    :width: 700
76 | 
77 | 
78 | 
79 | Assumptions
80 | -----------
81 | Before finishing this chapter, let us summarize the important assumptions :code:`deepdow` is making
82 | 
83 | - The time dimension is **contiguous** with a single frequency (i.e. daily)
84 | - The predicted weights **w** are turned into an actual investment that is **held** over :code:`horizon` time steps
85 | 
86 | 
87 | 


--------------------------------------------------------------------------------
/docs/source/benchmarks.rst:
--------------------------------------------------------------------------------
 1 | .. _benchmarks:
 2 | 
 3 | Benchmarks
 4 | ==========
 5 | The goal of this section is to introduce the concept of a benchmark and demonstrate some specific
 6 | examples of it that are implemented within :code:`deepdow`.
 7 | 
 8 | In the broad sense, a benchmark is any algorithm that takes in the input feature tensor **x** and
 9 | outputs the **weights** tensor. We can divide benchmarks into two categories
10 | 
11 | - **with** learnable parameters - we call these **networks** and discuss in detail in :ref:`networks`
12 | - **without** learnable parameters - we call these **simple benchmarks** and discuss them in this section
13 | 
14 | 
15 | Let us stress one important implication of the above distinction.
16 | The allocation algorithm of simple benchmarks does not need to be differentiable and nothing prevents the
17 | user from casting the input **x** (:code:`torch.Tensor`) to a :code:`numpy` array and using external libraries
18 | (:code:`scipy`, etc).
19 | On the other hand, the allocation algorithm of networks needs to be a forward pass implemented via :code:`torch`
20 | functions, modules or :code:`deepdow.layers` (which is built on top of :code:`torch`).
21 | 
22 | 
23 | Benchmark class
24 | ---------------
25 | To capture the above general definition we provide an abstract class :code:`deepdow.benchmarks.Benchmark` that
26 | requires its children to implement the :code:`__call__` and optionally
27 | also :code:`hparams` property.
28 | 
29 | 
30 | - :code:`__call__` - the weight allocation algorithm
31 | - :code:`hparams` - optional property that is a dictionary of hyperparameters
32 | 
33 | Simple benchmarks
34 | -----------------
35 | The simple benchmarks are supposed to be allocation schemes that provide a baseline for the trainable networks. By
36 | definition, these simple benchmarks do not change their predictions over different epochs so one can just run them once and
37 | see how they fare against the networks. :code:`deepdow` implements multiple simple benchmarks and we are going to
38 | discuss them in what follows. For usage details see :ref:`benchmarks_API`.
39 | 
40 | 
41 | InverseVolatility
42 | *****************
43 | The user needs to specify which channel represents returns via the :code:`returns_channel`
44 | The weight allocation is equal inverse standard deviation of returns if `use_std=True` otherwise it is the inverse
45 | variance.
46 | 
47 | 
48 | MaximumReturn
49 | *************
50 | After specifying which channel represents returns via the :code:`returns_channel` a standard maximum return
51 | optimization is performed. One can additionally choose :code:`max_weight` per asset.
52 | 
53 | MinimumVariance
54 | ***************
55 | After specifying which channel represents returns via the :code:`returns_channel` a standard minimum variance
56 | optimization is performed. One can additionally choose :code:`max_weight` per asset.
57 | 
58 | 
59 | OneOverN
60 | ********
61 | Equally weighted portfolio - each asset has the weight `1/n_assets`.
62 | 
63 | Random
64 | ******
65 | The weights are sampled randomly.
66 | 
67 | Singleton
68 | *********
69 | Sometimes also called one asset portfolio. The user can chose the single asset via the :code:`asset_ix`.
70 | 
71 | 
72 | 


--------------------------------------------------------------------------------
/docs/source/changelog.rst:
--------------------------------------------------------------------------------
 1 | Changelog
 2 | =========
 3 | 
 4 | Unreleased
 5 | ----------
 6 | 
 7 | v0.2.3
 8 | ------
 9 | Fixes
10 | *****
11 | - Fix PyTorch in-place issue and matplotib plotting problem - #147
12 | 
13 | v0.2.2
14 | ------
15 | 
16 | v0.2.1
17 | ------
18 | Added
19 | *****
20 | - IID example - #67
21 | 
22 | Fixes
23 | *****
24 | - Progressbar not displaying correctly - PR #80
25 | - Improving documentation - PR #72
26 | 
27 | 
28 | v0.2.0
29 | ------
30 | 
31 | Added
32 | *****
33 | - Create Getting started tutorial and refactoring of docs `#62 <https://github.com/jankrepl/deepdow/pull/62>`_
34 | - Implement :code:`Warp` layer `#61 <https://github.com/jankrepl/deepdow/pull/61>`_
35 | - Implement :code:`Zoom` layer `#57 <https://github.com/jankrepl/deepdow/pull/57>`_
36 | - Add :code:`Scale` transform `#53 <https://github.com/jankrepl/deepdow/pull/53>`_
37 | - Turn :code:`deepdow.data` into a subpackage `#50 <https://github.com/jankrepl/deepdow/pull/50>`_
38 | - Add :code:`generate_weights_table` `#47 <https://github.com/jankrepl/deepdow/pull/47>`_
39 | - :code:`WeightNorm` layer and :code:`MinimalNet` `#45 <https://github.com/jankrepl/deepdow/pull/45>`_
40 | - Create example for Sparsemax and Softmax layers `#42 <https://github.com/jankrepl/deepdow/pull/42>`_
41 | - Add constrained versions of Sparsemax and Softmax `#37 <https://github.com/jankrepl/deepdow/pull/37>`_
42 | 
43 | Changed
44 | *******
45 | - Rename :code:`portfolio_evolution` to :code:`plot_weight_anim`
46 | 
47 | 


--------------------------------------------------------------------------------
/docs/source/experiments.rst:
--------------------------------------------------------------------------------
  1 | .. testsetup::
  2 | 
  3 |     import numpy as np
  4 |     import torch
  5 | 
  6 |     np.random.seed(2)
  7 |     torch.manual_seed(2)
  8 | 
  9 | .. _experiments:
 10 | 
 11 | Experiments
 12 | ===========
 13 | This section focuses on putting all of the previous sections together and proposes a framework for training and
 14 | evaluation of networks. The central object is the :code:`deepdow.experiments.Run` class.
 15 | 
 16 | To instantiate run, we need to provide multiple parameters:
 17 | 
 18 | - :code:`network` - Network to be trained and evaluated. See :ref:`networks` for details.
 19 | - :code:`loss` - Loss criterion. See :ref:`losses` for details.
 20 | - :code:`train_dataloader` - Dataloader streaming training data. See :ref:`data` for details.
 21 | - :code:`val_dataloaders` - Dictionary where keys are names and values are instances of :code:`RigidDataloader`. See :ref:`data` for details.
 22 | - :code:`metrics` - Additional metrics to be monitored. See :ref:`losses` for details.
 23 | - :code:`benchmarks` - Additional baseline models to be used for comparison. See :ref:`benchmarks` for details.
 24 | - :code:`callbacks` - Additional callbacks to be used (on top of the default ones). See :ref:`callbacks` for details.
 25 | 
 26 | Once we construct the :code:`Run`, we can start the training and evaluation loop via the :code:`launch` method.
 27 | 
 28 | .. testcode::
 29 | 
 30 |     from deepdow.benchmarks import OneOverN
 31 |     from deepdow.data import InRAMDataset, RigidDataLoader
 32 |     from deepdow.experiments import Run
 33 |     from deepdow.losses import MaximumDrawdown, SharpeRatio
 34 |     from deepdow.nn import LinearNet
 35 | 
 36 |     n_samples, n_channels, lookback, n_assets = 200, 2, 20, 6
 37 |     horizon = 15
 38 | 
 39 |     X = np.random.random((n_samples, n_channels, lookback, n_assets)) - 0.5
 40 |     y = np.random.random((n_samples, n_channels, horizon, n_assets)) - 0.5
 41 | 
 42 |     dataset = InRAMDataset(X, y)
 43 |     train_dataloader = RigidDataLoader(dataset, indices=list(range(100)), batch_size=10)
 44 |     val_dataloaders = {'val': RigidDataLoader(dataset, indices=list(range(130, 180)), batch_size=10)}
 45 | 
 46 |     network = LinearNet(n_channels, lookback, n_assets)
 47 |     loss = SharpeRatio(returns_channel=0)
 48 |     benchmarks = {'1overN': OneOverN()}
 49 |     metrics = {'drawdown': MaximumDrawdown(returns_channel=0)}
 50 | 
 51 |     run = Run(network,
 52 |               loss,
 53 |               train_dataloader,
 54 |               val_dataloaders=val_dataloaders,
 55 |               metrics=metrics,
 56 |               benchmarks=benchmarks)
 57 | 
 58 |     history = run.launch(n_epochs=1)
 59 | 
 60 | .. testoutput::
 61 | 
 62 |     model   metric    epoch  dataloader
 63 |     1overN  drawdown  -1     val           0.283
 64 |             loss      -1     val          -0.331
 65 | 
 66 | We get results on the benchmarks in the standard output (see above).
 67 | Additionally, progress bar is sent to the standard error. It monitors progress of our network. To read more
 68 | details on the :code:`Run` class see :ref:`experiments_API`. Last but not least, we also get an
 69 | instance of the :code:`History` class. See below section for more information.
 70 | 
 71 | History
 72 | -------
 73 | The :code:`launch` method returns an instance of the :code:`History` class. It captures all the
 74 | useful information that was recorded during training. This information can be accessed via the
 75 | :code:`metrics` property that is a :code:`pd.DataFrame` with the following columns
 76 | 
 77 | - :code:`model` - name of the model
 78 | - :code:`metric` - name of the loss
 79 | - :code:`value` - value of the loss
 80 | - :code:`batch` - batch
 81 | - :code:`epoch` - epoch
 82 | - :code:`dataloader` - name of the dataloader
 83 | - :code:`lookback` - lookback size, by default only using the one from the dataloader
 84 | - :code:`timestamp` - it can be used to unique identify a given sample
 85 | - :code:`current_time` - time when the entry logged
 86 | 
 87 | .. _callbacks:
 88 | 
 89 | Callbacks
 90 | ---------
 91 | Callbacks are intended to be run at precise moments of the training loop. All callbacks have a shared interface
 92 | :code:`deepdow.callbacks.Callback` that provides the following methods
 93 | 
 94 | - :code:`on_batch_begin` - run at the beginning of each **batch**
 95 | - :code:`on_batch_end` - run at the end of each **batch**
 96 | - :code:`on_epoch_begin` - run at the beginning of each **epoch**
 97 | - :code:`on_epoch_end` - run at the end of each **epoch**
 98 | - :code:`on_train_begin` - run at the beginning of the **training**
 99 | - :code:`on_train_end`- run at the end of the **training**
100 | - :code:`on_train_interrupt` - run in case training interrupted
101 | 
102 | Each of these methods inputs the :code:`metadata` dictionary. It contains the most recent value of the most
103 | relevant variables.
104 | 
105 | Note that when constructing a :code:`Run` there are three callbacks inserted by default
106 | 
107 | - :code:`BenchmarkCallback`
108 | - :code:`ValidationCallback`
109 | - :code:`ProgressBarCallback`
110 | 
111 | One can chose additional one by defining adding a list of callbacks as the `callbacks` variable.
112 | 
113 | Lastly, callback instances can access the :code:`Run` instance within under the :code:`run`
114 | attribute. It is always injected when the training is launched.
115 | 
116 | 
117 | In what follows, we provide an overview of all available callbacks. For detailed usage instructions
118 | see :ref:`callbacks_API`.
119 | 
120 | 
121 | BenchmarkCallback
122 | *****************
123 | Automatically added to `Run` instances. It computes all metrics for all provided benchmarks over all validation
124 | dataloaders.
125 | 
126 | 
127 | EarlyStoppingCallback
128 | *********************
129 | This callback monitors a given metric and if there are no improvements over specific number of epochs it stops the
130 | training.
131 | 
132 | MLFlowCallback
133 | **************
134 | Callback that logs relevant metrics to MLflow.
135 | 
136 | ModelCheckpointCallback
137 | ***********************
138 | Saving a model each epoch it achieves lower than the previous lowest loss.
139 | 
140 | ProgressBarCallback
141 | *******************
142 | Automatically added to `Run` instances. Displays progress bar with all relevant metrics. One can choose where outputted
143 | with :code:`output` parameter.
144 | 
145 | 
146 | TensorBoardCallback
147 | *******************
148 | Callback that logs relevant metrics to MLflow together with images and histograms.
149 | 
150 | 
151 | ValidationCallback
152 | ******************
153 | Automatically added to `Run` instances. It computes all metrics of the trained network over all validation dataloaders.


--------------------------------------------------------------------------------
/docs/source/installation.rst:
--------------------------------------------------------------------------------
 1 | Installation
 2 | ============
 3 | 
 4 | The simplest way to start using :code:`deepdow` is to get it from PyPI
 5 | 
 6 | .. code-block:: bash
 7 | 
 8 |     pip install deepdow
 9 | 
10 | 
11 | Development
12 | -----------
13 | To start contributing one needs to clone the official repository
14 | 
15 | .. code-block:: bash
16 | 
17 |     git clone https://github.com/jankrepl/deepdow.git
18 |     cd deepdow
19 |     pip install -e .[dev,docs,examples]


--------------------------------------------------------------------------------
/docs/source/introduction.rst:
--------------------------------------------------------------------------------
 1 | Introduction
 2 | ============
 3 | :code:`deepdow` is a framework that focuses on portfolio optimization via end-to-end deep learning. Its goal is to
 4 | facilitate research of networks that perform weight allocation in **one forward pass**.
 5 | 
 6 | Name
 7 | ----
 8 | The name of the packages is inspired by the father of technical analysis—**Charles Dow**.
 9 | 
10 | 
11 | Traditional portfolio optimization
12 | ----------------------------------
13 | As described by Markowitz [MARK1952]_, portfolio optimization is commonly divided into 2 separate stages:
14 | 
15 | 1. **Creation of beliefs about the future performances of securities**
16 | 2. **Finding optimal portfolio given these beliefs**
17 | 
18 | One extremely popular example of this two stage paradigm is:
19 | 
20 | .. _traditional:
21 | 
22 | 1. **Estimation of expected returns** :math:`\boldsymbol{\mu}` **and covariance matrix** :math:`\boldsymbol{\Sigma}`
23 | 2. **Solving a convex optimization problem, e.g.** :math:`\boldsymbol{\mu}^T \textbf{w} - \gamma \textbf{w}^T  \boldsymbol{\Sigma} \textbf{w}` **such that** :math:`\textbf{w} > 0` **and** :math:`{\bf 1}^T \textbf{w}=1`
24 | 
25 | Commonly, these two steps are absolutely separated since they require different approaches
26 | 
27 | 1. **Predictive modeling (statistics + machine learning)**
28 | 2. **Objective function and constraints design**
29 | 
30 | Not surprisingly, one needs to use totally different tools. Below are some examples from the Python ecosystem.
31 | 
32 | 1. :code:`numpy`, :code:`pandas`, :code:`scikit-learn`, :code:`statsmodels`, :code:`tensorflow`, :code:`pytorch`, ...
33 | 2. :code:`cvxpy`, :code:`cvxopt`, :code:`scipy`, ...
34 | 
35 | 
36 | Why DeepDow different?
37 | ----------------------
38 | :code:`deepdow` strives to merge the above mentioned two steps into **one**. The fundamental idea is to construct
39 | end-to-end deep networks that input the rawest features (returns,
40 | volumes, ...) and output asset allocation. This approach has multiple benefits:
41 | 
42 | - Hyperparameters can be turned into **trainable weights** (i.e. :math:`\gamma` in :ref:`2nd stage <traditional>`)
43 | - Leveraging deep learning to extract useful features for **allocation** (rather than just prediction)
44 | - **Single** loss function
45 | 
46 | 
47 | References
48 | ----------
49 | .. [MARK1952]
50 |     Markowitz, H. (1952), PORTFOLIO SELECTION. The Journal of Finance, 7: 77-91.
51 |     doi:10.1111/j.1540-6261.1952.tb01525.x


--------------------------------------------------------------------------------
/docs/source/losses.rst:
--------------------------------------------------------------------------------
  1 | .. _losses:
  2 | 
  3 | Losses
  4 | ======
  5 | 
  6 | .. testsetup::
  7 | 
  8 |    import torch
  9 | 
 10 | Introduction
 11 | ------------
 12 | The loss functions are one of the main components of :code:`deepdow`. Please review the :ref:`basics_loss` in
 13 | :ref:`basics` to understand the setup. Most importantly, by a **loss function** we mean any function that
 14 | has the following **two inputs**
 15 | 
 16 | - :code:`weights` - :code:`torch.Tensor` of shape :code:`(n_samples, n_assets)`
 17 | - :code:`y` - :code:`torch.Tensor` of shape :code:`(n_samples, n_channels, horizon, n_assets)`
 18 | 
 19 | And a **single output**
 20 | 
 21 | - :code:`loss` - :code:`torch.Tensor` of shape :code:`(n_samples,)`
 22 | 
 23 | 
 24 | 
 25 | .. warning::
 26 | 
 27 |     Similarly to layers (see :ref:`layers`), all the :code:`deepdow` losses assume that the input and output tensors have
 28 |     an extra dimension in the front—the **sample** dimension. It serves for batching when training the networks. For
 29 |     this reason, losses must be implemented in a way that all samples are independent.
 30 | 
 31 | The above definition of a loss function is very general and in many cases :code:`deepdow` losses focus on a more narrow
 32 | family of functions in the background. To be more specific, one can select a single channel (:code:`returns_channel`)
 33 | from the :code:`y` tensor representing the desired returns. After this, portfolio returns **r** over each of the
 34 | :code:`horizon` steps can be computed which results in a tensor of shape :code:`(n_samples, horizon)`. By applying
 35 | some summarization function **S** over the :code:`horizon` dimension we arrive at the final output :code:`loss` of shape
 36 | :code:`(n_samples,)`.
 37 | 
 38 | 
 39 | Definitions
 40 | -----------
 41 | Before we start discussing losses themselves let us first write down multiple definitions. Let us assume, that before
 42 | investing, we have initial holdings of :math:`V` (in cash). Additionally, for each asset :math:`a` we denote its price
 43 | at time :math:`t` as :math:`p^{a}_{t}`. Given some portfolio weights :math:`\textbf{w}` over :math:`N` assets we define
 44 | portfolio value at time `t`
 45 | 
 46 | .. math::
 47 | 
 48 |     p^{\textbf{w}}_t = \sum_{a=1}^{N} p_t^a \frac{w_a V}{p_0^a}
 49 | 
 50 | Before we continue, notice that the above definition assumes two things
 51 | 
 52 | - We employ the buy and hold strategy
 53 | - Assets are perfectly divisible (one can by :math:`\frac{w_a V}{p_0^a}` units of any asset)
 54 | 
 55 | Let us now define two types of asset returns: **simple** and **logarithmic**
 56 | 
 57 | 
 58 | 
 59 | .. math::
 60 | 
 61 |     {}^{\text{S}}r^{a}_{t} = \frac{p^{a}_{t}}{p^{a}_{t-1}} - 1
 62 | 
 63 | 
 64 |     {}^{\text{L}}r^{a}_{t} = \log \frac{p^{a}_{t}}{p^{a}_{t-1}}
 65 | 
 66 | 
 67 | Additionally, we also consider their portfolio counterparts
 68 | 
 69 | .. math::
 70 | 
 71 |     {}^{\text{S}}r^{\textbf{w}}_{t} = \frac{p^{\textbf{w}}_{t}}{p^{\textbf{w}}_{t-1}} - 1
 72 | 
 73 | 
 74 |     {}^{\text{L}}r^{\textbf{w}}_{t} = \log \frac{p^{\textbf{w}}_{t}}{p^{\textbf{w}}_{t-1}}
 75 | 
 76 | 
 77 | Note that in both of the cases the initial holding :math:`V` cancels out and the portfolio returns are independent
 78 | of it.
 79 | 
 80 | 
 81 | Portfolio returns
 82 | -----------------
 83 | One can extract portfolio returns given asset returns via the function
 84 | :code:`portfolio_returns`. It inputs a matrix of asset returns (the returns type is controlled via :code:`input_type`)
 85 | 
 86 | 
 87 | 
 88 | .. math::
 89 | 
 90 |    \begin{bmatrix}
 91 |    r^{1}_1 & \dots  & r^{N}_1 \\
 92 |    \vdots &  \ddots  &  \vdots \\
 93 |    r^{1}_{\text{horizon}} & \dots & r^{N}_{\text{horizon}}
 94 |    \end{bmatrix}
 95 | 
 96 | 
 97 | and outputs a vector of portfolio returns (the type is controlled via :code:`output_type`)
 98 | 
 99 | .. math::
100 | 
101 |     \textbf{r}^{\textbf{w}} = \begin{bmatrix}
102 |     r^{\textbf{w}}_{1} \\
103 |     \vdots \\
104 |     r^{\textbf{w}}_{\text{horizon}}
105 |     \end{bmatrix}
106 | 
107 | We rely on the below relation to perform the computations
108 | 
109 | .. math::
110 | 
111 |     {}^{\text{S}}r_t^{\textbf{w}}=\frac{\sum_{a=1}^{N}{}^{\text{S}}r_{t}^{a}w_a\prod_{i=1}^{t-1}(1+{}^{\text{S}}r_{i}^{a})}{\sum_{a=1}^{N}w_a\prod_{i=1}^{t-1}(1+{}^{\text{S}}r_{i}^{a})}
112 | 
113 | .. math::
114 | 
115 | .. testcode::
116 | 
117 |     from deepdow.losses import portfolio_returns
118 | 
119 |     returns = torch.tensor([[[0.1, 0.2], [0.05, 0.02]]])  # (n_samples=1, horizon=2, n_asset=2)
120 |     weights = torch.tensor([[0.4, 0.6]])  # (n_samples=1, n_samples=2)
121 | 
122 |     prets = portfolio_returns(weights, returns, input_type='simple', output_type='simple')
123 | 
124 |     assert prets.shape == (1, 2)  # (n_samples, horizon)
125 |     assert torch.allclose(prets, torch.tensor([[0.1600, 0.0314]]), atol=1e-4)
126 | 
127 | 
128 | Available losses
129 | ----------------
130 | To avoid confusion, all the available losses have the *"The lower the better"* logic. If the class name suggests
131 | otherwise (i.e. :code:`MeanReturns`) a negative is computed instead. For the exact usage see :ref:`losses_API`.
132 | 
133 | 
134 | Alpha
135 | *****
136 | Negative alpha with respect to a predefined portfolio of assets. If :code:`benchmark_weights=None` then
137 | considering the equally weighted portfolio by default.
138 | 
139 | 
140 | CumulativeReturn
141 | ****************
142 | Negative simple cumulative of the buy and hold portfolio at the end of the :code:`horizon` steps.
143 | 
144 | .. math::
145 | 
146 |      \frac{p^{\textbf{w}}_{t + \text{horizon}}}{p^{\textbf{w}}_{t}} - 1
147 | 
148 | 
149 | LargestWeight
150 | *************
151 | Loss function independent of :code:`y`, only taking into account the :code:`weights`.
152 | 
153 | .. math::
154 | 
155 |     max(\textbf{w})
156 | 
157 | MaximumDrawdown
158 | ***************
159 | The **negative** of the maximum drawdown.
160 | 
161 | 
162 | MeanReturns
163 | ***********
164 | The **negative** of mean portfolio returns over the :code:`horizon` time steps.
165 | 
166 | 
167 | .. math::
168 | 
169 |     {\mu}^{\textbf{w}} = \frac{\sum_{i}^{\text{horizon}} r^{\textbf{w}}_{i} }{\text{horizon}}
170 | 
171 | RiskParity
172 | **********
173 | 
174 | .. math::
175 | 
176 |    \sum_{i=1}^{N}\Big(\frac{\sigma}{N} - w_i \big(\frac{\Sigma\textbf{w}}{\sigma}\big)_i\Big) ^ 2
177 | 
178 | where :math:`\sigma=\sqrt{\textbf{w}^T\Sigma\textbf{w}}` and :math:`\Sigma` is
179 | the covariance matrix of asset returns.
180 | 
181 | Quantile (Value at Risk)
182 | ************************
183 | The **negative** of the :code:`p`-quantile of portfolio returns. Note that in the background it solved via
184 | :code:`torch.kthvalue`.
185 | 
186 | SharpeRatio
187 | ***********
188 | The **negative** of the Sharpe ratio of portfolio returns.
189 | 
190 | .. math::
191 | 
192 |     \frac{{\mu}^{\textbf{w}} - r_{\text{rf}}}{{\sigma}^{\textbf{w}} + \epsilon}
193 | 
194 | SortinoRatio
195 | ************
196 | The **negative** of the Sortino ratio of portfolio returns.
197 | 
198 | .. math::
199 | 
200 |     \frac{{\mu}^{\textbf{w}} - r_{\text{rf}}}{\sqrt{\frac{\sum_{i}^{\text{horizon}} \max({\mu}^{\textbf{w}} - r^{\textbf{w}}_{i} , 0)^{2}}{\text{horizon}}} + \epsilon}
201 | 
202 | 
203 | SquaredWeights
204 | **************
205 | Loss function independent of :code:`y`, only taking into account the :code:`weights`.
206 | 
207 | .. math::
208 | 
209 |     \sum_{i=1}^{N} w_i^2
210 | 
211 | 
212 | The lower this loss is, the more diversified our portfolio is. If we focus on two extremes,
213 | for the equally weighted it is :math:`\frac{1}{N}`. For a single asset portfolio it is :math:`1`.
214 | 
215 | StandardDeviation
216 | *****************
217 | 
218 | .. math::
219 | 
220 |     {\sigma}^{\textbf{w}} = \sqrt{\frac{\sum_{i}^{\text{horizon}} (r^{\textbf{w}}_{i} - {\mu}^{\textbf{w}})^{2}}{\text{horizon}}}
221 | 
222 | Downside Risk
223 | *************
224 | 
225 | .. math::
226 | 
227 |     \sqrt{\frac{\sum_{i}^{\text{horizon}} \max({\mu}^{\textbf{w}} - r^{\textbf{w}}_{i} , 0)^{\beta}}{\text{horizon}}}
228 | 
229 | 
230 | WorstReturn
231 | ***********
232 | The **negative** of the minimum returns
233 | 
234 | .. math::
235 | 
236 |     min(\textbf{r}^{\textbf{w}})
237 | 
238 | 
239 | 
240 | 
241 | Arithmetic operations
242 | ----------------------
243 | :code:`deepdow` offers a powerful feature of performing arithmetic operations between loss instances. In other words,
244 | one can obtain new losses by performing **unary** and **binary** operations on existing losses.
245 | 
246 | Lets assume we have a loss instance, then the available operations are
247 | 
248 | **Unary**
249 | 
250 | - addition of a constant
251 | - multiplication by a constant
252 | - division by a constant
253 | - exponentiation
254 | 
255 | **Binary**
256 | 
257 | - addition of another loss
258 | - multiplication by another loss
259 | - division by another loss
260 | 
261 | .. warning::
262 | 
263 |     Currently, the :code:`__repr__` of a loss that is a result of an arithmetic operation is just a naive
264 |     string concatenation of :code:`__repr__` of the constituent losses. No symbolic mathematics and expression reduction
265 |     is utilized.
266 | 
267 | 
268 | 


--------------------------------------------------------------------------------
/examples/README.rst:
--------------------------------------------------------------------------------
1 | Examples
2 | ========
3 | 
4 | This section contains plenty of examples of how to use :code:`deepdow`. These examples
5 | are grouped into categories based on what they focus on.


--------------------------------------------------------------------------------
/examples/end_to_end/README.rst:
--------------------------------------------------------------------------------
1 | End to end
2 | ----------
3 | Tutorials that demonstrate the entire pipeline that results in trained networks.


--------------------------------------------------------------------------------
/examples/end_to_end/getting_started.py:
--------------------------------------------------------------------------------
  1 | """
  2 | ===============
  3 | Getting started
  4 | ===============
  5 | 
  6 | Welcome to :code:`deepdow`! This tutorial is going to demonstrate all the essential features.
  7 | Before you continue, make sure to check out :ref:`basics` to familiarize yourself with the core ideas
  8 | of :code:`deepdow`. This hands-on tutorial is divided into 4 sections
  9 | 
 10 | 1. Dataset creation and loading
 11 | 2. Network definition
 12 | 3. Training
 13 | 4. Evaluation and visualization of results
 14 | """
 15 | 
 16 | # %%
 17 | # Preliminaries
 18 | # ^^^^^^^^^^^^^
 19 | # Let us start with importing all important dependencies.
 20 | 
 21 | from deepdow.benchmarks import Benchmark, OneOverN, Random
 22 | from deepdow.callbacks import EarlyStoppingCallback
 23 | from deepdow.data import InRAMDataset, RigidDataLoader, prepare_standard_scaler, Scale
 24 | from deepdow.data.synthetic import sin_single
 25 | from deepdow.experiments import Run
 26 | from deepdow.layers import SoftmaxAllocator
 27 | from deepdow.losses import MeanReturns, SharpeRatio, MaximumDrawdown
 28 | from deepdow.visualize import generate_metrics_table, generate_weights_table, plot_metrics, plot_weight_heatmap
 29 | import matplotlib.pyplot as plt
 30 | import numpy as np
 31 | import torch
 32 | 
 33 | # %%
 34 | # In order to be able to reproduce all results we set both the :code:`numpy` and :code:`torch` seed.
 35 | 
 36 | torch.manual_seed(4)
 37 | np.random.seed(5)
 38 | 
 39 | # %%
 40 | # Dataset creation and loading
 41 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 42 | # In this example, we are going to be using a synthetic dataset. Asset returns are going to be
 43 | # sine functions where the frequency and phase are randomly selected for each asset. First of
 44 | # all let us set all the parameters relevant to data creation.
 45 | n_timesteps, n_assets = 1000, 20
 46 | lookback, gap, horizon = 40, 2, 20
 47 | n_samples = n_timesteps - lookback - horizon - gap + 1
 48 | 
 49 | # %%
 50 | # Additionally, we will use approximately 80% of the data for training and 20% for testing.
 51 | split_ix = int(n_samples * 0.8)
 52 | indices_train = list(range(split_ix))
 53 | indices_test = list(range(split_ix + lookback + horizon, n_samples))
 54 | 
 55 | print('Train range: {}:{}\nTest range: {}:{}'.format(indices_train[0], indices_train[-1],
 56 |                                                      indices_test[0], indices_test[-1]))
 57 | 
 58 | # %%
 59 | # Now we can generate the synthetic asset returns of with shape :code:`(n_timesteps, n_assets)`.
 60 | returns = np.array([sin_single(n_timesteps,
 61 |                                freq=1 / np.random.randint(3, lookback),
 62 |                                amplitude=0.05,
 63 |                                phase=np.random.randint(0, lookback)
 64 |                                ) for _ in range(n_assets)]).T
 65 | 
 66 | # %%
 67 | # We also add some noise.
 68 | returns += np.random.normal(scale=0.02, size=returns.shape)
 69 | 
 70 | # %%
 71 | # See below the first 100 timesteps of 2 assets.
 72 | plt.plot(returns[:100, [1, 2]])
 73 | 
 74 | 
 75 | # %%
 76 | # To obtain the feature matrix :code:`X` and the target :code:`y` we apply the rolling window
 77 | # strategy.
 78 | X_list, y_list = [], []
 79 | 
 80 | for i in range(lookback, n_timesteps - horizon - gap + 1):
 81 |     X_list.append(returns[i - lookback: i, :])
 82 |     y_list.append(returns[i + gap: i + gap + horizon, :])
 83 | 
 84 | X = np.stack(X_list, axis=0)[:, None, ...]
 85 | y = np.stack(y_list, axis=0)[:, None, ...]
 86 | 
 87 | print('X: {}, y: {}'.format(X.shape, y.shape))
 88 | 
 89 | # %%
 90 | # As commonly done in every deep learning application, we want to scale our input features to
 91 | # be approximately centered around 0 and have a standard deviation of 1. In :code:`deepdow` we
 92 | # can achieve this with the :code:`prepare_standard_scaler` function that computes the mean
 93 | # and standard deviation of the input (for each channel). Additionally, we do not want to leak
 94 | # any information from our test set and therefore we only compute these statistics over the
 95 | # training set.
 96 | means, stds = prepare_standard_scaler(X, indices=indices_train)
 97 | print('mean: {}, std: {}'.format(means, stds))
 98 | 
 99 | # %%
100 | # We can now construct the :code:`InRAMDataset`. By providing the optional :code:`transform` we
101 | # make sure that when the samples are streamed they are always scaled based on our computed
102 | # (training) statistics. See :ref:`inramdataset` for more details.
103 | 
104 | dataset = InRAMDataset(X, y, transform=Scale(means, stds))
105 | 
106 | # %%
107 | # Using the :code:`dataset` we can now construct two dataloaders—one for training and the other one
108 | # for testing. For more details see :ref:`dataloaders`.
109 | dataloader_train = RigidDataLoader(dataset,
110 |                                    indices=indices_train,
111 |                                    batch_size=32)
112 | 
113 | dataloader_test = RigidDataLoader(dataset,
114 |                                   indices=indices_test,
115 |                                   batch_size=32)
116 | 
117 | 
118 | # %%
119 | # Network definition
120 | # ^^^^^^^^^^^^^^^^^^
121 | # Let us now write a custom network. See :ref:`writing_custom_networks`.
122 | class GreatNet(torch.nn.Module, Benchmark):
123 |     def __init__(self, n_assets, lookback, p=0.5):
124 |         super().__init__()
125 | 
126 |         n_features = n_assets * lookback
127 | 
128 |         self.dropout_layer = torch.nn.Dropout(p=p)
129 |         self.dense_layer = torch.nn.Linear(n_features, n_assets, bias=True)
130 |         self.allocate_layer = SoftmaxAllocator(temperature=None)
131 |         self.temperature = torch.nn.Parameter(torch.ones(1), requires_grad=True)
132 | 
133 |     def forward(self, x):
134 |         """Perform forward pass.
135 | 
136 |         Parameters
137 |         ----------
138 |         x : torch.Tensor
139 |             Of shape (n_samples, 1, lookback, n_assets).
140 | 
141 |         Returns
142 |         -------
143 |         weights : torch.Torch
144 |             Tensor of shape (n_samples, n_assets).
145 | 
146 |         """
147 |         n_samples, _, _, _ = x.shape
148 |         x = x.view(n_samples, -1)  # flatten features
149 |         x = self.dropout_layer(x)
150 |         x = self.dense_layer(x)
151 | 
152 |         temperatures = torch.ones(n_samples).to(device=x.device, dtype=x.dtype) * self.temperature
153 |         weights = self.allocate_layer(x, temperatures)
154 | 
155 |         return weights
156 | 
157 | 
158 | # %%
159 | # So what is this network doing? First of all, we make an assumption that assets and lookback will
160 | # never change (the same shape and order at train and at inference time). This assumption
161 | # is justified since we are using :code:`RigidDataLoader`.
162 | # We can learn :code:`n_assets` linear models that have :code:`n_assets * lookback` features. In
163 | # other words we have a dense layer that takes the flattened feature tensor :code:`x` and returns
164 | # a vector of length :code:`n_assets`. Since elements of this vector can range from :math:`-\infty`
165 | # to :math:`\infty` we turn it into an asset allocation via :code:`SoftmaxAllocator`.
166 | # Additionally, we learn the :code:`temperature` from the data. This will enable us to learn the
167 | # optimal trade-off between an equally weighted allocation (uniform distribution) and
168 | # single asset portfolios.
169 | 
170 | # %%
171 | network = GreatNet(n_assets, lookback)
172 | print(network)
173 | 
174 | # %%
175 | # In :code:`torch` networks are either in the **train** or **eval** mode. Since we are using
176 | # dropout it is essential that we set the mode correctly based on what we are trying to do.
177 | network = network.train()  # it is the default, however, just to make the distinction clear
178 | 
179 | # %%
180 | # Training
181 | # ^^^^^^^^
182 | # It is now time to define our loss. Let's say we want to achieve multiple objectives at the same
183 | # time. We want to minimize the drawdowns, maximize the mean returns and also maximize the Sharpe
184 | # ratio. All of these losses are implemented in :code:`deepdow.losses`. To avoid confusion, they
185 | # are always implemented in a way that **the lower the value of the loss the better**. To combine
186 | # multiple objectives we can simply sum all of the individual losses. Similarly, if we want to
187 | # assign more importance to one of them we can achieve this by multiplying by a constant. To learn
188 | # more see :ref:`losses`.
189 | 
190 | loss = MaximumDrawdown() + 2 * MeanReturns() + SharpeRatio()
191 | 
192 | # %%
193 | # Note that by default all the losses assume that we input logarithmic returns
194 | # (:code:`input_type='log'`) and that they are in the 0th channel (:code:`returns_channel=0`).
195 | 
196 | 
197 | # %%
198 | # We now have all the ingredients ready for training of the neural network. :code:`deepdow` implements
199 | # a simple wrapper :code:`Run` that implements the training loop and a minimal callback
200 | # framework. For further information see :ref:`experiments`.
201 | 
202 | run = Run(network,
203 |           loss,
204 |           dataloader_train,
205 |           val_dataloaders={'test': dataloader_test},
206 |           optimizer=torch.optim.Adam(network.parameters(), amsgrad=True),
207 |           callbacks=[EarlyStoppingCallback(metric_name='loss',
208 |                                            dataloader_name='test',
209 |                                            patience=15)])
210 | # %%
211 | # To run the training loop, we use the :code:`launch` where we specify the number of epochs.
212 | history = run.launch(30)
213 | 
214 | # %%
215 | # Evaluation and visualization
216 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
217 | # The :code:`history` object returned by :code:`launch` contains a lot of useful information related
218 | # to training. Specifically, the property :code:`metrics` returns a comprehensive :code:`pd.DataFrame`.
219 | # To display the average test loss per each epoch we can run following.
220 | 
221 | per_epoch_results = history.metrics.groupby(['dataloader', 'metric', 'model', 'epoch'])['value']
222 | 
223 | print(per_epoch_results.count())  # double check number of samples each epoch
224 | print(per_epoch_results.mean())  # mean loss per epoch
225 | 
226 | # %%
227 | per_epoch_results.mean()['test']['loss']['network'].plot()
228 | 
229 | # %%
230 | # To get more insight into what our network predicts we can use the :code:`deepdow.visualize` module.
231 | # Before we even start further evaluations, let us make sure the network is in eval model.
232 | network = network.eval()
233 | 
234 | # %%
235 | # To put the performance of our network in context, we also utilize benchmarks. :code:`deepdow`
236 | # offers multiple benchmarks already. Additionally, one can provide custom simple benchmarks or
237 | # some pre-trained networks.
238 | benchmarks = {
239 |     '1overN': OneOverN(),  # each asset has weight 1 / n_assets
240 |     'random': Random(),  # random allocation that is however close 1OverN
241 |     'network': network
242 | }
243 | 
244 | # %%
245 | # During training, the only mandatory metric/loss was the loss criterion that we tried to minimize.
246 | # Naturally, one might be interested in many other metrics to evaluate the performance. See below
247 | # an example.
248 | 
249 | metrics = {
250 |     'MaxDD': MaximumDrawdown(),
251 |     'Sharpe': SharpeRatio(),
252 |     'MeanReturn': MeanReturns()
253 | }
254 | 
255 | # %%
256 | # Let us now use the above created objects. We first generate a table with all metrics over all
257 | # samples and for all benchmarks. This is done via :code:`generate_metrics_table`.
258 | metrics_table = generate_metrics_table(benchmarks,
259 |                                        dataloader_test,
260 |                                        metrics)
261 | 
262 | # %%
263 | # And then we plot it with :code:`plot_metrics`.
264 | plot_metrics(metrics_table)
265 | 
266 | # %%
267 | # Each plot represents a different metric. The x-axis represents the timestamps in our
268 | # test set. The different colors are capturing different models. How is the value of a metric
269 | # computed? We assume that the investor predicts the portfolio at time x and buys it. He then
270 | # holds it for :code:`horizon` timesteps. The actual metric is then computed over this time horizon.
271 | 
272 | # %%
273 | # Finally, we are also interested in how the allocation/prediction looks like at each time step.
274 | # We can use the :code:`generate_weights_table` function to create a :code:`pd.DataFrame`.
275 | weight_table = generate_weights_table(network, dataloader_test)
276 | 
277 | # %%
278 | # We then call the :code:`plot_weight_heatmap` to see a heatmap of weights.
279 | plot_weight_heatmap(weight_table,
280 |                     add_sum_column=True,
281 |                     time_format=None,
282 |                     time_skips=25)
283 | 
284 | # %%
285 | # The rows represent different timesteps in our test set. The columns are all the assets in our
286 | # universe. The values represent the weight in the portfolio. Additionally, we add a sum column
287 | # to show that we are really generating valid allocations.
288 | 


--------------------------------------------------------------------------------
/examples/end_to_end/var_coefs.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jankrepl/deepdow/384e18acc17c982ac5a4362187b348bdbdb07b98/examples/end_to_end/var_coefs.npy


--------------------------------------------------------------------------------
/examples/end_to_end/var_model.py:
--------------------------------------------------------------------------------
  1 | """
  2 | =====================
  3 | Vector autoregression
  4 | =====================
  5 | 
  6 | This example demonstrates how one can validate :code:`deepdow` on synthetic data.
  7 | We choose to model our returns with the vector autoregression model (VAR).
  8 | This model links future returns to lagged returns with a linear
  9 | model. See [Lütkepohl2005]_ for more details. We use a stable VAR
 10 | process with 12 lags and 8 assets, that is
 11 | 
 12 | .. math::
 13 | 
 14 |     r_t = A_1 r_{t-1} + ... +  A_{12} r_{t-12}
 15 | 
 16 | 
 17 | For this specific task, we use the :code:`LinearNet` network. It is very similar to VAR since it tries to find a linear
 18 | model of all lagged variables. However, it also has purely deep learning components like dropout, batch
 19 | normalization and softmax allocator.
 20 | 
 21 | To put the performance of our network into context, we create a benchmark **VARTrue** that has access to the true
 22 | parameters of the VAR process. We create a simple investment rule of investing all resources into the asset with the
 23 | highest future returns. Additionally, we also consider other benchmarks
 24 | 
 25 | - equally weighted portfolio
 26 | - inverse volatility
 27 | - random allocation
 28 | 
 29 | 
 30 | References
 31 | ----------
 32 | .. [Lütkepohl2005]
 33 |     Lütkepohl, Helmut. New introduction to multiple time series analysis. Springer Science & Business Media, 2005.
 34 | 
 35 | 
 36 | .. warning::
 37 | 
 38 |     Note that we are using the :code:`statsmodels` package to simulate the VAR process.
 39 | 
 40 | """
 41 | 
 42 | import numpy as np
 43 | import torch
 44 | 
 45 | import matplotlib.pyplot as plt
 46 | from statsmodels.tsa.vector_ar.var_model import VARProcess, forecast
 47 | 
 48 | from deepdow.benchmarks import OneOverN, Benchmark, InverseVolatility, Random
 49 | from deepdow.callbacks import EarlyStoppingCallback
 50 | from deepdow.data import InRAMDataset, RigidDataLoader
 51 | from deepdow.losses import MeanReturns, SquaredWeights
 52 | from deepdow.nn import LinearNet
 53 | from deepdow.experiments import Run
 54 | 
 55 | 
 56 | class VARTrue(Benchmark):
 57 |     """Benchmark representing the ground truth return process.
 58 | 
 59 |     Parameters
 60 |     ----------
 61 |     process : statsmodels.tsa.vector_ar.var_model.VARProcess
 62 |         The ground truth VAR process that generates the returns.
 63 | 
 64 |     """
 65 | 
 66 |     def __init__(self, process):
 67 |         self.process = process
 68 | 
 69 |     def __call__(self, x):
 70 |         """Invest all money into the asset with the highest return over the horizon."""
 71 |         n_samples, n_channels, lookback, n_assets = x.shape
 72 | 
 73 |         assert n_channels == 1
 74 | 
 75 |         x_np = x.detach().numpy()  # (n_samples, n_channels, lookback, n_assets)
 76 |         weights_list = [forecast(x_np[i, 0], self.process.coefs, None, 1).argmax() for i in range(n_samples)]
 77 | 
 78 |         result = torch.zeros(n_samples, n_assets).to(x.dtype)
 79 | 
 80 |         for i, w_ix in enumerate(weights_list):
 81 |             result[i, w_ix] = 1
 82 | 
 83 |         return result
 84 | 
 85 | 
 86 | coefs = np.load('var_coefs.npy')  # (lookback, n_assets, n_assets) = (12, 8, 8)
 87 | 
 88 | # Parameters
 89 | lookback, _, n_assets = coefs.shape
 90 | gap, horizon = 0, 1
 91 | batch_size = 256
 92 | 
 93 | # Simulate returns
 94 | process = VARProcess(coefs, None, np.eye(n_assets) * 1e-5)
 95 | data = process.simulate_var(10000)
 96 | n_timesteps = len(data)
 97 | 
 98 | # Create features and targets
 99 | X_list, y_list = [], []
100 | 
101 | for i in range(lookback, n_timesteps - horizon - gap + 1):
102 |     X_list.append(data[i - lookback: i, :])
103 |     y_list.append(data[i + gap: i + gap + horizon, :])
104 | 
105 | X = np.stack(X_list, axis=0)[:, None, ...]
106 | y = np.stack(y_list, axis=0)[:, None, ...]
107 | 
108 | # Setup deepdow framework
109 | dataset = InRAMDataset(X, y)
110 | 
111 | network = LinearNet(1, lookback, n_assets, p=0.5)
112 | dataloader = RigidDataLoader(dataset,
113 |                              indices=list(range(5000)),
114 |                              batch_size=batch_size,
115 |                              lookback=lookback)
116 | val_dataloaders = {'train': dataloader,
117 |                    'val': RigidDataLoader(dataset,
118 |                                           indices=list(range(5020, 9800)),
119 |                                           batch_size=batch_size,
120 |                                           lookback=lookback)}
121 | 
122 | run = Run(network,
123 |           100 * MeanReturns(),
124 |           dataloader,
125 |           val_dataloaders=val_dataloaders,
126 |           metrics={'sqweights': SquaredWeights()},
127 |           benchmarks={'1overN': OneOverN(),
128 |                       'VAR': VARTrue(process),
129 |                       'Random': Random(),
130 |                       'InverseVol': InverseVolatility()},
131 |           optimizer=torch.optim.Adam(network.parameters(), amsgrad=True),
132 |           callbacks=[EarlyStoppingCallback('val', 'loss')]
133 |           )
134 | 
135 | history = run.launch(40)
136 | 
137 | fig, ax = plt.subplots(1, 1)
138 | ax.set_title('Validation loss')
139 | 
140 | per_epoch_results = history.metrics.groupby(['dataloader', 'metric', 'model', 'epoch'])['value'].mean()['val']['loss']
141 | our = per_epoch_results['network']
142 | our.plot(ax=ax, label='network')
143 | 
144 | ax.hlines(y=per_epoch_results['VAR'], xmin=0, xmax=len(our), color='red', label='VAR')
145 | ax.hlines(y=per_epoch_results['1overN'], xmin=0, xmax=len(our), color='green', label='1overN')
146 | ax.hlines(y=per_epoch_results['Random'], xmin=0, xmax=len(our), color='yellow', label='Random')
147 | ax.hlines(y=per_epoch_results['InverseVol'], xmin=0, xmax=len(our), color='black', label='InverseVol')
148 | 
149 | plt.legend()
150 | 


--------------------------------------------------------------------------------
/examples/layers/README.rst:
--------------------------------------------------------------------------------
1 | Layers
2 | ------
3 | Examples focusing on specific layers.


--------------------------------------------------------------------------------
/examples/layers/softmax_sparsemax.py:
--------------------------------------------------------------------------------
 1 | """
 2 | =====================
 3 | Softmax and Sparsemax
 4 | =====================
 5 | 
 6 | :code:`deepdow` offers multiple allocation layers. Among them are the :code:`SoftmaxAllocator` and
 7 | :code:`SparsemaxAllocator`. Softmax is a very popular technique that turns vectors of numbers (logits)
 8 | into probability distributions. If we do not allow for short selling (no weights below zero) and
 9 | leveraging (no weight above 1) then weight allocation can be seen as a probability distribution.
10 | Additionally, sparsemax was proposed by [Martins2016]_ as an alternative to softmax. It enforces
11 | sparsity. Both :code:`SoftmaxAllocator` and :code:`SparsemaxAllocator` support :code:`max_weight`
12 | parameter controlling the maximum possible weight of a single asset and :code:`temperature`.
13 | 
14 | 
15 | The below plot shows how these two allocators react to changes in :code:`max_weight` and
16 | :code:`temperature`.
17 | 
18 | .. warning::
19 | 
20 |     Note that we are using the :code:`seaborn` to plot a heatmap.
21 | 
22 | """
23 | 
24 | from deepdow.layers import SoftmaxAllocator, SparsemaxAllocator
25 | import matplotlib.pyplot as plt
26 | import numpy as np
27 | import pandas as pd
28 | import seaborn as sns
29 | import torch
30 | 
31 | n_assets = 10
32 | seed = 6
33 | temperatures = [0.2, 0.4, 1]
34 | max_weights = [0.2, 0.5, 1]
35 | 
36 | torch.manual_seed(seed)
37 | logits = torch.rand(size=(1, n_assets)) - 0.5
38 | 
39 | fig, axs = plt.subplots(len(temperatures),
40 |                         len(max_weights),
41 |                         sharex=True,
42 |                         sharey=True,
43 |                         figsize=(15, 5))
44 | cbar_ax = fig.add_axes([.91, .3, .03, .4])
45 | 
46 | for r, temperature in enumerate(temperatures):
47 |     for c, max_weight in enumerate(max_weights):
48 |         sparsemax = SparsemaxAllocator(n_assets,
49 |                                        max_weight=max_weight,
50 |                                        temperature=temperature
51 |                                        )
52 | 
53 |         softmax = SoftmaxAllocator(n_assets=n_assets,
54 |                                    temperature=temperature,
55 |                                    max_weight=max_weight,
56 |                                    formulation='variational')
57 | 
58 |         w_sparsemax = sparsemax(logits).detach().numpy()
59 |         w_softmax = softmax(logits).detach().numpy()
60 | 
61 |         df = pd.DataFrame(np.concatenate([w_softmax, w_sparsemax], axis=0),
62 |                           index=['softmax', 'sparsemax'])
63 | 
64 |         axs[r, c].set_title('temp={}, max_weight={}'.format(temperature, max_weight))
65 |         sns.heatmap(df,
66 |                     vmin=0,
67 |                     vmax=1,
68 |                     center=0.5,
69 |                     cmap='hot',
70 |                     ax=axs[r, c],
71 |                     cbar_ax=cbar_ax,
72 |                     square=True)
73 | 


--------------------------------------------------------------------------------
/examples/layers/warp.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ==========
 3 | Warp layer
 4 | ==========
 5 | 
 6 | The :ref:`layers_warp` allows for arbitrary warping of the input tensor **x** along the time
 7 | (lookback) dimension. One needs to provide element by element transformation with values in [-1, 1].
 8 | Note that this transformation can be either seen as a hyperparameter, collection of learnable parameters
 9 | (one per training set) or predicted for each sample.
10 | 
11 | To illustrate how to use this layer, let us assume that we have a single asset. We have observed
12 | its returns over the :code:`lookback=50` previous days. Below we demonstrate 5 different
13 | transformations to the original time series.
14 | 
15 | - **identity** - no change
16 | - **zoom** - focusing on the last 25 days
17 | - **backwards** - swap the time flow
18 | - **slowdown_start** - slow down the beginning of the time series and speed up the end
19 | - **slowdown_end** - speed up the beginning of the time series and slow down the end
20 | """
21 | 
22 | import matplotlib.pyplot as plt
23 | import numpy as np
24 | import torch
25 | 
26 | from deepdow.data.synthetic import sin_single
27 | from deepdow.layers import Warp
28 | 
29 | lookback = 50
30 | 
31 | x_np = (np.linspace(0, 1, num=lookback) * sin_single(lookback, freq=4 / lookback))[None, None, :, None]
32 | x = torch.as_tensor(x_np)
33 | 
34 | grid = torch.linspace(0, end=1, steps=lookback)[None, :].to(dtype=x.dtype)
35 | 
36 | transform_dict = {
37 |     'identity': lambda x: 2 * (x - 0.5),
38 |     'zoom': lambda x: x,
39 |     'backwards': lambda x: -2 * (x - 0.5),
40 |     'slowdown\_start': lambda x: 2 * (x ** 3 - 0.5),
41 |     'slowdown\_end': lambda x: 2 * (x ** (1 / 3) - 0.5),
42 | }
43 | 
44 | n_tforms = len(transform_dict)
45 | 
46 | _, axs = plt.subplots(n_tforms, 2, figsize=(16, 3 * n_tforms), sharex=True, sharey=True)
47 | layer = Warp()
48 | 
49 | for i, (tform_name, tform_lambda) in enumerate(transform_dict.items()):
50 |     tform = tform_lambda(grid)
51 |     x_warped = layer(x, tform)
52 | 
53 |     axs[i, 0].plot(tform.numpy().squeeze(), linewidth=3, color='red')
54 |     axs[i, 1].plot(x_warped.numpy().squeeze(), linewidth=3, color='blue')
55 |     axs[i, 0].set_title('{} tform'.format(tform_name))
56 |     axs[i, 1].set_title('{} warped'.format(tform_name))
57 | 


--------------------------------------------------------------------------------
/examples/layers/zoom.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ==========
 3 | Zoom layer
 4 | ==========
 5 | 
 6 | The :ref:`layers_zoom` layer is inspired by the Spatial Transformer Network [Jaderberg2015]_.
 7 | Its goal is to be able to zoom in and zoom out across the time dimension. In other words,
 8 | it performs dynamic time warping. Note that this warping is identical over all
 9 | assets and channels. In the example below, we show how one can use this layer to both `speed up` and
10 | `slow down` the original input tensor **x**.
11 | 
12 | Specifically, let us assume that we have 50 assets that have identical returns that follow a
13 | sinusoid. The period of this sinusoid is equal to our lookback which is 40 (i.e. hours).
14 | 
15 | The first column shows the original input tensor. The second column shows a slow down (scale = 0.5).
16 | Here we only look at the previous 20 hours, however, we stretch each hour to last two hours.
17 | Finally, the third column shows a speed up (scale = 2). Each hour is shrunk to last 30 minutes.
18 | Note that in the case of speed up the :code:`padding_mode` becomes important since we do not
19 | have access to what happened more than 40 hours ago.
20 | 
21 | 
22 | .. note::
23 | 
24 |     This layer does not modify the shape of the input.
25 | 
26 | 
27 | As with many other layers in :code:`deepdow` one can either learn one :code:`scale` for the entire
28 | training set or have a separate network that predicts a different one for each sample.
29 | """
30 | 
31 | from deepdow.data.synthetic import sin_single
32 | from deepdow.layers import Zoom
33 | import matplotlib.pyplot as plt
34 | import numpy as np
35 | import torch
36 | 
37 | np.random.seed(3)
38 | torch.manual_seed(3)
39 | 
40 | lookback, n_assets = 40, 50
41 | scale_grid = [1, 0.5, 2]
42 | padding_grid = ['zeros', 'reflection']
43 | 
44 | dtype = torch.float
45 | 
46 | x = torch.as_tensor(np.stack(n_assets * [-sin_single(lookback, freq=1 / lookback)],
47 |                              axis=1),
48 |                     dtype=dtype)
49 | 
50 | x = x[None, None, ...]  # add sample and channel dimension
51 | 
52 | fig, axs = plt.subplots(len(padding_grid), len(scale_grid), sharex=True, sharey=True)
53 | 
54 | for r, padding_mode in enumerate(padding_grid):
55 |     for c, scale in enumerate(scale_grid):
56 |         layer = Zoom(padding_mode=padding_mode)
57 |         x_out = layer(x, torch.ones(1) * scale)
58 | 
59 |         axs[r, c].imshow(x_out.numpy()[0, 0])
60 |         axs[r, c].set_title('scale={}\npad={}'.format(scale, padding_mode))
61 | 
62 | plt.setp(axs[-1, :], xlabel='Assets')
63 | plt.setp(axs[:, 0], ylabel='Time')
64 | fig.subplots_adjust(hspace=1)
65 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [flake8]
 2 | count = True
 3 | max-line-length = 120
 4 | ignore =
 5 |     E203
 6 |     W503
 7 | 
 8 | [pydocstyle]
 9 | convention = numpy
10 | 
11 | [tool:pytest]
12 | addopts = -v
13 |           --color=yes
14 |           --cov=deepdow/
15 |           --cov-report=term
16 |           --disable-warnings
17 |           --tb=short
18 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import find_packages, setup
 2 | 
 3 | import deepdow
 4 | 
 5 | DESCRIPTION = "Portfolio optimization with deep learning"
 6 | LONG_DESCRIPTION = DESCRIPTION
 7 | 
 8 | INSTALL_REQUIRES = [
 9 |     "cvxpylayers",
10 |     "matplotlib",
11 |     "mlflow",
12 |     "numpy>=1.16.5",
13 |     "pandas",
14 |     "pillow",
15 |     "seaborn",
16 |     "torch>=1.5",
17 |     "tensorboard",
18 |     "tqdm"
19 | ]
20 | 
21 | setup(
22 |     name="deepdow",
23 |     version=deepdow.__version__,
24 |     author="Jan Krepl",
25 |     author_email="kjan.official@gmail.com",
26 |     description=DESCRIPTION,
27 |     long_description=LONG_DESCRIPTION,
28 |     url="https://github.com/jankrepl/deepdow",
29 |     packages=find_packages(exclude=["tests"]),
30 |     license="Apache License 2.0",
31 |     install_requires=INSTALL_REQUIRES,
32 |     python_requires='>=3.6',
33 |     extras_require={
34 |         "dev": ["codecov", "flake8==3.7.9", "pydocstyle", "pytest>=4.6", "pytest-cov", "tox"],
35 |         "docs": ["sphinx", "sphinx_rtd_theme"],
36 |         "examples": ["sphinx_gallery", "statsmodels"]
37 |     }
38 | )
39 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jankrepl/deepdow/384e18acc17c982ac5a4362187b348bdbdb07b98/tests/__init__.py


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | import pytest
  4 | import torch
  5 | 
  6 | from deepdow.data import InRAMDataset, RigidDataLoader
  7 | from deepdow.benchmarks import OneOverN
  8 | from deepdow.experiments import Run
  9 | from deepdow.losses import MeanReturns
 10 | from deepdow.nn import DummyNet
 11 | 
 12 | GPU_AVAILABLE = torch.cuda.is_available()
 13 | 
 14 | 
 15 | @pytest.fixture(
 16 |     scope="session", params=["B", "M"], ids=["true_freq=B", "true_freq=M"]
 17 | )
 18 | def raw_data(request):
 19 |     """Could represent prices, volumes,... Only positive values are allowed.
 20 | 
 21 |     Returns
 22 |     -------
 23 |     df : pd.DataFrame
 24 |         2D arrays where where rows represent different time points. Columns are a `pd.MultiIndex` with first
 25 |         level being the assets and the second level being the indicator.
 26 | 
 27 |     n_missing_entries : int
 28 |         Number of missing entries that were intentionally dropped from otherwise regular timeseries.
 29 | 
 30 |     true_freq : str
 31 |         True frequency of the underlying timeseries.
 32 |     """
 33 |     np.random.seed(1)
 34 | 
 35 |     n_assets = 4
 36 |     n_indicators = 6
 37 |     n_timestamps = 30
 38 |     n_missing_entries = 3
 39 |     true_freq = request.param
 40 | 
 41 |     missing_ixs = np.random.choice(
 42 |         list(range(1, n_timestamps - 1)), replace=False, size=n_missing_entries
 43 |     )
 44 | 
 45 |     index_full = pd.date_range(
 46 |         "1/1/2000", periods=n_timestamps, freq=true_freq
 47 |     )
 48 |     index = pd.DatetimeIndex(
 49 |         [x for ix, x in enumerate(index_full) if ix not in missing_ixs]
 50 |     )  # freq=None
 51 | 
 52 |     columns = pd.MultiIndex.from_product(
 53 |         [
 54 |             ["asset_{}".format(i) for i in range(n_assets)],
 55 |             ["indicator_{}".format(i) for i in range(n_indicators)],
 56 |         ],
 57 |         names=["assets", "indicators"],
 58 |     )
 59 | 
 60 |     df = pd.DataFrame(
 61 |         np.random.randint(
 62 |             low=1,
 63 |             high=1000,
 64 |             size=(n_timestamps - n_missing_entries, n_assets * n_indicators),
 65 |         )
 66 |         / 100,
 67 |         index=index,
 68 |         columns=columns,
 69 |     )
 70 | 
 71 |     return df, n_missing_entries, true_freq
 72 | 
 73 | 
 74 | @pytest.fixture(scope="session")
 75 | def dataset_dummy():
 76 |     """Minimal instance of ``InRAMDataset``.
 77 | 
 78 |     Returns
 79 |     -------
 80 |     InRAMDataset
 81 | 
 82 |     """
 83 |     n_samples = 200
 84 |     n_channels = 2
 85 |     lookback = 9
 86 |     horizon = 10
 87 |     n_assets = 6
 88 | 
 89 |     X = (
 90 |         np.random.normal(size=(n_samples, n_channels, lookback, n_assets))
 91 |         / 100
 92 |     )
 93 |     y = np.random.normal(size=(n_samples, n_channels, horizon, n_assets)) / 100
 94 | 
 95 |     timestamps = pd.date_range(start="31/01/2000", periods=n_samples, freq="M")
 96 |     asset_names = ["asset_{}".format(i) for i in range(n_assets)]
 97 | 
 98 |     return InRAMDataset(X, y, timestamps=timestamps, asset_names=asset_names)
 99 | 
100 | 
101 | @pytest.fixture()
102 | def dataloader_dummy(dataset_dummy):
103 |     """Minimal instance of ``RigidDataLoader``.
104 | 
105 |     Parameters
106 |     ----------
107 |     dataset_dummy : InRAMDataset
108 |         Underlying dataset.
109 | 
110 | 
111 |     Returns
112 |     -------
113 | 
114 |     """
115 |     batch_size = 4
116 |     return RigidDataLoader(dataset_dummy, batch_size=batch_size)
117 | 
118 | 
119 | @pytest.fixture(
120 |     params=[
121 |         pytest.param((torch.float32, torch.device("cpu")), id="float32_cpu"),
122 |         pytest.param((torch.float64, torch.device("cpu")), id="float64_cpu"),
123 |         pytest.param(
124 |             (torch.float32, torch.device("cuda:0")),
125 |             id="float32_gpu",
126 |             marks=[] if GPU_AVAILABLE else pytest.mark.skip,
127 |         ),
128 |         pytest.param(
129 |             (torch.float64, torch.device("cuda:0")),
130 |             id="float64_gpu",
131 |             marks=[] if GPU_AVAILABLE else pytest.mark.skip,
132 |         ),
133 |     ]
134 | )
135 | def dtype_device(request):
136 |     dtype, device = request.param
137 |     return dtype, device
138 | 
139 | 
140 | @pytest.fixture()
141 | def Xy_dummy(dtype_device, dataloader_dummy):
142 |     dtype, device = dtype_device
143 |     X, y, timestamps, asset_names = next(iter(dataloader_dummy))
144 | 
145 |     return (
146 |         X.to(dtype=dtype, device=device),
147 |         y.to(dtype=dtype, device=device),
148 |         timestamps,
149 |         asset_names,
150 |     )
151 | 
152 | 
153 | @pytest.fixture()
154 | def network_dummy(dataset_dummy):
155 |     return DummyNet(n_channels=dataset_dummy.n_channels)
156 | 
157 | 
158 | @pytest.fixture
159 | def run_dummy(dataloader_dummy, network_dummy, Xy_dummy):
160 |     """"""
161 |     X_batch, y_batch, timestamps, asset_names = Xy_dummy
162 | 
163 |     device = X_batch.device
164 |     dtype = X_batch.dtype
165 | 
166 |     return Run(
167 |         network_dummy,
168 |         MeanReturns(),
169 |         dataloader_dummy,
170 |         val_dataloaders={"val": dataloader_dummy},
171 |         benchmarks={"bm": OneOverN()},
172 |         device=device,
173 |         dtype=dtype,
174 |     )
175 | 
176 | 
177 | @pytest.fixture
178 | def metadata_dummy(Xy_dummy, network_dummy):
179 |     X_batch, y_batch, timestamps, asset_names = Xy_dummy
180 | 
181 |     device = X_batch.device
182 |     dtype = X_batch.dtype
183 |     _, _, horizon, _ = y_batch.shape
184 | 
185 |     network_dummy.to(device=device, dtype=dtype)
186 | 
187 |     return {
188 |         "asset_names": asset_names,
189 |         "batch": 1,
190 |         "batch_loss": 1.4,
191 |         "epoch": 1,
192 |         "exception": ValueError,
193 |         "locals": {"a": 2},
194 |         "n_epochs": 2,
195 |         "timestamps": timestamps,
196 |         "weights": network_dummy(X_batch),
197 |         "X_batch": X_batch,
198 |         "y_batch": y_batch,
199 |     }
200 | 


--------------------------------------------------------------------------------
/tests/test_benchmarks.py:
--------------------------------------------------------------------------------
  1 | """Collection of tests focused on `benchmarks` module."""
  2 | import pytest
  3 | import torch
  4 | 
  5 | from deepdow.benchmarks import (
  6 |     Benchmark,
  7 |     InverseVolatility,
  8 |     MaximumReturn,
  9 |     MinimumVariance,
 10 |     OneOverN,
 11 |     Random,
 12 |     Singleton,
 13 | )
 14 | 
 15 | 
 16 | class TestBenchmark:
 17 |     def test_errors(self, Xy_dummy):
 18 |         X_dummy, _, _, _ = Xy_dummy
 19 |         with pytest.raises(TypeError):
 20 |             Benchmark()
 21 | 
 22 |         class TempBenchmarkWrong(Benchmark):
 23 |             pass
 24 | 
 25 |         class TempBenchmarkCorrect(Benchmark):
 26 |             def __call__(self, X):
 27 |                 return X * 2
 28 | 
 29 |         with pytest.raises(TypeError):
 30 |             TempBenchmarkWrong()
 31 | 
 32 |         temp = TempBenchmarkCorrect()
 33 |         temp(X_dummy)
 34 | 
 35 |         assert isinstance(temp.hparams, dict)
 36 | 
 37 | 
 38 | class TestInverseVolatility:
 39 |     @pytest.mark.parametrize(
 40 |         "use_std", [True, False], ids=["use_std", "use_var"]
 41 |     )
 42 |     def test_basic(self, Xy_dummy, use_std):
 43 |         X_dummy, _, _, _ = Xy_dummy
 44 |         n_samples, n_channels, lookback, n_assets = X_dummy.shape
 45 |         dtype = X_dummy.dtype
 46 |         device = X_dummy.device
 47 |         bm = InverseVolatility(use_std=use_std)
 48 | 
 49 |         weights = bm(X_dummy)
 50 | 
 51 |         assert isinstance(weights, torch.Tensor)
 52 |         assert weights.shape == (n_samples, n_assets)
 53 |         assert weights.dtype == dtype
 54 |         assert weights.device == device
 55 |         assert torch.allclose(
 56 |             weights.sum(dim=1),
 57 |             torch.ones(n_samples).to(dtype=dtype, device=device),
 58 |         )
 59 |         assert torch.all(weights >= 0) and torch.all(weights <= 1)
 60 |         assert isinstance(bm.hparams, dict) and bm.hparams
 61 | 
 62 | 
 63 | class TestMaximumReturn:
 64 |     @pytest.mark.parametrize(
 65 |         "max_weight", [1, 0.5], ids=["max_weight=1", "max_weight=0.5"]
 66 |     )
 67 |     @pytest.mark.parametrize(
 68 |         "predefined_assets",
 69 |         [True, False],
 70 |         ids=["fixed_assets", "nonfixed_assets"],
 71 |     )
 72 |     def test_basic(self, Xy_dummy, predefined_assets, max_weight):
 73 |         X_dummy, _, _, _ = Xy_dummy
 74 |         eps = 1e-3
 75 |         n_samples, n_channels, lookback, n_assets = X_dummy.shape
 76 |         dtype = X_dummy.dtype
 77 |         device = X_dummy.device
 78 | 
 79 |         X_more_assets = torch.cat([X_dummy, X_dummy], dim=-1)
 80 | 
 81 |         bm = MaximumReturn(
 82 |             n_assets=n_assets if predefined_assets else None,
 83 |             max_weight=max_weight,
 84 |         )
 85 | 
 86 |         weights = bm(X_dummy)
 87 | 
 88 |         assert isinstance(weights, torch.Tensor)
 89 |         assert weights.shape == (n_samples, n_assets)
 90 |         assert weights.dtype == dtype
 91 |         assert weights.device == device
 92 |         assert torch.allclose(
 93 |             weights.sum(dim=1),
 94 |             torch.ones(n_samples).to(dtype=dtype, device=device),
 95 |             atol=1e-4,
 96 |         )
 97 |         assert torch.all(-eps <= weights) and torch.all(
 98 |             weights <= max_weight + eps
 99 |         )
100 |         assert isinstance(bm.hparams, dict) and bm.hparams
101 | 
102 |         if predefined_assets:
103 |             with pytest.raises(ValueError):
104 |                 bm(X_more_assets)
105 | 
106 |             return
107 |         else:
108 |             bm(X_more_assets)
109 | 
110 | 
111 | class TestMinimumVariance:
112 |     @pytest.mark.parametrize(
113 |         "max_weight", [1, 0.5], ids=["max_weight=1", "max_weight=0.5"]
114 |     )
115 |     @pytest.mark.parametrize(
116 |         "predefined_assets",
117 |         [True, False],
118 |         ids=["fixed_assets", "nonfixed_assets"],
119 |     )
120 |     def test_basic(self, Xy_dummy, predefined_assets, max_weight):
121 |         X_dummy, _, _, _ = Xy_dummy
122 |         eps = 1e-4
123 |         n_samples, n_channels, lookback, n_assets = X_dummy.shape
124 |         dtype = X_dummy.dtype
125 |         device = X_dummy.device
126 | 
127 |         X_more_assets = torch.cat([X_dummy, X_dummy], dim=-1)
128 | 
129 |         bm = MinimumVariance(
130 |             n_assets=n_assets if predefined_assets else None,
131 |             max_weight=max_weight,
132 |         )
133 | 
134 |         weights = bm(X_dummy)
135 | 
136 |         assert isinstance(weights, torch.Tensor)
137 |         assert weights.shape == (n_samples, n_assets)
138 |         assert weights.dtype == dtype
139 |         assert weights.device == device
140 |         assert torch.allclose(
141 |             weights.sum(dim=1),
142 |             torch.ones(n_samples).to(dtype=dtype, device=device),
143 |             atol=1e-4,
144 |         )
145 |         assert torch.all(-eps <= weights) and torch.all(
146 |             weights <= max_weight + eps
147 |         )
148 |         assert isinstance(bm.hparams, dict) and bm.hparams
149 | 
150 |         if predefined_assets:
151 |             with pytest.raises(ValueError):
152 |                 bm(X_more_assets)
153 | 
154 |             return
155 |         else:
156 |             bm(X_more_assets)
157 | 
158 | 
159 | class TestOneOverN:
160 |     def test_basic(self, Xy_dummy):
161 |         X_dummy, _, _, _ = Xy_dummy
162 |         n_samples, n_channels, lookback, n_assets = X_dummy.shape
163 |         dtype = X_dummy.dtype
164 |         device = X_dummy.device
165 | 
166 |         bm = OneOverN()
167 |         weights = bm(X_dummy)
168 | 
169 |         assert isinstance(weights, torch.Tensor)
170 |         assert weights.dtype == dtype
171 |         assert weights.device == device
172 |         assert torch.allclose(
173 |             weights.sum(dim=1),
174 |             torch.ones(n_samples).to(dtype=dtype, device=device),
175 |         )
176 |         assert len(torch.unique(weights)) == 1
177 |         assert isinstance(bm.hparams, dict) and not bm.hparams
178 | 
179 | 
180 | class TestRandom:
181 |     def test_basic(self, Xy_dummy):
182 |         X_dummy, _, _, _ = Xy_dummy
183 |         n_samples, n_channels, lookback, n_assets = X_dummy.shape
184 |         dtype = X_dummy.dtype
185 |         device = X_dummy.device
186 |         bm = Random()
187 | 
188 |         weights = bm(X_dummy)
189 | 
190 |         assert isinstance(weights, torch.Tensor)
191 |         assert weights.shape == (n_samples, n_assets)
192 |         assert weights.dtype == dtype
193 |         assert weights.device == device
194 |         assert torch.allclose(
195 |             weights.sum(dim=1),
196 |             torch.ones(n_samples).to(dtype=dtype, device=device),
197 |         )
198 | 
199 |         assert torch.all(weights >= 0) and torch.all(weights <= 1)
200 |         assert isinstance(bm.hparams, dict) and not bm.hparams
201 | 
202 | 
203 | class TestSingleton:
204 |     @pytest.mark.parametrize("asset_ix", [0, 3])
205 |     def test_basic(self, asset_ix, Xy_dummy):
206 |         X_dummy, _, _, _ = Xy_dummy
207 |         n_samples, n_channels, lookback, n_assets = X_dummy.shape
208 |         dtype = X_dummy.dtype
209 |         device = X_dummy.device
210 |         bm = Singleton(asset_ix=asset_ix)
211 |         weights = bm(X_dummy)
212 | 
213 |         assert isinstance(weights, torch.Tensor)
214 |         assert weights.shape == (n_samples, n_assets)
215 |         assert weights.dtype == dtype
216 |         assert weights.device == device
217 |         assert torch.allclose(
218 |             weights.sum(dim=1),
219 |             torch.ones(n_samples).to(dtype=dtype, device=device),
220 |         )
221 | 
222 |         assert torch.allclose(
223 |             weights[:, asset_ix],
224 |             torch.ones(n_samples).to(dtype=dtype, device=device),
225 |         )
226 |         assert isinstance(bm.hparams, dict) and bm.hparams
227 | 
228 |     def test_error(self):
229 |         with pytest.raises(IndexError):
230 |             Singleton(asset_ix=3)(torch.ones(2, 1, 3, 2))
231 | 


--------------------------------------------------------------------------------
/tests/test_callbacks.py:
--------------------------------------------------------------------------------
  1 | """Collection of tests focused on the callbacks module."""
  2 | import datetime
  3 | import pathlib
  4 | 
  5 | import pandas as pd
  6 | import pytest
  7 | 
  8 | from deepdow.callbacks import (
  9 |     BenchmarkCallback,
 10 |     Callback,
 11 |     EarlyStoppingCallback,
 12 |     EarlyStoppingException,
 13 |     ModelCheckpointCallback,
 14 |     MLFlowCallback,
 15 |     ProgressBarCallback,
 16 |     TensorBoardCallback,
 17 |     ValidationCallback,
 18 | )
 19 | 
 20 | ALL_METHODS = [
 21 |     "on_train_begin",
 22 |     "on_epoch_begin",
 23 |     "on_batch_begin",
 24 |     "on_train_interrupt",
 25 |     "on_batch_end",
 26 |     "on_epoch_end",
 27 |     "on_train_end",
 28 | ]
 29 | 
 30 | ALL_CALLBACKS = [
 31 |     BenchmarkCallback,
 32 |     Callback,
 33 |     MLFlowCallback,
 34 |     ProgressBarCallback,
 35 |     TensorBoardCallback,
 36 |     ValidationCallback,
 37 | ]
 38 | 
 39 | 
 40 | @pytest.mark.parametrize("lookbacks", [None, [2, 3]])
 41 | def test_benchmark(run_dummy, metadata_dummy, lookbacks):
 42 |     cb = BenchmarkCallback(lookbacks)
 43 |     cb.run = run_dummy
 44 | 
 45 |     run_dummy.callbacks = []  # make sure there are no default callbacks
 46 |     run_dummy.callbacks.append(cb)
 47 | 
 48 |     for method_name in ALL_METHODS:
 49 |         getattr(run_dummy, method_name)(metadata_dummy)
 50 | 
 51 |     assert isinstance(run_dummy.history.metrics_per_epoch(-1), pd.DataFrame)
 52 |     assert len(run_dummy.history.metrics["epoch"].unique()) == 1
 53 | 
 54 | 
 55 | class TestEarlyStoppingCallback:
 56 |     def test_error(self, run_dummy, metadata_dummy):
 57 |         dataloader_name = list(run_dummy.val_dataloaders.keys())[0]
 58 |         metric_name = list(run_dummy.metrics.keys())[0]
 59 | 
 60 |         cb_wrong_dataloader = EarlyStoppingCallback(
 61 |             dataloader_name="fake", metric_name=metric_name
 62 |         )
 63 |         cb_wrong_metric = EarlyStoppingCallback(
 64 |             dataloader_name=dataloader_name, metric_name="fake"
 65 |         )
 66 | 
 67 |         cb_wrong_dataloader.run = run_dummy
 68 |         cb_wrong_metric.run = run_dummy
 69 | 
 70 |         with pytest.raises(ValueError):
 71 |             cb_wrong_dataloader.on_train_begin(metadata_dummy)
 72 | 
 73 |         with pytest.raises(ValueError):
 74 |             cb_wrong_metric.on_train_begin(metadata_dummy)
 75 | 
 76 |     def test_basic(self, run_dummy, metadata_dummy):
 77 |         dataloader_name = list(run_dummy.val_dataloaders.keys())[0]
 78 |         metric_name = list(run_dummy.metrics.keys())[0]
 79 | 
 80 |         cb = EarlyStoppingCallback(
 81 |             dataloader_name=dataloader_name,
 82 |             metric_name=metric_name,
 83 |             patience=0,
 84 |         )
 85 | 
 86 |         cb.run = run_dummy
 87 |         cb_val = ValidationCallback()
 88 |         cb_val.run = run_dummy
 89 | 
 90 |         run_dummy.callbacks = [
 91 |             cb_val,
 92 |             cb,
 93 |         ]  # make sure there are no default callbacks
 94 | 
 95 |         with pytest.raises(EarlyStoppingException):
 96 |             for method_name in ALL_METHODS:
 97 |                 getattr(run_dummy, method_name)(metadata_dummy)
 98 | 
 99 |         cb.on_train_interrupt({"exception": EarlyStoppingException()})
100 | 
101 | 
102 | class TestMLFlowCallback:
103 |     @pytest.mark.parametrize(
104 |         "log_benchmarks", [True, False], ids=["log_bmarks", "dont_log_bmarks"]
105 |     )
106 |     def test_independent(
107 |         self, run_dummy, metadata_dummy, tmpdir, log_benchmarks
108 |     ):
109 |         with pytest.raises(ValueError):
110 |             MLFlowCallback(
111 |                 run_name="name",
112 |                 run_id="some_id",
113 |                 log_benchmarks=log_benchmarks,
114 |                 mlflow_path=pathlib.Path(str(tmpdir)),
115 |             )
116 | 
117 |         cb = MLFlowCallback(
118 |             mlflow_path=pathlib.Path(str(tmpdir)),
119 |             experiment_name="test",
120 |             log_benchmarks=log_benchmarks,
121 |         )
122 |         cb.run = run_dummy
123 | 
124 |         run_dummy.callbacks = [cb]  # make sure there are no default callbacks
125 | 
126 |         for method_name in ALL_METHODS:
127 |             getattr(run_dummy, method_name)(metadata_dummy)
128 | 
129 |     def test_benchmarks(self, run_dummy, metadata_dummy, tmpdir):
130 |         cb = MLFlowCallback(
131 |             mlflow_path=pathlib.Path(str(tmpdir)),
132 |             experiment_name="test",
133 |             log_benchmarks=True,
134 |         )
135 |         cb_bm = BenchmarkCallback()
136 | 
137 |         cb.run = run_dummy
138 |         cb_bm.run = run_dummy
139 | 
140 |         run_dummy.callbacks = [
141 |             cb_bm,
142 |             cb,
143 |         ]  # make sure there are no default callbacks
144 | 
145 |         for method_name in ALL_METHODS:
146 |             getattr(run_dummy, method_name)(metadata_dummy)
147 | 
148 |     def test_validation(self, run_dummy, metadata_dummy, tmpdir):
149 |         cb = MLFlowCallback(
150 |             mlflow_path=pathlib.Path(str(tmpdir)),
151 |             experiment_name="test",
152 |             log_benchmarks=False,
153 |         )
154 |         cb_val = ValidationCallback()
155 | 
156 |         cb.run = run_dummy
157 |         cb_val.run = run_dummy
158 | 
159 |         run_dummy.callbacks = [
160 |             cb_val,
161 |             cb,
162 |         ]  # make sure there are no default callbacks
163 | 
164 |         for method_name in ALL_METHODS:
165 |             getattr(run_dummy, method_name)(metadata_dummy)
166 | 
167 | 
168 | class TestModelCheckpointCallback(Callback):
169 |     def test_error(self, run_dummy, metadata_dummy, tmpdir):
170 |         dataloader_name = list(run_dummy.val_dataloaders.keys())[0]
171 |         metric_name = list(run_dummy.metrics.keys())[0]
172 | 
173 |         folder_path = pathlib.Path(str(tmpdir))
174 |         some_file_path = folder_path / "some_file.txt"
175 |         some_file_path.touch()
176 | 
177 |         with pytest.raises(NotADirectoryError):
178 |             ModelCheckpointCallback(
179 |                 folder_path=some_file_path,
180 |                 dataloader_name=dataloader_name,
181 |                 metric_name=metric_name,
182 |             )
183 | 
184 |         cb_wrong_dataloader = ModelCheckpointCallback(
185 |             folder_path, dataloader_name="fake", metric_name=metric_name
186 |         )
187 |         cb_wrong_metric = ModelCheckpointCallback(
188 |             folder_path, dataloader_name=dataloader_name, metric_name="fake"
189 |         )
190 | 
191 |         cb_wrong_dataloader.run = run_dummy
192 |         cb_wrong_metric.run = run_dummy
193 | 
194 |         with pytest.raises(ValueError):
195 |             cb_wrong_dataloader.on_train_begin(metadata_dummy)
196 | 
197 |         with pytest.raises(ValueError):
198 |             cb_wrong_metric.on_train_begin(metadata_dummy)
199 | 
200 |     def test_basic(self, run_dummy, metadata_dummy, tmpdir):
201 |         dataloader_name = list(run_dummy.val_dataloaders.keys())[0]
202 |         metric_name = list(run_dummy.metrics.keys())[0]
203 | 
204 |         cb = ModelCheckpointCallback(
205 |             folder_path=pathlib.Path(str(tmpdir)),
206 |             dataloader_name=dataloader_name,
207 |             metric_name=metric_name,
208 |             verbose=True,
209 |         )
210 | 
211 |         cb.run = run_dummy
212 |         cb_val = ValidationCallback()
213 |         cb_val.run = run_dummy
214 | 
215 |         run_dummy.callbacks = [
216 |             cb_val,
217 |             cb,
218 |         ]  # make sure there are no default callbacks
219 | 
220 |         for method_name in ALL_METHODS:
221 |             getattr(run_dummy, method_name)(metadata_dummy)
222 | 
223 |         cb.on_train_interrupt({"exception": EarlyStoppingException()})
224 | 
225 | 
226 | class TestProgressBarCallback:
227 |     @pytest.mark.parametrize("output", ["stderr", "stdout"])
228 |     def test_independent(self, run_dummy, metadata_dummy, output):
229 |         with pytest.raises(ValueError):
230 |             ProgressBarCallback(output="{}_fake".format(output))
231 | 
232 |         cb = ProgressBarCallback(output=output)
233 | 
234 |         cb.run = run_dummy
235 | 
236 |         run_dummy.callbacks = [cb]  # make sure there are no default callbacks
237 | 
238 |         for method_name in ALL_METHODS:
239 |             getattr(run_dummy, method_name)(metadata_dummy)
240 | 
241 |     def test_validation(self, run_dummy, metadata_dummy):
242 |         cb = ProgressBarCallback()
243 |         cb_val = ValidationCallback()
244 | 
245 |         cb.run = run_dummy
246 |         cb_val.run = run_dummy
247 | 
248 |         run_dummy.callbacks = [
249 |             cb_val,
250 |             cb,
251 |         ]  # make sure there are no default callbacks
252 | 
253 |         for method_name in ALL_METHODS:
254 |             getattr(run_dummy, method_name)(metadata_dummy)
255 | 
256 | 
257 | class TestTensorBoardCallback:
258 |     @pytest.mark.parametrize(
259 |         "ts_type", ["single_inside", "single_outside", "all"]
260 |     )
261 |     def test_independent(self, run_dummy, metadata_dummy, tmpdir, ts_type):
262 | 
263 |         if ts_type == "single_inside":
264 |             ts = metadata_dummy["timestamps"][0]
265 |         elif ts_type == "single_outside":
266 |             ts = datetime.datetime.now()
267 |         elif ts_type == "all":
268 |             ts = None
269 |         else:
270 |             ValueError()
271 | 
272 |         cb = TensorBoardCallback(log_dir=pathlib.Path(str(tmpdir)), ts=ts)
273 |         cb.run = run_dummy
274 | 
275 |         run_dummy.callbacks = [cb]  # make sure there are no default callbacks
276 | 
277 |         for method_name in ALL_METHODS:
278 |             if method_name == "on_batch_end":
279 |                 run_dummy.network(
280 |                     metadata_dummy["X_batch"]
281 |                 )  # let the forward hook take effect
282 | 
283 |             getattr(run_dummy, method_name)(metadata_dummy)
284 | 
285 |     @pytest.mark.parametrize(
286 |         "bm_available",
287 |         [True, False],
288 |         ids=["bmarks_available", "bmarks_unavailable"],
289 |     )
290 |     def test_benchmark(self, run_dummy, metadata_dummy, bm_available, tmpdir):
291 |         cb = TensorBoardCallback(
292 |             log_benchmarks=True, log_dir=pathlib.Path(str(tmpdir))
293 |         )
294 |         cb_bm = BenchmarkCallback()
295 | 
296 |         cb.run = run_dummy
297 |         cb_bm.run = run_dummy
298 | 
299 |         run_dummy.callbacks = (
300 |             [cb_bm, cb] if bm_available else [cb]
301 |         )  # make sure there are no default callbacks
302 | 
303 |         for method_name in ALL_METHODS:
304 |             getattr(run_dummy, method_name)(metadata_dummy)
305 | 
306 |     def test_validation(self, run_dummy, metadata_dummy, tmpdir):
307 |         cb = TensorBoardCallback(log_dir=pathlib.Path(str(tmpdir)))
308 |         cb_val = ValidationCallback()
309 | 
310 |         cb.run = run_dummy
311 |         cb_val.run = run_dummy
312 | 
313 |         run_dummy.callbacks = [
314 |             cb_val,
315 |             cb,
316 |         ]  # make sure there are no default callbacks
317 | 
318 |         for method_name in ALL_METHODS:
319 |             getattr(run_dummy, method_name)(metadata_dummy)
320 | 
321 | 
322 | @pytest.mark.parametrize("lookbacks", [None, [2, 3]])
323 | def test_validation(run_dummy, metadata_dummy, lookbacks):
324 |     cb = ValidationCallback(lookbacks=lookbacks)
325 |     cb.run = run_dummy
326 | 
327 |     run_dummy.callbacks = [cb]  # make sure there are no default callbacks
328 | 
329 |     for method_name in ALL_METHODS:
330 |         getattr(run_dummy, method_name)(metadata_dummy)
331 | 
332 |     assert isinstance(
333 |         run_dummy.history.metrics_per_epoch(metadata_dummy["epoch"]),
334 |         pd.DataFrame,
335 |     )
336 |     assert len(run_dummy.history.metrics["epoch"].unique()) == 1
337 | 


--------------------------------------------------------------------------------
/tests/test_data/test_augment.py:
--------------------------------------------------------------------------------
  1 | """Collection of tests focused on the `deepdow.data.augment`."""
  2 | 
  3 | import numpy as np
  4 | import pytest
  5 | import torch
  6 | 
  7 | from deepdow.data import (
  8 |     Compose,
  9 |     Dropout,
 10 |     Multiply,
 11 |     Noise,
 12 |     Scale,
 13 |     prepare_robust_scaler,
 14 |     prepare_standard_scaler,
 15 | )
 16 | 
 17 | 
 18 | @pytest.mark.parametrize(
 19 |     "tform",
 20 |     [
 21 |         Compose(
 22 |             [
 23 |                 lambda a, b, c, d: (2 * a, b, c, d),
 24 |                 lambda a, b, c, d: (3 + a, b, c, d),
 25 |             ]
 26 |         ),
 27 |         Dropout(p=0.5),
 28 |         Multiply(c=4),
 29 |         Noise(0.3),
 30 |         Scale(np.array([1.2]), np.array([5.7])),
 31 |     ],
 32 | )
 33 | def test_tforms_not_in_place_for_x(tform):
 34 |     X = torch.randn(1, 4, 5)
 35 |     X_orig = X.clone()
 36 | 
 37 |     X_after, _, _, _ = tform(X, None, None, None)
 38 | 
 39 |     assert torch.allclose(X, X_orig)
 40 |     assert not torch.allclose(X_after, X)
 41 |     assert X_after.shape == X.shape
 42 | 
 43 | 
 44 | @pytest.mark.parametrize("overlap", [True, False])
 45 | @pytest.mark.parametrize("indices", [None, [1, 4, 6]])
 46 | def test_prepare_standard_scaler(overlap, indices):
 47 |     n_samples, n_channels, lookback, n_assets = 10, 3, 5, 12
 48 | 
 49 |     X = np.random.random((n_samples, n_channels, lookback, n_assets)) - 0.5
 50 | 
 51 |     means, stds = prepare_standard_scaler(X, overlap=overlap, indices=indices)
 52 | 
 53 |     assert means.shape == (n_channels,)
 54 |     assert stds.shape == (n_channels,)
 55 |     assert np.all(stds > 0)
 56 | 
 57 | 
 58 | class TestPrepareRobustScaler:
 59 |     def test_error(self):
 60 |         with pytest.raises(ValueError):
 61 |             prepare_robust_scaler(
 62 |                 np.ones((1, 2, 3, 4)), percentile_range=(20, 10)
 63 |             )
 64 | 
 65 |         with pytest.raises(ValueError):
 66 |             prepare_robust_scaler(
 67 |                 np.ones((1, 2, 3, 4)), percentile_range=(-2, 99)
 68 |             )
 69 | 
 70 |     @pytest.mark.parametrize("overlap", [True, False])
 71 |     @pytest.mark.parametrize("indices", [None, [1, 4, 6]])
 72 |     def test_basic(self, overlap, indices):
 73 |         n_samples, n_channels, lookback, n_assets = 10, 3, 5, 12
 74 | 
 75 |         X = np.random.random((n_samples, n_channels, lookback, n_assets)) - 0.5
 76 | 
 77 |         medians, ranges = prepare_robust_scaler(
 78 |             X, overlap=overlap, indices=indices
 79 |         )
 80 | 
 81 |         assert medians.shape == (n_channels,)
 82 |         assert ranges.shape == (n_channels,)
 83 |         assert np.all(ranges > 0)
 84 | 
 85 |     def test_sanity(self):
 86 |         n_samples, n_channels, lookback, n_assets = 10, 3, 5, 12
 87 | 
 88 |         X = np.random.random((n_samples, n_channels, lookback, n_assets)) - 0.5
 89 | 
 90 |         medians_1, ranges_1 = prepare_robust_scaler(
 91 |             X, percentile_range=(20, 80)
 92 |         )
 93 |         medians_2, ranges_2 = prepare_robust_scaler(
 94 |             X, percentile_range=(10, 90)
 95 |         )
 96 | 
 97 |         assert np.all(ranges_2 > ranges_1)
 98 | 
 99 | 
100 | class TestScaler:
101 |     def test_erorrs(self):
102 |         with pytest.raises(ValueError):
103 |             raise Scale(np.ones(3), np.ones(4))
104 | 
105 |         with pytest.raises(ValueError):
106 |             raise Scale(np.array([1, -1]), np.array([9, -0.1]))
107 | 
108 |         tform = Scale(np.array([1, -1]), np.array([9, 10.0]))
109 |         with pytest.raises(ValueError):
110 |             tform(torch.rand(3, 4, 5), None, None, None)
111 | 
112 |     def test_overall(self):
113 |         n_channels, lookback, n_assets = 3, 5, 12
114 | 
115 |         X = np.random.random((n_channels, lookback, n_assets))
116 |         X_torch = torch.as_tensor(X)
117 |         dtype = X_torch.dtype
118 | 
119 |         center = X.mean(axis=(1, 2))
120 |         scale = X.std(
121 |             axis=(1, 2),
122 |         )
123 | 
124 |         tform = Scale(center, scale)
125 |         X_scaled = tform(X_torch, None, None, None)[0]
126 | 
127 |         assert torch.is_tensor(X_scaled)
128 |         assert X_torch.shape == X_scaled.shape
129 |         assert not torch.allclose(X_torch, X_scaled)
130 |         assert torch.allclose(
131 |             X_scaled.mean(dim=(1, 2)), torch.zeros(n_channels, dtype=dtype)
132 |         )
133 |         assert torch.allclose(
134 |             X_scaled.std(dim=(1, 2), unbiased=False),
135 |             torch.ones(n_channels, dtype=dtype),
136 |         )
137 | 


--------------------------------------------------------------------------------
/tests/test_data/test_load.py:
--------------------------------------------------------------------------------
  1 | """Tests focused on the data module."""
  2 | import datetime
  3 | import numpy as np
  4 | import pytest
  5 | import torch
  6 | 
  7 | from deepdow.data import (
  8 |     Compose,
  9 |     Dropout,
 10 |     FlexibleDataLoader,
 11 |     InRAMDataset,
 12 |     Multiply,
 13 |     Noise,
 14 |     RigidDataLoader,
 15 | )
 16 | from deepdow.data.load import collate_uniform
 17 | 
 18 | 
 19 | class TestCollateUniform:
 20 |     def test_incorrect_input(self):
 21 |         with pytest.raises(ValueError):
 22 |             collate_uniform([], n_assets_range=(-2, 0))
 23 | 
 24 |         with pytest.raises(ValueError):
 25 |             collate_uniform([], lookback_range=(3, 1))
 26 | 
 27 |         with pytest.raises(ValueError):
 28 |             collate_uniform([], horizon_range=(10, 10))
 29 | 
 30 |     def test_dummy(self):
 31 |         n_samples = 14
 32 |         max_n_assets = 10
 33 |         max_lookback = 8
 34 |         max_horizon = 5
 35 |         n_channels = 2
 36 | 
 37 |         batch = [
 38 |             (
 39 |                 torch.zeros((n_channels, max_lookback, max_n_assets)),
 40 |                 torch.ones((n_channels, max_horizon, max_n_assets)),
 41 |                 datetime.datetime.now(),
 42 |                 ["asset_{}".format(i) for i in range(max_n_assets)],
 43 |             )
 44 |             for _ in range(n_samples)
 45 |         ]
 46 | 
 47 |         (
 48 |             X_batch,
 49 |             y_batch,
 50 |             timestamps_batch,
 51 |             asset_names_batch,
 52 |         ) = collate_uniform(
 53 |             batch,
 54 |             n_assets_range=(5, 6),
 55 |             lookback_range=(4, 5),
 56 |             horizon_range=(3, 4),
 57 |         )
 58 | 
 59 |         assert torch.is_tensor(X_batch)
 60 |         assert torch.is_tensor(y_batch)
 61 | 
 62 |         assert X_batch.shape == (n_samples, n_channels, 4, 5)
 63 |         assert y_batch.shape == (n_samples, n_channels, 3, 5)
 64 |         assert len(timestamps_batch) == n_samples
 65 |         assert len(asset_names_batch) == 5
 66 | 
 67 |     def test_replicable(self):
 68 |         random_state_a = 3
 69 |         random_state_b = 5
 70 | 
 71 |         n_samples = 14
 72 |         max_n_assets = 10
 73 |         max_lookback = 8
 74 |         max_horizon = 5
 75 |         n_channels = 2
 76 | 
 77 |         batch = [
 78 |             (
 79 |                 torch.rand((n_channels, max_lookback, max_n_assets)),
 80 |                 torch.rand((n_channels, max_horizon, max_n_assets)),
 81 |                 datetime.datetime.now(),
 82 |                 ["asset_{}".format(i) for i in range(max_n_assets)],
 83 |             )
 84 |             for _ in range(n_samples)
 85 |         ]
 86 | 
 87 |         X_batch_1, y_batch_1, _, _ = collate_uniform(
 88 |             batch,
 89 |             random_state=random_state_a,
 90 |             n_assets_range=(4, 5),
 91 |             lookback_range=(4, 5),
 92 |             horizon_range=(3, 4),
 93 |         )
 94 |         X_batch_2, y_batch_2, _, _ = collate_uniform(
 95 |             batch,
 96 |             random_state=random_state_a,
 97 |             n_assets_range=(4, 5),
 98 |             lookback_range=(4, 5),
 99 |             horizon_range=(3, 4),
100 |         )
101 | 
102 |         X_batch_3, y_batch_3, _, _ = collate_uniform(
103 |             batch,
104 |             random_state=random_state_b,
105 |             n_assets_range=(4, 5),
106 |             lookback_range=(4, 5),
107 |             horizon_range=(3, 4),
108 |         )
109 | 
110 |         assert torch.allclose(X_batch_1, X_batch_2)
111 |         assert torch.allclose(y_batch_1, y_batch_2)
112 | 
113 |         assert not torch.allclose(X_batch_3, X_batch_1)
114 |         assert not torch.allclose(y_batch_3, y_batch_1)
115 | 
116 |     def test_different(self):
117 |         n_samples = 6
118 |         max_n_assets = 27
119 |         max_lookback = 15
120 |         max_horizon = 12
121 | 
122 |         n_channels = 2
123 |         batch = [
124 |             (
125 |                 torch.rand((n_channels, max_lookback, max_n_assets)),
126 |                 torch.rand((n_channels, max_horizon, max_n_assets)),
127 |                 datetime.datetime.now(),
128 |                 ["asset_{}".format(i) for i in range(max_n_assets)],
129 |             )
130 |             for _ in range(n_samples)
131 |         ]
132 |         n_trials = 10
133 | 
134 |         n_assets_set = set()
135 |         lookback_set = set()
136 |         horizon_set = set()
137 | 
138 |         for _ in range(n_trials):
139 |             (
140 |                 X_batch,
141 |                 y_batch,
142 |                 timestamps_batch,
143 |                 asset_names_batch,
144 |             ) = collate_uniform(
145 |                 batch,
146 |                 n_assets_range=(2, max_n_assets),
147 |                 lookback_range=(2, max_lookback),
148 |                 horizon_range=(2, max_lookback),
149 |             )
150 | 
151 |             n_assets_set.add(X_batch.shape[-1])
152 |             lookback_set.add(X_batch.shape[-2])
153 |             horizon_set.add(y_batch.shape[-2])
154 | 
155 |         assert len(n_assets_set) > 1
156 |         assert len(lookback_set) > 1
157 |         assert len(horizon_set) > 1
158 | 
159 | 
160 | class TestInRAMDataset:
161 |     def test_incorrect_input(self):
162 |         with pytest.raises(ValueError):
163 |             InRAMDataset(np.zeros((2, 1, 3, 4)), np.zeros((3, 1, 5, 4)))
164 | 
165 |         with pytest.raises(ValueError):
166 |             InRAMDataset(np.zeros((2, 1, 3, 4)), np.zeros((2, 2, 6, 4)))
167 | 
168 |         with pytest.raises(ValueError):
169 |             InRAMDataset(np.zeros((2, 1, 3, 4)), np.zeros((2, 1, 3, 6)))
170 | 
171 |     @pytest.mark.parametrize("n_samples", [1, 3, 6])
172 |     def test_lenght(self, n_samples):
173 |         dset = InRAMDataset(
174 |             np.zeros((n_samples, 1, 3, 4)), np.zeros((n_samples, 1, 6, 4))
175 |         )
176 | 
177 |         assert len(dset) == n_samples
178 | 
179 |     def test_get_item(self):
180 |         n_samples = 3
181 | 
182 |         n_channels = 3
183 | 
184 |         X = np.zeros((n_samples, n_channels, 3, 4))
185 |         y = np.zeros((n_samples, n_channels, 6, 4))
186 | 
187 |         for i in range(n_samples):
188 |             X[i] = i
189 |             y[i] = i
190 | 
191 |         dset = InRAMDataset(X, y)
192 | 
193 |         for i in range(n_samples):
194 |             X_sample, y_sample, _, _ = dset[i]
195 | 
196 |             assert torch.is_tensor(X_sample)
197 |             assert torch.is_tensor(y_sample)
198 | 
199 |             assert X_sample.shape == (n_channels, 3, 4)
200 |             assert y_sample.shape == (n_channels, 6, 4)
201 | 
202 |             assert torch.allclose(X_sample, torch.ones_like(X_sample) * i)
203 |             assert torch.allclose(y_sample, torch.ones_like(y_sample) * i)
204 | 
205 |     def test_transforms(self):
206 |         n_samples = 13
207 |         n_channels = 2
208 |         lookback = 9
209 |         horizon = 10
210 |         n_assets = 6
211 | 
212 |         X = (
213 |             np.random.normal(size=(n_samples, n_channels, lookback, n_assets))
214 |             / 100
215 |         )
216 |         y = (
217 |             np.random.normal(size=(n_samples, n_channels, horizon, n_assets))
218 |             / 100
219 |         )
220 | 
221 |         dataset = InRAMDataset(
222 |             X, y, transform=Compose([Noise(), Dropout(p=0.5), Multiply(c=100)])
223 |         )
224 | 
225 |         X_sample, y_sample, timestamps_sample, asset_names = dataset[1]
226 | 
227 |         assert (X_sample == 0).sum() > 0  # dropout
228 |         assert X_sample.max() > 1  # multiply 100
229 |         assert X_sample.min() < -1  # multiply 100
230 | 
231 |         assert (y_sample == 0).sum() == 0
232 |         assert y_sample.max() < 1
233 |         assert y_sample.min() > -1
234 | 
235 | 
236 | class TestFlexibleDataLoader:
237 |     def test_wrong_construction(self, dataset_dummy):
238 |         max_assets = dataset_dummy.n_assets
239 |         max_lookback = dataset_dummy.lookback
240 |         max_horizon = dataset_dummy.horizon
241 | 
242 |         with pytest.raises(ValueError):
243 |             FlexibleDataLoader(
244 |                 dataset_dummy,
245 |                 indices=None,
246 |                 asset_ixs=list(range(len(dataset_dummy))),
247 |                 n_assets_range=(max_assets, max_assets + 1),
248 |                 lookback_range=(max_lookback, max_lookback + 1),
249 |                 horizon_range=(-2, max_horizon + 1),
250 |             )
251 | 
252 |         with pytest.raises(ValueError):
253 |             FlexibleDataLoader(
254 |                 dataset_dummy,
255 |                 indices=[-1],
256 |                 n_assets_range=(max_assets, max_assets + 1),
257 |                 lookback_range=(max_lookback, max_lookback + 1),
258 |                 horizon_range=(max_horizon, max_horizon + 1),
259 |             )
260 | 
261 |         with pytest.raises(ValueError):
262 |             FlexibleDataLoader(
263 |                 dataset_dummy,
264 |                 indices=None,
265 |                 n_assets_range=(max_assets, max_assets + 2),
266 |                 lookback_range=(max_lookback, max_lookback + 1),
267 |                 horizon_range=(max_horizon, max_horizon + 1),
268 |             )
269 | 
270 |         with pytest.raises(ValueError):
271 |             FlexibleDataLoader(
272 |                 dataset_dummy,
273 |                 indices=None,
274 |                 n_assets_range=(max_assets, max_assets + 1),
275 |                 lookback_range=(0, max_lookback + 1),
276 |                 horizon_range=(max_horizon, max_horizon + 1),
277 |             )
278 | 
279 |         with pytest.raises(ValueError):
280 |             FlexibleDataLoader(
281 |                 dataset_dummy,
282 |                 indices=None,
283 |                 n_assets_range=(max_assets, max_assets + 1),
284 |                 lookback_range=(max_lookback, max_lookback + 1),
285 |                 horizon_range=(-2, max_horizon + 1),
286 |             )
287 | 
288 |     def test_basic(self, dataset_dummy):
289 |         max_assets = dataset_dummy.n_assets
290 |         max_lookback = dataset_dummy.lookback
291 |         max_horizon = dataset_dummy.horizon
292 | 
293 |         dl = FlexibleDataLoader(
294 |             dataset_dummy,
295 |             indices=None,
296 |             n_assets_range=(max_assets, max_assets + 1),
297 |             lookback_range=(max_lookback, max_lookback + 1),
298 |             horizon_range=(max_horizon, max_horizon + 1),
299 |         )
300 | 
301 |         dl = FlexibleDataLoader(dataset_dummy)
302 | 
303 |         assert isinstance(dl.hparams, dict)
304 | 
305 |     def test_minimal(self, dataset_dummy):
306 |         dl = FlexibleDataLoader(dataset_dummy, batch_size=2)
307 | 
308 |         res = next(iter(dl))
309 | 
310 |         assert len(res) == 4
311 | 
312 | 
313 | class TestRidigDataLoader:
314 |     def test_wrong_construction(self, dataset_dummy):
315 |         max_assets = dataset_dummy.n_assets
316 |         max_lookback = dataset_dummy.lookback
317 |         max_horizon = dataset_dummy.horizon
318 | 
319 |         with pytest.raises(ValueError):
320 |             RigidDataLoader(dataset_dummy, indices=[-1])
321 | 
322 |         with pytest.raises(ValueError):
323 |             RigidDataLoader(
324 |                 dataset_dummy, asset_ixs=[max_assets + 1, max_assets + 2]
325 |             )
326 | 
327 |         with pytest.raises(ValueError):
328 |             RigidDataLoader(dataset_dummy, lookback=max_lookback + 1)
329 | 
330 |         with pytest.raises(ValueError):
331 |             RigidDataLoader(dataset_dummy, horizon=max_horizon + 1)
332 | 
333 |     def test_basic(self, dataset_dummy):
334 |         dl = RigidDataLoader(dataset_dummy)
335 | 
336 |         assert isinstance(dl.hparams, dict)
337 | 


--------------------------------------------------------------------------------
/tests/test_data/test_synthetic.py:
--------------------------------------------------------------------------------
 1 | """Collection of tests focused on the `data/synthetic.py` module."""
 2 | 
 3 | import numpy as np
 4 | import pytest
 5 | 
 6 | from deepdow.data.synthetic import sin_single
 7 | 
 8 | 
 9 | class TestSin:
10 |     @pytest.mark.parametrize("n_timesteps", [50, 120])
11 |     @pytest.mark.parametrize("period_length", [2, 5, 9])
12 |     @pytest.mark.parametrize("amplitude", [0.1, 10])
13 |     def test_basic(self, n_timesteps, period_length, amplitude):
14 |         freq = 1 / period_length
15 |         res = sin_single(
16 |             n_timesteps, freq=freq, phase=0.4, amplitude=amplitude
17 |         )
18 | 
19 |         assert isinstance(res, np.ndarray)
20 |         assert res.shape == (n_timesteps,)
21 |         assert len(np.unique(np.round(res, 5))) == period_length
22 |         assert np.all(abs(res) <= amplitude)
23 | 


--------------------------------------------------------------------------------
/tests/test_experiments.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import pytest
  3 | import torch
  4 | 
  5 | from deepdow.benchmarks import OneOverN
  6 | from deepdow.callbacks import Callback
  7 | from deepdow.experiments import History, Run
  8 | from deepdow.losses import MeanReturns, StandardDeviation
  9 | from deepdow.nn import DummyNet
 10 | 
 11 | 
 12 | def test_basic():
 13 |     n_channels = 2
 14 |     x = torch.rand(10, n_channels, 4, 5)
 15 |     network = DummyNet(n_channels=n_channels)
 16 |     y = network(x)
 17 | 
 18 |     print(y)
 19 | 
 20 | 
 21 | def test_history():
 22 |     history = History()
 23 | 
 24 |     history.add_entry(model="whatever", epoch=1)
 25 |     history.add_entry(model="whatever_2", epoch=1, value=3)
 26 | 
 27 |     history.add_entry(model="1111", epoch=2)
 28 | 
 29 |     metrics_1 = history.metrics_per_epoch(1)
 30 |     metrics_2 = history.metrics_per_epoch(2)
 31 | 
 32 |     metrics_all = history.metrics
 33 | 
 34 |     assert isinstance(metrics_1, pd.DataFrame)
 35 |     assert isinstance(metrics_2, pd.DataFrame)
 36 |     assert isinstance(metrics_all, pd.DataFrame)
 37 | 
 38 |     assert len(metrics_1) == 2
 39 |     assert len(metrics_2) == 1
 40 |     assert len(metrics_all) == 3
 41 | 
 42 |     with pytest.raises(KeyError):
 43 |         history.metrics_per_epoch(3)
 44 | 
 45 |     history.pretty_print(epoch=1)
 46 |     history.pretty_print(epoch=None)
 47 | 
 48 | 
 49 | class TestRun:
 50 |     def test_wrong_construction_1(self, dataloader_dummy):
 51 |         """Wrong positional arguments."""
 52 |         with pytest.raises(TypeError):
 53 |             Run("this_is_fake", MeanReturns(), dataloader_dummy)
 54 | 
 55 |         with pytest.raises(TypeError):
 56 |             Run(DummyNet(), "this_is_fake", dataloader_dummy)
 57 | 
 58 |         with pytest.raises(TypeError):
 59 |             Run(DummyNet(), MeanReturns(), "this_is_fake")
 60 | 
 61 |     def test_wrong_construction_2(self, dataloader_dummy):
 62 |         """Wrong keyword arguments."""
 63 |         with pytest.raises(TypeError):
 64 |             Run(
 65 |                 DummyNet(),
 66 |                 MeanReturns(),
 67 |                 dataloader_dummy,
 68 |                 metrics="this_is_fake",
 69 |             )
 70 | 
 71 |         with pytest.raises(TypeError):
 72 |             Run(
 73 |                 DummyNet(),
 74 |                 MeanReturns(),
 75 |                 dataloader_dummy,
 76 |                 metrics={"a": "this_is_fake"},
 77 |             )
 78 | 
 79 |         with pytest.raises(ValueError):
 80 |             Run(
 81 |                 DummyNet(),
 82 |                 MeanReturns(),
 83 |                 dataloader_dummy,
 84 |                 metrics={"loss": MeanReturns()},
 85 |             )
 86 | 
 87 |         with pytest.raises(TypeError):
 88 |             Run(
 89 |                 DummyNet(),
 90 |                 MeanReturns(),
 91 |                 dataloader_dummy,
 92 |                 val_dataloaders="this_is_fake",
 93 |             )
 94 | 
 95 |         with pytest.raises(TypeError):
 96 |             Run(
 97 |                 DummyNet(),
 98 |                 MeanReturns(),
 99 |                 dataloader_dummy,
100 |                 val_dataloaders={"val": "this_is_fake"},
101 |             )
102 | 
103 |         with pytest.raises(TypeError):
104 |             Run(
105 |                 DummyNet(),
106 |                 MeanReturns(),
107 |                 dataloader_dummy,
108 |                 benchmarks="this_is_fake",
109 |             )
110 | 
111 |         with pytest.raises(TypeError):
112 |             Run(
113 |                 DummyNet(),
114 |                 MeanReturns(),
115 |                 dataloader_dummy,
116 |                 benchmarks={"uniform": "this_is_fake"},
117 |             )
118 | 
119 |         with pytest.raises(ValueError):
120 |             Run(
121 |                 DummyNet(),
122 |                 MeanReturns(),
123 |                 dataloader_dummy,
124 |                 benchmarks={"main": OneOverN()},
125 |             )
126 | 
127 |     @pytest.mark.parametrize("additional_kwargs", [True, False])
128 |     def test_attributes_after_construction(
129 |         self, dataloader_dummy, additional_kwargs
130 |     ):
131 |         network = DummyNet()
132 |         loss = MeanReturns()
133 | 
134 |         kwargs = {}
135 |         if additional_kwargs:
136 |             kwargs.update(
137 |                 {
138 |                     "metrics": {"std": StandardDeviation()},
139 |                     "val_dataloaders": {"val": dataloader_dummy},
140 |                     "benchmarks": {"whatever": OneOverN()},
141 |                 }
142 |             )
143 | 
144 |         run = Run(network, loss, dataloader_dummy, **kwargs)
145 | 
146 |         assert network is run.network
147 |         assert loss is run.loss
148 |         assert dataloader_dummy is run.train_dataloader
149 |         assert isinstance(run.metrics, dict)
150 |         assert isinstance(run.val_dataloaders, dict)
151 |         assert isinstance(run.hparams, dict)
152 | 
153 |     def test_launch(self, dataloader_dummy):
154 |         network = DummyNet(n_channels=dataloader_dummy.dataset.X.shape[1])
155 |         loss = MeanReturns()
156 |         run = Run(network, loss, dataloader_dummy)
157 | 
158 |         run.launch(n_epochs=1)
159 | 
160 |     def test_launch_interrupt(self, dataloader_dummy, monkeypatch):
161 |         network = DummyNet(n_channels=dataloader_dummy.dataset.X.shape[1])
162 |         loss = MeanReturns()
163 | 
164 |         class TempCallback(Callback):
165 |             def on_train_begin(self, metadata):
166 |                 raise KeyboardInterrupt()
167 | 
168 |         monkeypatch.setattr("time.sleep", lambda x: None)
169 |         run = Run(network, loss, dataloader_dummy, callbacks=[TempCallback()])
170 | 
171 |         run.launch(n_epochs=1)
172 | 


--------------------------------------------------------------------------------
/tests/test_explain.py:
--------------------------------------------------------------------------------
 1 | """Collection of tests focused on the explain.py module."""
 2 | import pytest
 3 | import torch
 4 | 
 5 | from deepdow.explain import gradient_wrt_input
 6 | from deepdow.nn import BachelierNet
 7 | 
 8 | 
 9 | def test_basic(dtype_device):
10 |     dtype, device = dtype_device
11 |     n_channels, lookback, n_assets = 2, 3, 4
12 | 
13 |     target_weights = torch.zeros(n_assets)
14 |     target_weights[1] = 1
15 |     initial_guess = torch.zeros(n_channels, lookback, n_assets)
16 | 
17 |     network = BachelierNet(
18 |         n_input_channels=n_channels, n_assets=n_assets, hidden_size=2
19 |     )
20 | 
21 |     # WRONG MASK
22 |     with pytest.raises(ValueError):
23 |         gradient_wrt_input(
24 |             network,
25 |             target_weights=target_weights,
26 |             initial_guess=initial_guess,
27 |             n_iter=3,
28 |             dtype=dtype,
29 |             device=device,
30 |             mask=torch.zeros(n_channels, lookback + 1, n_assets),
31 |         )
32 | 
33 |     with pytest.raises(TypeError):
34 |         gradient_wrt_input(
35 |             network,
36 |             target_weights=target_weights,
37 |             initial_guess=initial_guess,
38 |             n_iter=3,
39 |             dtype=dtype,
40 |             device=device,
41 |             mask="wrong_type",
42 |         )
43 | 
44 |     # NO MASK
45 |     res, hist = gradient_wrt_input(
46 |         network,
47 |         target_weights=target_weights,
48 |         initial_guess=initial_guess,
49 |         n_iter=3,
50 |         dtype=dtype,
51 |         device=device,
52 |         verbose=True,
53 |     )
54 | 
55 |     assert len(hist) == 3
56 |     assert torch.is_tensor(res)
57 |     assert res.shape == initial_guess.shape
58 |     assert res.dtype == dtype
59 |     assert res.device == device
60 |     assert not torch.allclose(
61 |         initial_guess.to(device=device, dtype=dtype), res
62 |     )
63 | 
64 |     # SOME MASK
65 |     some_mask = torch.ones_like(initial_guess, dtype=torch.bool)
66 |     some_mask[0] = False
67 | 
68 |     res_s, _ = gradient_wrt_input(
69 |         network,
70 |         target_weights=target_weights,
71 |         initial_guess=initial_guess,
72 |         n_iter=3,
73 |         dtype=dtype,
74 |         device=device,
75 |         mask=some_mask,
76 |     )
77 | 
78 |     assert torch.allclose(
79 |         initial_guess.to(device=device, dtype=dtype)[0], res_s[0]
80 |     )
81 |     assert not torch.allclose(
82 |         initial_guess.to(device=device, dtype=dtype)[1], res_s[1]
83 |     )
84 | 
85 |     # EXTREME_MASK
86 |     extreme_mask = torch.zeros_like(initial_guess, dtype=torch.bool)
87 | 
88 |     res_e, _ = gradient_wrt_input(
89 |         network,
90 |         target_weights=target_weights,
91 |         initial_guess=initial_guess,
92 |         n_iter=3,
93 |         dtype=dtype,
94 |         device=device,
95 |         mask=extreme_mask,
96 |     )
97 | 
98 |     assert torch.allclose(initial_guess.to(device=device, dtype=dtype), res_e)
99 | 


--------------------------------------------------------------------------------
/tests/test_nn.py:
--------------------------------------------------------------------------------
  1 | """Tests focused on the nn module."""
  2 | import pytest
  3 | import torch
  4 | 
  5 | from deepdow.nn import (
  6 |     BachelierNet,
  7 |     DummyNet,
  8 |     KeynesNet,
  9 |     LinearNet,
 10 |     MinimalNet,
 11 |     ThorpNet,
 12 | )
 13 | 
 14 | 
 15 | class TestDummyNetwork:
 16 |     def test_basic(self, Xy_dummy):
 17 |         X, _, _, _ = Xy_dummy
 18 |         n_samples, n_channels, lookback, n_assets = X.shape
 19 |         dtype = X.dtype
 20 |         device = X.device
 21 | 
 22 |         network = DummyNet(n_channels=n_channels)
 23 |         network.to(device=device, dtype=dtype)
 24 | 
 25 |         weights = network(X)
 26 | 
 27 |         assert torch.is_tensor(weights)
 28 |         assert weights.shape == (n_samples, n_assets)
 29 |         assert X.device == weights.device
 30 |         assert X.dtype == weights.dtype
 31 |         assert torch.allclose(
 32 |             weights.sum(dim=1),
 33 |             torch.ones(n_samples).to(dtype=dtype, device=device),
 34 |             atol=1e-4,
 35 |         )
 36 | 
 37 | 
 38 | class TestBachelierNet:
 39 |     @pytest.mark.parametrize(
 40 |         "max_weight",
 41 |         [0.25, 0.5, 1],
 42 |         ids=["max_weight_0.25", "max_weight_0.5", "max_weight_1"],
 43 |     )
 44 |     def test_basic(self, Xy_dummy, max_weight):
 45 |         eps = 1e-3
 46 | 
 47 |         X, _, _, _ = Xy_dummy
 48 |         n_samples, n_channels, lookback, n_assets = X.shape
 49 |         dtype = X.dtype
 50 |         device = X.device
 51 | 
 52 |         network = BachelierNet(n_channels, n_assets, max_weight=max_weight)
 53 |         network.to(device=device, dtype=dtype)
 54 | 
 55 |         weights = network(X)
 56 | 
 57 |         assert isinstance(network.hparams, dict)
 58 |         assert network.hparams
 59 |         assert torch.is_tensor(weights)
 60 |         assert weights.shape == (n_samples, n_assets)
 61 |         assert X.device == weights.device
 62 |         assert X.dtype == weights.dtype
 63 |         assert torch.all(-eps <= weights) and torch.all(
 64 |             weights <= max_weight + eps
 65 |         )
 66 |         assert torch.allclose(
 67 |             weights.sum(dim=1),
 68 |             torch.ones(n_samples).to(dtype=dtype, device=device),
 69 |             atol=eps,
 70 |         )
 71 | 
 72 | 
 73 | class TestKeynesNet:
 74 |     def test_error(self):
 75 |         with pytest.raises(ValueError):
 76 |             KeynesNet(2, transform_type="FAKE", hidden_size=10, n_groups=2)
 77 | 
 78 |         with pytest.raises(ValueError):
 79 |             KeynesNet(2, hidden_size=10, n_groups=3)
 80 | 
 81 |     @pytest.mark.parametrize("transform_type", ["Conv", "RNN"])
 82 |     @pytest.mark.parametrize("hidden_size", [4, 6])
 83 |     def test_basic(self, Xy_dummy, transform_type, hidden_size):
 84 |         eps = 1e-4
 85 | 
 86 |         X, _, _, _ = Xy_dummy
 87 |         n_samples, n_channels, lookback, n_assets = X.shape
 88 |         dtype = X.dtype
 89 |         device = X.device
 90 | 
 91 |         network = KeynesNet(
 92 |             n_channels,
 93 |             hidden_size=hidden_size,
 94 |             transform_type=transform_type,
 95 |             n_groups=2,
 96 |         )  #
 97 |         network.to(device=device, dtype=dtype)
 98 | 
 99 |         weights = network(X)
100 | 
101 |         assert isinstance(network.hparams, dict)
102 |         assert network.hparams
103 |         assert torch.is_tensor(weights)
104 |         assert weights.shape == (n_samples, n_assets)
105 |         assert X.device == weights.device
106 |         assert X.dtype == weights.dtype
107 |         assert torch.allclose(
108 |             weights.sum(dim=1),
109 |             torch.ones(n_samples).to(dtype=dtype, device=device),
110 |             atol=eps,
111 |         )
112 | 
113 |     @pytest.mark.parametrize("n_input_channels", [4, 8])
114 |     @pytest.mark.parametrize("hidden_size", [16, 32])
115 |     @pytest.mark.parametrize("n_groups", [2, 4, 8])
116 |     @pytest.mark.parametrize("transform_type", ["Conv", "RNN"])
117 |     def test_n_params(
118 |         self, n_input_channels, hidden_size, n_groups, transform_type
119 |     ):
120 |         network = KeynesNet(
121 |             n_input_channels=n_input_channels,
122 |             hidden_size=hidden_size,
123 |             n_groups=n_groups,
124 |             transform_type=transform_type,
125 |         )
126 | 
127 |         expected = 0
128 |         expected += n_input_channels * 2  # instance norm
129 |         if transform_type == "Conv":
130 |             expected += n_input_channels * 3 * hidden_size + hidden_size
131 |         else:
132 |             expected += 4 * (
133 |                 (n_input_channels * hidden_size)
134 |                 + (hidden_size * hidden_size)
135 |                 + 2 * hidden_size
136 |             )
137 |         expected += 2 * hidden_size  # group_norm
138 |         expected += 1  # temperature
139 | 
140 |         actual = sum(
141 |             p.numel() for p in network.parameters() if p.requires_grad
142 |         )
143 | 
144 |         assert expected == actual
145 | 
146 | 
147 | class TestLinear:
148 |     def test_basic(self, Xy_dummy):
149 |         eps = 1e-4
150 | 
151 |         X, _, _, _ = Xy_dummy
152 |         n_samples, n_channels, lookback, n_assets = X.shape
153 |         dtype = X.dtype
154 |         device = X.device
155 | 
156 |         network = LinearNet(n_channels, lookback, n_assets)
157 |         network.to(device=device, dtype=dtype)
158 | 
159 |         with pytest.raises(ValueError):
160 |             network(
161 |                 torch.ones(
162 |                     n_samples,
163 |                     n_channels + 1,
164 |                     lookback,
165 |                     n_assets,
166 |                     device=device,
167 |                     dtype=dtype,
168 |                 )
169 |             )
170 | 
171 |         weights = network(X)
172 | 
173 |         assert isinstance(network.hparams, dict)
174 |         assert network.hparams
175 |         assert torch.is_tensor(weights)
176 |         assert weights.shape == (n_samples, n_assets)
177 |         assert X.device == weights.device
178 |         assert X.dtype == weights.dtype
179 |         assert torch.allclose(
180 |             weights.sum(dim=1),
181 |             torch.ones(n_samples).to(dtype=dtype, device=device),
182 |             atol=eps,
183 |         )
184 | 
185 |     @pytest.mark.parametrize("n_channels", [1, 3])
186 |     @pytest.mark.parametrize("lookback", [2, 10])
187 |     @pytest.mark.parametrize("n_assets", [40, 4])
188 |     def test_n_params(self, n_channels, lookback, n_assets):
189 |         network = LinearNet(n_channels, lookback, n_assets)
190 | 
191 |         n_features = n_channels * lookback * n_assets
192 |         expected = 0
193 |         expected += n_features * 2  # batch norm
194 |         expected += n_features * n_assets + n_assets  # dense
195 |         expected += 1  # temperature
196 | 
197 |         actual = sum(
198 |             p.numel() for p in network.parameters() if p.requires_grad
199 |         )
200 | 
201 |         assert expected == actual
202 | 
203 | 
204 | class TestMinimal:
205 |     def test_basic(self, Xy_dummy):
206 |         eps = 1e-4
207 | 
208 |         X, _, _, _ = Xy_dummy
209 |         n_samples, n_channels, lookback, n_assets = X.shape
210 |         dtype = X.dtype
211 |         device = X.device
212 | 
213 |         network = MinimalNet(n_assets)
214 |         network.to(device=device, dtype=dtype)
215 | 
216 |         weights = network(X)
217 | 
218 |         assert isinstance(network.hparams, dict)
219 |         assert "n_assets" in network.hparams
220 |         assert torch.is_tensor(weights)
221 |         assert weights.shape == (n_samples, n_assets)
222 |         assert X.device == weights.device
223 |         assert X.dtype == weights.dtype
224 |         assert torch.allclose(
225 |             weights.sum(dim=1),
226 |             torch.ones(n_samples).to(dtype=dtype, device=device),
227 |             atol=eps,
228 |         )
229 | 
230 |     @pytest.mark.parametrize("n_assets", [40, 4])
231 |     def test_n_params(self, n_assets):
232 |         network = MinimalNet(n_assets)
233 | 
234 |         actual = sum(
235 |             p.numel() for p in network.parameters() if p.requires_grad
236 |         )
237 | 
238 |         assert n_assets == actual
239 | 
240 | 
241 | class TestThorpNet:
242 |     @pytest.mark.parametrize(
243 |         "force_symmetric", [True, False], ids=["symmetric", "asymetric"]
244 |     )
245 |     @pytest.mark.parametrize(
246 |         "max_weight",
247 |         [0.25, 0.5, 1],
248 |         ids=["max_weight_0.25", "max_weight_0.5", "max_weight_1"],
249 |     )
250 |     def test_basic(self, Xy_dummy, max_weight, force_symmetric):
251 |         eps = 1e-4
252 | 
253 |         X, _, _, _ = Xy_dummy
254 |         n_samples, n_channels, lookback, n_assets = X.shape
255 |         dtype = X.dtype
256 |         device = X.device
257 | 
258 |         network = ThorpNet(
259 |             n_assets, max_weight=max_weight, force_symmetric=force_symmetric
260 |         )
261 |         network.to(device=device, dtype=dtype)
262 | 
263 |         weights = network(X)
264 | 
265 |         assert isinstance(network.hparams, dict)
266 |         assert network.hparams
267 |         assert torch.is_tensor(weights)
268 |         assert weights.shape == (n_samples, n_assets)
269 |         assert X.device == weights.device
270 |         assert X.dtype == weights.dtype
271 |         assert torch.all(-eps <= weights) and torch.all(
272 |             weights <= max_weight + eps
273 |         )
274 |         assert torch.allclose(
275 |             weights.sum(dim=1),
276 |             torch.ones(n_samples).to(dtype=dtype, device=device),
277 |             atol=eps,
278 |         )
279 | 
280 |     @pytest.mark.parametrize(
281 |         "force_symmetric", [True, False], ids=["symmetric", "asymetric"]
282 |     )
283 |     @pytest.mark.parametrize("n_assets", [3, 5, 6])
284 |     def test_n_params(self, n_assets, force_symmetric):
285 |         network = ThorpNet(n_assets, force_symmetric=force_symmetric)
286 | 
287 |         expected = n_assets * n_assets + n_assets + 1 + 1
288 |         actual = sum(
289 |             p.numel() for p in network.parameters() if p.requires_grad
290 |         )
291 | 
292 |         assert expected == actual
293 | 


--------------------------------------------------------------------------------
/tests/test_utils.py:
--------------------------------------------------------------------------------
  1 | """Collection of tests focused on the utils module."""
  2 | import pathlib
  3 | 
  4 | import numpy as np
  5 | import pandas as pd
  6 | import pytest
  7 | 
  8 | from deepdow.utils import (
  9 |     ChangeWorkingDirectory,
 10 |     PandasChecks,
 11 |     prices_to_returns,
 12 |     raw_to_Xy,
 13 |     returns_to_Xy,
 14 | )
 15 | 
 16 | 
 17 | class TestChangeWorkingDirectory:
 18 |     def test_construction(self, tmpdir):
 19 |         dir_str = str(tmpdir)
 20 |         dir_path = pathlib.Path(dir_str)
 21 | 
 22 |         assert ChangeWorkingDirectory(dir_str).directory == dir_path
 23 |         assert ChangeWorkingDirectory(dir_path).directory == dir_path
 24 | 
 25 |         with pytest.raises(NotADirectoryError):
 26 |             ChangeWorkingDirectory("/fake/directory/")
 27 | 
 28 |     def test_working(self, tmpdir):
 29 |         dir_path = pathlib.Path(str(tmpdir))
 30 | 
 31 |         cwd_before = pathlib.Path.cwd()
 32 | 
 33 |         with ChangeWorkingDirectory(dir_path):
 34 |             cwd_inside = pathlib.Path.cwd()
 35 | 
 36 |         cwd_after = pathlib.Path.cwd()
 37 | 
 38 |         assert cwd_before == cwd_after
 39 |         assert cwd_before != cwd_inside
 40 |         assert cwd_inside == dir_path
 41 | 
 42 | 
 43 | class TestPandasChecks:
 44 |     def test_check_no_gaps(self):
 45 |         index_incorrect = [1, 2]
 46 |         index_without_gaps = pd.date_range("1/1/2000", periods=4, freq="M")
 47 |         index_with_gaps = pd.DatetimeIndex(
 48 |             [x for i, x in enumerate(index_without_gaps) if i != 2]
 49 |         )
 50 | 
 51 |         with pytest.raises(TypeError):
 52 |             PandasChecks.check_no_gaps(index_incorrect)
 53 | 
 54 |         with pytest.raises(IndexError):
 55 |             PandasChecks.check_no_gaps(index_with_gaps)
 56 | 
 57 |         PandasChecks.check_no_gaps(index_without_gaps)
 58 | 
 59 |     def test_check_valid_entries(self):
 60 |         table_incorrect = "table"
 61 |         table_invalid_1 = pd.Series([1, np.nan])
 62 |         table_invalid_2 = pd.DataFrame([[1, 2], [np.inf, 3]])
 63 |         table_valid = pd.DataFrame([[1, 2], [2, 4]])
 64 | 
 65 |         with pytest.raises(TypeError):
 66 |             PandasChecks.check_valid_entries(table_incorrect)
 67 | 
 68 |         with pytest.raises(ValueError):
 69 |             PandasChecks.check_valid_entries(table_invalid_1)
 70 | 
 71 |         with pytest.raises(ValueError):
 72 |             PandasChecks.check_valid_entries(table_invalid_2)
 73 | 
 74 |         PandasChecks.check_valid_entries(table_valid)
 75 | 
 76 |     def test_indices_agree(self):
 77 |         index_correct = ["A", "B"]
 78 |         index_wrong = ["A", "C"]
 79 | 
 80 |         with pytest.raises(TypeError):
 81 |             PandasChecks.check_indices_agree([], "a")
 82 | 
 83 |         with pytest.raises(IndexError):
 84 |             PandasChecks.check_indices_agree(
 85 |                 pd.Series(index=index_correct), pd.Series(index=index_wrong)
 86 |             )
 87 | 
 88 |         with pytest.raises(IndexError):
 89 |             PandasChecks.check_indices_agree(
 90 |                 pd.Series(index=index_correct),
 91 |                 pd.DataFrame(index=index_correct, columns=index_wrong),
 92 |             )
 93 | 
 94 |         PandasChecks.check_indices_agree(
 95 |             pd.Series(index=index_correct),
 96 |             pd.DataFrame(index=index_correct, columns=index_correct),
 97 |         )
 98 | 
 99 | 
100 | class TestPricesToReturns:
101 |     @pytest.mark.parametrize("use_log", [True, False])
102 |     def test_dummy_(self, raw_data, use_log):
103 |         prices_dummy, _, _ = raw_data
104 | 
105 |         returns = prices_to_returns(prices_dummy, use_log=use_log)
106 | 
107 |         assert isinstance(returns, pd.DataFrame)
108 |         assert returns.index.equals(prices_dummy.index[1:])
109 |         assert returns.columns.equals(prices_dummy.columns)
110 | 
111 |         if use_log:
112 |             assert np.log(
113 |                 prices_dummy.iloc[2, 3] / prices_dummy.iloc[1, 3]
114 |             ) == pytest.approx(returns.iloc[1, 3])
115 |         else:
116 |             assert (
117 |                 prices_dummy.iloc[2, 3] / prices_dummy.iloc[1, 3]
118 |             ) - 1 == pytest.approx(returns.iloc[1, 3])
119 | 
120 | 
121 | class TestRawToXy:
122 |     def test_wrong(self, raw_data):
123 |         df, n_missing_entries, true_freq = raw_data
124 | 
125 |         with pytest.raises(ValueError):
126 |             raw_to_Xy(df, lookback=len(df) + n_missing_entries, freq=true_freq)
127 | 
128 |         with pytest.raises(ValueError):
129 |             raw_to_Xy(df, freq=None)
130 | 
131 |     @pytest.mark.parametrize(
132 |         "included_assets",
133 |         [None, ["asset_1", "asset_3"]],
134 |         ids=["all_assets", "some_assets"],
135 |     )
136 |     @pytest.mark.parametrize(
137 |         "included_indicators",
138 |         [None, ["indicator_0", "indicator_2", "indicator_4"]],
139 |         ids=["all_indicators", "some_indicators"],
140 |     )
141 |     def test_sanity_check(
142 |         self, raw_data, included_assets, included_indicators
143 |     ):
144 |         df, n_missing_entries, true_freq = raw_data
145 | 
146 |         n_timesteps = len(df)
147 |         n_assets = len(included_assets or df.columns.levels[0])
148 |         n_indicators = len(included_indicators or df.columns.levels[1])
149 | 
150 |         lookback = n_timesteps // 3
151 |         horizon = n_timesteps // 4
152 |         gap = 1
153 | 
154 |         X, timestamps, y, asset_names, indicators = raw_to_Xy(
155 |             df,
156 |             lookback=lookback,
157 |             horizon=horizon,
158 |             gap=1,
159 |             freq=true_freq,
160 |             included_assets=included_assets,
161 |             included_indicators=included_indicators,
162 |         )
163 | 
164 |         n_new = (
165 |             n_timesteps + n_missing_entries - lookback - horizon - gap + 1 - 1
166 |         )  # we start with prices
167 | 
168 |         # types
169 |         assert isinstance(X, np.ndarray)
170 |         assert isinstance(y, np.ndarray)
171 |         assert isinstance(timestamps, pd.DatetimeIndex)
172 |         assert timestamps.freq is not None and true_freq == timestamps.freq
173 |         assert isinstance(asset_names, list)
174 |         assert isinstance(indicators, list)
175 | 
176 |         # shapes
177 |         assert X.shape == (n_new, n_indicators, lookback, n_assets)
178 |         assert y.shape == (n_new, n_indicators, horizon, n_assets)
179 |         assert (
180 |             timestamps[0]
181 |             == pd.date_range(
182 |                 start=df.index[1], periods=lookback, freq=true_freq
183 |             )[-1]
184 |         )  # prices
185 |         assert len(asset_names) == n_assets
186 |         assert len(indicators) == n_indicators
187 | 
188 |     def test_invalid_values(self, raw_data):
189 |         df, n_missing_entries, true_freq = raw_data
190 | 
191 |         n_timesteps = len(df)
192 |         n_assets = len(df.columns.levels[0])
193 | 
194 |         lookback = n_timesteps // 3
195 |         horizon = n_timesteps // 4
196 |         gap = 1
197 | 
198 |         df_invalid = df.copy()
199 | 
200 |         df_invalid.at[df.index[0], ("asset_1", "indicator_3")] = -2
201 | 
202 |         X, timestamps, y, asset_names, indicators = raw_to_Xy(
203 |             df_invalid,
204 |             lookback=lookback,
205 |             horizon=horizon,
206 |             gap=gap,
207 |             freq=true_freq,
208 |         )
209 | 
210 |         assert [
211 |             "asset_{}".format(i) for i in range(n_assets) if i != 1
212 |         ] == asset_names
213 | 
214 | 
215 | class TestReturnsToXY:
216 |     @pytest.mark.parametrize("lookback", [3, 5])
217 |     @pytest.mark.parametrize("horizon", [4, 6])
218 |     def test_basic(self, raw_data, lookback, horizon):
219 |         df, _, _ = raw_data
220 | 
221 |         returns_dummy = df.xs("indicator_1", axis=1, level=1)
222 | 
223 |         n_timesteps = len(returns_dummy.index)
224 |         n_assets = len(returns_dummy.columns)
225 |         n_samples = n_timesteps - lookback - horizon + 1
226 | 
227 |         X, timesteps, y = returns_to_Xy(
228 |             returns_dummy, lookback=lookback, horizon=horizon
229 |         )
230 | 
231 |         assert isinstance(X, np.ndarray)
232 |         assert isinstance(timesteps, pd.DatetimeIndex)
233 |         assert isinstance(y, np.ndarray)
234 | 
235 |         assert X.shape == (n_samples, 1, lookback, n_assets)
236 |         assert len(timesteps) == n_samples
237 |         assert y.shape == (n_samples, 1, horizon, n_assets)
238 | 


--------------------------------------------------------------------------------
/tests/test_visualize.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | from unittest.mock import MagicMock, Mock
  3 | 
  4 | from matplotlib.animation import FuncAnimation
  5 | from matplotlib.axes import Axes
  6 | from matplotlib.colors import ListedColormap
  7 | import numpy as np
  8 | import pandas as pd
  9 | import pytest
 10 | 
 11 | from deepdow.losses import MeanReturns
 12 | from deepdow.visualize import (
 13 |     plot_weight_anim,
 14 |     plot_weight_heatmap,
 15 |     generate_cumrets,
 16 |     generate_metrics_table,
 17 |     generate_weights_table,
 18 |     plot_metrics,
 19 | )
 20 | 
 21 | 
 22 | class TestGenerateCumrets:
 23 |     def test_errors(self, dataloader_dummy, network_dummy):
 24 |         with pytest.raises(TypeError):
 25 |             generate_cumrets({"bm_1": "WRONG"}, dataloader_dummy)
 26 | 
 27 |         with pytest.raises(TypeError):
 28 |             generate_cumrets({"bm_1": network_dummy}, "FAKE")
 29 | 
 30 |     def test_basic(self, dataloader_dummy, network_dummy):
 31 |         cumrets_dict = generate_cumrets(
 32 |             {"bm_1": network_dummy}, dataloader_dummy
 33 |         )
 34 | 
 35 |         assert isinstance(cumrets_dict, dict)
 36 |         assert len(cumrets_dict) == 1
 37 |         assert "bm_1" in cumrets_dict
 38 |         assert cumrets_dict["bm_1"].shape == (
 39 |             len(dataloader_dummy.dataset),
 40 |             dataloader_dummy.horizon,
 41 |         )
 42 | 
 43 | 
 44 | class TestGenerateMetricsTable:
 45 |     def test_errors(self, dataloader_dummy, network_dummy):
 46 |         with pytest.raises(TypeError):
 47 |             generate_metrics_table(
 48 |                 {"bm_1": "WRONG"}, dataloader_dummy, {"metric": MeanReturns()}
 49 |             )
 50 | 
 51 |         with pytest.raises(TypeError):
 52 |             generate_metrics_table(
 53 |                 {"bm_1": network_dummy}, "FAKE", {"metric": MeanReturns()}
 54 |             )
 55 | 
 56 |         with pytest.raises(TypeError):
 57 |             generate_metrics_table(
 58 |                 {"bm_1": network_dummy}, dataloader_dummy, {"metric": "FAKE"}
 59 |             )
 60 | 
 61 |     def test_basic(self, dataloader_dummy, network_dummy):
 62 |         metrics_table = generate_metrics_table(
 63 |             {"bm_1": network_dummy}, dataloader_dummy, {"rets": MeanReturns()}
 64 |         )
 65 | 
 66 |         assert isinstance(metrics_table, pd.DataFrame)
 67 |         assert len(metrics_table) == len(dataloader_dummy.dataset)
 68 |         assert {"metric", "value", "benchmark", "timestamp"} == set(
 69 |             metrics_table.columns.to_list()
 70 |         )
 71 | 
 72 | 
 73 | def test_plot_metrics(monkeypatch):
 74 |     n_entries = 100
 75 |     metrics_table = pd.DataFrame(
 76 |         np.random.random((n_entries, 2)), columns=["value", "timestamp"]
 77 |     )
 78 |     metrics_table["metric"] = "M"
 79 |     metrics_table["benchmark"] = "B"
 80 | 
 81 |     fake_plt = Mock()
 82 |     fake_plt.subplots.return_value = None, MagicMock()
 83 |     fake_pd = Mock()
 84 | 
 85 |     monkeypatch.setattr("deepdow.visualize.plt", fake_plt)
 86 |     monkeypatch.setattr("deepdow.visualize.pd", fake_pd)
 87 | 
 88 |     plot_metrics(metrics_table)
 89 | 
 90 | 
 91 | class TestGenerateWeightsTable:
 92 |     def test_errors(self, dataloader_dummy, network_dummy):
 93 |         with pytest.raises(TypeError):
 94 |             generate_weights_table("FAKE", dataloader_dummy)
 95 | 
 96 |         with pytest.raises(TypeError):
 97 |             generate_weights_table(network_dummy, "FAKE")
 98 | 
 99 |     def test_basic(self, dataloader_dummy, network_dummy):
100 |         weights_table = generate_weights_table(network_dummy, dataloader_dummy)
101 | 
102 |         assert isinstance(weights_table, pd.DataFrame)
103 |         assert len(weights_table) == len(dataloader_dummy.dataset)
104 |         assert set(weights_table.index.to_list()) == set(
105 |             dataloader_dummy.dataset.timestamps
106 |         )
107 |         assert (
108 |             weights_table.columns.to_list()
109 |             == dataloader_dummy.dataset.asset_names
110 |         )
111 | 
112 | 
113 | class TestPlotWeightAnim:
114 |     def test_errors(self):
115 |         with pytest.raises(ValueError):
116 |             plot_weight_anim(
117 |                 pd.DataFrame([[0, 1], [1, 2]], columns=["others", "asset_1"])
118 |             )
119 | 
120 |         with pytest.raises(ValueError):
121 |             plot_weight_anim(
122 |                 pd.DataFrame([[0, 1], [1, 2]]), n_displayed_assets=3
123 |             )
124 | 
125 |         with pytest.raises(ValueError):
126 |             plot_weight_anim(
127 |                 pd.DataFrame([[0, 1], [1, 2]], columns=["a", "b"]),
128 |                 n_displayed_assets=1,
129 |                 always_visible=["a", "b"],
130 |             )
131 | 
132 |     @pytest.mark.parametrize(
133 |         "colors",
134 |         [None, {"asset_1": "green"}, ListedColormap(["green", "red"])],
135 |     )
136 |     def test_portfolio_evolution(self, monkeypatch, colors):
137 |         n_timesteps = 4
138 |         n_assets = 3
139 |         n_displayed_assets = 2
140 | 
141 |         weights = pd.DataFrame(
142 |             np.random.random((n_timesteps, n_assets)),
143 |             index=pd.date_range(start="1/1/2000", periods=n_timesteps),
144 |             columns=["asset_{}".format(i) for i in range(n_assets)],
145 |         )
146 | 
147 |         weights[
148 |             "asset_0"
149 |         ] = 0  # the smallest but we will force its display anyway
150 | 
151 |         fake_functanim = Mock()
152 |         fake_functanim.return_value = Mock(spec=FuncAnimation)
153 | 
154 |         monkeypatch.setattr("deepdow.visualize.FuncAnimation", fake_functanim)
155 |         plt_mock = Mock()
156 |         plt_mock.subplots = Mock(return_value=[Mock(), Mock()])
157 | 
158 |         monkeypatch.setattr("deepdow.visualize.plt", plt_mock)
159 |         ani = plot_weight_anim(
160 |             weights,
161 |             n_displayed_assets=n_displayed_assets,
162 |             always_visible=["asset_0"],
163 |             n_seconds=10,
164 |             figsize=(1, 1),
165 |             colors=colors,
166 |         )
167 | 
168 |         assert isinstance(ani, FuncAnimation)
169 | 
170 | 
171 | class TestPlotWeightHeatmap:
172 |     @pytest.mark.parametrize("add_sum_column", [True, False])
173 |     @pytest.mark.parametrize("time_format", [None, "%d-%m-%Y"])
174 |     def test_basic(self, time_format, add_sum_column, monkeypatch):
175 |         n_timesteps = 20
176 |         n_assets = 10
177 |         index = (
178 |             list(range(n_timesteps))
179 |             if time_format is None
180 |             else pd.date_range("1/1/2000", periods=n_timesteps)
181 |         )
182 | 
183 |         weights = pd.DataFrame(
184 |             np.random.random(size=(n_timesteps, n_assets)), index=index
185 |         )
186 | 
187 |         fake_axes = Mock(spec=Axes)
188 |         fake_axes.xaxis = Mock()
189 | 
190 |         fake_sns = Mock()
191 |         fake_sns.heatmap.return_value = fake_axes
192 | 
193 |         monkeypatch.setattr("deepdow.visualize.sns", fake_sns)
194 |         ax = plot_weight_heatmap(
195 |             weights, time_format=time_format, add_sum_column=add_sum_column
196 |         )
197 | 
198 |         assert isinstance(ax, Axes)
199 |         assert fake_sns.heatmap.call_count == 1
200 |         assert fake_axes.tick_params.call_count == 2
201 | 
202 |     def test_sum_column(self):
203 |         with pytest.raises(ValueError):
204 |             now = datetime.datetime.now()
205 |             df = pd.DataFrame(
206 |                 np.zeros((2, 2)), columns=["asset", "sum"], index=[now, now]
207 |             )
208 |             plot_weight_heatmap(df, add_sum_column=True)
209 | 


--------------------------------------------------------------------------------