├── .gitignore
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── __init__.py
├── check_all_py_imports.py
├── check_init_files.py
├── docker
    ├── Dockerfile
    ├── build_image.sh
    ├── exec_mbt_gym.sh
    ├── launcher.sh
    ├── requirements.txt
    └── start_container.sh
├── experiments
    ├── __init__.py
    ├── helpers.py
    └── learning-pnl-with-different-arrival-rates.py
├── mbt_gym
    ├── __init__.py
    ├── agents
    │   ├── Agent.py
    │   ├── BaselineAgents.py
    │   ├── PolicyGradientAgent.py
    │   ├── SbAgent.py
    │   └── __init__.py
    ├── gym
    │   ├── ModelDynamics.py
    │   ├── MultiprocessTradingEnv.py
    │   ├── StableBaselinesTradingEnvironment.py
    │   ├── TradingEnvironment.py
    │   ├── __init__.py
    │   ├── backtesting.py
    │   ├── helpers
    │   │   ├── __init__.py
    │   │   ├── generate_trajectory.py
    │   │   └── plotting.py
    │   ├── index_names.py
    │   ├── info_calculators.py
    │   └── wrappers.py
    ├── rewards
    │   ├── RewardFunctions.py
    │   ├── __init__.py
    │   └── tests
    │   │   ├── __init__.py
    │   │   └── testRewardFunctions.py
    └── stochastic_processes
    │   ├── StochasticProcessModel.py
    │   ├── __init__.py
    │   ├── arrival_models.py
    │   ├── fill_probability_models.py
    │   ├── midprice_models.py
    │   └── price_impact_models.py
├── mypy.ini
├── notebooks
    ├── Baseline_Agents.ipynb
    ├── Learning_to_make_a_market_with_mbt_gym_and_Stable_Baselines_3.ipynb
    ├── Test_1_-_replicate_AS_original_results.html
    ├── Test_1_-_replicate_AS_original_results.ipynb
    ├── Test_2_-_replicate_CJP_2015_-_closed-form_solution_for_value_function.html
    └── Test_2_-_replicate_CJP_2015_-_closed-form_solution_for_value_function.ipynb
├── requirements.txt
├── requirements_no_versions.txt
├── roadmap.md
└── tasks.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | .DS_Store
  2 | 
  3 | # Update notebooks manually, if necessary
  4 | notebooks
  5 | TMP_NOTEBOOKS
  6 | 
  7 | # Byte-compiled / optimized / DLL files
  8 | __pycache__/
  9 | *.py[cod]
 10 | *$py.class
 11 | 
 12 | # C extensions
 13 | *.so
 14 | 
 15 | # Distribution / packaging
 16 | .Python
 17 | build/
 18 | develop-eggs/
 19 | dist/
 20 | downloads/
 21 | eggs/
 22 | .eggs/
 23 | lib/
 24 | lib64/
 25 | parts/
 26 | sdist/
 27 | var/
 28 | wheels/
 29 | share/python-wheels/
 30 | *.egg-info/
 31 | .installed.cfg
 32 | *.egg
 33 | MANIFEST
 34 | 
 35 | # PyInstaller
 36 | #  Usually these files are written by a python script from a template
 37 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 38 | *.manifest
 39 | *.spec
 40 | 
 41 | # Installer logs
 42 | pip-log.txt
 43 | pip-delete-this-directory.txt
 44 | 
 45 | # Unit test / coverage reports
 46 | htmlcov/
 47 | .tox/
 48 | .nox/
 49 | .coverage
 50 | .coverage.*
 51 | .cache
 52 | nosetests.xml
 53 | coverage.xml
 54 | *.cover
 55 | *.py,cover
 56 | .hypothesis/
 57 | .pytest_cache/
 58 | cover/
 59 | 
 60 | # Translations
 61 | *.mo
 62 | *.pot
 63 | 
 64 | # Django stuff:
 65 | *.log
 66 | local_settings.py
 67 | db.sqlite3
 68 | db.sqlite3-journal
 69 | 
 70 | # Flask stuff:
 71 | instance/
 72 | .webassets-cache
 73 | 
 74 | # Scrapy stuff:
 75 | .scrapy
 76 | 
 77 | # Sphinx documentation
 78 | docs/_build/
 79 | 
 80 | # PyBuilder
 81 | .pybuilder/
 82 | target/
 83 | 
 84 | # Jupyter Notebook
 85 | .ipynb_checkpoints
 86 | 
 87 | # IPython
 88 | profile_default/
 89 | ipython_config.py
 90 | 
 91 | # pyenv
 92 | #   For a library or package, you might want to ignore these files since the code is
 93 | #   intended to run in multiple environments; otherwise, check them in:
 94 | # .python-version
 95 | 
 96 | # pipenv
 97 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 98 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 99 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
100 | #   install all needed dependencies.
101 | #Pipfile.lock
102 | 
103 | # poetry
104 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
106 | #   commonly ignored for libraries.
107 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108 | #poetry.lock
109 | 
110 | # pdm
111 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
112 | #pdm.lock
113 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
114 | #   in version control.
115 | #   https://pdm.fming.dev/#use-with-ide
116 | .pdm.toml
117 | 
118 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
119 | __pypackages__/
120 | 
121 | # Celery stuff
122 | celerybeat-schedule
123 | celerybeat.pid
124 | 
125 | # SageMath parsed files
126 | *.sage.py
127 | 
128 | # Environments
129 | .env
130 | .venv
131 | env/
132 | venv/
133 | ENV/
134 | env.bak/
135 | venv.bak/
136 | 
137 | # Spyder project settings
138 | .spyderproject
139 | .spyproject
140 | 
141 | # Rope project settings
142 | .ropeproject
143 | 
144 | # mkdocs documentation
145 | /site
146 | 
147 | # mypy
148 | .mypy_cache/
149 | .dmypy.json
150 | dmypy.json
151 | 
152 | # Pyre type checker
153 | .pyre/
154 | 
155 | # pytype static type analyzer
156 | .pytype/
157 | 
158 | # Cython debug symbols
159 | cython_debug/
160 | 
161 | # PyCharm
162 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
163 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
164 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
165 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
166 | #.idea/
167 | *.iml
168 | *.xml
169 | .DS_Store
170 | .DS_Store
171 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
  1 | # Contributor Covenant Code of Conduct
  2 | 
  3 | ## Our Pledge
  4 | 
  5 | We as members, contributors, and leaders pledge to make participation in our
  6 | community a harassment-free experience for everyone, regardless of age, body
  7 | size, visible or invisible disability, ethnicity, sex characteristics, gender
  8 | identity and expression, level of experience, education, socio-economic status,
  9 | nationality, personal appearance, race, religion, or sexual identity
 10 | and orientation.
 11 | 
 12 | We pledge to act and interact in ways that contribute to an open, welcoming,
 13 | diverse, inclusive, and healthy community.
 14 | 
 15 | ## Our Standards
 16 | 
 17 | Examples of behavior that contributes to a positive environment for our
 18 | community include:
 19 | 
 20 | * Demonstrating empathy and kindness toward other people
 21 | * Being respectful of differing opinions, viewpoints, and experiences
 22 | * Giving and gracefully accepting constructive feedback
 23 | * Accepting responsibility and apologizing to those affected by our mistakes,
 24 |   and learning from the experience
 25 | * Focusing on what is best not just for us as individuals, but for the
 26 |   overall community
 27 | 
 28 | Examples of unacceptable behavior include:
 29 | 
 30 | * The use of sexualized language or imagery, and sexual attention or
 31 |   advances of any kind
 32 | * Trolling, insulting or derogatory comments, and personal or political attacks
 33 | * Public or private harassment
 34 | * Publishing others' private information, such as a physical or email
 35 |   address, without their explicit permission
 36 | * Other conduct which could reasonably be considered inappropriate in a
 37 |   professional setting
 38 | 
 39 | ## Enforcement Responsibilities
 40 | 
 41 | Community leaders are responsible for clarifying and enforcing our standards of
 42 | acceptable behavior and will take appropriate and fair corrective action in
 43 | response to any behavior that they deem inappropriate, threatening, offensive,
 44 | or harmful.
 45 | 
 46 | Community leaders have the right and responsibility to remove, edit, or reject
 47 | comments, commits, code, wiki edits, issues, and other contributions that are
 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation
 49 | decisions when appropriate.
 50 | 
 51 | ## Scope
 52 | 
 53 | This Code of Conduct applies within all community spaces, and also applies when
 54 | an individual is officially representing the community in public spaces.
 55 | Examples of representing our community include using an official e-mail address,
 56 | posting via an official social media account, or acting as an appointed
 57 | representative at an online or offline event.
 58 | 
 59 | ## Enforcement
 60 | 
 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
 62 | reported to the community leaders responsible for enforcement at
 63 | j.jerome@liverpool.ac.uk.
 64 | All complaints will be reviewed and investigated promptly and fairly.
 65 | 
 66 | All community leaders are obligated to respect the privacy and security of the
 67 | reporter of any incident.
 68 | 
 69 | ## Enforcement Guidelines
 70 | 
 71 | Community leaders will follow these Community Impact Guidelines in determining
 72 | the consequences for any action they deem in violation of this Code of Conduct:
 73 | 
 74 | ### 1. Correction
 75 | 
 76 | **Community Impact**: Use of inappropriate language or other behavior deemed
 77 | unprofessional or unwelcome in the community.
 78 | 
 79 | **Consequence**: A private, written warning from community leaders, providing
 80 | clarity around the nature of the violation and an explanation of why the
 81 | behavior was inappropriate. A public apology may be requested.
 82 | 
 83 | ### 2. Warning
 84 | 
 85 | **Community Impact**: A violation through a single incident or series
 86 | of actions.
 87 | 
 88 | **Consequence**: A warning with consequences for continued behavior. No
 89 | interaction with the people involved, including unsolicited interaction with
 90 | those enforcing the Code of Conduct, for a specified period of time. This
 91 | includes avoiding interactions in community spaces as well as external channels
 92 | like social media. Violating these terms may lead to a temporary or
 93 | permanent ban.
 94 | 
 95 | ### 3. Temporary Ban
 96 | 
 97 | **Community Impact**: A serious violation of community standards, including
 98 | sustained inappropriate behavior.
 99 | 
100 | **Consequence**: A temporary ban from any sort of interaction or public
101 | communication with the community for a specified period of time. No public or
102 | private interaction with the people involved, including unsolicited interaction
103 | with those enforcing the Code of Conduct, is allowed during this period.
104 | Violating these terms may lead to a permanent ban.
105 | 
106 | ### 4. Permanent Ban
107 | 
108 | **Community Impact**: Demonstrating a pattern of violation of community
109 | standards, including sustained inappropriate behavior,  harassment of an
110 | individual, or aggression toward or disparagement of classes of individuals.
111 | 
112 | **Consequence**: A permanent ban from any sort of public interaction within
113 | the community.
114 | 
115 | ## Attribution
116 | 
117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage],
118 | version 2.0, available at
119 | https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
120 | 
121 | Community Impact Guidelines were inspired by [Mozilla's code of conduct
122 | enforcement ladder](https://github.com/mozilla/diversity).
123 | 
124 | [homepage]: https://www.contributor-covenant.org
125 | 
126 | For answers to common questions about this code of conduct, see the FAQ at
127 | https://www.contributor-covenant.org/faq. Translations are available at
128 | https://www.contributor-covenant.org/translations.
129 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Welcome to the mbt_gym contributing guide
 2 | 
 3 | Thank you for considering contributing to `mbt_gym`!
 4 | 
 5 | Please read our [Code of Conduct](./CODE_OF_CONDUCT.md) first to help us to maintain a friendly and helpful community.
 6 | 
 7 | ## Pull requests (PRs)
 8 | 
 9 | Please feel free to open a Pull Request for any minor changes to the repository. For larger changes, please open an 
10 | issue first to discuss with other users and maintainers of `mbt_gym`. If you are not familiar with creating a Pull 
11 | Request, here are some guides:
12 | - http://stackoverflow.com/questions/14680711/how-to-do-a-github-pull-request
13 | - https://help.github.com/articles/creating-a-pull-request/
14 | 
15 | In particular, **please see the [roadmap.md](./roadmap.md) file**, for a list of desired additions that will be accepted.
16 | Any appropriate tests will also always be accepted.
17 | 
18 | ## Codestyle
19 | 
20 | We use [mypy](https://flake8.pycqa.org/en/latest/) as a static type checker, [Flake8](https://flake8.pycqa.org/en/latest/) to enforce PEP8 and [Black](https://black.readthedocs.io/en/stable/) to enforce consistent styling.
21 | 
22 | - Code will be automatically reformatted with: `invoke black-reformat`
23 | - Styling and type checking tests can be run locally with: `invoke check-python`
24 | 
25 | ## Tests
26 | 
27 | When adding new code to the `mbt_gym` code-base, please add test coverage wherever possible.
28 | We use [unittest](https://docs.python.org/2/library/unittest.html) for unit testing. All unit tests can be run by 
29 | calling `nose2` from the root directory.


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2018, Joseph Jerome
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | * Redistributions of source code must retain the above copyright notice, this
10 |   list of conditions and the following disclaimer.
11 | 
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 |   this list of conditions and the following disclaimer in the documentation
14 |   and/or other materials provided with the distribution.
15 | 
16 | * Neither the name of the copyright holder nor the names of its
17 |   contributors may be used to endorse or promote products derived from
18 |   this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # mbt_gym
 2 | `mbt_gym` is a module which provides a suite of gym environments for training reinforcement learning (RL) agents to solve model-based high-frequency trading problems such as market-making and optimal execution. The module is set up in an extensible way to allow the combination of different aspects of different models. It supports highly efficient implementations of vectorized environments to allow faster training of RL agents.
 3 | 
 4 | It includes gym environments for popular analytically tractable market making models, as well as more complex models that prove difficult to solve analytically.
 5 | 
 6 | The associated paper can be found at https://dl.acm.org/doi/pdf/10.1145/3604237.3626873 and https://arxiv.org/abs/2209.07823.
 7 | 
 8 | ## Contributions are welcome!
 9 | If you wish to contribute to this repository, please read the details of how to do so in the 
10 | [CONTRIBUTING.md](./CONTRIBUTING.md) file in the root directory of the repository. For ideas on code that you could 
11 | contribute, please look at the [roadmap](./roadmap.md).  
12 | 
13 | ## Using mbt_gym with Docker
14 | 
15 | To use the `mbt_gym` package from within a docker container (see [instructions on how to install docker](https://docs.docker.com/engine/install/ubuntu/))
16 | , first change directory into the
17 | docker subdirectory using `cd docker` and then follow the instructions below.
18 | 
19 | ### Building
20 | 
21 | To build the container:
22 | 
23 | ```
24 | sh build_image.sh
25 | ```
26 | 
27 | ### Running
28 | 
29 | Run the start container script (mounting ../, therefore mounting `mbt_gym`), and specify a port for jupyter notebook:
30 | 
31 | ```
32 | sh start_container.sh 8877
33 | ```
34 | 
35 | (Note: if you wish to add gpus to container, just add ```--gpus device=0``` to ```start_container.sh``` to use one gpu 
36 | or ```--gpus all``` to add all gpus available.)
37 | 
38 | To work in the container via shell:
39 | 
40 | ```
41 | sh exec_mbt_gym.sh
42 | ```
43 | 
44 | ## Citing mbt_gym
45 | 
46 | When using `mbt_gym`, please cite our [ACM ICAIF 2023 paper](https://arxiv.org/abs/2209.07823) by using the following
47 | BibTeX entry:
48 | ```
49 | @inproceedings{JeromeSSH23,
50 |   author       = {Joseph Jerome and
51 |                   Leandro S{\'{a}}nchez{-}Betancourt and
52 |                   Rahul Savani and
53 |                   Martin Herdegen},
54 |   title        = {Mbt-gym: Reinforcement learning for model-based limit order book trading},
55 |   booktitle    = {4th {ACM} International Conference on {AI} in Finance, {ICAIF} 2023,
56 |                   Brooklyn, NY, USA, November 27-29, 2023},
57 |   pages        = {619--627},
58 |   publisher    = {{ACM}},
59 |   year         = {2023},
60 |   url          = {https://doi.org/10.1145/3604237.3626873},
61 |   doi          = {10.1145/3604237.3626873},
62 |   note         = {arXiv preprint arXiv:2209.07823}
63 | }
64 | ```
65 | 


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JJJerome/mbt_gym/1e1aa38ca35c4fa472777e3574f9c907f89bb5b0/__init__.py


--------------------------------------------------------------------------------
/check_all_py_imports.py:
--------------------------------------------------------------------------------
 1 | from typing import Set
 2 | 
 3 | import os
 4 | 
 5 | from glob import glob
 6 | import importlib
 7 | 
 8 | 
 9 | def import_everything() -> None:
10 |     paths = _get_all_py_file_paths()
11 |     failed_paths: Set[str] = set()
12 |     for path in paths:
13 |         try:
14 |             importlib.__import__(_convert_file_path_to_module_path(path), fromlist=[""])
15 |         except ImportError:
16 |             failed_paths.add(path)
17 |     if failed_paths:
18 |         print("Could not import the following files:\n" + "\n".join(failed_paths))
19 |         exit(1)
20 |     else:
21 |         print(f"Successfully imported {len(paths)} .py files.")
22 |         exit(0)
23 | 
24 | 
25 | def _get_all_py_file_paths() -> Set[str]:
26 |     all_paths = set(glob(os.path.join("mbt_gym", "**", "*.py"), recursive=True))
27 |     return all_paths
28 | 
29 | 
30 | def _convert_file_path_to_module_path(path: str) -> str:
31 |     parts = os.path.normpath(path).split(os.sep)
32 |     mod_path, basename = parts[:-1], parts[-1]
33 |     basename = basename[:-3]
34 |     return ".".join(mod_path + [basename])
35 | 
36 | 
37 | if __name__ == "__main__":
38 |     import_everything()
39 | 


--------------------------------------------------------------------------------
/check_init_files.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | 
 4 | if __name__ == "__main__":
 5 |     """We want to check for missing init files as they may cause tests and mypy to not run"""
 6 |     result = [os.path.join(dp, f) for dp, dn, file_names in os.walk(".") for f in file_names]
 7 |     result = [r for r in result if r.endswith(".py")]
 8 | 
 9 |     dirs = set([os.path.dirname(f) for f in result])
10 |     missing_init_files = []
11 |     for _dir in dirs:
12 |         init_file = os.path.sep.join([_dir, "__init__.py"])
13 |         if not os.path.isfile(init_file):
14 |             missing_init_files.append(init_file)
15 | 
16 |     if missing_init_files:
17 |         print("Missing init files: ")
18 |         print("\n".join(missing_init_files))
19 |         exit(1)
20 |     else:
21 |         print("No init files seem to be missing")
22 |         exit(0)
23 | 


--------------------------------------------------------------------------------
/docker/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:20.04 AS MBT_GYM
 2 | USER root
 3 | 
 4 | RUN apt-get -y update \
 5 |     && apt-get -y upgrade \
 6 |     && apt-get -y install \
 7 |     fish \
 8 |     tmux \
 9 |     python3-pip
10 | 
11 | WORKDIR /home
12 | COPY requirements.txt requirements.txt
13 | RUN pip install --no-cache-dir -r requirements.txt
14 | COPY launcher.sh launcher.sh
15 | RUN chmod +x launcher.sh
16 | 


--------------------------------------------------------------------------------
/docker/build_image.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | docker build . -t mbt_gym --target MBT_GYM


--------------------------------------------------------------------------------
/docker/exec_mbt_gym.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | docker exec -it mbt_gym /bin/fish


--------------------------------------------------------------------------------
/docker/launcher.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | /etc/init.d/postgresql start
3 | jupyter notebook --no-browser --port $1 --ip=0.0.0.0 --allow-root


--------------------------------------------------------------------------------
/docker/requirements.txt:
--------------------------------------------------------------------------------
 1 | black==22.8.0
 2 | flake8==5.0.4
 3 | gym==0.26.2
 4 | invoke==1.6.0
 5 | jupyter
 6 | jupyter_nbextensions_configurator
 7 | matplotlib==3.4.3
 8 | mypy==0.971
 9 | numpy==1.22.3
10 | pandas==1.3.5
11 | seaborn==0.11.2
12 | stable_baselines3==1.6.2
13 | stochastic==0.6.0
14 | tensorboard
15 | torch>=1.13.1
16 | 


--------------------------------------------------------------------------------
/docker/start_container.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | tag='latest'
3 | # To add a single gpu, add the flag --gpus device=0, or to add all gpus add --gpus all
4 | docker run --rm --gpus all --shm-size=10.24gb -v ${PWD}/../:/home/mbt_gym/ -p $1:$1 --name mbt_gym --user root -dit mbt_gym:$tag ./launcher.sh $1
5 | 


--------------------------------------------------------------------------------
/experiments/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JJJerome/mbt_gym/1e1aa38ca35c4fa472777e3574f9c907f89bb5b0/experiments/__init__.py


--------------------------------------------------------------------------------
/experiments/helpers.py:
--------------------------------------------------------------------------------
  1 | from copy import deepcopy
  2 | 
  3 | import gym
  4 | import numpy as np
  5 | from matplotlib import pyplot as plt
  6 | from stable_baselines3 import PPO
  7 | from stable_baselines3.common.callbacks import EvalCallback
  8 | from stable_baselines3.common.vec_env import VecMonitor
  9 | 
 10 | from mbt_gym.agents.BaselineAgents import CarteaJaimungalMmAgent
 11 | from mbt_gym.agents.SbAgent import SbAgent
 12 | from mbt_gym.gym.StableBaselinesTradingEnvironment import StableBaselinesTradingEnvironment
 13 | from mbt_gym.gym.TradingEnvironment import TradingEnvironment
 14 | from mbt_gym.gym.wrappers import ReduceStateSizeWrapper
 15 | from mbt_gym.rewards.RewardFunctions import CjMmCriterion, PnL
 16 | from mbt_gym.stochastic_processes.arrival_models import PoissonArrivalModel
 17 | from mbt_gym.stochastic_processes.fill_probability_models import ExponentialFillFunction
 18 | from mbt_gym.stochastic_processes.midprice_models import BrownianMotionMidpriceModel
 19 | from mbt_gym.gym.ModelDynamics import LimitAndMarketOrderModelDynamics
 20 | 
 21 | def get_cj_env(
 22 |     num_trajectories: int = 1,
 23 |     terminal_time: float = 1.0,
 24 |     arrival_rate: float = 10.0,
 25 |     fill_exponent: float = 0.1,
 26 |     phi: float = 0.5,
 27 |     alpha: float = 0.001,
 28 |     sigma: float = 0.1,
 29 |     initial_inventory=(-5, 6),
 30 |     random_start: tuple = None,
 31 | ):
 32 |     initial_price = 100
 33 |     n_steps = int(10 * terminal_time * arrival_rate)
 34 |     step_size = terminal_time / n_steps
 35 |     reward_function = CjMmCriterion(phi, alpha) if phi > 0 or alpha > 0 else PnL()
 36 |     midprice_model=BrownianMotionMidpriceModel(
 37 |             volatility=sigma,
 38 |             terminal_time=terminal_time,
 39 |             step_size=step_size,
 40 |             initial_price=initial_price,
 41 |             num_trajectories=num_trajectories,
 42 |         )
 43 |     arrival_model=PoissonArrivalModel(
 44 |         intensity=np.array([arrival_rate, arrival_rate]), step_size=step_size, num_trajectories=num_trajectories
 45 |     )
 46 |     fill_probability_model=ExponentialFillFunction(
 47 |         fill_exponent=fill_exponent, step_size=step_size, num_trajectories=num_trajectories
 48 |     )
 49 |     env_params = dict(
 50 |         terminal_time=terminal_time,
 51 |         n_steps=n_steps,
 52 |         model_dynamics = LimitAndMarketOrderModelDynamics(midprice_model = midprice_model, arrival_model= arrival_model, fill_probability_model = fill_probability_model, 
 53 |                                                           num_trajectories = num_trajectories),
 54 |         initial_inventory=initial_inventory,
 55 |         reward_function=reward_function,
 56 |         max_inventory=n_steps,
 57 |         num_trajectories=num_trajectories,
 58 |         random_start=random_start,
 59 |     )
 60 |     return TradingEnvironment(**env_params)
 61 | 
 62 | 
 63 | def wrap_env(env: TradingEnvironment):
 64 |     env = StableBaselinesTradingEnvironment(trading_env=ReduceStateSizeWrapper(env))
 65 |     return VecMonitor(env)
 66 | 
 67 | 
 68 | def get_ppo_learner_and_callback(
 69 |     env: TradingEnvironment, tensorboard_base_logdir: str = "./tensorboard/", best_model_path: str = "./best_models"
 70 | ):
 71 |     policy_kwargs = dict(net_arch=[dict(pi=[256, 256], vf=[256, 256])])
 72 |     experiment_string = get_experiment_string(env)
 73 |     tensorboard_logdir = tensorboard_base_logdir + "/" + experiment_string
 74 |     PPO_params = {
 75 |         "policy": "MlpPolicy",
 76 |         "env": wrap_env(env),
 77 |         "verbose": 1,
 78 |         "policy_kwargs": policy_kwargs,
 79 |         "tensorboard_log": tensorboard_logdir,
 80 |         "n_epochs": 10,
 81 |         "batch_size": int(env.n_steps * env.num_trajectories / 4),
 82 |         "normalize_advantage": True,
 83 |         "n_steps": int(env.n_steps),
 84 |         "gae_lambda": 0.95,
 85 |         "gamma": 1,
 86 |     }
 87 |     callback_params = dict(
 88 |         eval_env=wrap_env(env),
 89 |         n_eval_episodes=10,
 90 |         best_model_save_path=best_model_path + "/" + experiment_string,
 91 |         deterministic=True,
 92 |         eval_freq=env.n_steps * env.num_trajectories * 10,
 93 |     )
 94 |     callback = EvalCallback(**callback_params)
 95 |     model = PPO(**PPO_params)
 96 |     return model, callback
 97 | 
 98 | 
 99 | def get_experiment_string(env):
100 |     phi = env.reward_function.phi if isinstance(env.reward_function, CjMmCriterion) else 0
101 |     alpha = env.reward_function.alpha if isinstance(env.reward_function, CjMmCriterion) else 0
102 |     return (
103 |         f"n_traj_{env.num_trajectories}__"
104 |         + f"arrival_rate_{env.arrival_model.intensity}__"
105 |         + f"fill_exponent_{env.fill_probability_model.fill_exponent}__"
106 |         + f"phi_{phi}__"
107 |         + f"alpha_{alpha}__"
108 |         + f"initial_inventory_{env.initial_inventory}__"
109 |         + f"random_start_{env.start_time}"
110 |     )
111 | 
112 | 
113 | def create_inventory_plot(
114 |     model: PPO,
115 |     env: TradingEnvironment,
116 |     min_inventory: int = -3,
117 |     max_inventory: int = 3,
118 |     reduced_training_indices: list = None,
119 |     model_uses_normalisation: bool = True,
120 |     time_of_action: float = 0.5,
121 |     save_figure: bool = False,
122 |     path_to_figures: str = "./figures",
123 | ):
124 |     if model_uses_normalisation:
125 |         normalised_env = StableBaselinesTradingEnvironment(ReduceStateSizeWrapper(env, reduced_training_indices))
126 |     assert env.num_trajectories == 1, "Plotting actions must be done with a single trajectory env"
127 |     ppo_agent = SbAgent(model)
128 |     cj_agent = CarteaJaimungalMmAgent(env=env)
129 |     inventories = np.arange(min_inventory, max_inventory + 1, 1)
130 |     bid_actions, ask_actions, cj_bid_actions, cj_ask_actions = [], [], [], []
131 |     for inventory in inventories:
132 |         state = np.array([[0, inventory, time_of_action, 100]])
133 |         reduced_state = state[:, reduced_training_indices] if reduced_training_indices is not None else state
134 |         if model_uses_normalisation:
135 |             reduced_state = normalised_env.normalise_observation(reduced_state)
136 |         action = ppo_agent.get_action(reduced_state)
137 |         if model_uses_normalisation:
138 |             action = normalised_env.normalise_action(action, inverse=True)
139 |         bid_action, ask_action = action
140 |         cj_bid_action, cj_ask_action = cj_agent.get_action(state).reshape(-1)
141 | 
142 |         if inventory == min_inventory:
143 |             ask_action = np.NaN
144 |             cj_ask_action = np.NaN
145 |         if inventory == max_inventory:
146 |             bid_action = np.NaN
147 |             cj_bid_action = np.NaN
148 | 
149 |         bid_actions.append(bid_action)
150 |         ask_actions.append(ask_action)
151 |         cj_bid_actions.append(cj_bid_action)
152 |         cj_ask_actions.append(cj_ask_action)
153 | 
154 |     plt.plot(inventories, bid_actions, label="bid", color="k")
155 |     plt.plot(inventories, ask_actions, label="ask", color="r")
156 |     plt.plot(inventories, cj_bid_actions, label="bid cj", color="k", linestyle="--")
157 |     plt.plot(inventories, cj_ask_actions, label="ask cj", color="r", linestyle="--")
158 |     plt.legend()
159 |     if save_figure:
160 |         plt.title(get_experiment_string(env))
161 |         plt.savefig(path_to_figures + "/inventory_plots/" + get_experiment_string(env) + ".pdf")
162 |     else:
163 |         plt.show()
164 | 
165 | 
166 | def create_time_plot(
167 |     model: PPO,
168 |     env: TradingEnvironment,
169 |     min_inventory: int = -3,
170 |     max_inventory: int = 3,
171 |     reduced_training_indices: list = None,
172 |     model_uses_normalisation: bool = True,
173 |     save_figure: bool = False,
174 |     path_to_figures: str = "./figures",
175 | ):
176 |     if model_uses_normalisation:
177 |         normalised_env = StableBaselinesTradingEnvironment(ReduceStateSizeWrapper(env, reduced_training_indices))
178 |     assert env.num_trajectories == 1, "Plotting actions must be done with a single trajectory env"
179 |     ppo_agent = SbAgent(model)
180 |     cj_agent = CarteaJaimungalMmAgent(env=env)
181 |     inventories = np.arange(min_inventory, max_inventory + 1, 1)
182 |     times = np.arange(0, env.terminal_time + 0.01, 0.01)
183 |     inventory_dict = {inventory: [] for inventory in inventories}
184 |     action_dict = {
185 |         "rl bid actions": deepcopy(inventory_dict),
186 |         "cj bid actions": deepcopy(inventory_dict),
187 |         "rl ask actions": deepcopy(inventory_dict),
188 |         "cj ask actions": deepcopy(inventory_dict),
189 |     }
190 |     for inventory in inventories:
191 |         for time in times:
192 |             state = np.array([[0, inventory, time, 100]])
193 |             reduced_state = state[:, reduced_training_indices] if reduced_training_indices is not None else state
194 |             if model_uses_normalisation:
195 |                 reduced_state = normalised_env.normalise_observation(reduced_state)
196 |             action = ppo_agent.get_action(reduced_state)
197 |             if model_uses_normalisation:
198 |                 action = normalised_env.normalise_action(action, inverse=True)
199 |             bid_action, ask_action = action
200 | 
201 |             cj_actions = cj_agent.get_action(state)
202 |             cj_bid_action = cj_actions[0, 0]
203 |             cj_ask_action = cj_actions[0, 1]
204 | 
205 |             if inventory == min_inventory:
206 |                 ask_action = np.NaN
207 |                 cj_ask_action = np.NaN
208 |             if inventory == max_inventory:
209 |                 bid_action = np.NaN
210 |                 cj_bid_action = np.NaN
211 | 
212 |             action_dict["rl bid actions"][inventory].append(bid_action)
213 |             action_dict["rl ask actions"][inventory].append(ask_action)
214 |             action_dict["cj bid actions"][inventory].append(cj_bid_action)
215 |             action_dict["cj ask actions"][inventory].append(cj_ask_action)
216 |     fig, axs = plt.subplots(2, 2, sharey=True, figsize=(15, 10))
217 |     for count, (name, actions) in enumerate(action_dict.items()):
218 |         axs[count // 2, count % 2].set_title(name, fontsize=20)
219 |         for inventory in inventories:
220 |             axs[count // 2, count % 2].plot(times, actions[inventory], label=f"inventory = {inventory}")
221 |             axs[count // 2, count % 2].legend()
222 |     fig.tight_layout()
223 |     if save_figure:
224 |         plt.savefig(path_to_figures + "/time_plots/" + get_experiment_string(env) + ".pdf")
225 |     else:
226 |         plt.show()
227 | 


--------------------------------------------------------------------------------
/experiments/learning-pnl-with-different-arrival-rates.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | sys.path.append("../")
 4 | 
 5 | from experiments.helpers import (
 6 |     get_cj_env,
 7 |     get_ppo_learner_and_callback,
 8 |     get_experiment_string,
 9 |     create_time_plot,
10 |     create_inventory_plot,
11 | )
12 | 
13 | num_trajectories = 1000
14 | terminal_time = 1.0
15 | phi = 0
16 | alpha = 0
17 | sigma = 0.0
18 | initial_inventory = (-5, 6)
19 | random_start = None
20 | 
21 | final_model_path = "./final_models"
22 | 
23 | arrival_rates = [1.0, 10.0, 100.0]
24 | fill_exponents = [0.1, 1, 10]
25 | 
26 | for arrival_rate in arrival_rates:
27 |     for fill_exponent in fill_exponents:
28 |         n_steps = int(10 * terminal_time * arrival_rate)
29 |         env = get_cj_env(
30 |             num_trajectories=num_trajectories,
31 |             terminal_time=terminal_time,
32 |             arrival_rate=arrival_rate,
33 |             fill_exponent=fill_exponent,
34 |             phi=phi,
35 |             alpha=alpha,
36 |             sigma=sigma,
37 |             initial_inventory=initial_inventory,
38 |         )
39 |         model, callback = get_ppo_learner_and_callback(env)
40 |         model.learn(total_timesteps=300_000_000, callback=callback)
41 |         model.save(final_model_path + "/" + get_experiment_string(env))
42 |         create_inventory_plot(model=model, env=env, save_figure=True)
43 |         create_time_plot(model=model, env=env, save_figure=True)
44 | 


--------------------------------------------------------------------------------
/mbt_gym/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JJJerome/mbt_gym/1e1aa38ca35c4fa472777e3574f9c907f89bb5b0/mbt_gym/__init__.py


--------------------------------------------------------------------------------
/mbt_gym/agents/Agent.py:
--------------------------------------------------------------------------------
 1 | import abc
 2 | 
 3 | import numpy as np
 4 | 
 5 | 
 6 | class Agent(metaclass=abc.ABCMeta):
 7 |     @abc.abstractmethod
 8 |     def get_action(self, state: np.ndarray) -> np.ndarray:
 9 |         pass
10 | 
11 |     def get_expected_action(self, state: np.ndarray, n_samples: int = 1000) -> np.ndarray:
12 |         return np.array([self.get_action(state) for _ in range(n_samples)]).mean(axis=0)
13 | 


--------------------------------------------------------------------------------
/mbt_gym/agents/BaselineAgents.py:
--------------------------------------------------------------------------------
  1 | from copy import deepcopy
  2 | 
  3 | import gym
  4 | import numpy as np
  5 | import warnings
  6 | from scipy.linalg import expm
  7 | 
  8 | from mbt_gym.agents.Agent import Agent
  9 | from mbt_gym.gym.TradingEnvironment import TradingEnvironment
 10 | from mbt_gym.gym.index_names import INVENTORY_INDEX, TIME_INDEX, ASSET_PRICE_INDEX, CASH_INDEX, BID_INDEX, ASK_INDEX
 11 | from mbt_gym.rewards.RewardFunctions import CjMmCriterion, PnL
 12 | from mbt_gym.stochastic_processes.price_impact_models import PriceImpactModel, TemporaryAndPermanentPriceImpact
 13 | from mbt_gym.gym.ModelDynamics import LimitOrderModelDynamics, TradinghWithSpeedModelDynamics
 14 | 
 15 | class RandomAgent(Agent):
 16 |     def __init__(self, env: gym.Env, seed: int = None):
 17 |         self.action_space = deepcopy(env.action_space)
 18 |         self.action_space.seed(seed)
 19 |         self.num_trajectories = env.num_trajectories
 20 | 
 21 |     def get_action(self, state: np.ndarray) -> np.ndarray:
 22 |         return np.repeat(self.action_space.sample().reshape(1, -1), self.num_trajectories, axis=0)
 23 | 
 24 | 
 25 | class FixedActionAgent(Agent):
 26 |     def __init__(self, fixed_action: np.ndarray, env: gym.Env):
 27 |         self.fixed_action = fixed_action
 28 |         self.env = env
 29 | 
 30 |     def get_action(self, state: np.ndarray) -> np.ndarray:
 31 |         return np.repeat(self.fixed_action.reshape(1, -1), self.env.num_trajectories, axis=0)
 32 | 
 33 | 
 34 | class FixedSpreadAgent(Agent):
 35 |     def __init__(self, env: gym.Env, half_spread: float = 1.0, offset: float = 0.0):
 36 |         self.half_spread = half_spread
 37 |         self.offset = offset
 38 |         self.env = env
 39 | 
 40 |     def get_action(self, state: np.ndarray) -> np.ndarray:
 41 |         action = np.array([[self.half_spread - self.offset, self.half_spread + self.offset]])
 42 |         return np.repeat(action, self.env.num_trajectories, axis=0)
 43 | 
 44 | 
 45 | class HumanAgent(Agent):
 46 |     def get_action(self, state: np.ndarray):
 47 |         bid = float(input(f"Current state is {state}. How large do you want to set midprice-bid half spread? "))
 48 |         ask = float(input(f"Current state is {state}. How large do you want to set ask-midprice half spread? "))
 49 |         return np.array([bid, ask])
 50 | 
 51 | 
 52 | class AvellanedaStoikovAgent(Agent):
 53 |     def __init__(self, risk_aversion: float = 0.1, env: TradingEnvironment = None):
 54 |         self.risk_aversion = risk_aversion
 55 |         self.env = env or TradingEnvironment()
 56 |         assert isinstance(self.env, TradingEnvironment)
 57 |         self.terminal_time = self.env.terminal_time
 58 |         self.volatility = self.env.model_dynamics.midprice_model.volatility
 59 |         self.rate_of_arrival = self.env.model_dynamics.arrival_model.intensity
 60 |         self.fill_exponent = self.env.model_dynamics.fill_probability_model.fill_exponent
 61 | 
 62 |     def get_action(self, state: np.ndarray):
 63 |         inventory = state[:, INVENTORY_INDEX]
 64 |         time = state[:, TIME_INDEX]
 65 |         action = self._get_action(inventory, time)
 66 |         if action.min() < 0:
 67 |             warnings.warn("Avellaneda-Stoikov agent is quoting a negative spread")
 68 |         return action
 69 | 
 70 |     def _get_price_adjustment(self, inventory: int, time: float) -> float:
 71 |         return inventory * self.risk_aversion * self.volatility**2 * (self.terminal_time - time)
 72 | 
 73 |     def _get_spread(self, time: float) -> float:
 74 |         if self.risk_aversion == 0:
 75 |             return 2 / self.fill_exponent  # Limit as risk aversion -> 0
 76 |         volatility_aversion_component = self.risk_aversion * self.volatility**2 * (self.terminal_time - time)
 77 |         fill_exponent_component = 2 / self.risk_aversion * np.log(1 + self.risk_aversion / self.fill_exponent)
 78 |         return volatility_aversion_component + fill_exponent_component
 79 | 
 80 |     def _get_action(self, inventory: int, time: float):
 81 |         bid_half_spread = (self._get_price_adjustment(inventory, time) + self._get_spread(time) / 2).reshape(-1, 1)
 82 |         ask_half_spread = (-self._get_price_adjustment(inventory, time) + self._get_spread(time) / 2).reshape(-1, 1)
 83 |         return np.append(bid_half_spread, ask_half_spread, axis=1)
 84 | 
 85 | 
 86 | class CarteaJaimungalMmAgent(Agent):
 87 |     def __init__(
 88 |         self,
 89 |         env: TradingEnvironment = None,
 90 |     ):
 91 |         self.env = env or TradingEnvironment()
 92 |         assert isinstance(self.env.model_dynamics, LimitOrderModelDynamics), "Trader must be type LimitOrderTrader"
 93 |         assert isinstance(self.env.reward_function, (CjMmCriterion, PnL)), "Reward function for CjMmAgent is incorrect."
 94 |         self.kappa = self.env.model_dynamics.fill_probability_model.fill_exponent
 95 |         self.num_trajectories = self.env.num_trajectories
 96 |         if isinstance(self.env.reward_function, PnL):
 97 |             self.inventory_neutral = True
 98 |             self.risk_neutral_action = 1 / self.kappa * np.ones((env.num_trajectories, env.action_space.shape[0]))
 99 |         else:
100 |             self.inventory_neutral = False
101 |             self.phi = env.reward_function.per_step_inventory_aversion
102 |             self.alpha = env.reward_function.terminal_inventory_aversion
103 |             assert self.env.reward_function.inventory_exponent == 2.0, "Inventory exponent must be = 2."
104 |             self.terminal_time = self.env.terminal_time
105 |             self.lambdas = self.env.model_dynamics.arrival_model.intensity
106 |             self.max_inventory = env.max_inventory
107 |             self.a_matrix, self.z_vector = self._calculate_a_and_z()
108 |             self.large_depth = 10_000
109 | 
110 |     def get_action(self, state: np.ndarray):
111 |         if self.inventory_neutral:
112 |             return self.risk_neutral_action
113 |         else:
114 |             assert (
115 |                 state[0, TIME_INDEX] == state[-1, TIME_INDEX]
116 |             ), "CarteaJaimungalMmAgent needs to be called on a tensor with a uniform time stamp."
117 |             current_time = state[0, TIME_INDEX]
118 |             inventories = state[:, INVENTORY_INDEX]
119 |             return self._calculate_deltas(inventories=inventories, current_time=current_time)
120 | 
121 |     def _calculate_deltas(self, current_time: float, inventories: np.ndarray):
122 |         deltas = np.zeros(shape=(self.num_trajectories, 2))
123 |         h_t = self._calculate_ht(current_time)
124 |         # If the inventory goes above the max level, we quote a large depth to bring it back and quote on the opposite
125 |         # side as if we had an inventory equal to sign(inventory) * self.max_inventory.
126 |         indices = np.clip(self.max_inventory + inventories, 0, 2 * self.max_inventory)
127 |         indices = indices.astype(int)
128 |         indices_minus_one = np.clip(indices - 1, 0, 2 * self.max_inventory)
129 |         indices_plus_one = np.clip(indices + 1, 0, 2 * self.max_inventory)
130 |         h_0 = h_t[indices]
131 |         h_plus_one = h_t[indices_plus_one]
132 |         h_minus_one = h_t[indices_minus_one]
133 |         max_inventory_bid = h_plus_one == h_0
134 |         max_inventory_ask = h_minus_one == h_0
135 |         deltas[:, BID_INDEX] = (1 / self.kappa - h_plus_one + h_0 + self.large_depth * max_inventory_bid).reshape(-1)
136 |         deltas[:, ASK_INDEX] = (1 / self.kappa - h_minus_one + h_0 + self.large_depth * max_inventory_ask).reshape(-1)
137 |         return deltas
138 | 
139 |     def _calculate_ht(self, current_time: float) -> float:
140 |         omega_function = self._calculate_omega(current_time)
141 |         return 1 / self.kappa * np.log(omega_function)
142 | 
143 |     def _calculate_omega(self, current_time: float):
144 |         """This is Equation (10.11) from [CJP15]."""
145 |         return np.matmul(expm(self.a_matrix * (self.terminal_time - current_time)), self.z_vector)
146 | 
147 |     def _calculate_a_and_z(self):
148 |         matrix_size = 2 * self.max_inventory + 1
149 |         Amatrix = np.zeros(shape=(matrix_size, matrix_size))
150 |         z_vector = np.zeros(shape=(matrix_size, 1))
151 |         for i in range(matrix_size):
152 |             inventory = self.max_inventory - i
153 |             Amatrix[i, i] = -self.phi * self.kappa * inventory**2
154 |             z_vector[i, 0] = np.exp(-self.alpha * self.kappa * inventory**2)
155 |             if i + 1 < matrix_size:
156 |                 Amatrix[i, i + 1] = self.lambdas[BID_INDEX] * np.exp(-1)
157 |             if i > 0:
158 |                 Amatrix[i, i - 1] = self.lambdas[ASK_INDEX] * np.exp(-1)
159 |         return Amatrix, z_vector
160 |     
161 |     def calculate_true_value_function(self, state: np.ndarray):
162 |         current_time = state[0, TIME_INDEX]
163 |         inventories = state[:, INVENTORY_INDEX]
164 |         value_fct = np.zeros(shape=(self.num_trajectories, 1))
165 |         h_t = self._calculate_ht(current_time)
166 |         indices = np.clip(self.max_inventory + inventories, 0, 2 * self.max_inventory)
167 |         indices = indices.astype(int)
168 |         h_0 = h_t[indices]
169 |         value_fct = h_0 + state[:, CASH_INDEX] + state[:, INVENTORY_INDEX] * state[:, ASSET_PRICE_INDEX]
170 |         return value_fct
171 | 
172 | 
173 | class CarteaJaimungalOeAgent(Agent):
174 |     def __init__(
175 |         self,
176 |         phi: float = 2 * 10 ** (-4),
177 |         alpha: float = 0.0001,
178 |         env: TradingEnvironment = None,
179 |     ):
180 |         self.phi = phi
181 |         self.alpha = alpha
182 |         self.env = env or TradingEnvironment()
183 |         self.price_impact_model = env.model_dynamics.price_impact_model
184 |         assert isinstance(self.env.model_dynamics, TradinghWithSpeedModelDynamics), "Trader must be type TradinghWithSpeedTrader"
185 |         self.terminal_time = self.env.terminal_time
186 |         self.temporary_price_impact = self.price_impact_model.temporary_impact_coefficient
187 |         self.permanent_price_impact = self.price_impact_model.permanent_impact_coefficient
188 |         self.num_trajectories = self.env.num_trajectories
189 | 
190 |     def get_action(self, state: np.ndarray):
191 |         action = np.zeros(shape=(self.num_trajectories, 1))
192 |         # The formulae below is in page 147 of Cartea, Jaimungal, Penalva (2015)
193 |         # Algorithmic and High-Frequency Trading
194 |         # Cambridge University Press
195 |         gamma = np.sqrt(self.phi / self.temporary_price_impact)
196 |         zeta = (self.alpha - 0.5 * self.permanent_price_impact + np.sqrt(self.temporary_price_impact * self.phi)) / (
197 |             self.alpha - 0.5 * self.permanent_price_impact - np.sqrt(self.temporary_price_impact * self.phi)
198 |         )
199 |         initial_inventory = self.env.initial_inventory
200 | 
201 |         time_left = self.terminal_time - state[0, TIME_INDEX]
202 |         action[:, :] = (
203 |             gamma
204 |             * initial_inventory
205 |             * (
206 |                 (zeta * np.exp(gamma * time_left) + np.exp(-gamma * time_left))
207 |                 / (zeta * np.exp(gamma * self.terminal_time) - np.exp(-gamma * self.terminal_time))
208 |             )
209 |         )
210 |         return -np.sign(initial_inventory) * action
211 | 


--------------------------------------------------------------------------------
/mbt_gym/agents/PolicyGradientAgent.py:
--------------------------------------------------------------------------------
 1 | from typing import Union, Callable, Tuple
 2 | 
 3 | import gym
 4 | import numpy as np
 5 | import torch
 6 | from torch.optim.lr_scheduler import StepLR, _LRScheduler
 7 | from tqdm import tqdm
 8 | 
 9 | from mbt_gym.agents.Agent import Agent
10 | from mbt_gym.gym.TradingEnvironment import TradingEnvironment
11 | from mbt_gym.gym.helpers.generate_trajectory import generate_trajectory
12 | 
13 | 
14 | class PolicyGradientAgent(Agent):
15 |     def __init__(
16 |         self,
17 |         policy: torch.nn.Module,
18 |         action_std: Union[float, Callable] = 0.01,
19 |         optimizer: torch.optim.Optimizer = None,
20 |         env: gym.Env = None,
21 |         lr_scheduler: _LRScheduler = None,
22 |     ):
23 |         self.env = env or TradingEnvironment()
24 |         self.input_size = env.observation_space.shape[0]
25 |         self.action_size = env.action_space.shape[0]
26 |         assert self.input_size == policy[0].in_features
27 |         self.policy_net = policy
28 |         self.action_std = action_std
29 |         self.optimizer = optimizer or torch.optim.SGD(self.policy_net.parameters(), lr=1e-1)
30 |         self.lr_scheduler = lr_scheduler or StepLR(self.optimizer, step_size=1, gamma=0.995)
31 |         self.noise_dist = torch.distributions.Normal
32 |         self.proportion_completed: float = 0.0
33 | 
34 |     def get_action(
35 |         self, state: np.ndarray, deterministic: bool = False, include_log_probs: bool = False
36 |     ) -> Union[np.ndarray, Tuple[np.ndarray, torch.tensor]]:
37 |         assert not (deterministic and include_log_probs), "include_log_probs is only an option for deterministic output"
38 |         mean_value = self.policy_net(torch.tensor(state, dtype=torch.float, requires_grad=False))
39 |         std = self.action_std(self.proportion_completed) if isinstance(self.action_std, Callable) else self.action_std
40 |         if deterministic:
41 |             return mean_value.detach().numpy()
42 |         action_dist = torch.distributions.Normal(loc=mean_value, scale=std * torch.ones_like(mean_value))
43 |         action = action_dist.sample()
44 |         if include_log_probs:
45 |             log_probs = action_dist.log_prob(action)
46 |             return action.detach().numpy(), log_probs
47 |         return action.detach().numpy()
48 | 
49 |     def train(self, num_epochs: int = 1, reporting_freq: int = 100):
50 |         learning_losses = []
51 |         learning_rewards = []
52 |         self.proportion_completed = 0.0
53 |         for epoch in tqdm(range(num_epochs)):
54 |             observations, actions, rewards, log_probs = generate_trajectory(self.env, self, include_log_probs=True)
55 |             learning_rewards.append(rewards.mean())
56 |             rewards = torch.tensor(rewards)
57 |             future_rewards = self._calculate_future_rewards(rewards)
58 |             loss = -torch.mean(log_probs * future_rewards)
59 |             self.optimizer.zero_grad()
60 |             loss.backward()
61 |             self.optimizer.step()
62 |             if epoch % reporting_freq == 0:
63 |                 tqdm.write(str(loss.item()))
64 |             learning_losses.append(loss.item())
65 |             self.proportion_completed += 1 / (num_epochs - 1)
66 |             self.lr_scheduler.step()
67 |         return learning_losses, learning_rewards
68 | 
69 |     @staticmethod
70 |     def _calculate_future_rewards(rewards: torch.tensor):
71 |         flipped_rewards = torch.flip(rewards, dims=(-1,))
72 |         cumulative_flipped = torch.cumsum(flipped_rewards, dim=-1)
73 |         return torch.flip(cumulative_flipped, dims=(-1,))
74 | 


--------------------------------------------------------------------------------
/mbt_gym/agents/SbAgent.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from mbt_gym.agents.Agent import Agent
 4 | 
 5 | from stable_baselines3.common.base_class import BaseAlgorithm
 6 | 
 7 | 
 8 | class SbAgent(Agent):
 9 |     def __init__(self, model: BaseAlgorithm, reduced_training_indices: list = None, num_trajectories: int = None):
10 |         self.model = model
11 |         self.num_trajectories = num_trajectories or self.model.env.num_trajectories
12 |         self.num_actions = self.model.action_space.shape[0]
13 |         if reduced_training_indices is not None:
14 |             self.reduced_training = True
15 |             self.reduced_training_indices = reduced_training_indices
16 |         else:
17 |             self.reduced_training = False
18 | 
19 |     def get_action(self, state: np.ndarray) -> np.ndarray:
20 |         if self.reduced_training:
21 |             state = state[:, self.reduced_training_indices]
22 |         # return self.model.predict(state, deterministic=True)[0].reshape(self.num_trajectories, self.num_actions)
23 |         return self.model.predict(state, deterministic=True)[0].reshape(state.shape[0], self.num_actions)
24 | 
25 |     def train(self, total_timesteps: int = 100000):
26 |         self.model.learn(total_timesteps=total_timesteps)
27 | 


--------------------------------------------------------------------------------
/mbt_gym/agents/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JJJerome/mbt_gym/1e1aa38ca35c4fa472777e3574f9c907f89bb5b0/mbt_gym/agents/__init__.py


--------------------------------------------------------------------------------
/mbt_gym/gym/ModelDynamics.py:
--------------------------------------------------------------------------------
  1 | import abc
  2 | import gym
  3 | from copy import copy
  4 | from typing import Optional
  5 |         
  6 | import numpy as np
  7 | from numpy.random import default_rng
  8 | 
  9 | 
 10 | from mbt_gym.gym.index_names import CASH_INDEX, INVENTORY_INDEX, BID_INDEX, ASK_INDEX
 11 | 
 12 | from mbt_gym.stochastic_processes.arrival_models import ArrivalModel
 13 | from mbt_gym.stochastic_processes.fill_probability_models import FillProbabilityModel
 14 | from mbt_gym.stochastic_processes.midprice_models import MidpriceModel
 15 | from mbt_gym.stochastic_processes.price_impact_models import PriceImpactModel
 16 | 
 17 | 
 18 | class ModelDynamics(metaclass=abc.ABCMeta):
 19 |     def __init__(
 20 |         self,
 21 |         midprice_model : MidpriceModel  = None,
 22 |         arrival_model : ArrivalModel  = None,
 23 |         fill_probability_model : FillProbabilityModel  = None,
 24 |         price_impact_model : PriceImpactModel = None,
 25 |         num_trajectories: int = 1,
 26 |         seed: int = None,
 27 |     ):
 28 |         self.midprice_model = midprice_model
 29 |         self.arrival_model = arrival_model
 30 |         self.fill_probability_model = fill_probability_model
 31 |         self.price_impact_model = price_impact_model
 32 |         self.num_trajectories = num_trajectories
 33 |         self.rng = default_rng(seed)
 34 |         self.seed_ = seed
 35 |         self.fill_multiplier = self._get_fill_multiplier()
 36 |         self.round_initial_inventory = False
 37 |         self.required_processes = self.get_required_stochastic_processes()
 38 |         self._check_processes_are_not_none(self.required_processes)
 39 |         self.state = None
 40 | 
 41 |     def update_state(self, arrivals: np.ndarray, fills: np.ndarray, action: np.ndarray):
 42 |         pass
 43 |     
 44 |     def get_fills(self, action: np.ndarray):
 45 |         pass
 46 |     
 47 |     def get_arrivals_and_fills(self, action: np.ndarray):
 48 |         return None, None 
 49 | 
 50 |     def _limit_depths(self, action: np.ndarray):
 51 |         return action[:, 0:2]
 52 | 
 53 |     def get_action_space(self) -> gym.spaces.Space:
 54 |         pass
 55 |     
 56 |     def get_required_stochastic_processes(self):
 57 |         pass
 58 |     
 59 |     def _get_max_depth(self) -> Optional[float]:
 60 |         if self.fill_probability_model is not None:
 61 |             return self.fill_probability_model.max_depth
 62 |         else:
 63 |             return None
 64 | 
 65 |     def _get_max_speed(self) -> float:
 66 |         if self.price_impact_model is not None:
 67 |             return self.price_impact_model.max_speed
 68 |         else:
 69 |             return None
 70 | 
 71 |     def _get_fill_multiplier(self):
 72 |         ones = np.ones((self.num_trajectories, 1))
 73 |         return np.append(-ones, ones, axis=1)
 74 | 
 75 |     def _check_processes_are_not_none(self, processes):
 76 |         for process in processes:
 77 |             self._check_process_is_not_none(process)
 78 | 
 79 |     def _check_process_is_not_none(self, process: str):
 80 |         assert getattr(self, process) is not None, f"This model dynamics cannot have env.{process} to be None."
 81 | 
 82 |     @property
 83 |     def midprice(self):
 84 |         return self.midprice_model.current_state[:, 0].reshape(-1, 1)
 85 | 
 86 | 
 87 | class LimitOrderModelDynamics(ModelDynamics):
 88 |     """ModelDynamics for 'limit'."""
 89 |     def __init__(
 90 |         self,
 91 |         midprice_model : MidpriceModel  = None,
 92 |         arrival_model : ArrivalModel  = None,
 93 |         fill_probability_model : FillProbabilityModel  = None,
 94 |         num_trajectories: int = 1,
 95 |         seed: int = None,
 96 |         max_depth : float = None,
 97 |     ):
 98 |         super().__init__(midprice_model = midprice_model,
 99 |                         arrival_model = arrival_model,
100 |                         fill_probability_model = fill_probability_model, 
101 |                         num_trajectories = num_trajectories,
102 |                         seed = seed)
103 |         self.max_depth = max_depth or self._get_max_depth()
104 |         self.required_processes = self.get_required_stochastic_processes()
105 |         self._check_processes_are_not_none(self.required_processes)
106 |         self.round_initial_inventory = True
107 |         
108 |     def update_state(self, arrivals: np.ndarray, fills: np.ndarray, action: np.ndarray):
109 |         self.state[:, INVENTORY_INDEX] += np.sum(arrivals * fills * -self.fill_multiplier, axis=1)
110 |         self.state[:, CASH_INDEX] += np.sum(
111 |                 self.fill_multiplier
112 |                 * arrivals
113 |                 * fills
114 |                 * (self.midprice + self._limit_depths(action) * self.fill_multiplier),
115 |                 axis=1,
116 |             )
117 | 
118 |     def get_action_space(self) -> gym.spaces.Space:
119 |         assert self.max_depth is not None, "For limit orders max_depth cannot be None."
120 |         # agent chooses spread on bid and ask
121 |         return gym.spaces.Box(low=np.float32(0.0), high=np.float32(self.max_depth), shape=(2,))
122 |     
123 |     def get_required_stochastic_processes(self):
124 |         processes = ["arrival_model", "fill_probability_model"]
125 |         return processes
126 | 
127 |     def get_arrivals_and_fills(self, action: np.ndarray):
128 |         arrivals = self.arrival_model.get_arrivals()
129 |         depths = self._limit_depths(action)
130 |         fills = self.fill_probability_model.get_fills(depths)
131 |         return arrivals, fills
132 | 
133 | 
134 | class AtTheTouchModelDynamics(ModelDynamics):
135 |     """ModelDynamics for 'touch'."""
136 |     def __init__(
137 |         self,
138 |         midprice_model : MidpriceModel  = None,
139 |         arrival_model : ArrivalModel  = None,
140 |         fill_probability_model : FillProbabilityModel  = None,
141 |         num_trajectories: int = 1,
142 |         fixed_market_half_spread: float = 0.5,
143 |         seed: int = None,
144 |     ):
145 |         super().__init__(midprice_model = midprice_model,
146 |                         arrival_model = arrival_model,
147 |                         fill_probability_model = fill_probability_model, 
148 |                         num_trajectories = num_trajectories,
149 |                         seed = seed)
150 |         self.round_initial_inventory = True
151 |         self.fixed_market_half_spread = fixed_market_half_spread
152 |         
153 |     def update_state(self, arrivals: np.ndarray, fills: np.ndarray, action: np.ndarray):
154 |         self.state[:, CASH_INDEX] += np.sum(
155 |                 self.fill_multiplier
156 |                 * arrivals
157 |                 * fills
158 |                 * (self.midprice + self.fixed_market_half_spread * self.fill_multiplier),
159 |                 axis=1,
160 |             )
161 |         self.state[:, INVENTORY_INDEX] += np.sum(arrivals * fills * -self.fill_multiplier, axis=1)
162 | 
163 |     def _post_at_touch(self, action: np.ndarray):
164 |         return action[:, 0:2]
165 | 
166 |     def get_action_space(self) -> gym.spaces.Space:
167 |         return gym.spaces.MultiBinary(2) 
168 |     
169 |     def get_required_stochastic_processes(self):
170 |         processes = ["arrival_model"]
171 |         return processes
172 | 
173 |     def get_arrivals_and_fills(self, action: np.ndarray):
174 |         arrivals = self.arrival_model.get_arrivals()
175 |         fills = self._post_at_touch(action)
176 |         return arrivals, fills
177 | 
178 | 
179 | class LimitAndMarketOrderModelDynamics(ModelDynamics):
180 |     """ModelDynamics for 'limit_and_market'."""
181 |     def __init__(
182 |         self,
183 |         midprice_model : MidpriceModel  = None,
184 |         arrival_model : ArrivalModel  = None,
185 |         fill_probability_model : FillProbabilityModel  = None,
186 |         num_trajectories: int = 1,
187 |         seed: int = None,
188 |         max_depth : float = None,
189 |         fixed_market_half_spread : float = 0.5,
190 |     ):
191 |         super().__init__(midprice_model = midprice_model,
192 |                         arrival_model = arrival_model,
193 |                         fill_probability_model = fill_probability_model, 
194 |                         num_trajectories = num_trajectories,
195 |                         seed = seed)
196 |         self.max_depth = max_depth or self._get_max_depth()
197 |         self.fixed_market_half_spread = fixed_market_half_spread
198 |         self.required_processes = self.get_required_stochastic_processes()
199 |         self._check_processes_are_not_none(self.required_processes)
200 |         self.round_initial_inventory = True
201 | 
202 |     def _market_order_buy(self, action: np.ndarray):
203 |         return action[:, 2 + BID_INDEX]
204 |         
205 |     def _market_order_sell(self, action: np.ndarray):
206 |         return action[:, 2 + ASK_INDEX]
207 | 
208 |     def update_state(self, arrivals: np.ndarray, fills: np.ndarray, action: np.ndarray):
209 |         mo_buy = np.single(self._market_order_buy(action) > 0.5)
210 |         mo_sell = np.single(self._market_order_sell(action) > 0.5)
211 |         best_bid = (self.midprice - self.fixed_market_half_spread).reshape(-1,)
212 |         best_ask = (self.midprice + self.fixed_market_half_spread).reshape(-1,)
213 |         self.state[:, CASH_INDEX] += mo_sell * best_bid - mo_buy * best_ask
214 |         self.state[:, INVENTORY_INDEX] += mo_buy - mo_sell
215 |         self.state[:, INVENTORY_INDEX] += np.sum(arrivals * fills * -self.fill_multiplier, axis=1)
216 |         self.state[:, CASH_INDEX] += np.sum(
217 |                 self.fill_multiplier
218 |                 * arrivals
219 |                 * fills
220 |                 * (self.midprice + self._limit_depths(action) * self.fill_multiplier),
221 |                 axis=1,
222 |             )
223 | 
224 |     def get_action_space(self) -> gym.spaces.Space:
225 |         assert self.max_depth is not None, "For limit orders max_depth cannot be None."
226 |         # agent chooses spread on bid and ask
227 |         return gym.spaces.Box(
228 |                 low=np.zeros(4),
229 |                 high=np.array([self.max_depth, self.max_depth, 1, 1], dtype=np.float32),
230 |             )
231 |     
232 |     def get_required_stochastic_processes(self):
233 |         processes = ["arrival_model", "fill_probability_model"]
234 |         return processes
235 | 
236 |     def get_arrivals_and_fills(self, action: np.ndarray):
237 |         arrivals = self.arrival_model.get_arrivals()
238 |         depths = self._limit_depths(action)
239 |         fills = self.fill_probability_model.get_fills(depths)
240 |         return arrivals, fills
241 | 
242 | 
243 | class TradinghWithSpeedModelDynamics(ModelDynamics):
244 |     """ModelDynamics for 'speed'."""
245 |     def __init__(
246 |         self,
247 |         midprice_model : MidpriceModel  = None,
248 |         price_impact_model : PriceImpactModel = None,
249 |         num_trajectories: int = 1,
250 |         seed: int = None,
251 |         max_speed : float = None,
252 |     ):
253 |         super().__init__(midprice_model = midprice_model,
254 |                         price_impact_model = price_impact_model,
255 |                         num_trajectories = num_trajectories,
256 |                         seed = seed)
257 |         self.max_speed = max_speed or self._get_max_speed()
258 |         self.required_processes = self.get_required_stochastic_processes()
259 |         self._check_processes_are_not_none(self.required_processes)
260 |         self.round_initial_inventory = False
261 | 
262 |     def update_state(self, arrivals: np.ndarray, fills: np.ndarray, action: np.ndarray):
263 |         price_impact = self.price_impact_model.get_impact(action)
264 |         execution_price = self.midprice + price_impact
265 |         volume = action * self.midprice_model.step_size
266 |         self.state[:, CASH_INDEX] -= np.squeeze(volume * execution_price)
267 |         self.state[:, INVENTORY_INDEX] += np.squeeze(volume)
268 | 
269 |     def get_action_space(self) -> gym.spaces.Space:
270 |         # agent chooses speed of trading: positive buys, negative sells
271 |         return gym.spaces.Box(low=np.float32([-self.max_speed]), high=np.float32([self.max_speed]))
272 |     
273 |     def get_required_stochastic_processes(self):
274 |         processes = ["price_impact_model"]
275 |         return processes
276 | 


--------------------------------------------------------------------------------
/mbt_gym/gym/MultiprocessTradingEnv.py:
--------------------------------------------------------------------------------
  1 | import multiprocessing as mp
  2 | from collections import OrderedDict
  3 | from typing import Any, Callable, List, Optional, Sequence, Tuple, Type, Union, Iterable
  4 | 
  5 | import gym
  6 | import numpy as np
  7 | from gym import spaces
  8 | 
  9 | from stable_baselines3.common.vec_env.base_vec_env import (
 10 |     CloudpickleWrapper,
 11 |     VecEnv,
 12 |     VecEnvIndices,
 13 |     VecEnvObs,
 14 |     VecEnvStepReturn,
 15 | )
 16 | 
 17 | from stable_baselines3.common.vec_env.subproc_vec_env import SubprocVecEnv, _flatten_obs
 18 | 
 19 | STORE_TERMINAL_OBSERVATION_INFO = True
 20 | 
 21 | 
 22 | def _worker(
 23 |     remote: mp.connection.Connection, parent_remote: mp.connection.Connection, env_fn_wrapper: CloudpickleWrapper
 24 | ) -> None:
 25 |     # Import here to avoid a circular import
 26 |     from stable_baselines3.common.env_util import is_wrapped
 27 | 
 28 |     parent_remote.close()
 29 |     env = env_fn_wrapper.var()
 30 |     while True:
 31 |         try:
 32 |             cmd, data = remote.recv()
 33 |             if cmd == "step":
 34 |                 observation, reward, done, infos = env.step(data)
 35 |                 single_done = done[0] if len(done) > 1 else done
 36 |                 if single_done:
 37 |                     if STORE_TERMINAL_OBSERVATION_INFO:
 38 |                         infos = infos.copy()
 39 |                         for count, info in enumerate(infos):
 40 |                             # save final observation where user can get it, then automatically reset (an SB3 convention).
 41 |                             info["terminal_observation"] = observation[count, :]
 42 |                     observation = env.reset()
 43 |                 remote.send((observation, reward, done, infos))
 44 |             elif cmd == "seed":
 45 |                 remote.send(env.seed(data))
 46 |             elif cmd == "reset":
 47 |                 observation = env.reset()
 48 |                 remote.send(observation)
 49 |             elif cmd == "render":
 50 |                 remote.send(env.render(data))
 51 |             elif cmd == "close":
 52 |                 env.close()
 53 |                 remote.close()
 54 |                 break
 55 |             elif cmd == "get_spaces":
 56 |                 remote.send((env.observation_space, env.action_space))
 57 |             elif cmd == "env_method":
 58 |                 method = getattr(env, data[0])
 59 |                 remote.send(method(*data[1], **data[2]))
 60 |             elif cmd == "get_attr":
 61 |                 remote.send(getattr(env, data))
 62 |             elif cmd == "set_attr":
 63 |                 remote.send(setattr(env, data[0], data[1]))
 64 |             elif cmd == "is_wrapped":
 65 |                 remote.send(is_wrapped(env, data))
 66 |             else:
 67 |                 raise NotImplementedError(f"`{cmd}` is not implemented in the worker")
 68 |         except EOFError:
 69 |             break
 70 | 
 71 | 
 72 | class MultiprocessTradingEnv(SubprocVecEnv):
 73 |     """
 74 |     This is a slight modification of SubprocVecEnv, the details of which can be found at
 75 |     https://stable-baselines3.readthedocs.io/en/master/guide/vec_envs.html#subprocvecenv.
 76 | 
 77 |     In particular, it modifies it SubprocVecEnv that the inputs are already VecEnvs. This allows the user to choose the
 78 |     amount of vectorisation that is preformed via numpy (in VectorizedTradingEnvironment) and the amount of
 79 |     multiprocessing processes.
 80 |     """
 81 | 
 82 |     def __init__(self, env_fns: List[Callable[[], gym.Env]], start_method: Optional[str] = None):
 83 |         super().__init__(self, env_fns, start_method)
 84 | 
 85 |         self.remotes[0].send(("get_attr", "num_trajectories"))
 86 |         num_trajectories_per_env = self.remotes[0].recv()
 87 | 
 88 |         self.remotes[0].send(("get_attr", "n_steps"))
 89 |         n_steps = self.remotes[0].recv()
 90 | 
 91 |         self.num_trajectories_per_env = num_trajectories_per_env
 92 |         self.num_multiprocess_envs = len(self.remotes)
 93 |         self.n_steps = n_steps
 94 |         self.num_trajectories = len(env_fns) * num_trajectories_per_env
 95 |         self.num_envs = self.num_trajectories
 96 | 
 97 |     def step_async(self, actions: np.ndarray) -> None:
 98 |         multi_actions = self.flatten_multi(actions, inverse=True)
 99 |         for remote, action in zip(self.remotes, multi_actions):
100 |             remote.send(("step", action))
101 |         self.waiting = True
102 | 
103 |     def step_wait(self) -> VecEnvStepReturn:
104 |         results = [remote.recv() for remote in self.remotes]
105 |         self.waiting = False
106 |         obs, rews, dones, infos = zip(*results)
107 |         obs = self.flatten_multi(_flatten_obs(obs, self.observation_space))
108 |         rews = self.flatten_multi(np.stack(rews))
109 |         dones = self.flatten_multi(np.stack(dones))
110 |         return obs, rews, dones, list(np.stack(infos).reshape(-1))
111 | 
112 |     def flatten_multi(self, array: np.ndarray, inverse=False):
113 |         if inverse:
114 |             return list(array.reshape(self.num_multiprocess_envs, self.num_trajectories_per_env, -1))
115 |         else:
116 |             return array.reshape(self.num_multiprocess_envs * self.num_trajectories_per_env, -1).squeeze()
117 | 
118 |     def reset(self) -> VecEnvObs:
119 |         for remote in self.remotes:
120 |             remote.send(("reset", None))
121 |         obs = [remote.recv() for remote in self.remotes]
122 |         obs = _flatten_obs(obs, self.observation_space)
123 |         return self.flatten_multi(obs)
124 | 


--------------------------------------------------------------------------------
/mbt_gym/gym/StableBaselinesTradingEnvironment.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Any, Type, Optional, Union, Sequence
 2 | 
 3 | import gym
 4 | import numpy as np
 5 | from stable_baselines3.common.vec_env import VecEnv
 6 | from stable_baselines3.common.vec_env.base_vec_env import VecEnvObs, VecEnvStepReturn, VecEnvIndices
 7 | 
 8 | from mbt_gym.gym.TradingEnvironment import TradingEnvironment
 9 | 
10 | 
11 | class StableBaselinesTradingEnvironment(VecEnv):
12 |     def __init__(
13 |         self,
14 |         trading_env: TradingEnvironment,
15 |         store_terminal_observation_info: bool = True,
16 |     ):
17 |         self.env = trading_env
18 |         self.store_terminal_observation_info = store_terminal_observation_info
19 |         self.actions: np.ndarray = self.env.action_space.sample()
20 |         super().__init__(self.env.num_trajectories, self.env.observation_space, self.env.action_space)
21 | 
22 |     def reset(self) -> VecEnvObs:
23 |         return self.env.reset()
24 | 
25 |     def step_async(self, actions: np.ndarray) -> None:
26 |         self.actions = actions
27 | 
28 |     def step_wait(self) -> VecEnvStepReturn:
29 |         obs, rewards, dones, infos = self.env.step(self.actions)
30 |         if dones.min():
31 |             if self.store_terminal_observation_info:
32 |                 infos = infos.copy()
33 |                 for count, info in enumerate(infos):
34 |                     # save final observation where user can get it, then automatically reset (an SB3 convention).
35 |                     info["terminal_observation"] = obs[count, :]
36 |             obs = self.env.reset()
37 |         return obs, rewards, dones, infos
38 | 
39 |     def close(self) -> None:
40 |         pass
41 | 
42 |     def get_attr(self, attr_name: str, indices: VecEnvIndices = None) -> List[Any]:
43 |         pass
44 | 
45 |     def set_attr(self, attr_name: str, value: Any, indices: VecEnvIndices = None) -> None:
46 |         pass
47 | 
48 |     def env_method(self, method_name: str, *method_args, indices: VecEnvIndices = None, **method_kwargs) -> List[Any]:
49 |         pass
50 | 
51 |     def env_is_wrapped(self, wrapper_class: Type[gym.Wrapper], indices: VecEnvIndices = None) -> List[bool]:
52 |         return [False for _ in range(self.env.num_trajectories)]
53 | 
54 |     def seed(self, seed: Optional[int] = None) -> List[Union[None, int]]:
55 |         return self.env.seed(seed)
56 | 
57 |     def get_images(self) -> Sequence[np.ndarray]:
58 |         pass
59 | 
60 |     @property
61 |     def num_trajectories(self):
62 |         return self.env.num_trajectories
63 | 
64 |     @property
65 |     def n_steps(self):
66 |         return self.env.n_steps
67 | 


--------------------------------------------------------------------------------
/mbt_gym/gym/TradingEnvironment.py:
--------------------------------------------------------------------------------
  1 | from collections import OrderedDict
  2 | from copy import copy, deepcopy
  3 | from typing import Union, Tuple, Callable
  4 | 
  5 | import gym
  6 | import numpy as np
  7 | 
  8 | from gym.spaces import Box
  9 | 
 10 | from mbt_gym.agents.Agent import Agent
 11 | from mbt_gym.gym.ModelDynamics import ModelDynamics, LimitOrderModelDynamics
 12 | from mbt_gym.gym.helpers.generate_trajectory import generate_trajectory
 13 | from mbt_gym.stochastic_processes.StochasticProcessModel import StochasticProcessModel
 14 | from mbt_gym.stochastic_processes.arrival_models import ArrivalModel, PoissonArrivalModel
 15 | from mbt_gym.stochastic_processes.fill_probability_models import FillProbabilityModel, ExponentialFillFunction
 16 | from mbt_gym.stochastic_processes.midprice_models import MidpriceModel, BrownianMotionMidpriceModel
 17 | from mbt_gym.stochastic_processes.price_impact_models import PriceImpactModel
 18 | from mbt_gym.gym.info_calculators import InfoCalculator
 19 | from mbt_gym.rewards.RewardFunctions import RewardFunction, PnL
 20 | 
 21 | from mbt_gym.gym.index_names import CASH_INDEX, INVENTORY_INDEX, TIME_INDEX
 22 | 
 23 | 
 24 | class TradingEnvironment(gym.Env):
 25 |     metadata = {"render.modes": ["human"]}
 26 | 
 27 |     def __init__(
 28 |         self,
 29 |         terminal_time: float = 1.0,
 30 |         n_steps: int = 20 * 10,
 31 |         reward_function: RewardFunction = None,
 32 |         model_dynamics: ModelDynamics = None,
 33 |         initial_cash: float = 0.0,
 34 |         initial_inventory: Union[int, Tuple[float, float]] = 0,  # Either a deterministic initial inventory, or a tuple
 35 |         max_inventory: int = 10_000,  # representing the mean and variance of it.
 36 |         max_cash: float = None,
 37 |         max_stock_price: float = None,
 38 |         start_time: Union[float, int, Callable] = 0.0,
 39 |         info_calculator: InfoCalculator = None,  # episode given as a proportion.
 40 |         seed: int = None,
 41 |         num_trajectories: int = 1,
 42 |         normalise_action_space: bool = True,
 43 |         normalise_observation_space: bool = True,
 44 |         normalise_rewards: bool = False,
 45 |     ):
 46 |         super(TradingEnvironment, self).__init__()
 47 |         self.terminal_time = terminal_time
 48 |         self.n_steps = n_steps
 49 |         self._step_size = self.terminal_time / self.n_steps
 50 |         self.reward_function = reward_function or PnL()
 51 |         self.model_dynamics = model_dynamics or LimitOrderModelDynamics(
 52 |             midprice_model=BrownianMotionMidpriceModel(
 53 |                 step_size=self._step_size, num_trajectories=num_trajectories, seed=seed
 54 |             ),
 55 |             arrival_model=PoissonArrivalModel(
 56 |                 intensity=np.array([100, 100]), step_size=self._step_size, num_trajectories=num_trajectories, seed=seed
 57 |             ),
 58 |             fill_probability_model=ExponentialFillFunction(
 59 |                 step_size=self._step_size, num_trajectories=num_trajectories, seed=seed
 60 |             ),
 61 |             num_trajectories=num_trajectories,
 62 |             seed=seed,
 63 |         )
 64 |         self.stochastic_processes = self._get_stochastic_processes()
 65 |         self.stochastic_process_indices = self._get_stochastic_process_indices()
 66 |         self.num_trajectories = num_trajectories
 67 |         self.initial_cash = initial_cash
 68 |         self.initial_inventory = initial_inventory
 69 |         self.max_inventory = max_inventory
 70 |         if seed:
 71 |             self.seed(seed)
 72 |         self.rng = np.random.default_rng(seed)
 73 |         self.start_time = start_time
 74 |         self.model_dynamics.state = self.initial_state
 75 |         self.max_stock_price = max_stock_price or self.model_dynamics.midprice_model.max_value[0, 0]
 76 |         self.max_cash = max_cash or self._get_max_cash()
 77 |         self.info_calculator = info_calculator
 78 |         self._empty_infos = self._get_empty_infos()
 79 |         self.observation_space = self._get_observation_space()
 80 |         self.action_space = self.model_dynamics.get_action_space()
 81 |         self.normalise_action_space_ = normalise_action_space
 82 |         self.normalise_observation_space_ = normalise_observation_space
 83 |         self.normalise_rewards_ = normalise_rewards
 84 |         if self.normalise_observation_space_:
 85 |             self.original_observation_space = copy(self.observation_space)
 86 |             self.observation_space = self._get_normalised_observation_space()
 87 |         if self.normalise_action_space_:
 88 |             self.original_action_space = copy(self.action_space)
 89 |             self.action_space = self._get_normalised_action_space()
 90 |         if self.normalise_rewards_:
 91 |             assert isinstance(self.model_dynamics.arrival_model, PoissonArrivalModel) and isinstance(
 92 |                 self.model_dynamics.fill_probability_model, ExponentialFillFunction
 93 |             ), "Arrival model must be Poisson and fill probability model must be exponential to scale rewards"
 94 |             self.reward_scaling = 1 / self._get_inventory_neutral_rewards()
 95 | 
 96 |     def reset(self):
 97 |         for process in self.stochastic_processes.values():
 98 |             process.reset()
 99 |         self.model_dynamics.state = self.initial_state
100 |         self.reward_function.reset(self.model_dynamics.state.copy())
101 |         return self.normalise_observation(self.model_dynamics.state.copy())
102 | 
103 |     def step(self, action: np.ndarray):
104 |         action = self.normalise_action(action, inverse=True)
105 |         current_state = self.model_dynamics.state.copy()
106 |         next_state = self._update_state(action)
107 |         dones = self._get_dones()
108 |         rewards = self.reward_function.calculate(current_state, action, next_state, dones[0])
109 |         infos = self._calculate_infos(current_state, action, rewards)
110 |         return self.normalise_observation(next_state.copy()), self.normalise_rewards(rewards), dones, infos
111 | 
112 |     def normalise_observation(self, obs: np.ndarray, inverse: bool = False):
113 |         if self.normalise_observation_space_ and not inverse:
114 |             return (obs - self._intercept_obs_norm) / self._gradient_obs_norm - 1
115 |         elif self.normalise_observation_space_ and inverse:
116 |             return (obs + 1) * self._gradient_obs_norm + self._intercept_obs_norm
117 |         else:
118 |             return obs
119 | 
120 |     def normalise_action(self, action: np.ndarray, inverse: bool = False):
121 |         if self.normalise_action_space_ and not inverse:
122 |             return (action - self._intercept_action_norm) / self._gradient_action_norm - 1
123 |         elif self.normalise_action_space_ and inverse:
124 |             return (action + 1) * self._gradient_action_norm + self._intercept_action_norm
125 |         else:
126 |             return action
127 | 
128 |     def normalise_rewards(self, rewards: np.ndarray):
129 |         return self.reward_scaling * rewards if self.normalise_rewards_ else rewards
130 | 
131 |     @property
132 |     def initial_state(self) -> np.ndarray:
133 |         scalar_initial_state = np.array([[self.initial_cash, 0, 0.0]])
134 |         initial_state = np.repeat(scalar_initial_state, self.num_trajectories, axis=0)
135 |         start_time = self._get_start_time()
136 |         initial_state[:, TIME_INDEX] = start_time * np.ones((self.num_trajectories,))
137 |         initial_state[:, INVENTORY_INDEX] = self._get_initial_inventories()
138 |         for process in self.stochastic_processes.values():
139 |             initial_state = np.append(initial_state, process.initial_vector_state, axis=1)
140 |         return initial_state
141 | 
142 |     @property
143 |     def state(self):
144 |         return self.model_dynamics.state
145 | 
146 |     @property
147 |     def is_at_max_inventory(self):
148 |         return self.state[:, INVENTORY_INDEX] >= self.max_inventory
149 | 
150 |     @property
151 |     def is_at_min_inventory(self):
152 |         return self.state[:, INVENTORY_INDEX] <= -self.max_inventory
153 | 
154 |     @property
155 |     def step_size(self):
156 |         return self._step_size
157 | 
158 |     @step_size.setter
159 |     def step_size(self, step_size: float):
160 |         self._step_size = step_size
161 |         for process_name, process in self.stochastic_processes.items():
162 |             if process.step_size != step_size:
163 |                 process.step_size = step_size
164 |         if hasattr(self.reward_function, "step_size"):
165 |             self.reward_function.step_size = step_size
166 | 
167 |     @property
168 |     def num_trajectories(self):
169 |         return self._num_trajectories
170 | 
171 |     @num_trajectories.setter
172 |     def num_trajectories(self, num_trajectories: int):
173 |         self._num_trajectories = num_trajectories
174 |         for process_name, process in self.stochastic_processes.items():
175 |             if process.num_trajectories != num_trajectories:
176 |                 process.num_trajectories = num_trajectories
177 |         self._empty_infos = self._get_empty_infos()
178 |         self.model_dynamics.fill_multiplier = self.model_dynamics._get_fill_multiplier()
179 | 
180 |     @property
181 |     def _intercept_obs_norm(self):
182 |         return self.original_observation_space.low
183 | 
184 |     @property
185 |     def _gradient_obs_norm(self):
186 |         return (self.original_observation_space.high - self.original_observation_space.low) / 2
187 | 
188 |     @property
189 |     def _intercept_action_norm(self):
190 |         return self.original_action_space.low
191 | 
192 |     @property
193 |     def _gradient_action_norm(self):
194 |         return (self.original_action_space.high - self.original_action_space.low) / 2
195 | 
196 |     # state[0]=cash, state[1]=inventory, state[2]=time, state[3] = asset_price, and then remaining states depend on
197 |     # the dimensionality of the arrival process, the midprice process and the fill probability process.
198 |     def _update_state(self, action: np.ndarray) -> np.ndarray:
199 |         arrivals, fills = self.model_dynamics.get_arrivals_and_fills(action)
200 |         if fills is not None:
201 |             fills = self._remove_max_inventory_fills(fills)
202 |         self._update_agent_state(arrivals, fills, action)
203 |         self._update_market_state(arrivals, fills, action)
204 |         return self.model_dynamics.state
205 | 
206 |     def _update_market_state(self, arrivals, fills, action):
207 |         for process_name, process in self.stochastic_processes.items():
208 |             process.update(arrivals, fills, action, self.model_dynamics.state)
209 |             lower_index = self.stochastic_process_indices[process_name][0]
210 |             upper_index = self.stochastic_process_indices[process_name][1]
211 |             self.model_dynamics.state[:, lower_index:upper_index] = process.current_state
212 | 
213 |     def _update_agent_state(self, arrivals: np.ndarray, fills: np.ndarray, action: np.ndarray):
214 |         self.model_dynamics.update_state(arrivals, fills, action)
215 |         self._clip_inventory_and_cash()
216 |         self.model_dynamics.state[:, TIME_INDEX] += self.step_size
217 | 
218 |     def _get_dones(self):
219 |         done = self.model_dynamics.state[0, TIME_INDEX] >= self.terminal_time - self.step_size / 2
220 |         return np.full((self.num_trajectories,), done, dtype=bool)
221 | 
222 |     def _calculate_infos(self, current_state, action, rewards):
223 |         return (
224 |             self.info_calculator.calculate(current_state, action, rewards)
225 |             if self.info_calculator is not None
226 |             else self._empty_infos
227 |         )
228 | 
229 |     def _get_max_cash(self) -> float:
230 |         return self.n_steps * self.max_stock_price  # TODO: make this a tighter bound
231 | 
232 |     def _get_observation_space(self) -> gym.spaces.Space:
233 |         """The observation space consists of a numpy array containg the agent's cash, the agent's inventory and the
234 |         current time. It also contains the states of the arrival model, the midprice model and the fill probability
235 |         model in that order."""
236 |         low = np.array([-self.max_cash, -self.max_inventory, 0])
237 |         high = np.array([self.max_cash, self.max_inventory, self.terminal_time])
238 |         for process in self.stochastic_processes.values():
239 |             low = np.append(low, process.min_value)
240 |             high = np.append(high, process.max_value)
241 |         return Box(low=np.float32(low), high=np.float32(high))
242 | 
243 |     def _get_normalised_observation_space(self):
244 |         # Linear normalisation of the gym.Box space so that the domain of the observation space is [-1,1].
245 |         return gym.spaces.Box(
246 |             low=-np.ones_like(self.observation_space.low, dtype=np.float32),
247 |             high=np.ones_like(self.observation_space.high, dtype=np.float32),
248 |         )
249 | 
250 |     def _get_normalised_action_space(self):
251 |         # Linear normalisation of the gym.Box space so that the domain of the action space is [-1,1].
252 |         return gym.spaces.Box(
253 |             low=-np.ones_like(self.action_space.low, dtype=np.float32),
254 |             high=np.ones_like(self.action_space.high, dtype=np.float32),
255 |         )
256 | 
257 |     def _get_start_time(self):
258 |         if isinstance(self.start_time, (float, int)):
259 |             random_start = self.start_time
260 |         elif isinstance(self.start_time, Callable):
261 |             random_start = self.start_time()
262 |         else:
263 |             raise NotImplementedError
264 |         return self._quantise_time_to_step(random_start)
265 | 
266 |     def _quantise_time_to_step(self, time: float):
267 |         assert (time >= 0.0) and (time < self.terminal_time), "Start time is not within (0, env.terminal_time)."
268 |         return np.round(time / self.step_size) * self.step_size
269 | 
270 |     def _get_initial_inventories(self) -> np.ndarray:
271 |         if isinstance(self.initial_inventory, tuple) and len(self.initial_inventory) == 2:
272 |             return self.rng.integers(*self.initial_inventory, size=self.num_trajectories)
273 |         elif isinstance(self.initial_inventory, int):
274 |             return self.initial_inventory * np.ones((self.num_trajectories,))
275 |         elif isinstance(self.initial_inventory, Callable):
276 |             initial_inventory = self.initial_inventory()
277 |             if self.model_dynamics.round_initial_inventory:
278 |                 initial_inventory = int(np.round(initial_inventory))
279 |             return initial_inventory
280 |         else:
281 |             raise Exception("Initial inventory must be a tuple of length 2 or an int.")
282 | 
283 |     def _clip_inventory_and_cash(self):
284 |         self.model_dynamics.state[:, INVENTORY_INDEX] = self._clip(
285 |             self.model_dynamics.state[:, INVENTORY_INDEX], -self.max_inventory, self.max_inventory, cash_flag=False
286 |         )
287 |         self.model_dynamics.state[:, CASH_INDEX] = self._clip(
288 |             self.model_dynamics.state[:, CASH_INDEX], -self.max_cash, self.max_cash, cash_flag=True
289 |         )
290 | 
291 |     def _clip(self, not_clipped: float, min: float, max: float, cash_flag: bool) -> float:
292 |         clipped = np.clip(not_clipped, min, max)
293 |         if (not_clipped != clipped).any() and cash_flag:
294 |             print(f"Clipping agent's cash from {not_clipped} to {clipped}.")
295 |         if (not_clipped != clipped).any() and not cash_flag:
296 |             print(f"Clipping agent's inventory from {not_clipped} to {clipped}.")
297 |         return clipped
298 | 
299 |     @staticmethod
300 |     def _clamp(probability):
301 |         return max(min(probability, 1), 0)
302 | 
303 |     def _get_stochastic_processes(self):
304 |         stochastic_processes = dict()
305 |         for process_name in ["midprice_model", "arrival_model", "fill_probability_model", "price_impact_model"]:
306 |             process: StochasticProcessModel = getattr(self.model_dynamics, process_name)
307 |             if process is not None:
308 |                 stochastic_processes[process_name] = process
309 |         return OrderedDict(stochastic_processes)
310 | 
311 |     def _get_stochastic_process_indices(self):
312 |         process_indices = dict()
313 |         count = 3
314 |         for process_name, process in self.stochastic_processes.items():
315 |             dimension = int(process.initial_vector_state.shape[1])
316 |             process_indices[process_name] = (count, count + dimension)
317 |             count += dimension
318 |         return OrderedDict(process_indices)
319 | 
320 |     def _get_empty_infos(self):
321 |         return [{} for _ in range(self.num_trajectories)] if self.num_trajectories > 1 else {}
322 | 
323 |     def _remove_max_inventory_fills(self, fills: np.ndarray) -> np.ndarray:
324 |         fill_multiplier = np.concatenate(
325 |             ((1 - self.is_at_max_inventory).reshape(-1, 1), (1 - self.is_at_min_inventory).reshape(-1, 1)), axis=1
326 |         )
327 |         return fill_multiplier * fills
328 | 
329 |     def _get_inventory_neutral_rewards(self, num_total_trajectories=100_000):
330 |         fixed_action = 1 / self.model_dynamics.fill_probability_model.fill_exponent
331 |         full_trajectory_env = deepcopy(self)
332 |         full_trajectory_env.start_time = 0.0
333 |         full_trajectory_env.num_trajectories = num_total_trajectories
334 |         full_trajectory_env.normalise_rewards_ = False
335 | 
336 |         class FixedAgent(Agent):
337 |             def get_action(self, obs: np.ndarray) -> np.ndarray:
338 |                 return np.ones((num_total_trajectories, 2)) * fixed_action
339 | 
340 |         fixed_agent = FixedAgent()
341 |         _, _, rewards = generate_trajectory(full_trajectory_env, fixed_agent)
342 |         mean_rewards = np.mean(rewards) * self.n_steps
343 |         return mean_rewards
344 | 
345 |     def seed(self, seed: int = None):
346 |         self.rng = np.random.default_rng(seed)
347 |         for i, process in enumerate(self.stochastic_processes.values()):
348 |             process.seed(seed + i + 1)
349 | 


--------------------------------------------------------------------------------
/mbt_gym/gym/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JJJerome/mbt_gym/1e1aa38ca35c4fa472777e3574f9c907f89bb5b0/mbt_gym/gym/__init__.py


--------------------------------------------------------------------------------
/mbt_gym/gym/backtesting.py:
--------------------------------------------------------------------------------
 1 | import gym
 2 | import numpy as np
 3 | import pandas as pd
 4 | from mbt_gym.gym.TradingEnvironment import TradingEnvironment
 5 | from mbt_gym.gym.index_names import CASH_INDEX, INVENTORY_INDEX, ASSET_PRICE_INDEX
 6 | from mbt_gym.agents.Agent import Agent
 7 | from mbt_gym.gym.helpers.generate_trajectory import generate_trajectory
 8 | import warnings
 9 | 
10 | 
11 | def get_sharpe_ratio(env: gym.Env, agent: Agent, risk_free_rate: float = 0.099):
12 |     """
13 |     The Annualized Sharpe Ratio is calculated as:
14 |                 Sharpe_Ratio = sqrt(num_steps)*(Returns  - Risk Free Rate)/(Std of Return)
15 |     It measures the reward in relation to risk.
16 |     """
17 |     assert env.num_trajectories == 1, "Backtesting is applied on a single trajectory"
18 |     obs, _, _ = generate_trajectory(env, agent)
19 |     portfolio_values = (obs[:, CASH_INDEX, :] + obs[:, INVENTORY_INDEX, :] * obs[:, ASSET_PRICE_INDEX, :]).squeeze()
20 |     if min(np.abs(portfolio_values)) < 1e-6:
21 |         warnings.warn("Runtime Warning: Division by Zero")
22 |     return_pcts = np.diff(portfolio_values, 1) / portfolio_values[1:]
23 |     annualized_std_returns = return_pcts.std() * np.sqrt(env.n_steps)
24 |     return_pcts_mean = return_pcts.mean()
25 |     if return_pcts_mean < 0:
26 |         warnings.warn("Warning: Mean Return % is negative. Sharpe Ratio may not be appropriate.")
27 |     return (return_pcts_mean * env.n_steps - risk_free_rate) / annualized_std_returns
28 | 
29 | 
30 | def get_sortino_ratio(env: gym.Env, agent: Agent, risk_free_rate: float = 0.099):
31 |     """
32 |     The Sortino Ratio is the Sharpe Ratio but restricted to only negative returns.
33 |                 Sortino_Ratio = sqrt(num_steps)*(Returns - Risk Free Rate)/(Std of negative returns)
34 |     """
35 |     assert env.num_trajectories == 1, "Backtesting is applied on a single trajectory"
36 |     obs, _, _ = generate_trajectory(env, agent)
37 |     portfolio_values = (obs[:, CASH_INDEX, :] + obs[:, INVENTORY_INDEX, :] * obs[:, ASSET_PRICE_INDEX, :]).squeeze()
38 |     if min(np.abs(portfolio_values)) < 1e-6:
39 |         warnings.warn("Runtime Warning: Division by Zero")
40 |     return_pcts = np.diff(portfolio_values, 1) / portfolio_values[1:]
41 |     loss_pcts = return_pcts[return_pcts < 0]
42 |     annualized_std_returns = loss_pcts.std() * np.sqrt(env.n_steps)
43 |     return_pcts_mean = return_pcts.mean()
44 |     if return_pcts_mean < 0:
45 |         warnings.warn("Warning: Mean Return % is negative. Sortino Ratio may not be appropriate.")
46 |     return (return_pcts_mean * env.n_steps - risk_free_rate) / annualized_std_returns
47 | 
48 | 
49 | def get_maximum_drawdown(env: TradingEnvironment, agent: Agent):
50 |     """
51 |     The maximum drawdown is the biggest difference between a peak and a trough in portfolio value.
52 |     """
53 |     assert env.num_trajectories == 1, "Backtesting is applied on a single trajectory"
54 |     obs, _, _ = generate_trajectory(env, agent)
55 |     portfolio_values = (obs[:, CASH_INDEX, :] + obs[:, INVENTORY_INDEX, :] * obs[:, ASSET_PRICE_INDEX, :]).squeeze()
56 |     return_pcts = pd.Series(np.diff(portfolio_values, 1) / portfolio_values[1:])
57 |     cum_prods = (return_pcts + 1).cumprod()
58 |     peak = cum_prods.expanding(min_periods=1).max()
59 |     drawdown = (cum_prods / peak) - 1
60 |     return drawdown.min()
61 | 


--------------------------------------------------------------------------------
/mbt_gym/gym/helpers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JJJerome/mbt_gym/1e1aa38ca35c4fa472777e3574f9c907f89bb5b0/mbt_gym/gym/helpers/__init__.py


--------------------------------------------------------------------------------
/mbt_gym/gym/helpers/generate_trajectory.py:
--------------------------------------------------------------------------------
 1 | import gym
 2 | import numpy as np
 3 | import torch
 4 | 
 5 | from mbt_gym.agents.Agent import Agent
 6 | 
 7 | 
 8 | def generate_trajectory(env: gym.Env, agent: Agent, seed: int = None, include_log_probs: bool = False):
 9 |     if seed is not None:
10 |         env.seed(seed)
11 |     obs_space_dim = env.observation_space.shape[0]
12 |     action_space_dim = env.action_space.shape[0]
13 |     observations = np.zeros((env.num_trajectories, obs_space_dim, env.n_steps + 1))
14 |     actions = np.zeros((env.num_trajectories, action_space_dim, env.n_steps))
15 |     rewards = np.zeros((env.num_trajectories, 1, env.n_steps))
16 |     if include_log_probs:
17 |         log_probs = torch.zeros((env.num_trajectories, env.action_space.shape[0], env.n_steps))
18 |     obs = env.reset()
19 |     observations[:, :, 0] = obs
20 |     count = 0
21 |     while True:
22 |         if include_log_probs:
23 |             action, log_prob = agent.get_action(obs, include_log_probs=True)
24 |         else:
25 |             action = agent.get_action(obs)
26 |         obs, reward, done, _ = env.step(action)
27 |         actions[:, :, count] = action
28 |         observations[:, :, count + 1] = obs
29 |         rewards[:, :, count] = reward.reshape(-1, 1)
30 |         if include_log_probs:
31 |             log_probs[:, :, count] = log_prob
32 |         if (env.num_trajectories > 1 and done[0]) or (env.num_trajectories == 1 and done):
33 |             break
34 |         count += 1
35 |     if include_log_probs:
36 |         return observations, actions, rewards, log_probs
37 |     else:
38 |         return observations, actions, rewards
39 | 


--------------------------------------------------------------------------------
/mbt_gym/gym/helpers/plotting.py:
--------------------------------------------------------------------------------
  1 | import gym
  2 | import matplotlib.pyplot as plt
  3 | import numpy as np
  4 | import pandas as pd
  5 | 
  6 | import seaborn as sns
  7 | 
  8 | from mbt_gym.agents.Agent import Agent
  9 | from mbt_gym.gym.TradingEnvironment import TradingEnvironment 
 10 | from mbt_gym.gym.index_names import CASH_INDEX, INVENTORY_INDEX, ASSET_PRICE_INDEX
 11 | from mbt_gym.gym.helpers.generate_trajectory import generate_trajectory
 12 | 
 13 | 
 14 | def plot_trajectory(env: gym.Env, agent: Agent, seed: int = None):
 15 |     # assert env.num_trajectories == 1, "Plotting a trajectory can only be done when env.num_trajectories == 1."
 16 |     timestamps = get_timestamps(env)
 17 |     observations, actions, rewards = generate_trajectory(env, agent, seed)
 18 |     action_dim = actions.shape[1]
 19 |     colors = ["r", "k", "b", "g"]
 20 |     rewards = np.squeeze(rewards, axis=1)
 21 |     cum_rewards = np.cumsum(rewards, axis=-1)
 22 |     cash_holdings = observations[:, CASH_INDEX, :]
 23 |     inventory = observations[:, INVENTORY_INDEX, :]
 24 |     asset_prices = observations[:, ASSET_PRICE_INDEX, :]
 25 |     fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(20, 10))
 26 |     ax3a = ax3.twinx()
 27 |     ax1.title.set_text("cum_rewards")
 28 |     ax2.title.set_text("asset_prices")
 29 |     ax3.title.set_text("inventory and cash holdings")
 30 |     ax4.title.set_text("Actions")
 31 |     for i in range(env.num_trajectories):
 32 |         traj_label = f" trajectory {i}" if env.num_trajectories > 1 else ""
 33 |         ax1.plot(timestamps[1:], cum_rewards[i, :])
 34 |         ax2.plot(timestamps, asset_prices[i, :])
 35 |         ax3.plot(
 36 |             timestamps,
 37 |             inventory[i, :],
 38 |             label=f"inventory" + traj_label,
 39 |             color="r",
 40 |             alpha=(i + 1) / (env.num_trajectories + 1),
 41 |         )
 42 |         ax3a.plot(
 43 |             timestamps,
 44 |             cash_holdings[i, :],
 45 |             label=f"cash holdings" + traj_label,
 46 |             color="b",
 47 |             alpha=(i + 1) / (env.num_trajectories + 1),
 48 |         )
 49 |         for j in range(action_dim):
 50 |             ax4.plot(
 51 |                 timestamps[0:-1],
 52 |                 actions[i, j, :],
 53 |                 label=f"Action {j}" + traj_label,
 54 |                 color=colors[j],
 55 |                 alpha=(i + 1) / (env.num_trajectories + 1),
 56 |             )
 57 |     ax3.legend()
 58 |     ax4.legend()
 59 |     plt.show()
 60 | 
 61 | 
 62 | def plot_stable_baselines_actions(model, env):
 63 |     timestamps = get_timestamps(env)
 64 |     inventory_action_dict = {}
 65 |     price = 100
 66 |     cash = 100
 67 |     for inventory in [-3, -2, -1, 0, 1, 2, 3]:
 68 |         actions = model.predict([price, cash, inventory, 0], deterministic=True)[0].reshape((1, 2))
 69 |         for ts in timestamps[1:]:
 70 |             actions = np.append(
 71 |                 actions, model.predict([price, cash, inventory, ts], deterministic=True)[0].reshape((1, 2)), axis=0
 72 |             )
 73 |         inventory_action_dict[inventory] = actions
 74 |     for inventory in [-3, -2, -1, 0, 1, 2, 3]:
 75 |         plt.plot(np.array(inventory_action_dict[inventory]).T[0], label=inventory)
 76 |     plt.legend()
 77 |     plt.show()
 78 |     for inventory in [-3, -2, -1, 0, 1, 2, 3]:
 79 |         plt.plot(np.array(inventory_action_dict[inventory]).T[1], label=inventory)
 80 |     plt.legend()
 81 |     plt.show()
 82 | 
 83 | 
 84 | def plot_pnl(rewards, symmetric_rewards=None):
 85 |     fig, ax = plt.subplots(1, 1, figsize=(20, 10))
 86 |     if symmetric_rewards is not None:
 87 |         sns.histplot(symmetric_rewards, label="Rewards of symmetric strategy", stat="density", bins=50, ax=ax)
 88 |     sns.histplot(rewards, label="Rewards", color="red", stat="density", bins=50, ax=ax)
 89 |     ax.legend()
 90 |     plt.close()
 91 |     return fig
 92 | 
 93 | 
 94 | def generate_results_table_and_hist(vec_env: TradingEnvironment, agent: Agent, n_episodes: int = 1000):
 95 |     assert vec_env.num_trajectories > 1, "To generate a results table and hist, vec_env must roll out > 1 trajectory."
 96 |     observations, actions, rewards = generate_trajectory(vec_env, agent)
 97 |     total_rewards = rewards.sum(axis=-1).reshape(-1)
 98 |     terminal_inventories = observations[:, INVENTORY_INDEX, -1]
 99 |     half_spreads = actions.mean(axis=(-1, -2))
100 | 
101 |     rows = ["Inventory"]
102 |     columns = ["Mean spread", "Mean PnL", "Std PnL", "Mean terminal inventory", "Std terminal inventory"]
103 |     results = pd.DataFrame(index=rows, columns=columns)
104 |     results.loc[:, "Mean spread"] = 2 * np.mean(half_spreads)
105 |     results.loc["Inventory", "Mean PnL"] = np.mean(total_rewards)
106 |     results.loc["Inventory", "Std PnL"] = np.std(total_rewards)
107 |     results.loc["Inventory", "Mean terminal inventory"] = np.mean(terminal_inventories)
108 |     results.loc["Inventory", "Std terminal inventory"] = np.std(terminal_inventories)
109 |     fig = plot_pnl(total_rewards)
110 |     return results, fig, total_rewards
111 | 
112 | 
113 | def get_timestamps(env):
114 |     return np.linspace(0, env.terminal_time, env.n_steps + 1)
115 | 


--------------------------------------------------------------------------------
/mbt_gym/gym/index_names.py:
--------------------------------------------------------------------------------
1 | CASH_INDEX = 0
2 | INVENTORY_INDEX = 1
3 | TIME_INDEX = 2
4 | ASSET_PRICE_INDEX = 3
5 | 
6 | BID_INDEX = 0
7 | ASK_INDEX = 1
8 | 


--------------------------------------------------------------------------------
/mbt_gym/gym/info_calculators.py:
--------------------------------------------------------------------------------
 1 | import abc
 2 | from typing import Union, List
 3 | 
 4 | import gym
 5 | import numpy as np
 6 | 
 7 | 
 8 | class InfoCalculator(metaclass=abc.ABCMeta):
 9 |     @abc.abstractmethod
10 |     def calculate(self, state: np.ndarray, action: np.ndarray, reward: np.ndarray, done: bool) -> dict:
11 |         pass
12 | 
13 |     @abc.abstractmethod
14 |     def reset(self, initial_state: np.ndarray):
15 |         pass
16 | 
17 | 
18 | class ActionInfoCalculator(InfoCalculator):
19 |     """ActionInfoCalculator records the actions taken throughout the episode and then outputs the mean actions taken at
20 |     the terminal step as an info dict. This is the Stable Baselines 3 convention. See for example, the VecMonitor class
21 |     of SB3."""
22 | 
23 |     def __init__(self, action_space: gym.spaces.Box, n_steps: int = 10 * 10, num_trajectories: int = 1000):
24 |         self.action_space = action_space
25 |         self.n_steps = n_steps
26 |         self.num_trajectories = num_trajectories
27 |         self.nan_matrix = np.empty((self.num_trajectories, self.action_space.shape[0], self.n_steps))
28 |         self.nan_matrix[:] = np.nan
29 |         self.actions = self.nan_matrix.copy()
30 |         self.empty_infos = [{} for _ in range(self.num_trajectories)] if self.num_trajectories > 1 else {}
31 |         self.count = 0
32 | 
33 |     def calculate(
34 |         self, state: np.ndarray, action: np.ndarray, reward: np.ndarray, done: bool
35 |     ) -> Union[dict, List[dict]]:
36 |         if done:
37 |             mean_actions = self._calculate_mean_actions()
38 |             return [
39 |                 {f"action_{j}": mean_actions[i, j] for j in range(mean_actions.shape[1])}
40 |                 for i in range(mean_actions.shape[0])
41 |             ]
42 |         else:
43 |             self.actions[:, :, self.count] = action
44 |             self.count += 1
45 |             return self.empty_infos
46 | 
47 |     def reset(self, initial_state: np.ndarray):
48 |         self.count = 0
49 |         self.actions = self.nan_matrix.copy()
50 | 
51 |     def _calculate_mean_actions(self):
52 |         return self.actions.nanmean(axis=2)
53 | 


--------------------------------------------------------------------------------
/mbt_gym/gym/wrappers.py:
--------------------------------------------------------------------------------
  1 | import gym
  2 | 
  3 | import numpy as np
  4 | 
  5 | from mbt_gym.gym.index_names import INVENTORY_INDEX, TIME_INDEX
  6 | 
  7 | from math import sqrt
  8 | 
  9 | 
 10 | class ReduceStateSizeWrapper(gym.Wrapper):
 11 |     """
 12 |     :param env: (gym.Env) Gym environment that will be wrapped
 13 |     """
 14 | 
 15 |     def __init__(self, env, list_of_state_indices: list = [INVENTORY_INDEX, TIME_INDEX]):
 16 |         # Call the parent constructor, so we can access self.env later
 17 |         super(ReduceStateSizeWrapper, self).__init__(env)
 18 |         assert type(env.observation_space) == gym.spaces.box.Box
 19 |         self.observation_space = gym.spaces.box.Box(
 20 |             low=env.observation_space.low[list_of_state_indices],
 21 |             high=env.observation_space.high[list_of_state_indices],
 22 |             dtype=np.float64,
 23 |         )
 24 |         self.list_of_state_indices = list_of_state_indices
 25 | 
 26 |     def reset(self):
 27 |         """
 28 |         Reset the environment
 29 |         """
 30 |         obs = self.env.reset()
 31 |         return obs[:, self.list_of_state_indices]
 32 | 
 33 |     def step(self, action):
 34 |         """
 35 |         :param action: ([float] or int) Action taken by the agent
 36 |         :return: (np.ndarray, float, bool, dict) observation, reward, is the episode over?, additional informations
 37 |         """
 38 |         obs, reward, done, info = self.env.step(action)
 39 |         return obs[:, self.list_of_state_indices], reward, done, info
 40 | 
 41 |     @property
 42 |     def spec(self):
 43 |         return self.env.spec
 44 | 
 45 | 
 46 | class NormaliseASObservation(gym.Wrapper):
 47 |     """
 48 |     :param env: (gym.Env) Gym environment that will be wrapped
 49 |     """
 50 | 
 51 |     def __init__(self, env):
 52 |         # Call the parent constructor, so we can access self.env later
 53 |         super(NormaliseASObservation, self).__init__(env)
 54 |         self.normalisation_factor = 2 / (env.observation_space.high - env.observation_space.low)
 55 |         self.normalisation_offset = (env.observation_space.high + env.observation_space.low) / 2
 56 |         assert type(env.observation_space) == gym.spaces.box.Box
 57 |         self.observation_space = gym.spaces.box.Box(
 58 |             low=-np.ones(env.observation_space.shape),
 59 |             high=np.ones(env.observation_space.shape),
 60 |             dtype=np.float64,
 61 |         )
 62 | 
 63 |     def reset(self):
 64 |         """
 65 |         Reset the environment
 66 |         """
 67 |         obs = self.env.reset()
 68 |         return (obs - self.normalisation_offset) * self.normalisation_factor
 69 | 
 70 |     def step(self, action):
 71 |         """
 72 |         :param action: ([float] or int) Action taken by the agent
 73 |         :return: (np.ndarray, float, bool, dict) observation, reward, is the episode over?, additional informations
 74 |         """
 75 |         obs, reward, done, info = self.env.step(action)
 76 |         return obs / self.normalisation_factor, reward, done, info
 77 | 
 78 | 
 79 | class RemoveTerminalRewards(gym.Wrapper):
 80 |     """
 81 |     :param env: (gym.Env) Gym environment that will be wrapped
 82 |     """
 83 | 
 84 |     def __init__(self, env, num_final_steps: int = 5):
 85 |         # Call the parent constructor, so we can access self.env later
 86 |         super(RemoveTerminalRewards, self).__init__(env)
 87 | 
 88 |     def reset(self):
 89 |         """
 90 |         Reset the environment
 91 |         """
 92 |         return self.env.reset()
 93 | 
 94 |     def step(self, action):
 95 |         """
 96 |         :param action: ([float] or int) Action taken by the agent
 97 |         :return: (np.ndarray, float, bool, dict) observation, reward, is the episode over?, additional informations
 98 |         """
 99 |         state, reward, done, _ = self.env.step(action)
100 |         if done:
101 |             reward *= (
102 |                 self.env.reward_function.per_step_inventory_aversion
103 |                 / self.env.reward_function.terminal_inventory_aversion
104 |             )
105 |         return state, reward, done, {}
106 | 


--------------------------------------------------------------------------------
/mbt_gym/rewards/RewardFunctions.py:
--------------------------------------------------------------------------------
  1 | import abc
  2 | from typing import Union
  3 | 
  4 | import numpy as np
  5 | from mbt_gym.gym.index_names import CASH_INDEX, INVENTORY_INDEX, TIME_INDEX, ASSET_PRICE_INDEX
  6 | 
  7 | 
  8 | class RewardFunction(metaclass=abc.ABCMeta):
  9 |     @abc.abstractmethod
 10 |     def calculate(
 11 |         self, current_state: np.ndarray, action: np.ndarray, next_state: np.ndarray, is_terminal_step: bool = False
 12 |     ) -> Union[float, np.ndarray]:
 13 |         pass
 14 | 
 15 |     @abc.abstractmethod
 16 |     def reset(self, initial_state: np.ndarray):
 17 |         pass
 18 | 
 19 | 
 20 | class PnL(RewardFunction):
 21 |     """A simple profit and loss reward function of the 'mark-to-market' value of the agent's portfolio."""
 22 | 
 23 |     def calculate(
 24 |         self, current_state: np.ndarray, action: np.ndarray, next_state: np.ndarray, is_terminal_step: bool = False
 25 |     ) -> float:
 26 |         assert len(current_state.shape) > 1, "Reward functions must be calculated on state matrices."
 27 |         current_market_value = (
 28 |             current_state[:, CASH_INDEX] + current_state[:, INVENTORY_INDEX] * current_state[:, ASSET_PRICE_INDEX]
 29 |         )
 30 |         next_market_value = (
 31 |             next_state[:, CASH_INDEX] + next_state[:, INVENTORY_INDEX] * next_state[:, ASSET_PRICE_INDEX]
 32 |         )
 33 |         return next_market_value - current_market_value
 34 | 
 35 |     def reset(self, initial_state: np.ndarray):
 36 |         pass
 37 | 
 38 | 
 39 | class CjOeCriterion(RewardFunction):
 40 |     def __init__(
 41 |         self,
 42 |         per_step_inventory_aversion: float = 0.01,
 43 |         terminal_inventory_aversion: float = 0.0,
 44 |         inventory_exponent: float = 2.0,
 45 |         terminal_time: float = 1.0,
 46 |     ):
 47 |         self.per_step_inventory_aversion = per_step_inventory_aversion
 48 |         self.terminal_inventory_aversion = terminal_inventory_aversion
 49 |         self.pnl = PnL()
 50 |         self.inventory_exponent = inventory_exponent
 51 |         self.terminal_time = terminal_time
 52 |         self.initial_inventory = None
 53 |         self.episode_length = None
 54 | 
 55 |     def calculate(
 56 |         self, current_state: np.ndarray, action: np.ndarray, next_state: np.ndarray, is_terminal_step: bool = False
 57 |     ) -> float:
 58 |         dt = next_state[:, TIME_INDEX] - current_state[:, TIME_INDEX]
 59 |         return (
 60 |             self.pnl.calculate(current_state, action, next_state, is_terminal_step)
 61 |             - dt * self.per_step_inventory_aversion * next_state[:, INVENTORY_INDEX] ** self.inventory_exponent
 62 |             - dt
 63 |             * self.terminal_inventory_aversion
 64 |             * (
 65 |                 self.inventory_exponent
 66 |                 * np.squeeze(action)
 67 |                 * (current_state[:, INVENTORY_INDEX]) ** (self.inventory_exponent - 1)
 68 |                 + self.initial_inventory**self.inventory_exponent * self.episode_length
 69 |             )
 70 |         )
 71 | 
 72 |     def reset(self, initial_state: np.ndarray):
 73 |         self.initial_inventory = initial_state[:, INVENTORY_INDEX]
 74 |         self.episode_length = self.terminal_time - initial_state[:, TIME_INDEX]
 75 | 
 76 | 
 77 | class CjMmCriterion(RewardFunction):
 78 |     """A version of the Cartea-Jaimungal criterion which uses Ito's lemma for Poisson processes to split the negative
 79 |     reward attributed to terminal inventory aversion over the trajectory of the inventory."""
 80 | 
 81 |     def __init__(
 82 |         self,
 83 |         per_step_inventory_aversion: float = 0.01,
 84 |         terminal_inventory_aversion: float = 0.0,
 85 |         inventory_exponent: float = 2.0,
 86 |         terminal_time: float = 1.0,
 87 |     ):
 88 |         self.per_step_inventory_aversion = per_step_inventory_aversion
 89 |         self.terminal_inventory_aversion = terminal_inventory_aversion
 90 |         self.pnl = PnL()
 91 |         self.inventory_exponent = inventory_exponent
 92 |         self.terminal_time = terminal_time
 93 |         self.initial_inventory = None
 94 |         self.episode_length = None
 95 | 
 96 |     def calculate(
 97 |         self, current_state: np.ndarray, action: np.ndarray, next_state: np.ndarray, is_terminal_step: bool = False
 98 |     ) -> float:
 99 |         dt = next_state[:, TIME_INDEX] - current_state[:, TIME_INDEX]
100 |         return (
101 |             self.pnl.calculate(current_state, action, next_state, is_terminal_step)
102 |             - dt * self.per_step_inventory_aversion * next_state[:, INVENTORY_INDEX] ** self.inventory_exponent
103 |             - self.terminal_inventory_aversion
104 |             * (
105 |                 next_state[:, INVENTORY_INDEX] ** self.inventory_exponent
106 |                 - current_state[:, INVENTORY_INDEX] ** self.inventory_exponent
107 |                 + dt / self.episode_length * self.initial_inventory**self.inventory_exponent
108 |             )
109 |         )
110 | 
111 |     def reset(self, initial_state: np.ndarray):
112 |         self.initial_inventory = initial_state[:, INVENTORY_INDEX]
113 |         self.episode_length = self.terminal_time - initial_state[:, TIME_INDEX]
114 | 
115 | 
116 | class RunningInventoryPenalty(RewardFunction):
117 |     def __init__(
118 |         self,
119 |         per_step_inventory_aversion: float = 0.01,
120 |         terminal_inventory_aversion: float = 0.0,
121 |         inventory_exponent: float = 2.0,
122 |     ):
123 |         self.per_step_inventory_aversion = per_step_inventory_aversion
124 |         self.terminal_inventory_aversion = terminal_inventory_aversion
125 |         self.pnl = PnL()
126 |         self.inventory_exponent = inventory_exponent
127 | 
128 |     def calculate(
129 |         self, current_state: np.ndarray, action: np.ndarray, next_state: np.ndarray, is_terminal_step: bool = False
130 |     ) -> float:
131 |         dt = next_state[:, TIME_INDEX] - current_state[:, TIME_INDEX]
132 |         return (
133 |             self.pnl.calculate(current_state, action, next_state, is_terminal_step)
134 |             - dt * self.per_step_inventory_aversion * next_state[:, INVENTORY_INDEX] ** self.inventory_exponent
135 |             - self.terminal_inventory_aversion
136 |             * int(is_terminal_step)
137 |             * next_state[:, INVENTORY_INDEX] ** self.inventory_exponent
138 |         )
139 | 
140 |     def reset(self, initial_state: np.ndarray):
141 |         pass
142 | 
143 | 
144 | # Cartea and Jaimungal criterion is the same as inventory adjusted PnL
145 | 
146 | CjCriterion = RunningInventoryPenalty
147 | 
148 | 
149 | class ExponentialUtility(RewardFunction):
150 |     def __init__(self, risk_aversion: float = 0.1):
151 |         self.risk_aversion = risk_aversion
152 | 
153 |     def calculate(
154 |         self, current_state: np.ndarray, action: np.ndarray, next_state: np.ndarray, is_terminal_step: bool = False
155 |     ) -> float:
156 |         return (
157 |             -np.exp(
158 |                 -self.risk_aversion
159 |                 * (next_state[:, CASH_INDEX] + next_state[:, INVENTORY_INDEX] * next_state[:, ASSET_PRICE_INDEX])
160 |             )
161 |             if is_terminal_step
162 |             else 0
163 |         )
164 | 
165 |     def reset(self, initial_state: np.ndarray):
166 |         pass
167 | 


--------------------------------------------------------------------------------
/mbt_gym/rewards/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JJJerome/mbt_gym/1e1aa38ca35c4fa472777e3574f9c907f89bb5b0/mbt_gym/rewards/__init__.py


--------------------------------------------------------------------------------
/mbt_gym/rewards/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JJJerome/mbt_gym/1e1aa38ca35c4fa472777e3574f9c907f89bb5b0/mbt_gym/rewards/tests/__init__.py


--------------------------------------------------------------------------------
/mbt_gym/rewards/tests/testRewardFunctions.py:
--------------------------------------------------------------------------------
  1 | from copy import deepcopy
  2 | from unittest import TestCase, main
  3 | 
  4 | import numpy as np
  5 | 
  6 | from mbt_gym.rewards.RewardFunctions import RunningInventoryPenalty, PnL, CjMmCriterion
  7 | from mbt_gym.gym.index_names import CASH_INDEX, INVENTORY_INDEX, TIME_INDEX, ASSET_PRICE_INDEX
  8 | 
  9 | STEP_SIZE = 0.2
 10 | TEST_CURRENT_STATE = np.array([[120, 2, 0.5, 100]])
 11 | TEST_ACTION = np.array([[1, 1]])
 12 | TEST_NEXT_STATE = np.array([[20, 3, 0.5 + STEP_SIZE, 100.05]])  # Buy order gets filled
 13 | TERMINAL_TIME = 1.0
 14 | 
 15 | # CASH, INVENTORY, TIME, ASSET_PRICE
 16 | MOCK_OBSERVATIONS = [
 17 |     np.array([[100.0, 0, 0.0, 100]]),
 18 |     np.array([[0.5, 1, STEP_SIZE, 101]]),
 19 |     np.array([[102.0, 0, 2 * STEP_SIZE, 102]]),
 20 |     np.array([[103.0, 0, 3 * STEP_SIZE, 103]]),
 21 |     np.array([[206.5, -1, 4 * STEP_SIZE, 104]]),
 22 |     np.array([[103.0, 0, 5 * STEP_SIZE, 103]]),
 23 | ]
 24 | MOCK_ACTIONS = [
 25 |     np.array([[0.5, 0.5]]),
 26 |     np.array([[0.5, 1]]),
 27 |     np.array([[0.5, 0.5]]),
 28 |     np.array([[1, 0.5]]),
 29 |     np.array([[0.5, 0.5]]),
 30 | ]
 31 | 
 32 | 
 33 | class testPnL(TestCase):
 34 |     def test_calculate_per_step_reward(self):
 35 |         current_value = (
 36 |             TEST_CURRENT_STATE[:, CASH_INDEX]
 37 |             + TEST_CURRENT_STATE[:, INVENTORY_INDEX] * TEST_CURRENT_STATE[:, ASSET_PRICE_INDEX]
 38 |         )
 39 |         next_value = (
 40 |             TEST_NEXT_STATE[:, CASH_INDEX] + TEST_NEXT_STATE[:, INVENTORY_INDEX] * TEST_NEXT_STATE[:, ASSET_PRICE_INDEX]
 41 |         )
 42 |         expected = next_value - current_value
 43 |         actual = PnL().calculate(current_state=TEST_CURRENT_STATE, action=TEST_ACTION, next_state=TEST_NEXT_STATE)
 44 |         self.assertEqual(expected, actual, f"PnL calculation should give {expected}. Instead got {actual}!")
 45 | 
 46 | 
 47 | PER_STEP_INVENTORY_AVERSION = 0.01
 48 | TERMINAL_INVENTORY_AVERSION = 1
 49 | 
 50 | 
 51 | class testInventoryReward(TestCase):
 52 |     def test_calculate_per_step_reward(self):
 53 |         reward_function = RunningInventoryPenalty(PER_STEP_INVENTORY_AVERSION, TERMINAL_INVENTORY_AVERSION)
 54 |         pnl = PnL().calculate(current_state=TEST_CURRENT_STATE, action=TEST_ACTION, next_state=TEST_NEXT_STATE)
 55 |         inventory_penalty = PER_STEP_INVENTORY_AVERSION * STEP_SIZE * abs(TEST_NEXT_STATE[:, INVENTORY_INDEX]) ** 2
 56 |         expected = pnl - inventory_penalty
 57 |         actual = reward_function.calculate(TEST_CURRENT_STATE, TEST_ACTION, TEST_NEXT_STATE)
 58 |         self.assertAlmostEqual(expected.item(), actual.item(), places=5)
 59 | 
 60 | 
 61 | class testCjMmCriterion(TestCase):
 62 |     cj_mm_criterion = CjMmCriterion(
 63 |         per_step_inventory_aversion=PER_STEP_INVENTORY_AVERSION,
 64 |         terminal_inventory_aversion=TERMINAL_INVENTORY_AVERSION,
 65 |         terminal_time=TERMINAL_TIME,
 66 |     )
 67 | 
 68 |     def test_agreement_with_non_decontructed_version(self):
 69 |         target_reward_function = RunningInventoryPenalty(PER_STEP_INVENTORY_AVERSION, TERMINAL_INVENTORY_AVERSION)
 70 |         cj_mm_rewards = []
 71 |         target_rewards = []
 72 |         self.cj_mm_criterion.reset(MOCK_OBSERVATIONS[0])
 73 |         for i in range(len(MOCK_ACTIONS)):
 74 |             is_terminal_step = MOCK_OBSERVATIONS[i + 1][:, TIME_INDEX] == 1
 75 |             cj_mm_rewards.append(
 76 |                 self.cj_mm_criterion.calculate(
 77 |                     MOCK_OBSERVATIONS[i], MOCK_ACTIONS[i], MOCK_OBSERVATIONS[i + 1], is_terminal_step
 78 |                 )
 79 |             )
 80 |             target_rewards.append(
 81 |                 target_reward_function.calculate(
 82 |                     MOCK_OBSERVATIONS[i], MOCK_ACTIONS[i], MOCK_OBSERVATIONS[i + 1], is_terminal_step
 83 |                 )
 84 |             )
 85 |         self.assertAlmostEqual(float(sum(cj_mm_rewards)), float(sum(target_rewards)), places=5)
 86 | 
 87 |     def test_agreement_with_non_decontructed_version_nonzero_initial_inventory(self):
 88 |         target_reward_function = RunningInventoryPenalty(PER_STEP_INVENTORY_AVERSION, TERMINAL_INVENTORY_AVERSION)
 89 |         cj_mm_rewards = []
 90 |         target_rewards = []
 91 |         mock_observations = deepcopy(MOCK_OBSERVATIONS)
 92 |         mock_observations[0][:, INVENTORY_INDEX] = 2
 93 |         mock_observations[0][:, CASH_INDEX] = -100
 94 |         mock_observations[-1] = deepcopy(mock_observations[-2])
 95 |         mock_observations[-1][:, TIME_INDEX] = 1.0
 96 |         self.cj_mm_criterion.reset(mock_observations[0])
 97 |         for i in range(len(MOCK_ACTIONS)):
 98 |             is_terminal_step = mock_observations[i + 1][:, TIME_INDEX] == 1
 99 |             cj_mm_rewards.append(
100 |                 self.cj_mm_criterion.calculate(
101 |                     mock_observations[i], MOCK_ACTIONS[i], mock_observations[i + 1], is_terminal_step
102 |                 )
103 |             )
104 |             target_rewards.append(
105 |                 target_reward_function.calculate(
106 |                     mock_observations[i], MOCK_ACTIONS[i], mock_observations[i + 1], is_terminal_step
107 |                 )
108 |             )
109 |         self.assertAlmostEqual(float(sum(cj_mm_rewards)), float(sum(target_rewards)), places=5)
110 | 
111 |     def test_agreement_with_non_decontructed_version_partial_trajectory(self):
112 |         target_reward_function = RunningInventoryPenalty(PER_STEP_INVENTORY_AVERSION, TERMINAL_INVENTORY_AVERSION)
113 |         cj_mm_rewards = []
114 |         target_rewards = []
115 |         START_STEP = 2
116 |         self.cj_mm_criterion.reset(MOCK_OBSERVATIONS[START_STEP])
117 |         for i in range(len(MOCK_ACTIONS[START_STEP:])):
118 |             is_terminal_step = MOCK_OBSERVATIONS[START_STEP + i + 1][:, TIME_INDEX] == 1
119 |             cj_mm_rewards.append(
120 |                 self.cj_mm_criterion.calculate(
121 |                     MOCK_OBSERVATIONS[START_STEP + i],
122 |                     MOCK_ACTIONS[START_STEP + i],
123 |                     MOCK_OBSERVATIONS[START_STEP + i + 1],
124 |                     is_terminal_step,
125 |                 )
126 |             )
127 |             target_rewards.append(
128 |                 target_reward_function.calculate(
129 |                     MOCK_OBSERVATIONS[START_STEP + i],
130 |                     MOCK_ACTIONS[START_STEP + i],
131 |                     MOCK_OBSERVATIONS[START_STEP + i + 1],
132 |                     is_terminal_step,
133 |                 )
134 |             )
135 |         self.assertAlmostEqual(float(sum(cj_mm_rewards)), float(sum(target_rewards)), places=5)
136 | 
137 | 
138 | if __name__ == "__main__":
139 |     main()
140 | 


--------------------------------------------------------------------------------
/mbt_gym/stochastic_processes/StochasticProcessModel.py:
--------------------------------------------------------------------------------
 1 | import abc
 2 | from copy import copy
 3 | 
 4 | import numpy as np
 5 | from numpy.random import default_rng
 6 | 
 7 | 
 8 | class StochasticProcessModel(metaclass=abc.ABCMeta):
 9 |     def __init__(
10 |         self,
11 |         min_value: np.ndarray,
12 |         max_value: np.ndarray,
13 |         step_size: float,
14 |         terminal_time: float,
15 |         initial_state: np.ndarray,
16 |         num_trajectories: int = 1,
17 |         seed: int = None,
18 |     ):
19 |         self.min_value = min_value
20 |         self.max_value = max_value
21 |         self.step_size = step_size
22 |         self.terminal_time = terminal_time
23 |         self.num_trajectories = num_trajectories
24 |         self.initial_state = initial_state
25 |         self._check_attribute_shapes()
26 |         self.current_state = copy(self.initial_vector_state)
27 |         self.rng = default_rng(seed)
28 |         self.seed_ = seed
29 | 
30 |     def reset(self):
31 |         self.current_state = self.initial_vector_state
32 | 
33 |     @abc.abstractmethod
34 |     def update(self, arrivals: np.ndarray, fills: np.ndarray, action: np.ndarray, state: np.ndarray = None):
35 |         pass
36 | 
37 |     def seed(self, seed: int = None):
38 |         self.rng = default_rng(seed)
39 |         self.seed_ = seed
40 | 
41 |     def _check_attribute_shapes(self):
42 |         for name in ["initial_state", "min_value", "max_value"]:
43 |             attribute = getattr(self, name)
44 |             assert (
45 |                 len(attribute.shape) == 2 and attribute.shape[0] == 1
46 |             ), f"Attribute {name} must be a vector of shape (1, state_size)."
47 | 
48 |     @property
49 |     def initial_vector_state(self) -> np.ndarray:
50 |         initial_state = self.initial_state
51 |         if isinstance(initial_state, list):
52 |             initial_state = np.array([self.initial_state])
53 |         return np.repeat(initial_state, self.num_trajectories, axis=0)
54 | 


--------------------------------------------------------------------------------
/mbt_gym/stochastic_processes/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JJJerome/mbt_gym/1e1aa38ca35c4fa472777e3574f9c907f89bb5b0/mbt_gym/stochastic_processes/__init__.py


--------------------------------------------------------------------------------
/mbt_gym/stochastic_processes/arrival_models.py:
--------------------------------------------------------------------------------
  1 | import abc
  2 | from typing import Optional
  3 | 
  4 | import numpy as np
  5 | 
  6 | from mbt_gym.stochastic_processes.StochasticProcessModel import StochasticProcessModel
  7 | 
  8 | 
  9 | class ArrivalModel(StochasticProcessModel):
 10 |     """ArrivalModel models the arrival of orders to the order book. The first entry of arrivals represents an arrival
 11 |     of an exogenous SELL order (arriving on the buy side of the book) and the second entry represents an arrival of an
 12 |     exogenous BUY order (arriving on the sell side of the book).
 13 |     """
 14 | 
 15 |     def __init__(
 16 |         self,
 17 |         min_value: np.ndarray,
 18 |         max_value: np.ndarray,
 19 |         step_size: float,
 20 |         terminal_time: float,
 21 |         initial_state: np.ndarray,
 22 |         num_trajectories: int = 1,
 23 |         seed: int = None,
 24 |     ):
 25 |         super().__init__(min_value, max_value, step_size, terminal_time, initial_state, num_trajectories, seed)
 26 | 
 27 |     @abc.abstractmethod
 28 |     def get_arrivals(self) -> np.ndarray:
 29 |         pass
 30 | 
 31 | 
 32 | class PoissonArrivalModel(ArrivalModel):
 33 |     def __init__(
 34 |         self,
 35 |         intensity: np.ndarray = np.array([140.0, 140.0]),
 36 |         step_size: float = 0.001,
 37 |         num_trajectories: int = 1,
 38 |         seed: Optional[int] = None,
 39 |     ):
 40 |         self.intensity = np.array(intensity)
 41 |         super().__init__(
 42 |             min_value=np.array([[]]),
 43 |             max_value=np.array([[]]),
 44 |             step_size=step_size,
 45 |             terminal_time=0.0,
 46 |             initial_state=np.array([[]]),
 47 |             num_trajectories=num_trajectories,
 48 |             seed=seed,
 49 |         )
 50 | 
 51 |     def update(self, arrivals: np.ndarray, fills: np.ndarray, actions: np.ndarray, state: np.ndarray = None):
 52 |         pass
 53 | 
 54 |     def get_arrivals(self) -> np.ndarray:
 55 |         unif = self.rng.uniform(size=(self.num_trajectories, 2))
 56 |         return unif < self.intensity * self.step_size
 57 | 
 58 | 
 59 | class PoissonArrivalNonLinearModel(ArrivalModel):
 60 |     def __init__(
 61 |         self,
 62 |         intensity: np.ndarray = np.array([140.0, 140.0]),
 63 |         step_size: float = 0.001,
 64 |         num_trajectories: int = 1,
 65 |         seed: Optional[int] = None,
 66 |     ):
 67 |         self.intensity = np.array(intensity)
 68 |         super().__init__(
 69 |             min_value=np.array([[]]),
 70 |             max_value=np.array([[]]),
 71 |             step_size=step_size,
 72 |             terminal_time=0.0,
 73 |             initial_state=np.array([[]]),
 74 |             num_trajectories=num_trajectories,
 75 |             seed=seed,
 76 |         )
 77 | 
 78 |     def update(self, arrivals: np.ndarray, fills: np.ndarray, actions: np.ndarray, state: np.ndarray = None):
 79 |         pass
 80 | 
 81 |     def get_arrivals(self) -> np.ndarray:
 82 |         unif = self.rng.uniform(size=(self.num_trajectories, 2))
 83 |         return unif < 1. - np.exp(-self.intensity * self.step_size)
 84 | 
 85 | 
 86 | class HawkesArrivalModel(ArrivalModel):
 87 |     def __init__(
 88 |         self,
 89 |         baseline_arrival_rate: np.ndarray = np.array([[10.0, 10.0]]),
 90 |         step_size: float = 0.01,
 91 |         jump_size: float = 40.0,
 92 |         mean_reversion_speed: float = 60.0,
 93 |         terminal_time: float = 1,
 94 |         num_trajectories: int = 1,
 95 |         seed: Optional[int] = None,
 96 |     ):
 97 |         self.baseline_arrival_rate = baseline_arrival_rate
 98 |         self.jump_size = jump_size  # see https://arxiv.org/pdf/1507.02822.pdf, equation (4).
 99 |         self.mean_reversion_speed = mean_reversion_speed
100 |         super().__init__(
101 |             min_value=np.array([[0, 0]]),
102 |             max_value=np.array([[1, 1]]) * self._get_max_arrival_rate(),
103 |             step_size=step_size,
104 |             terminal_time=terminal_time,
105 |             initial_state=baseline_arrival_rate,
106 |             num_trajectories=num_trajectories,
107 |             seed=seed,
108 |         )
109 | 
110 |     def update(self, arrivals: np.ndarray, fills: np.ndarray, actions: np.ndarray, state: np.ndarray = None) -> np.ndarray:
111 |         self.current_state = (
112 |             self.current_state
113 |             + self.mean_reversion_speed
114 |             * (np.ones((self.num_trajectories, 2)) * self.baseline_arrival_rate - self.current_state)
115 |             * self.step_size
116 |             * np.ones((self.num_trajectories, 2))
117 |             + self.jump_size * arrivals
118 |         )
119 |         return self.current_state
120 | 
121 |     def get_arrivals(self) -> np.ndarray:
122 |         unif = self.rng.uniform(size=(self.num_trajectories, 2))
123 |         return unif < self.current_state * self.step_size
124 | 
125 |     def _get_max_arrival_rate(self):
126 |         return self.baseline_arrival_rate * 10
127 | 
128 |     # TODO: Improve this with 4*std
129 |     # See: https://math.stackexchange.com/questions/4047342/expectation-of-hawkes-process-with-exponential-kernel
130 | 


--------------------------------------------------------------------------------
/mbt_gym/stochastic_processes/fill_probability_models.py:
--------------------------------------------------------------------------------
  1 | import abc
  2 | from typing import Optional, Tuple
  3 | 
  4 | import numpy as np
  5 | 
  6 | from mbt_gym.stochastic_processes.StochasticProcessModel import StochasticProcessModel
  7 | 
  8 | 
  9 | class FillProbabilityModel(StochasticProcessModel):
 10 |     def __init__(
 11 |         self,
 12 |         min_value: np.ndarray,
 13 |         max_value: np.ndarray,
 14 |         step_size: float,
 15 |         terminal_time: float,
 16 |         initial_state: np.ndarray,
 17 |         num_trajectories: int = 1,
 18 |         seed: int = None,
 19 |     ):
 20 |         super().__init__(min_value, max_value, step_size, terminal_time, initial_state, num_trajectories, seed)
 21 | 
 22 |     @abc.abstractmethod
 23 |     def _get_fill_probabilities(self, depths: np.ndarray) -> np.ndarray:
 24 |         """Note that _get_fill_probabilities can return a 'probability' greater than one. However, this is not an issue
 25 |         for it is only use is in `get_hypothetical_fills` below."""
 26 |         pass
 27 | 
 28 |     def get_fills(self, depths: np.ndarray) -> np.ndarray:
 29 |         assert depths.shape == (self.num_trajectories, 2), (
 30 |             "Depths must be a numpy array of shape "
 31 |             + f"({self.num_trajectories},2). Instead it is a numpy array of shape {depths.shape}."
 32 |         )
 33 |         unif = self.rng.uniform(size=(self.num_trajectories, 2))
 34 |         return unif < self._get_fill_probabilities(depths)
 35 | 
 36 |     @property
 37 |     @abc.abstractmethod
 38 |     def max_depth(self) -> float:
 39 |         pass
 40 | 
 41 | 
 42 | class ExponentialFillFunction(FillProbabilityModel):
 43 |     def __init__(
 44 |         self, fill_exponent: float = 1.5, step_size: float = 0.1, num_trajectories: int = 1, seed: Optional[int] = None
 45 |     ):
 46 |         self.fill_exponent = fill_exponent
 47 |         super().__init__(
 48 |             min_value=np.array([[]]),
 49 |             max_value=np.array([[]]),
 50 |             step_size=step_size,
 51 |             terminal_time=0.0,
 52 |             initial_state=np.array([[]]),
 53 |             num_trajectories=num_trajectories,
 54 |             seed=seed,
 55 |         )
 56 | 
 57 |     def _get_fill_probabilities(self, depths: np.ndarray) -> np.ndarray:
 58 |         return np.exp(-self.fill_exponent * depths)
 59 | 
 60 |     @property
 61 |     def max_depth(self) -> float:
 62 |         return -np.log(0.01) / self.fill_exponent
 63 | 
 64 |     def update(self, arrivals: np.ndarray, fills: np.ndarray, actions: np.ndarray, state: np.ndarray = None):
 65 |         pass
 66 | 
 67 | 
 68 | class TriangularFillFunction(FillProbabilityModel):
 69 |     def __init__(
 70 |         self, max_fill_depth: float = 1.0, step_size: float = 0.1, num_trajectories: int = 1, seed: Optional[int] = None
 71 |     ):
 72 |         self.max_fill_depth = max_fill_depth
 73 |         super().__init__(
 74 |             min_value=np.array([[]]),
 75 |             max_value=np.array([[]]),
 76 |             step_size=step_size,
 77 |             terminal_time=0.0,
 78 |             initial_state=np.array([[]]),
 79 |             num_trajectories=num_trajectories,
 80 |             seed=seed,
 81 |         )
 82 | 
 83 |     def _get_fill_probabilities(self, depths: np.ndarray) -> np.ndarray:
 84 |         return np.max(1 - np.max(depths, 0) / self.max_fill_depth, 0)
 85 | 
 86 |     @property
 87 |     def max_depth(self) -> float:
 88 |         return 1.5 * self.max_fill_depth
 89 | 
 90 |     def update(self, arrivals: np.ndarray, fills: np.ndarray, actions: np.ndarray, state: np.ndarray = None):
 91 |         pass
 92 | 
 93 | 
 94 | class PowerFillFunction(FillProbabilityModel):
 95 |     def __init__(
 96 |         self,
 97 |         fill_exponent: float = 1.5,
 98 |         fill_multiplier: float = 1.5,
 99 |         step_size: float = 0.1,
100 |         num_trajectories: int = 1,
101 |         seed: Optional[int] = None,
102 |     ):
103 |         self.fill_exponent = fill_exponent
104 |         self.fill_multiplier = fill_multiplier
105 |         super().__init__(
106 |             min_value=np.array([[]]),
107 |             max_value=np.array([[]]),
108 |             step_size=step_size,
109 |             terminal_time=0.0,
110 |             initial_state=np.array([[]]),
111 |             num_trajectories=num_trajectories,
112 |             seed=seed,
113 |         )
114 | 
115 |     def _get_fill_probabilities(self, depths: np.ndarray) -> np.ndarray:
116 |         return (1 + (self.fill_multiplier * np.max(depths, 0)) ** self.fill_exponent) ** -1
117 | 
118 |     @property
119 |     def max_depth(self) -> float:
120 |         return 0.01 ** (-1 / self.fill_exponent) - 1
121 | 
122 |     def update(self, arrivals: np.ndarray, fills: np.ndarray, actions: np.ndarray, state: np.ndarray = None):
123 |         pass
124 | 
125 | 
126 | class ExogenousMmFillProbabilityModel(FillProbabilityModel):
127 |     def __init__(
128 |         self,
129 |         exogenous_best_depth_processes: Tuple[StochasticProcessModel],
130 |         fill_exponent: float = 1.5,
131 |         base_fill_probability: float = 1.0,
132 |         step_size: float = 0.1,
133 |         num_trajectories: int = 1,
134 |         seed: Optional[int] = None,
135 |     ):
136 |         assert len(exogenous_best_depth_processes) == 2, "exogenous_best_depth_processes must be length 2 (bid and ask)"
137 |         assert all(
138 |             len(process.initial_state) > 0 for process in exogenous_best_depth_processes
139 |         ), "Exogenous best depth processes must have a state of at least size 1."
140 |         self.exogenous_best_depth_processes = exogenous_best_depth_processes
141 |         self.fill_exponent = fill_exponent
142 |         self.base_fill_probability = base_fill_probability
143 |         super().__init__(
144 |             min_value=np.concatenate([process.min_value for process in self.exogenous_best_depth_processes], axis=1),
145 |             max_value=np.concatenate([process.max_value for process in self.exogenous_best_depth_processes], axis=1),
146 |             step_size=step_size,
147 |             terminal_time=0.0,
148 |             initial_state=np.concatenate(
149 |                 (
150 |                     self.exogenous_best_depth_processes[0].initial_state,
151 |                     self.exogenous_best_depth_processes[1].initial_state,
152 |                 ),
153 |                 axis=1,
154 |             ),
155 |             num_trajectories=num_trajectories,
156 |             seed=seed,
157 |         )
158 | 
159 |     def _get_fill_probabilities(self, depths: np.ndarray) -> np.ndarray:
160 |         return (depths > self.current_state) * self.base_fill_probability * np.exp(
161 |             -self.fill_exponent * (depths - self.current_state)
162 |         ) + (depths <= self.current_state)
163 | 
164 |     @property
165 |     def max_depth(self) -> float:
166 |         return -np.log(0.01) / self.fill_exponent + np.max(self.exogenous_best_depth_processes[0].max_value)
167 | 
168 |     def update(self, arrivals: np.ndarray, fills: np.ndarray, actions: np.ndarray, state: np.ndarray = None):
169 |         for process in self.exogenous_best_depth_processes:
170 |             process.update(arrivals, fills, actions)
171 | 


--------------------------------------------------------------------------------
/mbt_gym/stochastic_processes/midprice_models.py:
--------------------------------------------------------------------------------
  1 | from math import sqrt
  2 | from typing import Optional
  3 | 
  4 | import numpy as np
  5 | 
  6 | from mbt_gym.stochastic_processes.StochasticProcessModel import StochasticProcessModel
  7 | 
  8 | MidpriceModel = StochasticProcessModel
  9 | 
 10 | from mbt_gym.gym.index_names import BID_INDEX, ASK_INDEX
 11 | 
 12 | class ConstantMidpriceModel(MidpriceModel):
 13 |     def __init__(
 14 |         self,
 15 |         initial_price: float = 100,
 16 |         terminal_time: float = 1.0,
 17 |         step_size: float = 0.01,
 18 |         num_trajectories: int = 1,
 19 |         seed: Optional[int] = None,
 20 |     ):
 21 |         self.terminal_time = terminal_time
 22 |         super().__init__(
 23 |             min_value=np.array([[initial_price]]),
 24 |             max_value=np.array([[initial_price]]),
 25 |             step_size=step_size,
 26 |             terminal_time=terminal_time,
 27 |             initial_state=np.array([[initial_price]]),
 28 |             num_trajectories=num_trajectories,
 29 |             seed=seed,
 30 |         )
 31 | 
 32 |     def update(self, arrivals: np.ndarray, fills: np.ndarray, actions: np.ndarray, state: np.ndarray = None) -> np.ndarray:
 33 |         pass
 34 | 
 35 | 
 36 | class BrownianMotionMidpriceModel(MidpriceModel):
 37 |     def __init__(
 38 |         self,
 39 |         drift: float = 0.0,
 40 |         volatility: float = 2.0,
 41 |         initial_price: float = 100,
 42 |         terminal_time: float = 1.0,
 43 |         step_size: float = 0.01,
 44 |         num_trajectories: int = 1,
 45 |         seed: Optional[int] = None,
 46 |     ):
 47 |         self.drift = drift
 48 |         self.volatility = volatility
 49 |         self.terminal_time = terminal_time
 50 |         super().__init__(
 51 |             min_value=np.array([[initial_price - (self._get_max_value(initial_price, terminal_time) - initial_price)]]),
 52 |             max_value=np.array([[self._get_max_value(initial_price, terminal_time)]]),
 53 |             step_size=step_size,
 54 |             terminal_time=terminal_time,
 55 |             initial_state=np.array([[initial_price]]),
 56 |             num_trajectories=num_trajectories,
 57 |             seed=seed,
 58 |         )
 59 | 
 60 |     def update(self, arrivals: np.ndarray, fills: np.ndarray, actions: np.ndarray, state: np.ndarray = None) -> np.ndarray:
 61 |         self.current_state = (
 62 |             self.current_state
 63 |             + self.drift * self.step_size * np.ones((self.num_trajectories, 1))
 64 |             + self.volatility * sqrt(self.step_size) * self.rng.normal(size=(self.num_trajectories, 1))
 65 |         )
 66 | 
 67 |     def _get_max_value(self, initial_price, terminal_time):
 68 |         return initial_price + 4 * self.volatility * np.sqrt(terminal_time)
 69 | 
 70 | 
 71 | class GeometricBrownianMotionMidpriceModel(MidpriceModel):
 72 |     def __init__(
 73 |         self,
 74 |         drift: float = 0.0,
 75 |         volatility: float = 0.1,
 76 |         initial_price: float = 100,
 77 |         terminal_time: float = 1.0,
 78 |         step_size: float = 0.01,
 79 |         num_trajectories: int = 1,
 80 |         seed: Optional[int] = None,
 81 |     ):
 82 |         self.drift = drift
 83 |         self.volatility = volatility
 84 |         self.terminal_time = terminal_time
 85 |         super().__init__(
 86 |             min_value=np.array([[initial_price - (self._get_max_value(initial_price, terminal_time) - initial_price)]]),
 87 |             max_value=np.array([[self._get_max_value(initial_price, terminal_time)]]),
 88 |             step_size=step_size,
 89 |             terminal_time=terminal_time,
 90 |             initial_state=np.array([[initial_price]]),
 91 |             num_trajectories=num_trajectories,
 92 |             seed=seed,
 93 |         )
 94 | 
 95 |     def update(self, arrivals: np.ndarray, fills: np.ndarray, actions: np.ndarray, state: np.ndarray = None) -> np.ndarray:
 96 |         self.current_state = (
 97 |             self.current_state
 98 |             + self.drift * self.current_state * self.step_size
 99 |             + self.volatility
100 |             * self.current_state
101 |             * sqrt(self.step_size)
102 |             * self.rng.normal(size=(self.num_trajectories, 1))
103 |         )
104 | 
105 |     def _get_max_value(self, initial_price, terminal_time):
106 |         stdev = sqrt(
107 |             initial_price**2
108 |             * np.exp(2 * self.drift * terminal_time)
109 |             * (np.exp(self.volatility**2 * terminal_time) - 1)
110 |         )
111 |         return initial_price * np.exp(self.drift * terminal_time) + 4 * stdev
112 | 
113 | 
114 | class OuMidpriceModel(MidpriceModel):
115 |     def __init__(
116 |         self,
117 |         mean_reversion_level: float = 0.0,
118 |         mean_reversion_speed: float = 1.0,
119 |         volatility: float = 2.0,
120 |         initial_price: float = 100.0,
121 |         terminal_time: float = 1.0,
122 |         step_size: float = 0.01,
123 |         num_trajectories: int = 1,
124 |         seed: Optional[int] = None,
125 |     ):
126 |         self.mean_reversion_level = mean_reversion_level
127 |         self.mean_reversion_speed = mean_reversion_speed
128 |         self.volatility = volatility
129 |         self.terminal_time = terminal_time
130 |         super().__init__(
131 |             min_value=np.array([[initial_price - (self._get_max_value(initial_price, terminal_time) - initial_price)]]),
132 |             max_value=np.array([[self._get_max_value(initial_price, terminal_time)]]),
133 |             step_size=step_size,
134 |             terminal_time=terminal_time,
135 |             initial_state=np.array([[initial_price]]),
136 |             num_trajectories=num_trajectories,
137 |             seed=seed,
138 |         )
139 | 
140 |     def update(self, arrivals: np.ndarray, fills: np.ndarray, actions: np.ndarray, state: np.ndarray = None) -> np.ndarray:
141 |         self.current_state += -self.mean_reversion_speed * (
142 |             self.current_state - self.mean_reversion_level * np.ones((self.num_trajectories, 1))
143 |         ) + self.volatility * sqrt(self.step_size) * self.rng.normal(size=(self.num_trajectories, 1))
144 | 
145 |     def _get_max_value(self, initial_price, terminal_time):
146 |         return initial_price + 4 * self.volatility * terminal_time  # TODO: What should this be?
147 | 
148 | 
149 | class ShortTermOuAlphaMidpriceModel(MidpriceModel):
150 |     def __init__(
151 |         self,
152 |         volatility: float = 2.0,
153 |         ou_process: OuMidpriceModel = None,
154 |         initial_price: float = 100.0,
155 |         terminal_time: float = 1.0,
156 |         step_size: float = 0.01,
157 |         num_trajectories: int = 1,
158 |         seed: Optional[int] = None,
159 |     ):
160 |         self.volatility = volatility
161 |         self.ou_process = ou_process or OuMidpriceModel(initial_price=0.0)
162 |         self.terminal_time = terminal_time
163 |         super().__init__(
164 |             min_value=np.array(
165 |                 [
166 |                     [
167 |                         initial_price - (self._get_max_asset_price(initial_price, terminal_time) - initial_price),
168 |                         self.ou_process.min_value,
169 |                     ]
170 |                 ]
171 |             ),
172 |             max_value=np.array([[self._get_max_asset_price(initial_price, terminal_time), self.ou_process.max_value]]),
173 |             step_size=step_size,
174 |             terminal_time=terminal_time,
175 |             initial_state=np.array([[initial_price, self.ou_process.initial_state[0][0]]]),
176 |             num_trajectories=num_trajectories,
177 |             seed=seed,
178 |         )
179 | 
180 |     def update(self, arrivals: np.ndarray, fills: np.ndarray, actions: np.ndarray, state: np.ndarray = None) -> np.ndarray:
181 |         self.current_state[:, 0] = (
182 |             self.current_state[:, 0]
183 |             + self.ou_process.current_state * self.step_size * np.ones((self.num_trajectories, 1))
184 |             + self.volatility * sqrt(self.step_size) * self.rng.normal(size=(self.num_trajectories, 1))
185 |         )
186 |         self.ou_process.update(arrivals, fills, actions)
187 |         self.current_state[:, 1] = self.ou_process.current_state
188 | 
189 |     def _get_max_asset_price(self, initial_price, terminal_time):
190 |         return initial_price + 4 * self.volatility * terminal_time  # TODO: what should this be?
191 | 
192 | 
193 | class BrownianMotionJumpMidpriceModel(MidpriceModel):
194 |     def __init__(
195 |         self,
196 |         drift: float = 0.0,
197 |         volatility: float = 2.0,
198 |         jump_size: float = 1.0,
199 |         initial_price: float = 100,
200 |         terminal_time: float = 1.0,
201 |         step_size: float = 0.01,
202 |         num_trajectories: int = 1,
203 |         seed: Optional[int] = None,
204 |     ):
205 |         self.drift = drift
206 |         self.volatility = volatility
207 |         self.jump_size = jump_size
208 |         self.terminal_time = terminal_time
209 |         super().__init__(
210 |             min_value=np.array([[initial_price - (self._get_max_value(initial_price, terminal_time) - initial_price)]]),
211 |             max_value=np.array([[self._get_max_value(initial_price, terminal_time)]]),
212 |             step_size=step_size,
213 |             terminal_time=terminal_time,
214 |             initial_state=np.array([[initial_price]]),
215 |             num_trajectories=num_trajectories,
216 |             seed=seed,
217 |         )
218 | 
219 |     def update(self, arrivals: np.ndarray, fills: np.ndarray, actions: np.ndarray, state: np.ndarray = None) -> np.ndarray:
220 |         fills_bid = fills[:, BID_INDEX] * arrivals[:, BID_INDEX]
221 |         fills_ask = fills[:, ASK_INDEX] * arrivals[:, ASK_INDEX]
222 |         self.current_state = (
223 |             self.current_state
224 |             + self.drift * self.step_size * np.ones((self.num_trajectories, 1))
225 |             + self.volatility * sqrt(self.step_size) * self.rng.normal(size=(self.num_trajectories, 1))
226 |             + (self.jump_size * fills_ask - self.jump_size * fills_bid).reshape(-1,1)
227 |         )
228 | 
229 |     def _get_max_value(self, initial_price, terminal_time):
230 |         return initial_price + 4 * self.volatility * terminal_time
231 | 
232 | 
233 | class OuJumpMidpriceModel(MidpriceModel):
234 |     def __init__(
235 |         self,
236 |         mean_reversion_level: float = 0.0,
237 |         mean_reversion_speed: float = 1.0,
238 |         volatility: float = 2.0,
239 |         jump_size: float = 1.0,
240 |         initial_price: float = 100.0,
241 |         terminal_time: float = 1.0,
242 |         step_size: float = 0.01,
243 |         num_trajectories: int = 1,
244 |         seed: Optional[int] = None,
245 |     ):
246 |         self.mean_reversion_level = mean_reversion_level
247 |         self.mean_reversion_speed = mean_reversion_speed
248 |         self.volatility = volatility
249 |         self.jump_size = jump_size
250 |         self.terminal_time = terminal_time
251 |         super().__init__(
252 |             min_value=np.array([[initial_price - (self._get_max_value(initial_price, terminal_time) - initial_price)]]),
253 |             max_value=np.array([[self._get_max_value(initial_price, terminal_time)]]),
254 |             step_size=step_size,
255 |             terminal_time=terminal_time,
256 |             initial_state=np.array([[initial_price]]),
257 |             num_trajectories=num_trajectories,
258 |             seed=seed,
259 |         )
260 | 
261 |     def update(self, arrivals: np.ndarray, fills: np.ndarray, actions: np.ndarray, state: np.ndarray = None) -> np.ndarray:
262 |         fills_bid = fills[:, BID_INDEX] * arrivals[:, BID_INDEX]
263 |         fills_ask = fills[:, ASK_INDEX] * arrivals[:, ASK_INDEX]
264 |         self.current_state = (
265 |             self.current_state
266 |             - self.mean_reversion_speed
267 |             * (self.current_state - self.mean_reversion_level * np.ones((self.num_trajectories, 1)))
268 |             + self.volatility * sqrt(self.step_size) * self.rng.normal(size=(self.num_trajectories, 1))            
269 |             + (self.jump_size * fills_ask - self.jump_size * fills_bid).reshape(-1,1)
270 |         )
271 | 
272 |     def _get_max_value(self, initial_price, terminal_time):
273 |         return initial_price + 4 * self.volatility * terminal_time
274 | 
275 | 
276 | class ShortTermJumpAlphaMidpriceModel(MidpriceModel):
277 |     def __init__(
278 |         self,
279 |         volatility: float = 2.0,
280 |         ou_jump_process: OuJumpMidpriceModel = None,
281 |         initial_price: float = 100.0,
282 |         terminal_time: float = 1.0,
283 |         step_size: float = 0.01,
284 |         num_trajectories: int = 1,
285 |         seed: Optional[int] = None,
286 |     ):
287 |         self.volatility = volatility
288 |         self.ou_jump_process = ou_jump_process or OuJumpMidpriceModel(initial_price=0.0)
289 |         self.terminal_time = terminal_time
290 |         super().__init__(
291 |             min_value=np.array(
292 |                 [
293 |                     [
294 |                         initial_price - (self._get_max_asset_price(initial_price, terminal_time) - initial_price),
295 |                         self.ou_jump_process.min_value,
296 |                     ]
297 |                 ]
298 |             ),
299 |             max_value=np.array(
300 |                 [[self._get_max_asset_price(initial_price, terminal_time), self.ou_jump_process.max_value]]
301 |             ),
302 |             step_size=step_size,
303 |             terminal_time=terminal_time,
304 |             initial_state=np.array([[initial_price, self.ou_jump_process.initial_state[0][0]]]),
305 |             num_trajectories=num_trajectories,
306 |             seed=seed,
307 |         )
308 | 
309 |     def update(self, arrivals: np.ndarray, fills: np.ndarray, actions: np.ndarray, state: np.ndarray = None) -> np.ndarray:
310 |         self.current_state[:, 0] = (
311 |             self.current_state[:, 0]
312 |             + self.ou_jump_process.current_state * self.step_size * np.ones((self.num_trajectories, 1))
313 |             + self.volatility * sqrt(self.step_size) * self.rng.normal(size=(self.num_trajectories, 1))
314 |         )
315 |         self.ou_jump_process.update(arrivals, fills, actions)
316 |         self.current_state[:, 1] = self.ou_jump_process.current_state
317 | 
318 |     def _get_max_asset_price(self, initial_price, terminal_time):
319 |         return initial_price + 4 * self.volatility * terminal_time  # TODO: what should this be?
320 | 
321 | 
322 | class HestonMidpriceModel(MidpriceModel):
323 |     # Current/Initial State with the Heston model will consist of price AND current variance, not just price
324 |     def __init__(
325 |         self,
326 |         drift: float = 0.05,
327 |         volatility_mean_reversion_rate: float = 3,
328 |         volatility_mean_reversion_level: float = 0.04,
329 |         weiner_correlation: float = -0.8,
330 |         volatility_of_volatility: float = 0.6,
331 |         initial_price: float = 100,
332 |         initial_variance: float = 0.2**2,
333 |         terminal_time: float = 1.0,
334 |         step_size: float = 0.01,
335 |         num_trajectories: int = 1,
336 |         seed: Optional[int] = None,
337 |     ):
338 |         self.drift = drift
339 |         self.volatility_mean_reversion_rate = volatility_mean_reversion_rate
340 |         self.terminal_time = terminal_time
341 |         self.weiner_correlation = weiner_correlation
342 |         self.volatility_mean_reversion_level = volatility_mean_reversion_level
343 |         self.volatility_of_volatility = volatility_of_volatility
344 |         super().__init__(
345 |             min_value=np.array([[initial_price - (self._get_max_value(initial_price, terminal_time) - initial_price)]]),
346 |             max_value=np.array([[self._get_max_value(initial_price, terminal_time)]]),
347 |             step_size=step_size,
348 |             terminal_time=terminal_time,
349 |             initial_state=np.array([[initial_price, initial_variance]]),
350 |             num_trajectories=num_trajectories,
351 |             seed=seed,
352 |         )
353 | 
354 |     def update(self, arrivals: np.ndarray, fills: np.ndarray, actions: np.ndarray, state: np.ndarray = None) -> np.ndarray:
355 |         weiner_means = np.array([0, 0])
356 |         weiner_corr = np.array([[1, self.weiner_correlation], [self.weiner_correlation, 1]])
357 |         weiners = np.random.multivariate_normal(weiner_means, cov=weiner_corr, size=self.num_trajectories)
358 |         self.current_state[:, 0] = (
359 |             self.current_state[:, 0]
360 |             + self.drift * self.current_state[:, 0] * self.step_size
361 |             + np.sqrt(self.current_state[:, 1] * self.step_size) * self.current_state[:, 0] * weiners[:, 0]
362 |         )
363 |         self.current_state[:, 1] = np.abs(
364 |             self.current_state[:, 1]
365 |             + self.volatility_mean_reversion_rate
366 |             * (self.volatility_mean_reversion_level - self.current_state[:, 1])
367 |             * self.step_size
368 |             + self.volatility_of_volatility * np.sqrt(self.current_state[:, 1] * self.step_size) * weiners[:, 1]
369 |         )
370 | 
371 |     def _get_max_value(self, initial_price, terminal_time):
372 |         return initial_price + 4 * self.volatility_mean_reversion_level * terminal_time
373 | 
374 | 
375 | class ConstantElasticityOfVarianceMidpriceModel(MidpriceModel):
376 |     def __init__(
377 |         self,
378 |         drift: float = 0.0,
379 |         volatility: float = 0.1,
380 |         gamma: float = 1,  # gamma = 1 is just gbm
381 |         initial_price: float = 100,
382 |         terminal_time: float = 1.0,
383 |         step_size: float = 0.01,
384 |         num_trajectories: int = 1,
385 |         seed: Optional[int] = None,
386 |     ):
387 |         self.drift = drift
388 |         self.volatility = volatility
389 |         self.gamma = gamma
390 |         self.terminal_time = terminal_time
391 |         super().__init__(
392 |             min_value=np.array([[initial_price - (self._get_max_value(initial_price, terminal_time) - initial_price)]]),
393 |             max_value=np.array([[self._get_max_value(initial_price, terminal_time)]]),
394 |             step_size=step_size,
395 |             terminal_time=terminal_time,
396 |             initial_state=np.array([[initial_price]]),
397 |             num_trajectories=num_trajectories,
398 |             seed=seed,
399 |         )
400 | 
401 |     def update(self, arrivals: np.ndarray, fills: np.ndarray, actions: np.ndarray, state: np.ndarray = None) -> np.ndarray:
402 |         self.current_state = (
403 |             self.current_state
404 |             + self.current_state * self.drift * self.step_size  # *np.ones((self.num_trajectories, 1))
405 |             + self.volatility
406 |             * (self.current_state**self.gamma)
407 |             * np.sqrt(self.step_size)
408 |             * np.random.normal(size=self.num_trajectories)
409 |         )
410 | 
411 |     def _get_max_value(self, initial_price, terminal_time):
412 |         return initial_price + 4 * self.volatility * terminal_time
413 | 
414 | 
415 | 
416 | 


--------------------------------------------------------------------------------
/mbt_gym/stochastic_processes/price_impact_models.py:
--------------------------------------------------------------------------------
  1 | import abc
  2 | from typing import Optional
  3 | 
  4 | import numpy as np
  5 | 
  6 | from mbt_gym.stochastic_processes.StochasticProcessModel import StochasticProcessModel
  7 | 
  8 | 
  9 | class PriceImpactModel(StochasticProcessModel):
 10 |     """PriceImpactModel models the price impact of orders in the order book."""
 11 | 
 12 |     def __init__(
 13 |         self,
 14 |         min_value: np.ndarray,
 15 |         max_value: np.ndarray,
 16 |         step_size: float,
 17 |         terminal_time: float,
 18 |         initial_state: np.ndarray,
 19 |         num_trajectories: int = 1,
 20 |         seed: int = None,
 21 |     ):
 22 |         super().__init__(min_value, max_value, step_size, terminal_time, initial_state, num_trajectories, seed)
 23 | 
 24 |     @abc.abstractmethod
 25 |     def get_impact(self, action: np.ndarray) -> np.ndarray:
 26 |         pass
 27 | 
 28 |     @property
 29 |     @abc.abstractmethod
 30 |     def max_speed(self) -> float:
 31 |         pass
 32 | 
 33 | 
 34 | class TemporaryPowerPriceImpact(PriceImpactModel):
 35 |     def __init__(
 36 |         self,
 37 |         temporary_impact_coefficient: float = 0.01,
 38 |         temporary_impact_exponent: float = 1.0,
 39 |         num_trajectories: int = 1,
 40 |     ):
 41 |         self.temporary_impact_coefficient = temporary_impact_coefficient
 42 |         self.temporary_impact_exponent = temporary_impact_exponent
 43 |         super().__init__(
 44 |             min_value=np.array([[]]),
 45 |             max_value=np.array([[]]),
 46 |             step_size=None,
 47 |             terminal_time=0.0,
 48 |             initial_state=np.array([[]]),
 49 |             num_trajectories=num_trajectories,
 50 |             seed=None,
 51 |         )
 52 | 
 53 |     def update(self, arrivals: np.ndarray, fills: np.ndarray, actions: np.ndarray, state: np.ndarray = None):
 54 |         pass
 55 | 
 56 |     def get_impact(self, action) -> np.ndarray:
 57 |         return self.temporary_impact_coefficient * action**self.temporary_impact_exponent
 58 | 
 59 |     @property
 60 |     def max_speed(self) -> float:
 61 |         return 100.0  # TODO: link to asset price perhaps?
 62 | 
 63 | 
 64 | class TemporaryAndPermanentPriceImpact(PriceImpactModel):
 65 |     def __init__(
 66 |         self,
 67 |         temporary_impact_coefficient: float = 0.01,
 68 |         permanent_impact_coefficient: float = 0.01,
 69 |         n_steps: int = 20 * 10,
 70 |         terminal_time: float = 1.0,
 71 |         num_trajectories: int = 1,
 72 |     ):
 73 |         self.temporary_impact_coefficient = temporary_impact_coefficient
 74 |         self.permanent_impact_coefficient = permanent_impact_coefficient
 75 |         self.n_steps = n_steps
 76 |         self.terminal_time = terminal_time
 77 |         self.step_size = self.terminal_time / self.n_steps
 78 |         super().__init__(
 79 |             min_value=np.array([[-self.max_speed * self.terminal_time * self.permanent_impact_coefficient]]),
 80 |             max_value=np.array([[self.max_speed * self.terminal_time * self.permanent_impact_coefficient]]),
 81 |             step_size=self.step_size,
 82 |             terminal_time=0.0,
 83 |             initial_state=np.array([[0]]),
 84 |             num_trajectories=num_trajectories,
 85 |             seed=None,
 86 |         )
 87 | 
 88 |     def update(self, arrivals: np.ndarray, fills: np.ndarray, actions: np.ndarray, state: np.ndarray = None):
 89 |         self.current_state = self.current_state + self.permanent_impact_coefficient * actions * self.step_size
 90 | 
 91 |     def get_impact(self, action) -> np.ndarray:
 92 |         return self.temporary_impact_coefficient * action + self.current_state
 93 | 
 94 |     @property
 95 |     def max_speed(self) -> float:
 96 |         return 10.0  # TODO: link to asset price perhaps?
 97 | 
 98 | 
 99 | class TemporaryAndTransientPriceImpact(PriceImpactModel):
100 |     def __init__(
101 |         self,
102 |         temporary_impact_coefficient: float = 0.01,
103 |         transient_impact_coefficient: float = 0.01, # kappa in Neuman-Voß (2022)
104 |         resilience_coefficient: float = 0.01, # rho in Neuman-Voß (2022)
105 |         initial_transient_impact: float = 0.01, # y in Neuman-Voß (2022)
106 |         linear_kernel_coefficient: float = 0.01, # gamma in Neuman-Voß (2022)
107 |         n_steps: int = 20 * 10,
108 |         terminal_time: float = 1.0,
109 |         num_trajectories: int = 1,
110 |     ):
111 |         self.temporary_impact_coefficient = temporary_impact_coefficient
112 |         self.transient_impact_coefficient = transient_impact_coefficient
113 |         self.resilience_coefficient = resilience_coefficient
114 |         self.initial_transient_impact = initial_transient_impact
115 |         self.linear_kernel_coefficient = linear_kernel_coefficient
116 |         self.n_steps = n_steps
117 |         self.terminal_time = terminal_time
118 |         self.step_size = self.terminal_time / self.n_steps
119 |         super().__init__(
120 |             min_value=np.array([[-self.max_speed * self.terminal_time * self.transient_impact_coefficient]]),
121 |             max_value=np.array([[self.max_speed * self.terminal_time * self.transient_impact_coefficient]]),
122 |             step_size=self.step_size,
123 |             terminal_time=0.0,
124 |             initial_state=np.array([[self.initial_transient_impact]]),
125 |             num_trajectories=num_trajectories,
126 |             seed=None,
127 |         )
128 | 
129 |     def update(self, arrivals: np.ndarray, fills: np.ndarray, actions: np.ndarray, state: np.ndarray = None):
130 |         self.current_state = (self.current_state - self.resilience_coefficient * self.current_state * self.step_size
131 |                               + self.linear_kernel_coefficient * actions * self.step_size)
132 | 
133 |     def get_impact(self, action) -> np.ndarray:
134 |         return self.temporary_impact_coefficient * action + self.transient_impact_coefficient * self.current_state
135 | 
136 |     @property
137 |     def max_speed(self) -> float:
138 |         return 10.0  # TODO: link to asset price
139 | 
140 | 
141 | 
142 | class TransientPriceImpact(PriceImpactModel):
143 |     def __init__(
144 |         self,
145 |         transient_impact_coefficient: float = 0.01, # kappa in Neuman-Voß (2022)
146 |         resilience_coefficient: float = 0.01, # rho in Neuman-Voß (2022)
147 |         initial_transient_impact: float = 0.01, # y in Neuman-Voß (2022)
148 |         linear_kernel_coefficient: float = 0.01, # gamma in Neuman-Voß (2022)
149 |         n_steps: int = 20 * 10,
150 |         terminal_time: float = 1.0,
151 |         num_trajectories: int = 1,
152 |     ):
153 |         self.transient_impact_coefficient = transient_impact_coefficient
154 |         self.resilience_coefficient = resilience_coefficient
155 |         self.initial_transient_impact = initial_transient_impact
156 |         self.linear_kernel_coefficient = linear_kernel_coefficient
157 |         self.n_steps = n_steps
158 |         self.terminal_time = terminal_time
159 |         self.step_size = self.terminal_time / self.n_steps
160 |         super().__init__(
161 |             min_value=np.array([[-self.max_speed * self.terminal_time * self.transient_impact_coefficient]]),
162 |             max_value=np.array([[self.max_speed * self.terminal_time * self.transient_impact_coefficient]]),
163 |             step_size=self.step_size,
164 |             terminal_time=0.0,
165 |             initial_state=np.array([[self.initial_transient_impact]]),
166 |             num_trajectories=num_trajectories,
167 |             seed=None,
168 |         )
169 | 
170 |     def update(self, arrivals: np.ndarray, fills: np.ndarray, actions: np.ndarray, state: np.ndarray = None):
171 |         self.current_state = (self.current_state - self.resilience_coefficient * self.current_state * self.step_size
172 |                               + self.linear_kernel_coefficient * actions * self.step_size)
173 | 
174 |     def get_impact(self, action) -> np.ndarray:
175 |         return self.transient_impact_coefficient * self.current_state
176 | 
177 |     @property
178 |     def max_speed(self) -> float:
179 |         return 10.0  # TODO: link to asset price
180 | 
181 | 
182 | 


--------------------------------------------------------------------------------
/mypy.ini:
--------------------------------------------------------------------------------
1 | [mypy]
2 | ignore_missing_imports = True
3 | check_untyped_defs = True
4 | 


--------------------------------------------------------------------------------
/notebooks/Learning_to_make_a_market_with_mbt_gym_and_Stable_Baselines_3.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "b097fbb8",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# Learning to make a market with mbt_gym and Stable Baselines 3"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "markdown",
 13 |    "id": "2b918608",
 14 |    "metadata": {},
 15 |    "source": [
 16 |     "### Import external modules"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": null,
 22 |    "id": "56ffdffb",
 23 |    "metadata": {},
 24 |    "outputs": [],
 25 |    "source": [
 26 |     "import gym\n",
 27 |     "import matplotlib.pyplot as plt\n",
 28 |     "import numpy as np\n",
 29 |     "\n",
 30 |     "from stable_baselines3 import PPO\n",
 31 |     "from stable_baselines3.common.callbacks import EvalCallback\n",
 32 |     "from stable_baselines3.common.env_util import make_vec_env\n",
 33 |     "from stable_baselines3.common.vec_env import VecMonitor"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "markdown",
 38 |    "id": "16d0a2c9",
 39 |    "metadata": {},
 40 |    "source": [
 41 |     "### Add mbt-gym to path"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": null,
 47 |    "id": "387934ba",
 48 |    "metadata": {},
 49 |    "outputs": [],
 50 |    "source": [
 51 |     "import sys\n",
 52 |     "sys.path.append(\"../\")"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": null,
 58 |    "id": "5cb89dbb",
 59 |    "metadata": {},
 60 |    "outputs": [],
 61 |    "source": [
 62 |     "from mbt_gym.agents.BaselineAgents import CarteaJaimungalMmAgent\n",
 63 |     "from mbt_gym.gym.helpers.generate_trajectory import generate_trajectory\n",
 64 |     "from mbt_gym.gym.StableBaselinesTradingEnvironment import StableBaselinesTradingEnvironment\n",
 65 |     "from mbt_gym.gym.TradingEnvironment import TradingEnvironment\n",
 66 |     "from mbt_gym.gym.wrappers import *\n",
 67 |     "from mbt_gym.rewards.RewardFunctions import PnL, CjMmCriterion\n",
 68 |     "from mbt_gym.stochastic_processes.midprice_models import BrownianMotionMidpriceModel\n",
 69 |     "from mbt_gym.stochastic_processes.arrival_models import PoissonArrivalModel\n",
 70 |     "from mbt_gym.stochastic_processes.fill_probability_models import ExponentialFillFunction\n",
 71 |     "from mbt_gym.gym.ModelDynamics import LimitOrderModelDynamics"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "markdown",
 76 |    "id": "535d65b0",
 77 |    "metadata": {},
 78 |    "source": [
 79 |     "### Create market making environment"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 |    "execution_count": null,
 85 |    "id": "98ab1846",
 86 |    "metadata": {},
 87 |    "outputs": [],
 88 |    "source": [
 89 |     "terminal_time = 1.0\n",
 90 |     "arrival_rate = 10.0\n",
 91 |     "n_steps = int(10 * terminal_time * arrival_rate)\n",
 92 |     "phi = 0.5\n",
 93 |     "alpha = 0.001"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "code",
 98 |    "execution_count": null,
 99 |    "id": "11432746",
100 |    "metadata": {},
101 |    "outputs": [],
102 |    "source": [
103 |     "def get_cj_env(num_trajectories:int = 1):\n",
104 |     "    fill_exponent = 1\n",
105 |     "    sigma = 0.1\n",
106 |     "    initial_inventory = (-4,5)\n",
107 |     "    initial_price = 100\n",
108 |     "    step_size = 1/n_steps\n",
109 |     "    timestamps = np.linspace(0, terminal_time, n_steps + 1)\n",
110 |     "    midprice_model = BrownianMotionMidpriceModel(volatility=sigma, step_size=1/n_steps,\n",
111 |     "                                                 num_trajectories=num_trajectories)\n",
112 |     "    arrival_model = PoissonArrivalModel(intensity=np.array([arrival_rate, arrival_rate]), \n",
113 |     "                                        step_size=1/n_steps, \n",
114 |     "                                        num_trajectories=num_trajectories)\n",
115 |     "    fill_probability_model = ExponentialFillFunction(fill_exponent=fill_exponent, \n",
116 |     "                                                     step_size=1/n_steps,\n",
117 |     "                                                     num_trajectories=num_trajectories)\n",
118 |     "    LOtrader = LimitOrderModelDynamics(midprice_model = midprice_model, arrival_model = arrival_model, \n",
119 |     "                                fill_probability_model = fill_probability_model,\n",
120 |     "                                num_trajectories = num_trajectories)\n",
121 |     "    reward_function = CjMmCriterion(per_step_inventory_aversion = phi, terminal_inventory_aversion = alpha)\n",
122 |     "    env_params = dict(terminal_time=terminal_time, \n",
123 |     "                      n_steps=n_steps,\n",
124 |     "                      initial_inventory = initial_inventory,\n",
125 |     "                      model_dynamics = LOtrader,\n",
126 |     "                      max_inventory=n_steps,\n",
127 |     "                      normalise_action_space = False,\n",
128 |     "                      normalise_observation_space = False,\n",
129 |     "                      reward_function = reward_function,\n",
130 |     "                      num_trajectories=num_trajectories)\n",
131 |     "    return TradingEnvironment(**env_params)"
132 |    ]
133 |   },
134 |   {
135 |    "cell_type": "code",
136 |    "execution_count": null,
137 |    "id": "0d29022e",
138 |    "metadata": {},
139 |    "outputs": [],
140 |    "source": [
141 |     "num_trajectories = 1000\n",
142 |     "env = ReduceStateSizeWrapper(get_cj_env(num_trajectories))\n",
143 |     "sb_env = StableBaselinesTradingEnvironment(trading_env=env)"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "code",
148 |    "execution_count": null,
149 |    "id": "3f837dc9",
150 |    "metadata": {},
151 |    "outputs": [],
152 |    "source": [
153 |     "# Monitor sb_env\n",
154 |     "sb_env = VecMonitor(sb_env)\n",
155 |     "# Add directory for tensorboard logging and best model\n",
156 |     "tensorboard_logdir = \"./tensorboard/PPO-learning-CJ/\"\n",
157 |     "best_model_path = \"./SB_models/PPO-best-CJ\""
158 |    ]
159 |   },
160 |   {
161 |    "cell_type": "markdown",
162 |    "id": "3106df91",
163 |    "metadata": {},
164 |    "source": [
165 |     "### Define PPO policy"
166 |    ]
167 |   },
168 |   {
169 |    "cell_type": "code",
170 |    "execution_count": null,
171 |    "id": "b5d0e1c5",
172 |    "metadata": {},
173 |    "outputs": [],
174 |    "source": [
175 |     "policy_kwargs = dict(net_arch=[dict(pi=[256, 256], vf=[256, 256])])\n",
176 |     "PPO_params = {\"policy\":'MlpPolicy', \"env\": sb_env, \"verbose\":1, \n",
177 |     "              \"policy_kwargs\":policy_kwargs, \n",
178 |     "              \"tensorboard_log\":tensorboard_logdir,\n",
179 |     "              \"n_epochs\":3,\n",
180 |     "              \"batch_size\": int(n_steps * num_trajectories / 10), \n",
181 |     "              \"n_steps\": int(n_steps)}\n",
182 |     "callback_params = dict(eval_env=sb_env, n_eval_episodes = 2048, #200 before  (n_eval_episodes)\n",
183 |     "                       best_model_save_path = best_model_path, \n",
184 |     "                       deterministic=True)\n",
185 |     "\n",
186 |     "callback = EvalCallback(**callback_params)\n",
187 |     "model = PPO(**PPO_params, device=\"cpu\")"
188 |    ]
189 |   },
190 |   {
191 |    "cell_type": "code",
192 |    "execution_count": null,
193 |    "id": "01707612",
194 |    "metadata": {},
195 |    "outputs": [],
196 |    "source": [
197 |     "model.learn(total_timesteps = 10_000_000)  # Increase number of training timesteps according to computing resources"
198 |    ]
199 |   },
200 |   {
201 |    "cell_type": "markdown",
202 |    "id": "6d74b6cd",
203 |    "metadata": {},
204 |    "source": [
205 |     "## Comparing the learnt policy to the optimal policy"
206 |    ]
207 |   },
208 |   {
209 |    "cell_type": "code",
210 |    "execution_count": null,
211 |    "id": "bcc4d5d9",
212 |    "metadata": {},
213 |    "outputs": [],
214 |    "source": [
215 |     "from mbt_gym.agents.SbAgent import SbAgent"
216 |    ]
217 |   },
218 |   {
219 |    "cell_type": "code",
220 |    "execution_count": null,
221 |    "id": "f80b78c0",
222 |    "metadata": {},
223 |    "outputs": [],
224 |    "source": [
225 |     "ppo_agent = SbAgent(model)"
226 |    ]
227 |   },
228 |   {
229 |    "cell_type": "code",
230 |    "execution_count": null,
231 |    "id": "3dcb28e9",
232 |    "metadata": {},
233 |    "outputs": [],
234 |    "source": [
235 |     "inventories = np.arange(-3,4,1)\n",
236 |     "bid_actions = []\n",
237 |     "ask_actions = []\n",
238 |     "for inventory in inventories:\n",
239 |     "    bid_action, ask_action = ppo_agent.get_action(np.array([[inventory,0.5]])).reshape(-1)\n",
240 |     "    bid_actions.append(bid_action)\n",
241 |     "    ask_actions.append(ask_action)    "
242 |    ]
243 |   },
244 |   {
245 |    "cell_type": "code",
246 |    "execution_count": null,
247 |    "id": "de3df21d",
248 |    "metadata": {},
249 |    "outputs": [],
250 |    "source": [
251 |     "#ppo_agent.get_action(np.array([[inventory,0.5]]))"
252 |    ]
253 |   },
254 |   {
255 |    "cell_type": "code",
256 |    "execution_count": null,
257 |    "id": "fb6bb5cc",
258 |    "metadata": {},
259 |    "outputs": [],
260 |    "source": [
261 |     "cj_agent = CarteaJaimungalMmAgent(env=get_cj_env())"
262 |    ]
263 |   },
264 |   {
265 |    "cell_type": "code",
266 |    "execution_count": null,
267 |    "id": "a6a344c6",
268 |    "metadata": {},
269 |    "outputs": [],
270 |    "source": [
271 |     "# Get the Cartea Jaimungal action\n",
272 |     "cj_bid_actions = []\n",
273 |     "cj_ask_actions = []\n",
274 |     "for inventory in inventories:\n",
275 |     "    bid_action, ask_action = cj_agent.get_action(np.array([[0,inventory,0.5]])).reshape(-1)\n",
276 |     "    cj_bid_actions.append(bid_action)\n",
277 |     "    cj_ask_actions.append(ask_action)"
278 |    ]
279 |   },
280 |   {
281 |    "cell_type": "code",
282 |    "execution_count": null,
283 |    "id": "615c8e8f",
284 |    "metadata": {},
285 |    "outputs": [],
286 |    "source": [
287 |     "plt.plot(inventories, bid_actions, label = \"bid\", color = \"k\")\n",
288 |     "plt.plot(inventories, ask_actions, label = \"ask\", color = \"r\")\n",
289 |     "plt.plot(inventories, cj_bid_actions, label = \"bid cj\", color = \"k\", linestyle = \"--\")\n",
290 |     "plt.plot(inventories, cj_ask_actions, label = \"ask cj\", color = \"r\", linestyle = \"--\")\n",
291 |     "plt.legend()"
292 |    ]
293 |   }
294 |  ],
295 |  "metadata": {
296 |   "hide_input": false,
297 |   "kernelspec": {
298 |    "display_name": "Python 3 (ipykernel)",
299 |    "language": "python",
300 |    "name": "python3"
301 |   },
302 |   "language_info": {
303 |    "codemirror_mode": {
304 |     "name": "ipython",
305 |     "version": 3
306 |    },
307 |    "file_extension": ".py",
308 |    "mimetype": "text/x-python",
309 |    "name": "python",
310 |    "nbconvert_exporter": "python",
311 |    "pygments_lexer": "ipython3",
312 |    "version": "3.10.10"
313 |   },
314 |   "toc": {
315 |    "base_numbering": 1,
316 |    "nav_menu": {},
317 |    "number_sections": true,
318 |    "sideBar": true,
319 |    "skip_h1_title": false,
320 |    "title_cell": "Table of Contents",
321 |    "title_sidebar": "Contents",
322 |    "toc_cell": false,
323 |    "toc_position": {},
324 |    "toc_section_display": true,
325 |    "toc_window_display": false
326 |   }
327 |  },
328 |  "nbformat": 4,
329 |  "nbformat_minor": 5
330 | }
331 | 


--------------------------------------------------------------------------------
/notebooks/Test_2_-_replicate_CJP_2015_-_closed-form_solution_for_value_function.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "08005fcb",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# Cartea Jaimungal Penalva 2015"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "code",
 13 |    "execution_count": null,
 14 |    "id": "96ce4fb2",
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "import sys\n",
 19 |     "sys.path.append(\"../\") # This version of the notebook is in the subfolder \"notebooks\" of the repo\n",
 20 |     "\n",
 21 |     "import gym\n",
 22 |     "import matplotlib.pyplot as plt\n",
 23 |     "import numpy as np\n",
 24 |     "import pandas as pd\n",
 25 |     "import seaborn as sns\n",
 26 |     "import scipy\n",
 27 |     "\n",
 28 |     "from copy import deepcopy\n",
 29 |     "\n",
 30 |     "\n",
 31 |     "from mbt_gym.agents.BaselineAgents import *\n",
 32 |     "from mbt_gym.gym.TradingEnvironment import TradingEnvironment\n",
 33 |     "from mbt_gym.gym.helpers.generate_trajectory import generate_trajectory\n",
 34 |     "from mbt_gym.gym.helpers.plotting import *\n",
 35 |     "from mbt_gym.stochastic_processes.midprice_models import *\n",
 36 |     "from mbt_gym.stochastic_processes.arrival_models import *\n",
 37 |     "from mbt_gym.stochastic_processes.fill_probability_models import *\n",
 38 |     "import torch\n",
 39 |     "#print(torch.cuda.is_available())\n",
 40 |     "#print(torch.cuda.get_device_name())"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": null,
 46 |    "id": "41b5cd51",
 47 |    "metadata": {},
 48 |    "outputs": [],
 49 |    "source": [
 50 |     "from mbt_gym.gym.ModelDynamics import LimitOrderModelDynamics\n",
 51 |     "from mbt_gym.rewards.RewardFunctions import CjMmCriterion\n",
 52 |     "seed = 410\n",
 53 |     "max_inventory = 100"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "code",
 58 |    "execution_count": null,
 59 |    "id": "00dc33ca",
 60 |    "metadata": {},
 61 |    "outputs": [],
 62 |    "source": [
 63 |     "def get_env(num_trajectories:int = 1,\n",
 64 |     "            initial_price = 100,\n",
 65 |     "            terminal_time = 1.0,\n",
 66 |     "            sigma = 2.0,\n",
 67 |     "            n_steps = 1000,\n",
 68 |     "            initial_inventory = 0,\n",
 69 |     "            arrival_rate = 140,\n",
 70 |     "            fill_exponent = 1.5,\n",
 71 |     "            per_step_inventory_aversion = 0.01,\n",
 72 |     "            terminal_inventory_aversion = 0.001):    \n",
 73 |     "    midprice_model = BrownianMotionMidpriceModel(initial_price = initial_price, \n",
 74 |     "                                                 volatility=sigma, step_size=terminal_time/n_steps,\n",
 75 |     "                                                 terminal_time = terminal_time,\n",
 76 |     "                                                 num_trajectories=num_trajectories)\n",
 77 |     "    arrival_model = PoissonArrivalModel(intensity=np.array([arrival_rate, arrival_rate]), \n",
 78 |     "                                        step_size=terminal_time/n_steps, \n",
 79 |     "                                        num_trajectories=num_trajectories)\n",
 80 |     "    fill_probability_model = ExponentialFillFunction(fill_exponent=fill_exponent, \n",
 81 |     "                                                     step_size=terminal_time/n_steps,\n",
 82 |     "                                                     num_trajectories=num_trajectories)\n",
 83 |     "    LOtrader = LimitOrderModelDynamics(midprice_model = midprice_model, arrival_model = arrival_model, \n",
 84 |     "                                fill_probability_model = fill_probability_model,\n",
 85 |     "                                num_trajectories = num_trajectories)\n",
 86 |     "    reward = CjMmCriterion(per_step_inventory_aversion = per_step_inventory_aversion,\n",
 87 |     "                           terminal_inventory_aversion = terminal_inventory_aversion,\n",
 88 |     "                           terminal_time = terminal_time)\n",
 89 |     "    env_params = dict(terminal_time=terminal_time, \n",
 90 |     "                      n_steps=n_steps,\n",
 91 |     "                      seed = seed,\n",
 92 |     "                      initial_inventory = initial_inventory,\n",
 93 |     "                      model_dynamics = LOtrader,\n",
 94 |     "                      reward_function = reward,\n",
 95 |     "                      max_inventory=max_inventory,\n",
 96 |     "                      normalise_action_space = False,\n",
 97 |     "                      normalise_observation_space = False,\n",
 98 |     "                      num_trajectories=num_trajectories)\n",
 99 |     "    return TradingEnvironment(**env_params)"
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "code",
104 |    "execution_count": null,
105 |    "id": "d0c171a6",
106 |    "metadata": {},
107 |    "outputs": [],
108 |    "source": [
109 |     "env = get_env()\n",
110 |     "agent = CarteaJaimungalMmAgent(env = env, max_inventory = max_inventory)"
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "code",
115 |    "execution_count": null,
116 |    "id": "7dd86124",
117 |    "metadata": {},
118 |    "outputs": [],
119 |    "source": [
120 |     "plot_trajectory(env, agent, seed = seed)"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "markdown",
125 |    "id": "ed76675c",
126 |    "metadata": {},
127 |    "source": [
128 |     "### Comparing the value function to the simulated optimal agent "
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "code",
133 |    "execution_count": null,
134 |    "id": "b4b4a440",
135 |    "metadata": {},
136 |    "outputs": [],
137 |    "source": [
138 |     "num_trajectories = 1_000\n",
139 |     "vec_env = get_env(num_trajectories)\n",
140 |     "vec_agent = CarteaJaimungalMmAgent(env = vec_env, max_inventory = max_inventory)"
141 |    ]
142 |   },
143 |   {
144 |    "cell_type": "code",
145 |    "execution_count": null,
146 |    "id": "9ea588e8",
147 |    "metadata": {},
148 |    "outputs": [],
149 |    "source": [
150 |     "observations, actions, rewards = generate_trajectory(vec_env, vec_agent)"
151 |    ]
152 |   },
153 |   {
154 |    "cell_type": "code",
155 |    "execution_count": null,
156 |    "id": "97b71326",
157 |    "metadata": {},
158 |    "outputs": [],
159 |    "source": [
160 |     "results, fig, total_rewards = generate_results_table_and_hist(vec_env=vec_env,agent=vec_agent)"
161 |    ]
162 |   },
163 |   {
164 |    "cell_type": "markdown",
165 |    "id": "f71e7174",
166 |    "metadata": {},
167 |    "source": [
168 |     "# Value function versus total rewards"
169 |    ]
170 |   },
171 |   {
172 |    "cell_type": "code",
173 |    "execution_count": null,
174 |    "id": "db4aa92d",
175 |    "metadata": {},
176 |    "outputs": [],
177 |    "source": [
178 |     "vec_env.reset()\n",
179 |     "agent = CarteaJaimungalMmAgent(env = vec_env, max_inventory = max_inventory)\n",
180 |     "agent.calculate_true_value_function(vec_env.state[0].reshape(1,-1))"
181 |    ]
182 |   },
183 |   {
184 |    "cell_type": "code",
185 |    "execution_count": null,
186 |    "id": "0a766459",
187 |    "metadata": {},
188 |    "outputs": [],
189 |    "source": [
190 |     "np.mean(total_rewards), np.std(total_rewards)"
191 |    ]
192 |   },
193 |   {
194 |    "cell_type": "code",
195 |    "execution_count": null,
196 |    "id": "fe0ed865",
197 |    "metadata": {},
198 |    "outputs": [],
199 |    "source": [
200 |     "true_mean = agent.calculate_true_value_function(vec_env.state[0].reshape(1,-1))[0,0]\n",
201 |     "sample_mean = np.mean(total_rewards)\n",
202 |     "N = len(total_rewards)\n",
203 |     "sample_variance = np.var(total_rewards) * N/(N-1)\n",
204 |     "T = (sample_mean -  true_mean)/ (np.sqrt(sample_variance) / np.sqrt(N))\n",
205 |     "q_l, q_u = scipy.stats.t(df=(N-1)).ppf((0.1, 0.9))\n",
206 |     "if T>q_l and T<q_u:\n",
207 |     "    print('We do not have evidence to reject the hypothesis that the means are not the same')\n",
208 |     "else:\n",
209 |     "    print('We have evidence to reject the hypothesis that the means are the same')"
210 |    ]
211 |   },
212 |   {
213 |    "cell_type": "markdown",
214 |    "id": "25c5a0aa",
215 |    "metadata": {},
216 |    "source": [
217 |     "# Alternative model parameters -- Part I"
218 |    ]
219 |   },
220 |   {
221 |    "cell_type": "code",
222 |    "execution_count": null,
223 |    "id": "e07cb49e",
224 |    "metadata": {},
225 |    "outputs": [],
226 |    "source": [
227 |     "num_trajectories = 1_000\n",
228 |     "vec_env = get_env(num_trajectories, initial_price=150,\n",
229 |     "                    terminal_time=1.0,\n",
230 |     "                    sigma=1.0,\n",
231 |     "                    n_steps=1000,\n",
232 |     "                    initial_inventory=0,\n",
233 |     "                    arrival_rate=100,\n",
234 |     "                    fill_exponent=1.0)\n",
235 |     "vec_agent = CarteaJaimungalMmAgent(env = vec_env, max_inventory = max_inventory)\n",
236 |     "observations, actions, rewards = generate_trajectory(vec_env, vec_agent)\n",
237 |     "results, fig, total_rewards = generate_results_table_and_hist(vec_env=vec_env,agent=vec_agent)"
238 |    ]
239 |   },
240 |   {
241 |    "cell_type": "code",
242 |    "execution_count": null,
243 |    "id": "ab6748a9",
244 |    "metadata": {},
245 |    "outputs": [],
246 |    "source": [
247 |     "vec_env.reset()\n",
248 |     "agent = CarteaJaimungalMmAgent(env = vec_env, max_inventory = max_inventory)\n",
249 |     "agent.calculate_true_value_function(vec_env.state[0].reshape(1,-1))"
250 |    ]
251 |   },
252 |   {
253 |    "cell_type": "code",
254 |    "execution_count": null,
255 |    "id": "f480db50",
256 |    "metadata": {},
257 |    "outputs": [],
258 |    "source": [
259 |     "np.mean(total_rewards), np.std(total_rewards)"
260 |    ]
261 |   },
262 |   {
263 |    "cell_type": "code",
264 |    "execution_count": null,
265 |    "id": "25a8733f",
266 |    "metadata": {},
267 |    "outputs": [],
268 |    "source": [
269 |     "true_mean = agent.calculate_true_value_function(vec_env.state[0].reshape(1,-1))[0,0]\n",
270 |     "sample_mean = np.mean(total_rewards)\n",
271 |     "N = len(total_rewards)\n",
272 |     "sample_variance = np.var(total_rewards) * N/(N-1)\n",
273 |     "T = (sample_mean -  true_mean)/ (np.sqrt(sample_variance) / np.sqrt(N))\n",
274 |     "q_l, q_u = scipy.stats.t(df=(N-1)).ppf((0.1, 0.9))\n",
275 |     "if T>q_l and T<q_u:\n",
276 |     "    print('We do not have evidence to reject the hypothesis that the means are the same')\n",
277 |     "else:\n",
278 |     "    print('We have evidence to reject the hypothesis that the means are the same')"
279 |    ]
280 |   },
281 |   {
282 |    "cell_type": "markdown",
283 |    "id": "5edb2d03",
284 |    "metadata": {},
285 |    "source": [
286 |     "# Alternative model parameters -- Part II"
287 |    ]
288 |   },
289 |   {
290 |    "cell_type": "code",
291 |    "execution_count": null,
292 |    "id": "8db791bd",
293 |    "metadata": {},
294 |    "outputs": [],
295 |    "source": [
296 |     "num_trajectories = 1_000\n",
297 |     "vec_env = get_env(num_trajectories, initial_price=50,\n",
298 |     "                    terminal_time=1.0,\n",
299 |     "                    sigma=1.5,\n",
300 |     "                    n_steps=2000,\n",
301 |     "                    initial_inventory=0,\n",
302 |     "                    arrival_rate=50,\n",
303 |     "                    fill_exponent=2.0)\n",
304 |     "vec_agent = CarteaJaimungalMmAgent(env = vec_env, max_inventory = max_inventory)\n",
305 |     "observations, actions, rewards = generate_trajectory(vec_env, vec_agent)\n",
306 |     "results, fig, total_rewards = generate_results_table_and_hist(vec_env=vec_env,agent=vec_agent)"
307 |    ]
308 |   },
309 |   {
310 |    "cell_type": "code",
311 |    "execution_count": null,
312 |    "id": "1939a1d1",
313 |    "metadata": {},
314 |    "outputs": [],
315 |    "source": [
316 |     "vec_env.reset()\n",
317 |     "agent = CarteaJaimungalMmAgent(env = vec_env, max_inventory = max_inventory)\n",
318 |     "agent.calculate_true_value_function(vec_env.state[0].reshape(1,-1))"
319 |    ]
320 |   },
321 |   {
322 |    "cell_type": "code",
323 |    "execution_count": null,
324 |    "id": "7dd209b0",
325 |    "metadata": {},
326 |    "outputs": [],
327 |    "source": [
328 |     "np.mean(total_rewards), np.std(total_rewards)"
329 |    ]
330 |   },
331 |   {
332 |    "cell_type": "code",
333 |    "execution_count": null,
334 |    "id": "517a62d4",
335 |    "metadata": {},
336 |    "outputs": [],
337 |    "source": [
338 |     "true_mean = agent.calculate_true_value_function(vec_env.state[0].reshape(1,-1))[0,0]\n",
339 |     "sample_mean = np.mean(total_rewards)\n",
340 |     "N = len(total_rewards)\n",
341 |     "sample_variance = np.var(total_rewards) * N/(N-1)\n",
342 |     "T = (sample_mean -  true_mean)/ (np.sqrt(sample_variance) / np.sqrt(N))\n",
343 |     "q_l, q_u = scipy.stats.t(df=(N-1)).ppf((0.1, 0.9))\n",
344 |     "if T>q_l and T<q_u:\n",
345 |     "    print('We do not have evidence to reject the hypothesis that the means are the same')\n",
346 |     "else:\n",
347 |     "    print('We have evidence to reject the hypothesis that the means are the same')"
348 |    ]
349 |   },
350 |   {
351 |    "cell_type": "markdown",
352 |    "id": "f2cb49e1",
353 |    "metadata": {},
354 |    "source": [
355 |     "# Alternative model parameters -- Part III"
356 |    ]
357 |   },
358 |   {
359 |    "cell_type": "code",
360 |    "execution_count": null,
361 |    "id": "bbafb4cf",
362 |    "metadata": {},
363 |    "outputs": [],
364 |    "source": [
365 |     "num_trajectories = 1_000\n",
366 |     "vec_env = get_env(num_trajectories, initial_price=50,\n",
367 |     "                    terminal_time=2.0,\n",
368 |     "                    sigma=1.5,\n",
369 |     "                    n_steps=2000,\n",
370 |     "                    initial_inventory=0,\n",
371 |     "                    arrival_rate=50,\n",
372 |     "                    fill_exponent=2.0)\n",
373 |     "vec_agent = CarteaJaimungalMmAgent(env = vec_env, max_inventory = max_inventory)\n",
374 |     "#observations, actions, rewards = generate_trajectory(vec_env, vec_agent)\n",
375 |     "results, fig, total_rewards = generate_results_table_and_hist(vec_env=vec_env,agent=vec_agent, n_episodes=num_trajectories)"
376 |    ]
377 |   },
378 |   {
379 |    "cell_type": "code",
380 |    "execution_count": null,
381 |    "id": "2df3d3bc",
382 |    "metadata": {},
383 |    "outputs": [],
384 |    "source": [
385 |     "vec_env.reset()\n",
386 |     "agent = CarteaJaimungalMmAgent(env = vec_env, max_inventory = max_inventory)\n",
387 |     "agent.calculate_true_value_function(vec_env.state[0].reshape(1,-1))"
388 |    ]
389 |   },
390 |   {
391 |    "cell_type": "code",
392 |    "execution_count": null,
393 |    "id": "173819f2",
394 |    "metadata": {},
395 |    "outputs": [],
396 |    "source": [
397 |     "np.mean(total_rewards), np.std(total_rewards)"
398 |    ]
399 |   },
400 |   {
401 |    "cell_type": "code",
402 |    "execution_count": null,
403 |    "id": "bfc65816",
404 |    "metadata": {},
405 |    "outputs": [],
406 |    "source": [
407 |     "true_mean = agent.calculate_true_value_function(vec_env.state[0].reshape(1,-1))[0,0]\n",
408 |     "sample_mean = np.mean(total_rewards)\n",
409 |     "N = len(total_rewards)\n",
410 |     "sample_variance = np.var(total_rewards) * N/(N-1)\n",
411 |     "T = (sample_mean -  true_mean)/ (np.sqrt(sample_variance) / np.sqrt(N))\n",
412 |     "q_l, q_u = scipy.stats.t(df=(N-1)).ppf((0.1, 0.9))\n",
413 |     "if T>q_l and T<q_u:\n",
414 |     "    print('We do not have evidence to reject the hypothesis that the means are the same')\n",
415 |     "else:\n",
416 |     "    print('We have evidence to reject the hypothesis that the means are the same')"
417 |    ]
418 |   }
419 |  ],
420 |  "metadata": {
421 |   "kernelspec": {
422 |    "display_name": "Python 3 (ipykernel)",
423 |    "language": "python",
424 |    "name": "python3"
425 |   },
426 |   "language_info": {
427 |    "codemirror_mode": {
428 |     "name": "ipython",
429 |     "version": 3
430 |    },
431 |    "file_extension": ".py",
432 |    "mimetype": "text/x-python",
433 |    "name": "python",
434 |    "nbconvert_exporter": "python",
435 |    "pygments_lexer": "ipython3",
436 |    "version": "3.10.10"
437 |   },
438 |   "toc": {
439 |    "base_numbering": 1,
440 |    "nav_menu": {},
441 |    "number_sections": true,
442 |    "sideBar": true,
443 |    "skip_h1_title": false,
444 |    "title_cell": "Table of Contents",
445 |    "title_sidebar": "Contents",
446 |    "toc_cell": false,
447 |    "toc_position": {
448 |     "height": "calc(100% - 180px)",
449 |     "left": "10px",
450 |     "top": "150px",
451 |     "width": "165px"
452 |    },
453 |    "toc_section_display": true,
454 |    "toc_window_display": true
455 |   },
456 |   "vscode": {
457 |    "interpreter": {
458 |     "hash": "7fdb8041655b3dc02b7fba31b82b0328083461cc824a5f662da36f4ff301447b"
459 |    }
460 |   }
461 |  },
462 |  "nbformat": 4,
463 |  "nbformat_minor": 5
464 | }
465 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | black==22.8.0
 2 | flake8==5.0.4
 3 | gym==0.26.2
 4 | invoke==1.6.0
 5 | jupyter
 6 | jupyter_nbextensions_configurator
 7 | matplotlib==3.4.3
 8 | mypy==0.971
 9 | numpy==1.22.3
10 | pandas==1.3.5
11 | seaborn==0.11.2
12 | stable_baselines3==1.6.2
13 | stochastic==0.6.0
14 | tensorboard
15 | torch>=1.13.1
16 | 


--------------------------------------------------------------------------------
/requirements_no_versions.txt:
--------------------------------------------------------------------------------
 1 | # specific version of setuptools needed for gym 0.21
 2 | # https://github.com/openai/gym/issues/3176
 3 | # (it is not clear why it goes for gym 0.21 not e.g., 0.26.1)
 4 | setuptools==65.5.0
 5 | black
 6 | flake8
 7 | invoke
 8 | jupyter
 9 | jupyter_nbextensions_configurator
10 | matplotlib
11 | mypy
12 | numpy
13 | pandas
14 | seaborn
15 | stable_baselines3
16 | stochastic
17 | tensorboard
18 | torch
19 | gym
20 | 


--------------------------------------------------------------------------------
/roadmap.md:
--------------------------------------------------------------------------------
 1 | # Roadmap for mbt_gym
 2 | 
 3 | ## General additions
 4 | 
 5 | - [Stochastic and local volatility models](https://en.wikipedia.org/wiki/Stochastic_volatility) for the midprice 
 6 | process. For example, the Heston or CEV model. 
 7 | - Hawkes processes with more general kernels for the arrival rate process.
 8 | - Robust adversarial reinforcement learning. See, for example, 
 9 | [the paper by Spooner and Savani](https://arxiv.org/abs/2003.01820). 
10 | 
11 | ## Market making
12 | 
13 | - Market making with multiple assets.
14 | 
15 | ## Optimal execution
16 | 
17 | - More general price impact processes. 
18 | - Optimal execution with multiple assets
19 | 
20 | 


--------------------------------------------------------------------------------
/tasks.py:
--------------------------------------------------------------------------------
 1 | from invoke import task
 2 | 
 3 | 
 4 | MODULES_TO_CHECK = ["mbt_gym", "*.py"]
 5 | MODULES_TO_CHECK_STR = " ".join(MODULES_TO_CHECK)
 6 | BLACK_PATHS_TO_IGNORE = []
 7 | BLACK_PATHS_TO_IGNORE_STR = " ".join(BLACK_PATHS_TO_IGNORE)
 8 | MYPY_PATHS_TO_IGNORE = []
 9 | MYPY_EXCLUSION_STR = ""
10 | for path in MYPY_PATHS_TO_IGNORE:
11 |     MYPY_EXCLUSION_STR += " --exclude " + path
12 | FLAKE_ERROR_CODE_IGNORE_STR = "mbt_gym/*.py:E203"
13 | 
14 | 
15 | @task
16 | def black_reformat(c):
17 |     if len(BLACK_PATHS_TO_IGNORE) > 0:
18 |         c.run(f"black --line-length 120 {MODULES_TO_CHECK_STR} --exclude {BLACK_PATHS_TO_IGNORE_STR}")
19 |     else:
20 |         c.run(f"black --line-length 120 {MODULES_TO_CHECK_STR}")
21 | 
22 | 
23 | @task
24 | def check_python(c):
25 |     if len(BLACK_PATHS_TO_IGNORE) > 0:
26 |         c.run(f"black --check --line-length 120 {MODULES_TO_CHECK_STR} --exclude {BLACK_PATHS_TO_IGNORE_STR}")
27 |     else:
28 |         c.run(f"black --check --line-length 120 {MODULES_TO_CHECK_STR}")
29 |     print("Running flake8...")
30 |     c.run(f"flake8 --max-line-length 120 {MODULES_TO_CHECK_STR} --per-file-ignores={FLAKE_ERROR_CODE_IGNORE_STR}")
31 |     print("No flake8 errors")
32 |     print("Running mypy...")
33 |     c.run(f"mypy -p {MODULES_TO_CHECK[0]}" + MYPY_EXCLUSION_STR)
34 |     print("No mypy errors")
35 |     c.run("python check_init_files.py")
36 |     c.run("python check_all_py_imports.py")
37 | 


--------------------------------------------------------------------------------