├── .gitignore ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── __init__.py ├── check_all_py_imports.py ├── check_init_files.py ├── docker ├── Dockerfile ├── build_image.sh ├── exec_mbt_gym.sh ├── launcher.sh ├── requirements.txt └── start_container.sh ├── experiments ├── __init__.py ├── helpers.py └── learning-pnl-with-different-arrival-rates.py ├── mbt_gym ├── __init__.py ├── agents │ ├── Agent.py │ ├── BaselineAgents.py │ ├── PolicyGradientAgent.py │ ├── SbAgent.py │ └── __init__.py ├── gym │ ├── ModelDynamics.py │ ├── MultiprocessTradingEnv.py │ ├── StableBaselinesTradingEnvironment.py │ ├── TradingEnvironment.py │ ├── __init__.py │ ├── backtesting.py │ ├── helpers │ │ ├── __init__.py │ │ ├── generate_trajectory.py │ │ └── plotting.py │ ├── index_names.py │ ├── info_calculators.py │ └── wrappers.py ├── rewards │ ├── RewardFunctions.py │ ├── __init__.py │ └── tests │ │ ├── __init__.py │ │ └── testRewardFunctions.py └── stochastic_processes │ ├── StochasticProcessModel.py │ ├── __init__.py │ ├── arrival_models.py │ ├── fill_probability_models.py │ ├── midprice_models.py │ └── price_impact_models.py ├── mypy.ini ├── notebooks ├── Baseline_Agents.ipynb ├── Learning_to_make_a_market_with_mbt_gym_and_Stable_Baselines_3.ipynb ├── Test_1_-_replicate_AS_original_results.html ├── Test_1_-_replicate_AS_original_results.ipynb ├── Test_2_-_replicate_CJP_2015_-_closed-form_solution_for_value_function.html └── Test_2_-_replicate_CJP_2015_-_closed-form_solution_for_value_function.ipynb ├── requirements.txt ├── requirements_no_versions.txt ├── roadmap.md └── tasks.py /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | 3 | # Update notebooks manually, if necessary 4 | notebooks 5 | TMP_NOTEBOOKS 6 | 7 | # Byte-compiled / optimized / DLL files 8 | __pycache__/ 9 | *.py[cod] 10 | *$py.class 11 | 12 | # C extensions 13 | *.so 14 | 15 | # Distribution / packaging 16 | .Python 17 | build/ 18 | develop-eggs/ 19 | dist/ 20 | downloads/ 21 | eggs/ 22 | .eggs/ 23 | lib/ 24 | lib64/ 25 | parts/ 26 | sdist/ 27 | var/ 28 | wheels/ 29 | share/python-wheels/ 30 | *.egg-info/ 31 | .installed.cfg 32 | *.egg 33 | MANIFEST 34 | 35 | # PyInstaller 36 | # Usually these files are written by a python script from a template 37 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 38 | *.manifest 39 | *.spec 40 | 41 | # Installer logs 42 | pip-log.txt 43 | pip-delete-this-directory.txt 44 | 45 | # Unit test / coverage reports 46 | htmlcov/ 47 | .tox/ 48 | .nox/ 49 | .coverage 50 | .coverage.* 51 | .cache 52 | nosetests.xml 53 | coverage.xml 54 | *.cover 55 | *.py,cover 56 | .hypothesis/ 57 | .pytest_cache/ 58 | cover/ 59 | 60 | # Translations 61 | *.mo 62 | *.pot 63 | 64 | # Django stuff: 65 | *.log 66 | local_settings.py 67 | db.sqlite3 68 | db.sqlite3-journal 69 | 70 | # Flask stuff: 71 | instance/ 72 | .webassets-cache 73 | 74 | # Scrapy stuff: 75 | .scrapy 76 | 77 | # Sphinx documentation 78 | docs/_build/ 79 | 80 | # PyBuilder 81 | .pybuilder/ 82 | target/ 83 | 84 | # Jupyter Notebook 85 | .ipynb_checkpoints 86 | 87 | # IPython 88 | profile_default/ 89 | ipython_config.py 90 | 91 | # pyenv 92 | # For a library or package, you might want to ignore these files since the code is 93 | # intended to run in multiple environments; otherwise, check them in: 94 | # .python-version 95 | 96 | # pipenv 97 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 98 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 99 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 100 | # install all needed dependencies. 101 | #Pipfile.lock 102 | 103 | # poetry 104 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 105 | # This is especially recommended for binary packages to ensure reproducibility, and is more 106 | # commonly ignored for libraries. 107 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 108 | #poetry.lock 109 | 110 | # pdm 111 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 112 | #pdm.lock 113 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 114 | # in version control. 115 | # https://pdm.fming.dev/#use-with-ide 116 | .pdm.toml 117 | 118 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 119 | __pypackages__/ 120 | 121 | # Celery stuff 122 | celerybeat-schedule 123 | celerybeat.pid 124 | 125 | # SageMath parsed files 126 | *.sage.py 127 | 128 | # Environments 129 | .env 130 | .venv 131 | env/ 132 | venv/ 133 | ENV/ 134 | env.bak/ 135 | venv.bak/ 136 | 137 | # Spyder project settings 138 | .spyderproject 139 | .spyproject 140 | 141 | # Rope project settings 142 | .ropeproject 143 | 144 | # mkdocs documentation 145 | /site 146 | 147 | # mypy 148 | .mypy_cache/ 149 | .dmypy.json 150 | dmypy.json 151 | 152 | # Pyre type checker 153 | .pyre/ 154 | 155 | # pytype static type analyzer 156 | .pytype/ 157 | 158 | # Cython debug symbols 159 | cython_debug/ 160 | 161 | # PyCharm 162 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 163 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 164 | # and can be added to the global gitignore or merged into this file. For a more nuclear 165 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 166 | #.idea/ 167 | *.iml 168 | *.xml 169 | .DS_Store 170 | .DS_Store 171 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | We as members, contributors, and leaders pledge to make participation in our 6 | community a harassment-free experience for everyone, regardless of age, body 7 | size, visible or invisible disability, ethnicity, sex characteristics, gender 8 | identity and expression, level of experience, education, socio-economic status, 9 | nationality, personal appearance, race, religion, or sexual identity 10 | and orientation. 11 | 12 | We pledge to act and interact in ways that contribute to an open, welcoming, 13 | diverse, inclusive, and healthy community. 14 | 15 | ## Our Standards 16 | 17 | Examples of behavior that contributes to a positive environment for our 18 | community include: 19 | 20 | * Demonstrating empathy and kindness toward other people 21 | * Being respectful of differing opinions, viewpoints, and experiences 22 | * Giving and gracefully accepting constructive feedback 23 | * Accepting responsibility and apologizing to those affected by our mistakes, 24 | and learning from the experience 25 | * Focusing on what is best not just for us as individuals, but for the 26 | overall community 27 | 28 | Examples of unacceptable behavior include: 29 | 30 | * The use of sexualized language or imagery, and sexual attention or 31 | advances of any kind 32 | * Trolling, insulting or derogatory comments, and personal or political attacks 33 | * Public or private harassment 34 | * Publishing others' private information, such as a physical or email 35 | address, without their explicit permission 36 | * Other conduct which could reasonably be considered inappropriate in a 37 | professional setting 38 | 39 | ## Enforcement Responsibilities 40 | 41 | Community leaders are responsible for clarifying and enforcing our standards of 42 | acceptable behavior and will take appropriate and fair corrective action in 43 | response to any behavior that they deem inappropriate, threatening, offensive, 44 | or harmful. 45 | 46 | Community leaders have the right and responsibility to remove, edit, or reject 47 | comments, commits, code, wiki edits, issues, and other contributions that are 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation 49 | decisions when appropriate. 50 | 51 | ## Scope 52 | 53 | This Code of Conduct applies within all community spaces, and also applies when 54 | an individual is officially representing the community in public spaces. 55 | Examples of representing our community include using an official e-mail address, 56 | posting via an official social media account, or acting as an appointed 57 | representative at an online or offline event. 58 | 59 | ## Enforcement 60 | 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 62 | reported to the community leaders responsible for enforcement at 63 | j.jerome@liverpool.ac.uk. 64 | All complaints will be reviewed and investigated promptly and fairly. 65 | 66 | All community leaders are obligated to respect the privacy and security of the 67 | reporter of any incident. 68 | 69 | ## Enforcement Guidelines 70 | 71 | Community leaders will follow these Community Impact Guidelines in determining 72 | the consequences for any action they deem in violation of this Code of Conduct: 73 | 74 | ### 1. Correction 75 | 76 | **Community Impact**: Use of inappropriate language or other behavior deemed 77 | unprofessional or unwelcome in the community. 78 | 79 | **Consequence**: A private, written warning from community leaders, providing 80 | clarity around the nature of the violation and an explanation of why the 81 | behavior was inappropriate. A public apology may be requested. 82 | 83 | ### 2. Warning 84 | 85 | **Community Impact**: A violation through a single incident or series 86 | of actions. 87 | 88 | **Consequence**: A warning with consequences for continued behavior. No 89 | interaction with the people involved, including unsolicited interaction with 90 | those enforcing the Code of Conduct, for a specified period of time. This 91 | includes avoiding interactions in community spaces as well as external channels 92 | like social media. Violating these terms may lead to a temporary or 93 | permanent ban. 94 | 95 | ### 3. Temporary Ban 96 | 97 | **Community Impact**: A serious violation of community standards, including 98 | sustained inappropriate behavior. 99 | 100 | **Consequence**: A temporary ban from any sort of interaction or public 101 | communication with the community for a specified period of time. No public or 102 | private interaction with the people involved, including unsolicited interaction 103 | with those enforcing the Code of Conduct, is allowed during this period. 104 | Violating these terms may lead to a permanent ban. 105 | 106 | ### 4. Permanent Ban 107 | 108 | **Community Impact**: Demonstrating a pattern of violation of community 109 | standards, including sustained inappropriate behavior, harassment of an 110 | individual, or aggression toward or disparagement of classes of individuals. 111 | 112 | **Consequence**: A permanent ban from any sort of public interaction within 113 | the community. 114 | 115 | ## Attribution 116 | 117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], 118 | version 2.0, available at 119 | https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. 120 | 121 | Community Impact Guidelines were inspired by [Mozilla's code of conduct 122 | enforcement ladder](https://github.com/mozilla/diversity). 123 | 124 | [homepage]: https://www.contributor-covenant.org 125 | 126 | For answers to common questions about this code of conduct, see the FAQ at 127 | https://www.contributor-covenant.org/faq. Translations are available at 128 | https://www.contributor-covenant.org/translations. 129 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Welcome to the mbt_gym contributing guide 2 | 3 | Thank you for considering contributing to `mbt_gym`! 4 | 5 | Please read our [Code of Conduct](./CODE_OF_CONDUCT.md) first to help us to maintain a friendly and helpful community. 6 | 7 | ## Pull requests (PRs) 8 | 9 | Please feel free to open a Pull Request for any minor changes to the repository. For larger changes, please open an 10 | issue first to discuss with other users and maintainers of `mbt_gym`. If you are not familiar with creating a Pull 11 | Request, here are some guides: 12 | - http://stackoverflow.com/questions/14680711/how-to-do-a-github-pull-request 13 | - https://help.github.com/articles/creating-a-pull-request/ 14 | 15 | In particular, **please see the [roadmap.md](./roadmap.md) file**, for a list of desired additions that will be accepted. 16 | Any appropriate tests will also always be accepted. 17 | 18 | ## Codestyle 19 | 20 | We use [mypy](https://flake8.pycqa.org/en/latest/) as a static type checker, [Flake8](https://flake8.pycqa.org/en/latest/) to enforce PEP8 and [Black](https://black.readthedocs.io/en/stable/) to enforce consistent styling. 21 | 22 | - Code will be automatically reformatted with: `invoke black-reformat` 23 | - Styling and type checking tests can be run locally with: `invoke check-python` 24 | 25 | ## Tests 26 | 27 | When adding new code to the `mbt_gym` code-base, please add test coverage wherever possible. 28 | We use [unittest](https://docs.python.org/2/library/unittest.html) for unit testing. All unit tests can be run by 29 | calling `nose2` from the root directory. -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2018, Joseph Jerome 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | * Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # mbt_gym 2 | `mbt_gym` is a module which provides a suite of gym environments for training reinforcement learning (RL) agents to solve model-based high-frequency trading problems such as market-making and optimal execution. The module is set up in an extensible way to allow the combination of different aspects of different models. It supports highly efficient implementations of vectorized environments to allow faster training of RL agents. 3 | 4 | It includes gym environments for popular analytically tractable market making models, as well as more complex models that prove difficult to solve analytically. 5 | 6 | The associated paper can be found at https://dl.acm.org/doi/pdf/10.1145/3604237.3626873 and https://arxiv.org/abs/2209.07823. 7 | 8 | ## Contributions are welcome! 9 | If you wish to contribute to this repository, please read the details of how to do so in the 10 | [CONTRIBUTING.md](./CONTRIBUTING.md) file in the root directory of the repository. For ideas on code that you could 11 | contribute, please look at the [roadmap](./roadmap.md). 12 | 13 | ## Using mbt_gym with Docker 14 | 15 | To use the `mbt_gym` package from within a docker container (see [instructions on how to install docker](https://docs.docker.com/engine/install/ubuntu/)) 16 | , first change directory into the 17 | docker subdirectory using `cd docker` and then follow the instructions below. 18 | 19 | ### Building 20 | 21 | To build the container: 22 | 23 | ``` 24 | sh build_image.sh 25 | ``` 26 | 27 | ### Running 28 | 29 | Run the start container script (mounting ../, therefore mounting `mbt_gym`), and specify a port for jupyter notebook: 30 | 31 | ``` 32 | sh start_container.sh 8877 33 | ``` 34 | 35 | (Note: if you wish to add gpus to container, just add ```--gpus device=0``` to ```start_container.sh``` to use one gpu 36 | or ```--gpus all``` to add all gpus available.) 37 | 38 | To work in the container via shell: 39 | 40 | ``` 41 | sh exec_mbt_gym.sh 42 | ``` 43 | 44 | ## Citing mbt_gym 45 | 46 | When using `mbt_gym`, please cite our [ACM ICAIF 2023 paper](https://arxiv.org/abs/2209.07823) by using the following 47 | BibTeX entry: 48 | ``` 49 | @inproceedings{JeromeSSH23, 50 | author = {Joseph Jerome and 51 | Leandro S{\'{a}}nchez{-}Betancourt and 52 | Rahul Savani and 53 | Martin Herdegen}, 54 | title = {Mbt-gym: Reinforcement learning for model-based limit order book trading}, 55 | booktitle = {4th {ACM} International Conference on {AI} in Finance, {ICAIF} 2023, 56 | Brooklyn, NY, USA, November 27-29, 2023}, 57 | pages = {619--627}, 58 | publisher = {{ACM}}, 59 | year = {2023}, 60 | url = {https://doi.org/10.1145/3604237.3626873}, 61 | doi = {10.1145/3604237.3626873}, 62 | note = {arXiv preprint arXiv:2209.07823} 63 | } 64 | ``` 65 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JJJerome/mbt_gym/1e1aa38ca35c4fa472777e3574f9c907f89bb5b0/__init__.py -------------------------------------------------------------------------------- /check_all_py_imports.py: -------------------------------------------------------------------------------- 1 | from typing import Set 2 | 3 | import os 4 | 5 | from glob import glob 6 | import importlib 7 | 8 | 9 | def import_everything() -> None: 10 | paths = _get_all_py_file_paths() 11 | failed_paths: Set[str] = set() 12 | for path in paths: 13 | try: 14 | importlib.__import__(_convert_file_path_to_module_path(path), fromlist=[""]) 15 | except ImportError: 16 | failed_paths.add(path) 17 | if failed_paths: 18 | print("Could not import the following files:\n" + "\n".join(failed_paths)) 19 | exit(1) 20 | else: 21 | print(f"Successfully imported {len(paths)} .py files.") 22 | exit(0) 23 | 24 | 25 | def _get_all_py_file_paths() -> Set[str]: 26 | all_paths = set(glob(os.path.join("mbt_gym", "**", "*.py"), recursive=True)) 27 | return all_paths 28 | 29 | 30 | def _convert_file_path_to_module_path(path: str) -> str: 31 | parts = os.path.normpath(path).split(os.sep) 32 | mod_path, basename = parts[:-1], parts[-1] 33 | basename = basename[:-3] 34 | return ".".join(mod_path + [basename]) 35 | 36 | 37 | if __name__ == "__main__": 38 | import_everything() 39 | -------------------------------------------------------------------------------- /check_init_files.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | 4 | if __name__ == "__main__": 5 | """We want to check for missing init files as they may cause tests and mypy to not run""" 6 | result = [os.path.join(dp, f) for dp, dn, file_names in os.walk(".") for f in file_names] 7 | result = [r for r in result if r.endswith(".py")] 8 | 9 | dirs = set([os.path.dirname(f) for f in result]) 10 | missing_init_files = [] 11 | for _dir in dirs: 12 | init_file = os.path.sep.join([_dir, "__init__.py"]) 13 | if not os.path.isfile(init_file): 14 | missing_init_files.append(init_file) 15 | 16 | if missing_init_files: 17 | print("Missing init files: ") 18 | print("\n".join(missing_init_files)) 19 | exit(1) 20 | else: 21 | print("No init files seem to be missing") 22 | exit(0) 23 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:20.04 AS MBT_GYM 2 | USER root 3 | 4 | RUN apt-get -y update \ 5 | && apt-get -y upgrade \ 6 | && apt-get -y install \ 7 | fish \ 8 | tmux \ 9 | python3-pip 10 | 11 | WORKDIR /home 12 | COPY requirements.txt requirements.txt 13 | RUN pip install --no-cache-dir -r requirements.txt 14 | COPY launcher.sh launcher.sh 15 | RUN chmod +x launcher.sh 16 | -------------------------------------------------------------------------------- /docker/build_image.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | docker build . -t mbt_gym --target MBT_GYM -------------------------------------------------------------------------------- /docker/exec_mbt_gym.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | docker exec -it mbt_gym /bin/fish -------------------------------------------------------------------------------- /docker/launcher.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | /etc/init.d/postgresql start 3 | jupyter notebook --no-browser --port $1 --ip=0.0.0.0 --allow-root -------------------------------------------------------------------------------- /docker/requirements.txt: -------------------------------------------------------------------------------- 1 | black==22.8.0 2 | flake8==5.0.4 3 | gym==0.26.2 4 | invoke==1.6.0 5 | jupyter 6 | jupyter_nbextensions_configurator 7 | matplotlib==3.4.3 8 | mypy==0.971 9 | numpy==1.22.3 10 | pandas==1.3.5 11 | seaborn==0.11.2 12 | stable_baselines3==1.6.2 13 | stochastic==0.6.0 14 | tensorboard 15 | torch>=1.13.1 16 | -------------------------------------------------------------------------------- /docker/start_container.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | tag='latest' 3 | # To add a single gpu, add the flag --gpus device=0, or to add all gpus add --gpus all 4 | docker run --rm --gpus all --shm-size=10.24gb -v ${PWD}/../:/home/mbt_gym/ -p $1:$1 --name mbt_gym --user root -dit mbt_gym:$tag ./launcher.sh $1 5 | -------------------------------------------------------------------------------- /experiments/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JJJerome/mbt_gym/1e1aa38ca35c4fa472777e3574f9c907f89bb5b0/experiments/__init__.py -------------------------------------------------------------------------------- /experiments/helpers.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | 3 | import gym 4 | import numpy as np 5 | from matplotlib import pyplot as plt 6 | from stable_baselines3 import PPO 7 | from stable_baselines3.common.callbacks import EvalCallback 8 | from stable_baselines3.common.vec_env import VecMonitor 9 | 10 | from mbt_gym.agents.BaselineAgents import CarteaJaimungalMmAgent 11 | from mbt_gym.agents.SbAgent import SbAgent 12 | from mbt_gym.gym.StableBaselinesTradingEnvironment import StableBaselinesTradingEnvironment 13 | from mbt_gym.gym.TradingEnvironment import TradingEnvironment 14 | from mbt_gym.gym.wrappers import ReduceStateSizeWrapper 15 | from mbt_gym.rewards.RewardFunctions import CjMmCriterion, PnL 16 | from mbt_gym.stochastic_processes.arrival_models import PoissonArrivalModel 17 | from mbt_gym.stochastic_processes.fill_probability_models import ExponentialFillFunction 18 | from mbt_gym.stochastic_processes.midprice_models import BrownianMotionMidpriceModel 19 | from mbt_gym.gym.ModelDynamics import LimitAndMarketOrderModelDynamics 20 | 21 | def get_cj_env( 22 | num_trajectories: int = 1, 23 | terminal_time: float = 1.0, 24 | arrival_rate: float = 10.0, 25 | fill_exponent: float = 0.1, 26 | phi: float = 0.5, 27 | alpha: float = 0.001, 28 | sigma: float = 0.1, 29 | initial_inventory=(-5, 6), 30 | random_start: tuple = None, 31 | ): 32 | initial_price = 100 33 | n_steps = int(10 * terminal_time * arrival_rate) 34 | step_size = terminal_time / n_steps 35 | reward_function = CjMmCriterion(phi, alpha) if phi > 0 or alpha > 0 else PnL() 36 | midprice_model=BrownianMotionMidpriceModel( 37 | volatility=sigma, 38 | terminal_time=terminal_time, 39 | step_size=step_size, 40 | initial_price=initial_price, 41 | num_trajectories=num_trajectories, 42 | ) 43 | arrival_model=PoissonArrivalModel( 44 | intensity=np.array([arrival_rate, arrival_rate]), step_size=step_size, num_trajectories=num_trajectories 45 | ) 46 | fill_probability_model=ExponentialFillFunction( 47 | fill_exponent=fill_exponent, step_size=step_size, num_trajectories=num_trajectories 48 | ) 49 | env_params = dict( 50 | terminal_time=terminal_time, 51 | n_steps=n_steps, 52 | model_dynamics = LimitAndMarketOrderModelDynamics(midprice_model = midprice_model, arrival_model= arrival_model, fill_probability_model = fill_probability_model, 53 | num_trajectories = num_trajectories), 54 | initial_inventory=initial_inventory, 55 | reward_function=reward_function, 56 | max_inventory=n_steps, 57 | num_trajectories=num_trajectories, 58 | random_start=random_start, 59 | ) 60 | return TradingEnvironment(**env_params) 61 | 62 | 63 | def wrap_env(env: TradingEnvironment): 64 | env = StableBaselinesTradingEnvironment(trading_env=ReduceStateSizeWrapper(env)) 65 | return VecMonitor(env) 66 | 67 | 68 | def get_ppo_learner_and_callback( 69 | env: TradingEnvironment, tensorboard_base_logdir: str = "./tensorboard/", best_model_path: str = "./best_models" 70 | ): 71 | policy_kwargs = dict(net_arch=[dict(pi=[256, 256], vf=[256, 256])]) 72 | experiment_string = get_experiment_string(env) 73 | tensorboard_logdir = tensorboard_base_logdir + "/" + experiment_string 74 | PPO_params = { 75 | "policy": "MlpPolicy", 76 | "env": wrap_env(env), 77 | "verbose": 1, 78 | "policy_kwargs": policy_kwargs, 79 | "tensorboard_log": tensorboard_logdir, 80 | "n_epochs": 10, 81 | "batch_size": int(env.n_steps * env.num_trajectories / 4), 82 | "normalize_advantage": True, 83 | "n_steps": int(env.n_steps), 84 | "gae_lambda": 0.95, 85 | "gamma": 1, 86 | } 87 | callback_params = dict( 88 | eval_env=wrap_env(env), 89 | n_eval_episodes=10, 90 | best_model_save_path=best_model_path + "/" + experiment_string, 91 | deterministic=True, 92 | eval_freq=env.n_steps * env.num_trajectories * 10, 93 | ) 94 | callback = EvalCallback(**callback_params) 95 | model = PPO(**PPO_params) 96 | return model, callback 97 | 98 | 99 | def get_experiment_string(env): 100 | phi = env.reward_function.phi if isinstance(env.reward_function, CjMmCriterion) else 0 101 | alpha = env.reward_function.alpha if isinstance(env.reward_function, CjMmCriterion) else 0 102 | return ( 103 | f"n_traj_{env.num_trajectories}__" 104 | + f"arrival_rate_{env.arrival_model.intensity}__" 105 | + f"fill_exponent_{env.fill_probability_model.fill_exponent}__" 106 | + f"phi_{phi}__" 107 | + f"alpha_{alpha}__" 108 | + f"initial_inventory_{env.initial_inventory}__" 109 | + f"random_start_{env.start_time}" 110 | ) 111 | 112 | 113 | def create_inventory_plot( 114 | model: PPO, 115 | env: TradingEnvironment, 116 | min_inventory: int = -3, 117 | max_inventory: int = 3, 118 | reduced_training_indices: list = None, 119 | model_uses_normalisation: bool = True, 120 | time_of_action: float = 0.5, 121 | save_figure: bool = False, 122 | path_to_figures: str = "./figures", 123 | ): 124 | if model_uses_normalisation: 125 | normalised_env = StableBaselinesTradingEnvironment(ReduceStateSizeWrapper(env, reduced_training_indices)) 126 | assert env.num_trajectories == 1, "Plotting actions must be done with a single trajectory env" 127 | ppo_agent = SbAgent(model) 128 | cj_agent = CarteaJaimungalMmAgent(env=env) 129 | inventories = np.arange(min_inventory, max_inventory + 1, 1) 130 | bid_actions, ask_actions, cj_bid_actions, cj_ask_actions = [], [], [], [] 131 | for inventory in inventories: 132 | state = np.array([[0, inventory, time_of_action, 100]]) 133 | reduced_state = state[:, reduced_training_indices] if reduced_training_indices is not None else state 134 | if model_uses_normalisation: 135 | reduced_state = normalised_env.normalise_observation(reduced_state) 136 | action = ppo_agent.get_action(reduced_state) 137 | if model_uses_normalisation: 138 | action = normalised_env.normalise_action(action, inverse=True) 139 | bid_action, ask_action = action 140 | cj_bid_action, cj_ask_action = cj_agent.get_action(state).reshape(-1) 141 | 142 | if inventory == min_inventory: 143 | ask_action = np.NaN 144 | cj_ask_action = np.NaN 145 | if inventory == max_inventory: 146 | bid_action = np.NaN 147 | cj_bid_action = np.NaN 148 | 149 | bid_actions.append(bid_action) 150 | ask_actions.append(ask_action) 151 | cj_bid_actions.append(cj_bid_action) 152 | cj_ask_actions.append(cj_ask_action) 153 | 154 | plt.plot(inventories, bid_actions, label="bid", color="k") 155 | plt.plot(inventories, ask_actions, label="ask", color="r") 156 | plt.plot(inventories, cj_bid_actions, label="bid cj", color="k", linestyle="--") 157 | plt.plot(inventories, cj_ask_actions, label="ask cj", color="r", linestyle="--") 158 | plt.legend() 159 | if save_figure: 160 | plt.title(get_experiment_string(env)) 161 | plt.savefig(path_to_figures + "/inventory_plots/" + get_experiment_string(env) + ".pdf") 162 | else: 163 | plt.show() 164 | 165 | 166 | def create_time_plot( 167 | model: PPO, 168 | env: TradingEnvironment, 169 | min_inventory: int = -3, 170 | max_inventory: int = 3, 171 | reduced_training_indices: list = None, 172 | model_uses_normalisation: bool = True, 173 | save_figure: bool = False, 174 | path_to_figures: str = "./figures", 175 | ): 176 | if model_uses_normalisation: 177 | normalised_env = StableBaselinesTradingEnvironment(ReduceStateSizeWrapper(env, reduced_training_indices)) 178 | assert env.num_trajectories == 1, "Plotting actions must be done with a single trajectory env" 179 | ppo_agent = SbAgent(model) 180 | cj_agent = CarteaJaimungalMmAgent(env=env) 181 | inventories = np.arange(min_inventory, max_inventory + 1, 1) 182 | times = np.arange(0, env.terminal_time + 0.01, 0.01) 183 | inventory_dict = {inventory: [] for inventory in inventories} 184 | action_dict = { 185 | "rl bid actions": deepcopy(inventory_dict), 186 | "cj bid actions": deepcopy(inventory_dict), 187 | "rl ask actions": deepcopy(inventory_dict), 188 | "cj ask actions": deepcopy(inventory_dict), 189 | } 190 | for inventory in inventories: 191 | for time in times: 192 | state = np.array([[0, inventory, time, 100]]) 193 | reduced_state = state[:, reduced_training_indices] if reduced_training_indices is not None else state 194 | if model_uses_normalisation: 195 | reduced_state = normalised_env.normalise_observation(reduced_state) 196 | action = ppo_agent.get_action(reduced_state) 197 | if model_uses_normalisation: 198 | action = normalised_env.normalise_action(action, inverse=True) 199 | bid_action, ask_action = action 200 | 201 | cj_actions = cj_agent.get_action(state) 202 | cj_bid_action = cj_actions[0, 0] 203 | cj_ask_action = cj_actions[0, 1] 204 | 205 | if inventory == min_inventory: 206 | ask_action = np.NaN 207 | cj_ask_action = np.NaN 208 | if inventory == max_inventory: 209 | bid_action = np.NaN 210 | cj_bid_action = np.NaN 211 | 212 | action_dict["rl bid actions"][inventory].append(bid_action) 213 | action_dict["rl ask actions"][inventory].append(ask_action) 214 | action_dict["cj bid actions"][inventory].append(cj_bid_action) 215 | action_dict["cj ask actions"][inventory].append(cj_ask_action) 216 | fig, axs = plt.subplots(2, 2, sharey=True, figsize=(15, 10)) 217 | for count, (name, actions) in enumerate(action_dict.items()): 218 | axs[count // 2, count % 2].set_title(name, fontsize=20) 219 | for inventory in inventories: 220 | axs[count // 2, count % 2].plot(times, actions[inventory], label=f"inventory = {inventory}") 221 | axs[count // 2, count % 2].legend() 222 | fig.tight_layout() 223 | if save_figure: 224 | plt.savefig(path_to_figures + "/time_plots/" + get_experiment_string(env) + ".pdf") 225 | else: 226 | plt.show() 227 | -------------------------------------------------------------------------------- /experiments/learning-pnl-with-different-arrival-rates.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | sys.path.append("../") 4 | 5 | from experiments.helpers import ( 6 | get_cj_env, 7 | get_ppo_learner_and_callback, 8 | get_experiment_string, 9 | create_time_plot, 10 | create_inventory_plot, 11 | ) 12 | 13 | num_trajectories = 1000 14 | terminal_time = 1.0 15 | phi = 0 16 | alpha = 0 17 | sigma = 0.0 18 | initial_inventory = (-5, 6) 19 | random_start = None 20 | 21 | final_model_path = "./final_models" 22 | 23 | arrival_rates = [1.0, 10.0, 100.0] 24 | fill_exponents = [0.1, 1, 10] 25 | 26 | for arrival_rate in arrival_rates: 27 | for fill_exponent in fill_exponents: 28 | n_steps = int(10 * terminal_time * arrival_rate) 29 | env = get_cj_env( 30 | num_trajectories=num_trajectories, 31 | terminal_time=terminal_time, 32 | arrival_rate=arrival_rate, 33 | fill_exponent=fill_exponent, 34 | phi=phi, 35 | alpha=alpha, 36 | sigma=sigma, 37 | initial_inventory=initial_inventory, 38 | ) 39 | model, callback = get_ppo_learner_and_callback(env) 40 | model.learn(total_timesteps=300_000_000, callback=callback) 41 | model.save(final_model_path + "/" + get_experiment_string(env)) 42 | create_inventory_plot(model=model, env=env, save_figure=True) 43 | create_time_plot(model=model, env=env, save_figure=True) 44 | -------------------------------------------------------------------------------- /mbt_gym/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JJJerome/mbt_gym/1e1aa38ca35c4fa472777e3574f9c907f89bb5b0/mbt_gym/__init__.py -------------------------------------------------------------------------------- /mbt_gym/agents/Agent.py: -------------------------------------------------------------------------------- 1 | import abc 2 | 3 | import numpy as np 4 | 5 | 6 | class Agent(metaclass=abc.ABCMeta): 7 | @abc.abstractmethod 8 | def get_action(self, state: np.ndarray) -> np.ndarray: 9 | pass 10 | 11 | def get_expected_action(self, state: np.ndarray, n_samples: int = 1000) -> np.ndarray: 12 | return np.array([self.get_action(state) for _ in range(n_samples)]).mean(axis=0) 13 | -------------------------------------------------------------------------------- /mbt_gym/agents/BaselineAgents.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | 3 | import gym 4 | import numpy as np 5 | import warnings 6 | from scipy.linalg import expm 7 | 8 | from mbt_gym.agents.Agent import Agent 9 | from mbt_gym.gym.TradingEnvironment import TradingEnvironment 10 | from mbt_gym.gym.index_names import INVENTORY_INDEX, TIME_INDEX, ASSET_PRICE_INDEX, CASH_INDEX, BID_INDEX, ASK_INDEX 11 | from mbt_gym.rewards.RewardFunctions import CjMmCriterion, PnL 12 | from mbt_gym.stochastic_processes.price_impact_models import PriceImpactModel, TemporaryAndPermanentPriceImpact 13 | from mbt_gym.gym.ModelDynamics import LimitOrderModelDynamics, TradinghWithSpeedModelDynamics 14 | 15 | class RandomAgent(Agent): 16 | def __init__(self, env: gym.Env, seed: int = None): 17 | self.action_space = deepcopy(env.action_space) 18 | self.action_space.seed(seed) 19 | self.num_trajectories = env.num_trajectories 20 | 21 | def get_action(self, state: np.ndarray) -> np.ndarray: 22 | return np.repeat(self.action_space.sample().reshape(1, -1), self.num_trajectories, axis=0) 23 | 24 | 25 | class FixedActionAgent(Agent): 26 | def __init__(self, fixed_action: np.ndarray, env: gym.Env): 27 | self.fixed_action = fixed_action 28 | self.env = env 29 | 30 | def get_action(self, state: np.ndarray) -> np.ndarray: 31 | return np.repeat(self.fixed_action.reshape(1, -1), self.env.num_trajectories, axis=0) 32 | 33 | 34 | class FixedSpreadAgent(Agent): 35 | def __init__(self, env: gym.Env, half_spread: float = 1.0, offset: float = 0.0): 36 | self.half_spread = half_spread 37 | self.offset = offset 38 | self.env = env 39 | 40 | def get_action(self, state: np.ndarray) -> np.ndarray: 41 | action = np.array([[self.half_spread - self.offset, self.half_spread + self.offset]]) 42 | return np.repeat(action, self.env.num_trajectories, axis=0) 43 | 44 | 45 | class HumanAgent(Agent): 46 | def get_action(self, state: np.ndarray): 47 | bid = float(input(f"Current state is {state}. How large do you want to set midprice-bid half spread? ")) 48 | ask = float(input(f"Current state is {state}. How large do you want to set ask-midprice half spread? ")) 49 | return np.array([bid, ask]) 50 | 51 | 52 | class AvellanedaStoikovAgent(Agent): 53 | def __init__(self, risk_aversion: float = 0.1, env: TradingEnvironment = None): 54 | self.risk_aversion = risk_aversion 55 | self.env = env or TradingEnvironment() 56 | assert isinstance(self.env, TradingEnvironment) 57 | self.terminal_time = self.env.terminal_time 58 | self.volatility = self.env.model_dynamics.midprice_model.volatility 59 | self.rate_of_arrival = self.env.model_dynamics.arrival_model.intensity 60 | self.fill_exponent = self.env.model_dynamics.fill_probability_model.fill_exponent 61 | 62 | def get_action(self, state: np.ndarray): 63 | inventory = state[:, INVENTORY_INDEX] 64 | time = state[:, TIME_INDEX] 65 | action = self._get_action(inventory, time) 66 | if action.min() < 0: 67 | warnings.warn("Avellaneda-Stoikov agent is quoting a negative spread") 68 | return action 69 | 70 | def _get_price_adjustment(self, inventory: int, time: float) -> float: 71 | return inventory * self.risk_aversion * self.volatility**2 * (self.terminal_time - time) 72 | 73 | def _get_spread(self, time: float) -> float: 74 | if self.risk_aversion == 0: 75 | return 2 / self.fill_exponent # Limit as risk aversion -> 0 76 | volatility_aversion_component = self.risk_aversion * self.volatility**2 * (self.terminal_time - time) 77 | fill_exponent_component = 2 / self.risk_aversion * np.log(1 + self.risk_aversion / self.fill_exponent) 78 | return volatility_aversion_component + fill_exponent_component 79 | 80 | def _get_action(self, inventory: int, time: float): 81 | bid_half_spread = (self._get_price_adjustment(inventory, time) + self._get_spread(time) / 2).reshape(-1, 1) 82 | ask_half_spread = (-self._get_price_adjustment(inventory, time) + self._get_spread(time) / 2).reshape(-1, 1) 83 | return np.append(bid_half_spread, ask_half_spread, axis=1) 84 | 85 | 86 | class CarteaJaimungalMmAgent(Agent): 87 | def __init__( 88 | self, 89 | env: TradingEnvironment = None, 90 | ): 91 | self.env = env or TradingEnvironment() 92 | assert isinstance(self.env.model_dynamics, LimitOrderModelDynamics), "Trader must be type LimitOrderTrader" 93 | assert isinstance(self.env.reward_function, (CjMmCriterion, PnL)), "Reward function for CjMmAgent is incorrect." 94 | self.kappa = self.env.model_dynamics.fill_probability_model.fill_exponent 95 | self.num_trajectories = self.env.num_trajectories 96 | if isinstance(self.env.reward_function, PnL): 97 | self.inventory_neutral = True 98 | self.risk_neutral_action = 1 / self.kappa * np.ones((env.num_trajectories, env.action_space.shape[0])) 99 | else: 100 | self.inventory_neutral = False 101 | self.phi = env.reward_function.per_step_inventory_aversion 102 | self.alpha = env.reward_function.terminal_inventory_aversion 103 | assert self.env.reward_function.inventory_exponent == 2.0, "Inventory exponent must be = 2." 104 | self.terminal_time = self.env.terminal_time 105 | self.lambdas = self.env.model_dynamics.arrival_model.intensity 106 | self.max_inventory = env.max_inventory 107 | self.a_matrix, self.z_vector = self._calculate_a_and_z() 108 | self.large_depth = 10_000 109 | 110 | def get_action(self, state: np.ndarray): 111 | if self.inventory_neutral: 112 | return self.risk_neutral_action 113 | else: 114 | assert ( 115 | state[0, TIME_INDEX] == state[-1, TIME_INDEX] 116 | ), "CarteaJaimungalMmAgent needs to be called on a tensor with a uniform time stamp." 117 | current_time = state[0, TIME_INDEX] 118 | inventories = state[:, INVENTORY_INDEX] 119 | return self._calculate_deltas(inventories=inventories, current_time=current_time) 120 | 121 | def _calculate_deltas(self, current_time: float, inventories: np.ndarray): 122 | deltas = np.zeros(shape=(self.num_trajectories, 2)) 123 | h_t = self._calculate_ht(current_time) 124 | # If the inventory goes above the max level, we quote a large depth to bring it back and quote on the opposite 125 | # side as if we had an inventory equal to sign(inventory) * self.max_inventory. 126 | indices = np.clip(self.max_inventory + inventories, 0, 2 * self.max_inventory) 127 | indices = indices.astype(int) 128 | indices_minus_one = np.clip(indices - 1, 0, 2 * self.max_inventory) 129 | indices_plus_one = np.clip(indices + 1, 0, 2 * self.max_inventory) 130 | h_0 = h_t[indices] 131 | h_plus_one = h_t[indices_plus_one] 132 | h_minus_one = h_t[indices_minus_one] 133 | max_inventory_bid = h_plus_one == h_0 134 | max_inventory_ask = h_minus_one == h_0 135 | deltas[:, BID_INDEX] = (1 / self.kappa - h_plus_one + h_0 + self.large_depth * max_inventory_bid).reshape(-1) 136 | deltas[:, ASK_INDEX] = (1 / self.kappa - h_minus_one + h_0 + self.large_depth * max_inventory_ask).reshape(-1) 137 | return deltas 138 | 139 | def _calculate_ht(self, current_time: float) -> float: 140 | omega_function = self._calculate_omega(current_time) 141 | return 1 / self.kappa * np.log(omega_function) 142 | 143 | def _calculate_omega(self, current_time: float): 144 | """This is Equation (10.11) from [CJP15].""" 145 | return np.matmul(expm(self.a_matrix * (self.terminal_time - current_time)), self.z_vector) 146 | 147 | def _calculate_a_and_z(self): 148 | matrix_size = 2 * self.max_inventory + 1 149 | Amatrix = np.zeros(shape=(matrix_size, matrix_size)) 150 | z_vector = np.zeros(shape=(matrix_size, 1)) 151 | for i in range(matrix_size): 152 | inventory = self.max_inventory - i 153 | Amatrix[i, i] = -self.phi * self.kappa * inventory**2 154 | z_vector[i, 0] = np.exp(-self.alpha * self.kappa * inventory**2) 155 | if i + 1 < matrix_size: 156 | Amatrix[i, i + 1] = self.lambdas[BID_INDEX] * np.exp(-1) 157 | if i > 0: 158 | Amatrix[i, i - 1] = self.lambdas[ASK_INDEX] * np.exp(-1) 159 | return Amatrix, z_vector 160 | 161 | def calculate_true_value_function(self, state: np.ndarray): 162 | current_time = state[0, TIME_INDEX] 163 | inventories = state[:, INVENTORY_INDEX] 164 | value_fct = np.zeros(shape=(self.num_trajectories, 1)) 165 | h_t = self._calculate_ht(current_time) 166 | indices = np.clip(self.max_inventory + inventories, 0, 2 * self.max_inventory) 167 | indices = indices.astype(int) 168 | h_0 = h_t[indices] 169 | value_fct = h_0 + state[:, CASH_INDEX] + state[:, INVENTORY_INDEX] * state[:, ASSET_PRICE_INDEX] 170 | return value_fct 171 | 172 | 173 | class CarteaJaimungalOeAgent(Agent): 174 | def __init__( 175 | self, 176 | phi: float = 2 * 10 ** (-4), 177 | alpha: float = 0.0001, 178 | env: TradingEnvironment = None, 179 | ): 180 | self.phi = phi 181 | self.alpha = alpha 182 | self.env = env or TradingEnvironment() 183 | self.price_impact_model = env.model_dynamics.price_impact_model 184 | assert isinstance(self.env.model_dynamics, TradinghWithSpeedModelDynamics), "Trader must be type TradinghWithSpeedTrader" 185 | self.terminal_time = self.env.terminal_time 186 | self.temporary_price_impact = self.price_impact_model.temporary_impact_coefficient 187 | self.permanent_price_impact = self.price_impact_model.permanent_impact_coefficient 188 | self.num_trajectories = self.env.num_trajectories 189 | 190 | def get_action(self, state: np.ndarray): 191 | action = np.zeros(shape=(self.num_trajectories, 1)) 192 | # The formulae below is in page 147 of Cartea, Jaimungal, Penalva (2015) 193 | # Algorithmic and High-Frequency Trading 194 | # Cambridge University Press 195 | gamma = np.sqrt(self.phi / self.temporary_price_impact) 196 | zeta = (self.alpha - 0.5 * self.permanent_price_impact + np.sqrt(self.temporary_price_impact * self.phi)) / ( 197 | self.alpha - 0.5 * self.permanent_price_impact - np.sqrt(self.temporary_price_impact * self.phi) 198 | ) 199 | initial_inventory = self.env.initial_inventory 200 | 201 | time_left = self.terminal_time - state[0, TIME_INDEX] 202 | action[:, :] = ( 203 | gamma 204 | * initial_inventory 205 | * ( 206 | (zeta * np.exp(gamma * time_left) + np.exp(-gamma * time_left)) 207 | / (zeta * np.exp(gamma * self.terminal_time) - np.exp(-gamma * self.terminal_time)) 208 | ) 209 | ) 210 | return -np.sign(initial_inventory) * action 211 | -------------------------------------------------------------------------------- /mbt_gym/agents/PolicyGradientAgent.py: -------------------------------------------------------------------------------- 1 | from typing import Union, Callable, Tuple 2 | 3 | import gym 4 | import numpy as np 5 | import torch 6 | from torch.optim.lr_scheduler import StepLR, _LRScheduler 7 | from tqdm import tqdm 8 | 9 | from mbt_gym.agents.Agent import Agent 10 | from mbt_gym.gym.TradingEnvironment import TradingEnvironment 11 | from mbt_gym.gym.helpers.generate_trajectory import generate_trajectory 12 | 13 | 14 | class PolicyGradientAgent(Agent): 15 | def __init__( 16 | self, 17 | policy: torch.nn.Module, 18 | action_std: Union[float, Callable] = 0.01, 19 | optimizer: torch.optim.Optimizer = None, 20 | env: gym.Env = None, 21 | lr_scheduler: _LRScheduler = None, 22 | ): 23 | self.env = env or TradingEnvironment() 24 | self.input_size = env.observation_space.shape[0] 25 | self.action_size = env.action_space.shape[0] 26 | assert self.input_size == policy[0].in_features 27 | self.policy_net = policy 28 | self.action_std = action_std 29 | self.optimizer = optimizer or torch.optim.SGD(self.policy_net.parameters(), lr=1e-1) 30 | self.lr_scheduler = lr_scheduler or StepLR(self.optimizer, step_size=1, gamma=0.995) 31 | self.noise_dist = torch.distributions.Normal 32 | self.proportion_completed: float = 0.0 33 | 34 | def get_action( 35 | self, state: np.ndarray, deterministic: bool = False, include_log_probs: bool = False 36 | ) -> Union[np.ndarray, Tuple[np.ndarray, torch.tensor]]: 37 | assert not (deterministic and include_log_probs), "include_log_probs is only an option for deterministic output" 38 | mean_value = self.policy_net(torch.tensor(state, dtype=torch.float, requires_grad=False)) 39 | std = self.action_std(self.proportion_completed) if isinstance(self.action_std, Callable) else self.action_std 40 | if deterministic: 41 | return mean_value.detach().numpy() 42 | action_dist = torch.distributions.Normal(loc=mean_value, scale=std * torch.ones_like(mean_value)) 43 | action = action_dist.sample() 44 | if include_log_probs: 45 | log_probs = action_dist.log_prob(action) 46 | return action.detach().numpy(), log_probs 47 | return action.detach().numpy() 48 | 49 | def train(self, num_epochs: int = 1, reporting_freq: int = 100): 50 | learning_losses = [] 51 | learning_rewards = [] 52 | self.proportion_completed = 0.0 53 | for epoch in tqdm(range(num_epochs)): 54 | observations, actions, rewards, log_probs = generate_trajectory(self.env, self, include_log_probs=True) 55 | learning_rewards.append(rewards.mean()) 56 | rewards = torch.tensor(rewards) 57 | future_rewards = self._calculate_future_rewards(rewards) 58 | loss = -torch.mean(log_probs * future_rewards) 59 | self.optimizer.zero_grad() 60 | loss.backward() 61 | self.optimizer.step() 62 | if epoch % reporting_freq == 0: 63 | tqdm.write(str(loss.item())) 64 | learning_losses.append(loss.item()) 65 | self.proportion_completed += 1 / (num_epochs - 1) 66 | self.lr_scheduler.step() 67 | return learning_losses, learning_rewards 68 | 69 | @staticmethod 70 | def _calculate_future_rewards(rewards: torch.tensor): 71 | flipped_rewards = torch.flip(rewards, dims=(-1,)) 72 | cumulative_flipped = torch.cumsum(flipped_rewards, dim=-1) 73 | return torch.flip(cumulative_flipped, dims=(-1,)) 74 | -------------------------------------------------------------------------------- /mbt_gym/agents/SbAgent.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from mbt_gym.agents.Agent import Agent 4 | 5 | from stable_baselines3.common.base_class import BaseAlgorithm 6 | 7 | 8 | class SbAgent(Agent): 9 | def __init__(self, model: BaseAlgorithm, reduced_training_indices: list = None, num_trajectories: int = None): 10 | self.model = model 11 | self.num_trajectories = num_trajectories or self.model.env.num_trajectories 12 | self.num_actions = self.model.action_space.shape[0] 13 | if reduced_training_indices is not None: 14 | self.reduced_training = True 15 | self.reduced_training_indices = reduced_training_indices 16 | else: 17 | self.reduced_training = False 18 | 19 | def get_action(self, state: np.ndarray) -> np.ndarray: 20 | if self.reduced_training: 21 | state = state[:, self.reduced_training_indices] 22 | # return self.model.predict(state, deterministic=True)[0].reshape(self.num_trajectories, self.num_actions) 23 | return self.model.predict(state, deterministic=True)[0].reshape(state.shape[0], self.num_actions) 24 | 25 | def train(self, total_timesteps: int = 100000): 26 | self.model.learn(total_timesteps=total_timesteps) 27 | -------------------------------------------------------------------------------- /mbt_gym/agents/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JJJerome/mbt_gym/1e1aa38ca35c4fa472777e3574f9c907f89bb5b0/mbt_gym/agents/__init__.py -------------------------------------------------------------------------------- /mbt_gym/gym/ModelDynamics.py: -------------------------------------------------------------------------------- 1 | import abc 2 | import gym 3 | from copy import copy 4 | from typing import Optional 5 | 6 | import numpy as np 7 | from numpy.random import default_rng 8 | 9 | 10 | from mbt_gym.gym.index_names import CASH_INDEX, INVENTORY_INDEX, BID_INDEX, ASK_INDEX 11 | 12 | from mbt_gym.stochastic_processes.arrival_models import ArrivalModel 13 | from mbt_gym.stochastic_processes.fill_probability_models import FillProbabilityModel 14 | from mbt_gym.stochastic_processes.midprice_models import MidpriceModel 15 | from mbt_gym.stochastic_processes.price_impact_models import PriceImpactModel 16 | 17 | 18 | class ModelDynamics(metaclass=abc.ABCMeta): 19 | def __init__( 20 | self, 21 | midprice_model : MidpriceModel = None, 22 | arrival_model : ArrivalModel = None, 23 | fill_probability_model : FillProbabilityModel = None, 24 | price_impact_model : PriceImpactModel = None, 25 | num_trajectories: int = 1, 26 | seed: int = None, 27 | ): 28 | self.midprice_model = midprice_model 29 | self.arrival_model = arrival_model 30 | self.fill_probability_model = fill_probability_model 31 | self.price_impact_model = price_impact_model 32 | self.num_trajectories = num_trajectories 33 | self.rng = default_rng(seed) 34 | self.seed_ = seed 35 | self.fill_multiplier = self._get_fill_multiplier() 36 | self.round_initial_inventory = False 37 | self.required_processes = self.get_required_stochastic_processes() 38 | self._check_processes_are_not_none(self.required_processes) 39 | self.state = None 40 | 41 | def update_state(self, arrivals: np.ndarray, fills: np.ndarray, action: np.ndarray): 42 | pass 43 | 44 | def get_fills(self, action: np.ndarray): 45 | pass 46 | 47 | def get_arrivals_and_fills(self, action: np.ndarray): 48 | return None, None 49 | 50 | def _limit_depths(self, action: np.ndarray): 51 | return action[:, 0:2] 52 | 53 | def get_action_space(self) -> gym.spaces.Space: 54 | pass 55 | 56 | def get_required_stochastic_processes(self): 57 | pass 58 | 59 | def _get_max_depth(self) -> Optional[float]: 60 | if self.fill_probability_model is not None: 61 | return self.fill_probability_model.max_depth 62 | else: 63 | return None 64 | 65 | def _get_max_speed(self) -> float: 66 | if self.price_impact_model is not None: 67 | return self.price_impact_model.max_speed 68 | else: 69 | return None 70 | 71 | def _get_fill_multiplier(self): 72 | ones = np.ones((self.num_trajectories, 1)) 73 | return np.append(-ones, ones, axis=1) 74 | 75 | def _check_processes_are_not_none(self, processes): 76 | for process in processes: 77 | self._check_process_is_not_none(process) 78 | 79 | def _check_process_is_not_none(self, process: str): 80 | assert getattr(self, process) is not None, f"This model dynamics cannot have env.{process} to be None." 81 | 82 | @property 83 | def midprice(self): 84 | return self.midprice_model.current_state[:, 0].reshape(-1, 1) 85 | 86 | 87 | class LimitOrderModelDynamics(ModelDynamics): 88 | """ModelDynamics for 'limit'.""" 89 | def __init__( 90 | self, 91 | midprice_model : MidpriceModel = None, 92 | arrival_model : ArrivalModel = None, 93 | fill_probability_model : FillProbabilityModel = None, 94 | num_trajectories: int = 1, 95 | seed: int = None, 96 | max_depth : float = None, 97 | ): 98 | super().__init__(midprice_model = midprice_model, 99 | arrival_model = arrival_model, 100 | fill_probability_model = fill_probability_model, 101 | num_trajectories = num_trajectories, 102 | seed = seed) 103 | self.max_depth = max_depth or self._get_max_depth() 104 | self.required_processes = self.get_required_stochastic_processes() 105 | self._check_processes_are_not_none(self.required_processes) 106 | self.round_initial_inventory = True 107 | 108 | def update_state(self, arrivals: np.ndarray, fills: np.ndarray, action: np.ndarray): 109 | self.state[:, INVENTORY_INDEX] += np.sum(arrivals * fills * -self.fill_multiplier, axis=1) 110 | self.state[:, CASH_INDEX] += np.sum( 111 | self.fill_multiplier 112 | * arrivals 113 | * fills 114 | * (self.midprice + self._limit_depths(action) * self.fill_multiplier), 115 | axis=1, 116 | ) 117 | 118 | def get_action_space(self) -> gym.spaces.Space: 119 | assert self.max_depth is not None, "For limit orders max_depth cannot be None." 120 | # agent chooses spread on bid and ask 121 | return gym.spaces.Box(low=np.float32(0.0), high=np.float32(self.max_depth), shape=(2,)) 122 | 123 | def get_required_stochastic_processes(self): 124 | processes = ["arrival_model", "fill_probability_model"] 125 | return processes 126 | 127 | def get_arrivals_and_fills(self, action: np.ndarray): 128 | arrivals = self.arrival_model.get_arrivals() 129 | depths = self._limit_depths(action) 130 | fills = self.fill_probability_model.get_fills(depths) 131 | return arrivals, fills 132 | 133 | 134 | class AtTheTouchModelDynamics(ModelDynamics): 135 | """ModelDynamics for 'touch'.""" 136 | def __init__( 137 | self, 138 | midprice_model : MidpriceModel = None, 139 | arrival_model : ArrivalModel = None, 140 | fill_probability_model : FillProbabilityModel = None, 141 | num_trajectories: int = 1, 142 | fixed_market_half_spread: float = 0.5, 143 | seed: int = None, 144 | ): 145 | super().__init__(midprice_model = midprice_model, 146 | arrival_model = arrival_model, 147 | fill_probability_model = fill_probability_model, 148 | num_trajectories = num_trajectories, 149 | seed = seed) 150 | self.round_initial_inventory = True 151 | self.fixed_market_half_spread = fixed_market_half_spread 152 | 153 | def update_state(self, arrivals: np.ndarray, fills: np.ndarray, action: np.ndarray): 154 | self.state[:, CASH_INDEX] += np.sum( 155 | self.fill_multiplier 156 | * arrivals 157 | * fills 158 | * (self.midprice + self.fixed_market_half_spread * self.fill_multiplier), 159 | axis=1, 160 | ) 161 | self.state[:, INVENTORY_INDEX] += np.sum(arrivals * fills * -self.fill_multiplier, axis=1) 162 | 163 | def _post_at_touch(self, action: np.ndarray): 164 | return action[:, 0:2] 165 | 166 | def get_action_space(self) -> gym.spaces.Space: 167 | return gym.spaces.MultiBinary(2) 168 | 169 | def get_required_stochastic_processes(self): 170 | processes = ["arrival_model"] 171 | return processes 172 | 173 | def get_arrivals_and_fills(self, action: np.ndarray): 174 | arrivals = self.arrival_model.get_arrivals() 175 | fills = self._post_at_touch(action) 176 | return arrivals, fills 177 | 178 | 179 | class LimitAndMarketOrderModelDynamics(ModelDynamics): 180 | """ModelDynamics for 'limit_and_market'.""" 181 | def __init__( 182 | self, 183 | midprice_model : MidpriceModel = None, 184 | arrival_model : ArrivalModel = None, 185 | fill_probability_model : FillProbabilityModel = None, 186 | num_trajectories: int = 1, 187 | seed: int = None, 188 | max_depth : float = None, 189 | fixed_market_half_spread : float = 0.5, 190 | ): 191 | super().__init__(midprice_model = midprice_model, 192 | arrival_model = arrival_model, 193 | fill_probability_model = fill_probability_model, 194 | num_trajectories = num_trajectories, 195 | seed = seed) 196 | self.max_depth = max_depth or self._get_max_depth() 197 | self.fixed_market_half_spread = fixed_market_half_spread 198 | self.required_processes = self.get_required_stochastic_processes() 199 | self._check_processes_are_not_none(self.required_processes) 200 | self.round_initial_inventory = True 201 | 202 | def _market_order_buy(self, action: np.ndarray): 203 | return action[:, 2 + BID_INDEX] 204 | 205 | def _market_order_sell(self, action: np.ndarray): 206 | return action[:, 2 + ASK_INDEX] 207 | 208 | def update_state(self, arrivals: np.ndarray, fills: np.ndarray, action: np.ndarray): 209 | mo_buy = np.single(self._market_order_buy(action) > 0.5) 210 | mo_sell = np.single(self._market_order_sell(action) > 0.5) 211 | best_bid = (self.midprice - self.fixed_market_half_spread).reshape(-1,) 212 | best_ask = (self.midprice + self.fixed_market_half_spread).reshape(-1,) 213 | self.state[:, CASH_INDEX] += mo_sell * best_bid - mo_buy * best_ask 214 | self.state[:, INVENTORY_INDEX] += mo_buy - mo_sell 215 | self.state[:, INVENTORY_INDEX] += np.sum(arrivals * fills * -self.fill_multiplier, axis=1) 216 | self.state[:, CASH_INDEX] += np.sum( 217 | self.fill_multiplier 218 | * arrivals 219 | * fills 220 | * (self.midprice + self._limit_depths(action) * self.fill_multiplier), 221 | axis=1, 222 | ) 223 | 224 | def get_action_space(self) -> gym.spaces.Space: 225 | assert self.max_depth is not None, "For limit orders max_depth cannot be None." 226 | # agent chooses spread on bid and ask 227 | return gym.spaces.Box( 228 | low=np.zeros(4), 229 | high=np.array([self.max_depth, self.max_depth, 1, 1], dtype=np.float32), 230 | ) 231 | 232 | def get_required_stochastic_processes(self): 233 | processes = ["arrival_model", "fill_probability_model"] 234 | return processes 235 | 236 | def get_arrivals_and_fills(self, action: np.ndarray): 237 | arrivals = self.arrival_model.get_arrivals() 238 | depths = self._limit_depths(action) 239 | fills = self.fill_probability_model.get_fills(depths) 240 | return arrivals, fills 241 | 242 | 243 | class TradinghWithSpeedModelDynamics(ModelDynamics): 244 | """ModelDynamics for 'speed'.""" 245 | def __init__( 246 | self, 247 | midprice_model : MidpriceModel = None, 248 | price_impact_model : PriceImpactModel = None, 249 | num_trajectories: int = 1, 250 | seed: int = None, 251 | max_speed : float = None, 252 | ): 253 | super().__init__(midprice_model = midprice_model, 254 | price_impact_model = price_impact_model, 255 | num_trajectories = num_trajectories, 256 | seed = seed) 257 | self.max_speed = max_speed or self._get_max_speed() 258 | self.required_processes = self.get_required_stochastic_processes() 259 | self._check_processes_are_not_none(self.required_processes) 260 | self.round_initial_inventory = False 261 | 262 | def update_state(self, arrivals: np.ndarray, fills: np.ndarray, action: np.ndarray): 263 | price_impact = self.price_impact_model.get_impact(action) 264 | execution_price = self.midprice + price_impact 265 | volume = action * self.midprice_model.step_size 266 | self.state[:, CASH_INDEX] -= np.squeeze(volume * execution_price) 267 | self.state[:, INVENTORY_INDEX] += np.squeeze(volume) 268 | 269 | def get_action_space(self) -> gym.spaces.Space: 270 | # agent chooses speed of trading: positive buys, negative sells 271 | return gym.spaces.Box(low=np.float32([-self.max_speed]), high=np.float32([self.max_speed])) 272 | 273 | def get_required_stochastic_processes(self): 274 | processes = ["price_impact_model"] 275 | return processes 276 | -------------------------------------------------------------------------------- /mbt_gym/gym/MultiprocessTradingEnv.py: -------------------------------------------------------------------------------- 1 | import multiprocessing as mp 2 | from collections import OrderedDict 3 | from typing import Any, Callable, List, Optional, Sequence, Tuple, Type, Union, Iterable 4 | 5 | import gym 6 | import numpy as np 7 | from gym import spaces 8 | 9 | from stable_baselines3.common.vec_env.base_vec_env import ( 10 | CloudpickleWrapper, 11 | VecEnv, 12 | VecEnvIndices, 13 | VecEnvObs, 14 | VecEnvStepReturn, 15 | ) 16 | 17 | from stable_baselines3.common.vec_env.subproc_vec_env import SubprocVecEnv, _flatten_obs 18 | 19 | STORE_TERMINAL_OBSERVATION_INFO = True 20 | 21 | 22 | def _worker( 23 | remote: mp.connection.Connection, parent_remote: mp.connection.Connection, env_fn_wrapper: CloudpickleWrapper 24 | ) -> None: 25 | # Import here to avoid a circular import 26 | from stable_baselines3.common.env_util import is_wrapped 27 | 28 | parent_remote.close() 29 | env = env_fn_wrapper.var() 30 | while True: 31 | try: 32 | cmd, data = remote.recv() 33 | if cmd == "step": 34 | observation, reward, done, infos = env.step(data) 35 | single_done = done[0] if len(done) > 1 else done 36 | if single_done: 37 | if STORE_TERMINAL_OBSERVATION_INFO: 38 | infos = infos.copy() 39 | for count, info in enumerate(infos): 40 | # save final observation where user can get it, then automatically reset (an SB3 convention). 41 | info["terminal_observation"] = observation[count, :] 42 | observation = env.reset() 43 | remote.send((observation, reward, done, infos)) 44 | elif cmd == "seed": 45 | remote.send(env.seed(data)) 46 | elif cmd == "reset": 47 | observation = env.reset() 48 | remote.send(observation) 49 | elif cmd == "render": 50 | remote.send(env.render(data)) 51 | elif cmd == "close": 52 | env.close() 53 | remote.close() 54 | break 55 | elif cmd == "get_spaces": 56 | remote.send((env.observation_space, env.action_space)) 57 | elif cmd == "env_method": 58 | method = getattr(env, data[0]) 59 | remote.send(method(*data[1], **data[2])) 60 | elif cmd == "get_attr": 61 | remote.send(getattr(env, data)) 62 | elif cmd == "set_attr": 63 | remote.send(setattr(env, data[0], data[1])) 64 | elif cmd == "is_wrapped": 65 | remote.send(is_wrapped(env, data)) 66 | else: 67 | raise NotImplementedError(f"`{cmd}` is not implemented in the worker") 68 | except EOFError: 69 | break 70 | 71 | 72 | class MultiprocessTradingEnv(SubprocVecEnv): 73 | """ 74 | This is a slight modification of SubprocVecEnv, the details of which can be found at 75 | https://stable-baselines3.readthedocs.io/en/master/guide/vec_envs.html#subprocvecenv. 76 | 77 | In particular, it modifies it SubprocVecEnv that the inputs are already VecEnvs. This allows the user to choose the 78 | amount of vectorisation that is preformed via numpy (in VectorizedTradingEnvironment) and the amount of 79 | multiprocessing processes. 80 | """ 81 | 82 | def __init__(self, env_fns: List[Callable[[], gym.Env]], start_method: Optional[str] = None): 83 | super().__init__(self, env_fns, start_method) 84 | 85 | self.remotes[0].send(("get_attr", "num_trajectories")) 86 | num_trajectories_per_env = self.remotes[0].recv() 87 | 88 | self.remotes[0].send(("get_attr", "n_steps")) 89 | n_steps = self.remotes[0].recv() 90 | 91 | self.num_trajectories_per_env = num_trajectories_per_env 92 | self.num_multiprocess_envs = len(self.remotes) 93 | self.n_steps = n_steps 94 | self.num_trajectories = len(env_fns) * num_trajectories_per_env 95 | self.num_envs = self.num_trajectories 96 | 97 | def step_async(self, actions: np.ndarray) -> None: 98 | multi_actions = self.flatten_multi(actions, inverse=True) 99 | for remote, action in zip(self.remotes, multi_actions): 100 | remote.send(("step", action)) 101 | self.waiting = True 102 | 103 | def step_wait(self) -> VecEnvStepReturn: 104 | results = [remote.recv() for remote in self.remotes] 105 | self.waiting = False 106 | obs, rews, dones, infos = zip(*results) 107 | obs = self.flatten_multi(_flatten_obs(obs, self.observation_space)) 108 | rews = self.flatten_multi(np.stack(rews)) 109 | dones = self.flatten_multi(np.stack(dones)) 110 | return obs, rews, dones, list(np.stack(infos).reshape(-1)) 111 | 112 | def flatten_multi(self, array: np.ndarray, inverse=False): 113 | if inverse: 114 | return list(array.reshape(self.num_multiprocess_envs, self.num_trajectories_per_env, -1)) 115 | else: 116 | return array.reshape(self.num_multiprocess_envs * self.num_trajectories_per_env, -1).squeeze() 117 | 118 | def reset(self) -> VecEnvObs: 119 | for remote in self.remotes: 120 | remote.send(("reset", None)) 121 | obs = [remote.recv() for remote in self.remotes] 122 | obs = _flatten_obs(obs, self.observation_space) 123 | return self.flatten_multi(obs) 124 | -------------------------------------------------------------------------------- /mbt_gym/gym/StableBaselinesTradingEnvironment.py: -------------------------------------------------------------------------------- 1 | from typing import List, Any, Type, Optional, Union, Sequence 2 | 3 | import gym 4 | import numpy as np 5 | from stable_baselines3.common.vec_env import VecEnv 6 | from stable_baselines3.common.vec_env.base_vec_env import VecEnvObs, VecEnvStepReturn, VecEnvIndices 7 | 8 | from mbt_gym.gym.TradingEnvironment import TradingEnvironment 9 | 10 | 11 | class StableBaselinesTradingEnvironment(VecEnv): 12 | def __init__( 13 | self, 14 | trading_env: TradingEnvironment, 15 | store_terminal_observation_info: bool = True, 16 | ): 17 | self.env = trading_env 18 | self.store_terminal_observation_info = store_terminal_observation_info 19 | self.actions: np.ndarray = self.env.action_space.sample() 20 | super().__init__(self.env.num_trajectories, self.env.observation_space, self.env.action_space) 21 | 22 | def reset(self) -> VecEnvObs: 23 | return self.env.reset() 24 | 25 | def step_async(self, actions: np.ndarray) -> None: 26 | self.actions = actions 27 | 28 | def step_wait(self) -> VecEnvStepReturn: 29 | obs, rewards, dones, infos = self.env.step(self.actions) 30 | if dones.min(): 31 | if self.store_terminal_observation_info: 32 | infos = infos.copy() 33 | for count, info in enumerate(infos): 34 | # save final observation where user can get it, then automatically reset (an SB3 convention). 35 | info["terminal_observation"] = obs[count, :] 36 | obs = self.env.reset() 37 | return obs, rewards, dones, infos 38 | 39 | def close(self) -> None: 40 | pass 41 | 42 | def get_attr(self, attr_name: str, indices: VecEnvIndices = None) -> List[Any]: 43 | pass 44 | 45 | def set_attr(self, attr_name: str, value: Any, indices: VecEnvIndices = None) -> None: 46 | pass 47 | 48 | def env_method(self, method_name: str, *method_args, indices: VecEnvIndices = None, **method_kwargs) -> List[Any]: 49 | pass 50 | 51 | def env_is_wrapped(self, wrapper_class: Type[gym.Wrapper], indices: VecEnvIndices = None) -> List[bool]: 52 | return [False for _ in range(self.env.num_trajectories)] 53 | 54 | def seed(self, seed: Optional[int] = None) -> List[Union[None, int]]: 55 | return self.env.seed(seed) 56 | 57 | def get_images(self) -> Sequence[np.ndarray]: 58 | pass 59 | 60 | @property 61 | def num_trajectories(self): 62 | return self.env.num_trajectories 63 | 64 | @property 65 | def n_steps(self): 66 | return self.env.n_steps 67 | -------------------------------------------------------------------------------- /mbt_gym/gym/TradingEnvironment.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | from copy import copy, deepcopy 3 | from typing import Union, Tuple, Callable 4 | 5 | import gym 6 | import numpy as np 7 | 8 | from gym.spaces import Box 9 | 10 | from mbt_gym.agents.Agent import Agent 11 | from mbt_gym.gym.ModelDynamics import ModelDynamics, LimitOrderModelDynamics 12 | from mbt_gym.gym.helpers.generate_trajectory import generate_trajectory 13 | from mbt_gym.stochastic_processes.StochasticProcessModel import StochasticProcessModel 14 | from mbt_gym.stochastic_processes.arrival_models import ArrivalModel, PoissonArrivalModel 15 | from mbt_gym.stochastic_processes.fill_probability_models import FillProbabilityModel, ExponentialFillFunction 16 | from mbt_gym.stochastic_processes.midprice_models import MidpriceModel, BrownianMotionMidpriceModel 17 | from mbt_gym.stochastic_processes.price_impact_models import PriceImpactModel 18 | from mbt_gym.gym.info_calculators import InfoCalculator 19 | from mbt_gym.rewards.RewardFunctions import RewardFunction, PnL 20 | 21 | from mbt_gym.gym.index_names import CASH_INDEX, INVENTORY_INDEX, TIME_INDEX 22 | 23 | 24 | class TradingEnvironment(gym.Env): 25 | metadata = {"render.modes": ["human"]} 26 | 27 | def __init__( 28 | self, 29 | terminal_time: float = 1.0, 30 | n_steps: int = 20 * 10, 31 | reward_function: RewardFunction = None, 32 | model_dynamics: ModelDynamics = None, 33 | initial_cash: float = 0.0, 34 | initial_inventory: Union[int, Tuple[float, float]] = 0, # Either a deterministic initial inventory, or a tuple 35 | max_inventory: int = 10_000, # representing the mean and variance of it. 36 | max_cash: float = None, 37 | max_stock_price: float = None, 38 | start_time: Union[float, int, Callable] = 0.0, 39 | info_calculator: InfoCalculator = None, # episode given as a proportion. 40 | seed: int = None, 41 | num_trajectories: int = 1, 42 | normalise_action_space: bool = True, 43 | normalise_observation_space: bool = True, 44 | normalise_rewards: bool = False, 45 | ): 46 | super(TradingEnvironment, self).__init__() 47 | self.terminal_time = terminal_time 48 | self.n_steps = n_steps 49 | self._step_size = self.terminal_time / self.n_steps 50 | self.reward_function = reward_function or PnL() 51 | self.model_dynamics = model_dynamics or LimitOrderModelDynamics( 52 | midprice_model=BrownianMotionMidpriceModel( 53 | step_size=self._step_size, num_trajectories=num_trajectories, seed=seed 54 | ), 55 | arrival_model=PoissonArrivalModel( 56 | intensity=np.array([100, 100]), step_size=self._step_size, num_trajectories=num_trajectories, seed=seed 57 | ), 58 | fill_probability_model=ExponentialFillFunction( 59 | step_size=self._step_size, num_trajectories=num_trajectories, seed=seed 60 | ), 61 | num_trajectories=num_trajectories, 62 | seed=seed, 63 | ) 64 | self.stochastic_processes = self._get_stochastic_processes() 65 | self.stochastic_process_indices = self._get_stochastic_process_indices() 66 | self.num_trajectories = num_trajectories 67 | self.initial_cash = initial_cash 68 | self.initial_inventory = initial_inventory 69 | self.max_inventory = max_inventory 70 | if seed: 71 | self.seed(seed) 72 | self.rng = np.random.default_rng(seed) 73 | self.start_time = start_time 74 | self.model_dynamics.state = self.initial_state 75 | self.max_stock_price = max_stock_price or self.model_dynamics.midprice_model.max_value[0, 0] 76 | self.max_cash = max_cash or self._get_max_cash() 77 | self.info_calculator = info_calculator 78 | self._empty_infos = self._get_empty_infos() 79 | self.observation_space = self._get_observation_space() 80 | self.action_space = self.model_dynamics.get_action_space() 81 | self.normalise_action_space_ = normalise_action_space 82 | self.normalise_observation_space_ = normalise_observation_space 83 | self.normalise_rewards_ = normalise_rewards 84 | if self.normalise_observation_space_: 85 | self.original_observation_space = copy(self.observation_space) 86 | self.observation_space = self._get_normalised_observation_space() 87 | if self.normalise_action_space_: 88 | self.original_action_space = copy(self.action_space) 89 | self.action_space = self._get_normalised_action_space() 90 | if self.normalise_rewards_: 91 | assert isinstance(self.model_dynamics.arrival_model, PoissonArrivalModel) and isinstance( 92 | self.model_dynamics.fill_probability_model, ExponentialFillFunction 93 | ), "Arrival model must be Poisson and fill probability model must be exponential to scale rewards" 94 | self.reward_scaling = 1 / self._get_inventory_neutral_rewards() 95 | 96 | def reset(self): 97 | for process in self.stochastic_processes.values(): 98 | process.reset() 99 | self.model_dynamics.state = self.initial_state 100 | self.reward_function.reset(self.model_dynamics.state.copy()) 101 | return self.normalise_observation(self.model_dynamics.state.copy()) 102 | 103 | def step(self, action: np.ndarray): 104 | action = self.normalise_action(action, inverse=True) 105 | current_state = self.model_dynamics.state.copy() 106 | next_state = self._update_state(action) 107 | dones = self._get_dones() 108 | rewards = self.reward_function.calculate(current_state, action, next_state, dones[0]) 109 | infos = self._calculate_infos(current_state, action, rewards) 110 | return self.normalise_observation(next_state.copy()), self.normalise_rewards(rewards), dones, infos 111 | 112 | def normalise_observation(self, obs: np.ndarray, inverse: bool = False): 113 | if self.normalise_observation_space_ and not inverse: 114 | return (obs - self._intercept_obs_norm) / self._gradient_obs_norm - 1 115 | elif self.normalise_observation_space_ and inverse: 116 | return (obs + 1) * self._gradient_obs_norm + self._intercept_obs_norm 117 | else: 118 | return obs 119 | 120 | def normalise_action(self, action: np.ndarray, inverse: bool = False): 121 | if self.normalise_action_space_ and not inverse: 122 | return (action - self._intercept_action_norm) / self._gradient_action_norm - 1 123 | elif self.normalise_action_space_ and inverse: 124 | return (action + 1) * self._gradient_action_norm + self._intercept_action_norm 125 | else: 126 | return action 127 | 128 | def normalise_rewards(self, rewards: np.ndarray): 129 | return self.reward_scaling * rewards if self.normalise_rewards_ else rewards 130 | 131 | @property 132 | def initial_state(self) -> np.ndarray: 133 | scalar_initial_state = np.array([[self.initial_cash, 0, 0.0]]) 134 | initial_state = np.repeat(scalar_initial_state, self.num_trajectories, axis=0) 135 | start_time = self._get_start_time() 136 | initial_state[:, TIME_INDEX] = start_time * np.ones((self.num_trajectories,)) 137 | initial_state[:, INVENTORY_INDEX] = self._get_initial_inventories() 138 | for process in self.stochastic_processes.values(): 139 | initial_state = np.append(initial_state, process.initial_vector_state, axis=1) 140 | return initial_state 141 | 142 | @property 143 | def state(self): 144 | return self.model_dynamics.state 145 | 146 | @property 147 | def is_at_max_inventory(self): 148 | return self.state[:, INVENTORY_INDEX] >= self.max_inventory 149 | 150 | @property 151 | def is_at_min_inventory(self): 152 | return self.state[:, INVENTORY_INDEX] <= -self.max_inventory 153 | 154 | @property 155 | def step_size(self): 156 | return self._step_size 157 | 158 | @step_size.setter 159 | def step_size(self, step_size: float): 160 | self._step_size = step_size 161 | for process_name, process in self.stochastic_processes.items(): 162 | if process.step_size != step_size: 163 | process.step_size = step_size 164 | if hasattr(self.reward_function, "step_size"): 165 | self.reward_function.step_size = step_size 166 | 167 | @property 168 | def num_trajectories(self): 169 | return self._num_trajectories 170 | 171 | @num_trajectories.setter 172 | def num_trajectories(self, num_trajectories: int): 173 | self._num_trajectories = num_trajectories 174 | for process_name, process in self.stochastic_processes.items(): 175 | if process.num_trajectories != num_trajectories: 176 | process.num_trajectories = num_trajectories 177 | self._empty_infos = self._get_empty_infos() 178 | self.model_dynamics.fill_multiplier = self.model_dynamics._get_fill_multiplier() 179 | 180 | @property 181 | def _intercept_obs_norm(self): 182 | return self.original_observation_space.low 183 | 184 | @property 185 | def _gradient_obs_norm(self): 186 | return (self.original_observation_space.high - self.original_observation_space.low) / 2 187 | 188 | @property 189 | def _intercept_action_norm(self): 190 | return self.original_action_space.low 191 | 192 | @property 193 | def _gradient_action_norm(self): 194 | return (self.original_action_space.high - self.original_action_space.low) / 2 195 | 196 | # state[0]=cash, state[1]=inventory, state[2]=time, state[3] = asset_price, and then remaining states depend on 197 | # the dimensionality of the arrival process, the midprice process and the fill probability process. 198 | def _update_state(self, action: np.ndarray) -> np.ndarray: 199 | arrivals, fills = self.model_dynamics.get_arrivals_and_fills(action) 200 | if fills is not None: 201 | fills = self._remove_max_inventory_fills(fills) 202 | self._update_agent_state(arrivals, fills, action) 203 | self._update_market_state(arrivals, fills, action) 204 | return self.model_dynamics.state 205 | 206 | def _update_market_state(self, arrivals, fills, action): 207 | for process_name, process in self.stochastic_processes.items(): 208 | process.update(arrivals, fills, action, self.model_dynamics.state) 209 | lower_index = self.stochastic_process_indices[process_name][0] 210 | upper_index = self.stochastic_process_indices[process_name][1] 211 | self.model_dynamics.state[:, lower_index:upper_index] = process.current_state 212 | 213 | def _update_agent_state(self, arrivals: np.ndarray, fills: np.ndarray, action: np.ndarray): 214 | self.model_dynamics.update_state(arrivals, fills, action) 215 | self._clip_inventory_and_cash() 216 | self.model_dynamics.state[:, TIME_INDEX] += self.step_size 217 | 218 | def _get_dones(self): 219 | done = self.model_dynamics.state[0, TIME_INDEX] >= self.terminal_time - self.step_size / 2 220 | return np.full((self.num_trajectories,), done, dtype=bool) 221 | 222 | def _calculate_infos(self, current_state, action, rewards): 223 | return ( 224 | self.info_calculator.calculate(current_state, action, rewards) 225 | if self.info_calculator is not None 226 | else self._empty_infos 227 | ) 228 | 229 | def _get_max_cash(self) -> float: 230 | return self.n_steps * self.max_stock_price # TODO: make this a tighter bound 231 | 232 | def _get_observation_space(self) -> gym.spaces.Space: 233 | """The observation space consists of a numpy array containg the agent's cash, the agent's inventory and the 234 | current time. It also contains the states of the arrival model, the midprice model and the fill probability 235 | model in that order.""" 236 | low = np.array([-self.max_cash, -self.max_inventory, 0]) 237 | high = np.array([self.max_cash, self.max_inventory, self.terminal_time]) 238 | for process in self.stochastic_processes.values(): 239 | low = np.append(low, process.min_value) 240 | high = np.append(high, process.max_value) 241 | return Box(low=np.float32(low), high=np.float32(high)) 242 | 243 | def _get_normalised_observation_space(self): 244 | # Linear normalisation of the gym.Box space so that the domain of the observation space is [-1,1]. 245 | return gym.spaces.Box( 246 | low=-np.ones_like(self.observation_space.low, dtype=np.float32), 247 | high=np.ones_like(self.observation_space.high, dtype=np.float32), 248 | ) 249 | 250 | def _get_normalised_action_space(self): 251 | # Linear normalisation of the gym.Box space so that the domain of the action space is [-1,1]. 252 | return gym.spaces.Box( 253 | low=-np.ones_like(self.action_space.low, dtype=np.float32), 254 | high=np.ones_like(self.action_space.high, dtype=np.float32), 255 | ) 256 | 257 | def _get_start_time(self): 258 | if isinstance(self.start_time, (float, int)): 259 | random_start = self.start_time 260 | elif isinstance(self.start_time, Callable): 261 | random_start = self.start_time() 262 | else: 263 | raise NotImplementedError 264 | return self._quantise_time_to_step(random_start) 265 | 266 | def _quantise_time_to_step(self, time: float): 267 | assert (time >= 0.0) and (time < self.terminal_time), "Start time is not within (0, env.terminal_time)." 268 | return np.round(time / self.step_size) * self.step_size 269 | 270 | def _get_initial_inventories(self) -> np.ndarray: 271 | if isinstance(self.initial_inventory, tuple) and len(self.initial_inventory) == 2: 272 | return self.rng.integers(*self.initial_inventory, size=self.num_trajectories) 273 | elif isinstance(self.initial_inventory, int): 274 | return self.initial_inventory * np.ones((self.num_trajectories,)) 275 | elif isinstance(self.initial_inventory, Callable): 276 | initial_inventory = self.initial_inventory() 277 | if self.model_dynamics.round_initial_inventory: 278 | initial_inventory = int(np.round(initial_inventory)) 279 | return initial_inventory 280 | else: 281 | raise Exception("Initial inventory must be a tuple of length 2 or an int.") 282 | 283 | def _clip_inventory_and_cash(self): 284 | self.model_dynamics.state[:, INVENTORY_INDEX] = self._clip( 285 | self.model_dynamics.state[:, INVENTORY_INDEX], -self.max_inventory, self.max_inventory, cash_flag=False 286 | ) 287 | self.model_dynamics.state[:, CASH_INDEX] = self._clip( 288 | self.model_dynamics.state[:, CASH_INDEX], -self.max_cash, self.max_cash, cash_flag=True 289 | ) 290 | 291 | def _clip(self, not_clipped: float, min: float, max: float, cash_flag: bool) -> float: 292 | clipped = np.clip(not_clipped, min, max) 293 | if (not_clipped != clipped).any() and cash_flag: 294 | print(f"Clipping agent's cash from {not_clipped} to {clipped}.") 295 | if (not_clipped != clipped).any() and not cash_flag: 296 | print(f"Clipping agent's inventory from {not_clipped} to {clipped}.") 297 | return clipped 298 | 299 | @staticmethod 300 | def _clamp(probability): 301 | return max(min(probability, 1), 0) 302 | 303 | def _get_stochastic_processes(self): 304 | stochastic_processes = dict() 305 | for process_name in ["midprice_model", "arrival_model", "fill_probability_model", "price_impact_model"]: 306 | process: StochasticProcessModel = getattr(self.model_dynamics, process_name) 307 | if process is not None: 308 | stochastic_processes[process_name] = process 309 | return OrderedDict(stochastic_processes) 310 | 311 | def _get_stochastic_process_indices(self): 312 | process_indices = dict() 313 | count = 3 314 | for process_name, process in self.stochastic_processes.items(): 315 | dimension = int(process.initial_vector_state.shape[1]) 316 | process_indices[process_name] = (count, count + dimension) 317 | count += dimension 318 | return OrderedDict(process_indices) 319 | 320 | def _get_empty_infos(self): 321 | return [{} for _ in range(self.num_trajectories)] if self.num_trajectories > 1 else {} 322 | 323 | def _remove_max_inventory_fills(self, fills: np.ndarray) -> np.ndarray: 324 | fill_multiplier = np.concatenate( 325 | ((1 - self.is_at_max_inventory).reshape(-1, 1), (1 - self.is_at_min_inventory).reshape(-1, 1)), axis=1 326 | ) 327 | return fill_multiplier * fills 328 | 329 | def _get_inventory_neutral_rewards(self, num_total_trajectories=100_000): 330 | fixed_action = 1 / self.model_dynamics.fill_probability_model.fill_exponent 331 | full_trajectory_env = deepcopy(self) 332 | full_trajectory_env.start_time = 0.0 333 | full_trajectory_env.num_trajectories = num_total_trajectories 334 | full_trajectory_env.normalise_rewards_ = False 335 | 336 | class FixedAgent(Agent): 337 | def get_action(self, obs: np.ndarray) -> np.ndarray: 338 | return np.ones((num_total_trajectories, 2)) * fixed_action 339 | 340 | fixed_agent = FixedAgent() 341 | _, _, rewards = generate_trajectory(full_trajectory_env, fixed_agent) 342 | mean_rewards = np.mean(rewards) * self.n_steps 343 | return mean_rewards 344 | 345 | def seed(self, seed: int = None): 346 | self.rng = np.random.default_rng(seed) 347 | for i, process in enumerate(self.stochastic_processes.values()): 348 | process.seed(seed + i + 1) 349 | -------------------------------------------------------------------------------- /mbt_gym/gym/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JJJerome/mbt_gym/1e1aa38ca35c4fa472777e3574f9c907f89bb5b0/mbt_gym/gym/__init__.py -------------------------------------------------------------------------------- /mbt_gym/gym/backtesting.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import numpy as np 3 | import pandas as pd 4 | from mbt_gym.gym.TradingEnvironment import TradingEnvironment 5 | from mbt_gym.gym.index_names import CASH_INDEX, INVENTORY_INDEX, ASSET_PRICE_INDEX 6 | from mbt_gym.agents.Agent import Agent 7 | from mbt_gym.gym.helpers.generate_trajectory import generate_trajectory 8 | import warnings 9 | 10 | 11 | def get_sharpe_ratio(env: gym.Env, agent: Agent, risk_free_rate: float = 0.099): 12 | """ 13 | The Annualized Sharpe Ratio is calculated as: 14 | Sharpe_Ratio = sqrt(num_steps)*(Returns - Risk Free Rate)/(Std of Return) 15 | It measures the reward in relation to risk. 16 | """ 17 | assert env.num_trajectories == 1, "Backtesting is applied on a single trajectory" 18 | obs, _, _ = generate_trajectory(env, agent) 19 | portfolio_values = (obs[:, CASH_INDEX, :] + obs[:, INVENTORY_INDEX, :] * obs[:, ASSET_PRICE_INDEX, :]).squeeze() 20 | if min(np.abs(portfolio_values)) < 1e-6: 21 | warnings.warn("Runtime Warning: Division by Zero") 22 | return_pcts = np.diff(portfolio_values, 1) / portfolio_values[1:] 23 | annualized_std_returns = return_pcts.std() * np.sqrt(env.n_steps) 24 | return_pcts_mean = return_pcts.mean() 25 | if return_pcts_mean < 0: 26 | warnings.warn("Warning: Mean Return % is negative. Sharpe Ratio may not be appropriate.") 27 | return (return_pcts_mean * env.n_steps - risk_free_rate) / annualized_std_returns 28 | 29 | 30 | def get_sortino_ratio(env: gym.Env, agent: Agent, risk_free_rate: float = 0.099): 31 | """ 32 | The Sortino Ratio is the Sharpe Ratio but restricted to only negative returns. 33 | Sortino_Ratio = sqrt(num_steps)*(Returns - Risk Free Rate)/(Std of negative returns) 34 | """ 35 | assert env.num_trajectories == 1, "Backtesting is applied on a single trajectory" 36 | obs, _, _ = generate_trajectory(env, agent) 37 | portfolio_values = (obs[:, CASH_INDEX, :] + obs[:, INVENTORY_INDEX, :] * obs[:, ASSET_PRICE_INDEX, :]).squeeze() 38 | if min(np.abs(portfolio_values)) < 1e-6: 39 | warnings.warn("Runtime Warning: Division by Zero") 40 | return_pcts = np.diff(portfolio_values, 1) / portfolio_values[1:] 41 | loss_pcts = return_pcts[return_pcts < 0] 42 | annualized_std_returns = loss_pcts.std() * np.sqrt(env.n_steps) 43 | return_pcts_mean = return_pcts.mean() 44 | if return_pcts_mean < 0: 45 | warnings.warn("Warning: Mean Return % is negative. Sortino Ratio may not be appropriate.") 46 | return (return_pcts_mean * env.n_steps - risk_free_rate) / annualized_std_returns 47 | 48 | 49 | def get_maximum_drawdown(env: TradingEnvironment, agent: Agent): 50 | """ 51 | The maximum drawdown is the biggest difference between a peak and a trough in portfolio value. 52 | """ 53 | assert env.num_trajectories == 1, "Backtesting is applied on a single trajectory" 54 | obs, _, _ = generate_trajectory(env, agent) 55 | portfolio_values = (obs[:, CASH_INDEX, :] + obs[:, INVENTORY_INDEX, :] * obs[:, ASSET_PRICE_INDEX, :]).squeeze() 56 | return_pcts = pd.Series(np.diff(portfolio_values, 1) / portfolio_values[1:]) 57 | cum_prods = (return_pcts + 1).cumprod() 58 | peak = cum_prods.expanding(min_periods=1).max() 59 | drawdown = (cum_prods / peak) - 1 60 | return drawdown.min() 61 | -------------------------------------------------------------------------------- /mbt_gym/gym/helpers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JJJerome/mbt_gym/1e1aa38ca35c4fa472777e3574f9c907f89bb5b0/mbt_gym/gym/helpers/__init__.py -------------------------------------------------------------------------------- /mbt_gym/gym/helpers/generate_trajectory.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import numpy as np 3 | import torch 4 | 5 | from mbt_gym.agents.Agent import Agent 6 | 7 | 8 | def generate_trajectory(env: gym.Env, agent: Agent, seed: int = None, include_log_probs: bool = False): 9 | if seed is not None: 10 | env.seed(seed) 11 | obs_space_dim = env.observation_space.shape[0] 12 | action_space_dim = env.action_space.shape[0] 13 | observations = np.zeros((env.num_trajectories, obs_space_dim, env.n_steps + 1)) 14 | actions = np.zeros((env.num_trajectories, action_space_dim, env.n_steps)) 15 | rewards = np.zeros((env.num_trajectories, 1, env.n_steps)) 16 | if include_log_probs: 17 | log_probs = torch.zeros((env.num_trajectories, env.action_space.shape[0], env.n_steps)) 18 | obs = env.reset() 19 | observations[:, :, 0] = obs 20 | count = 0 21 | while True: 22 | if include_log_probs: 23 | action, log_prob = agent.get_action(obs, include_log_probs=True) 24 | else: 25 | action = agent.get_action(obs) 26 | obs, reward, done, _ = env.step(action) 27 | actions[:, :, count] = action 28 | observations[:, :, count + 1] = obs 29 | rewards[:, :, count] = reward.reshape(-1, 1) 30 | if include_log_probs: 31 | log_probs[:, :, count] = log_prob 32 | if (env.num_trajectories > 1 and done[0]) or (env.num_trajectories == 1 and done): 33 | break 34 | count += 1 35 | if include_log_probs: 36 | return observations, actions, rewards, log_probs 37 | else: 38 | return observations, actions, rewards 39 | -------------------------------------------------------------------------------- /mbt_gym/gym/helpers/plotting.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | import pandas as pd 5 | 6 | import seaborn as sns 7 | 8 | from mbt_gym.agents.Agent import Agent 9 | from mbt_gym.gym.TradingEnvironment import TradingEnvironment 10 | from mbt_gym.gym.index_names import CASH_INDEX, INVENTORY_INDEX, ASSET_PRICE_INDEX 11 | from mbt_gym.gym.helpers.generate_trajectory import generate_trajectory 12 | 13 | 14 | def plot_trajectory(env: gym.Env, agent: Agent, seed: int = None): 15 | # assert env.num_trajectories == 1, "Plotting a trajectory can only be done when env.num_trajectories == 1." 16 | timestamps = get_timestamps(env) 17 | observations, actions, rewards = generate_trajectory(env, agent, seed) 18 | action_dim = actions.shape[1] 19 | colors = ["r", "k", "b", "g"] 20 | rewards = np.squeeze(rewards, axis=1) 21 | cum_rewards = np.cumsum(rewards, axis=-1) 22 | cash_holdings = observations[:, CASH_INDEX, :] 23 | inventory = observations[:, INVENTORY_INDEX, :] 24 | asset_prices = observations[:, ASSET_PRICE_INDEX, :] 25 | fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(20, 10)) 26 | ax3a = ax3.twinx() 27 | ax1.title.set_text("cum_rewards") 28 | ax2.title.set_text("asset_prices") 29 | ax3.title.set_text("inventory and cash holdings") 30 | ax4.title.set_text("Actions") 31 | for i in range(env.num_trajectories): 32 | traj_label = f" trajectory {i}" if env.num_trajectories > 1 else "" 33 | ax1.plot(timestamps[1:], cum_rewards[i, :]) 34 | ax2.plot(timestamps, asset_prices[i, :]) 35 | ax3.plot( 36 | timestamps, 37 | inventory[i, :], 38 | label=f"inventory" + traj_label, 39 | color="r", 40 | alpha=(i + 1) / (env.num_trajectories + 1), 41 | ) 42 | ax3a.plot( 43 | timestamps, 44 | cash_holdings[i, :], 45 | label=f"cash holdings" + traj_label, 46 | color="b", 47 | alpha=(i + 1) / (env.num_trajectories + 1), 48 | ) 49 | for j in range(action_dim): 50 | ax4.plot( 51 | timestamps[0:-1], 52 | actions[i, j, :], 53 | label=f"Action {j}" + traj_label, 54 | color=colors[j], 55 | alpha=(i + 1) / (env.num_trajectories + 1), 56 | ) 57 | ax3.legend() 58 | ax4.legend() 59 | plt.show() 60 | 61 | 62 | def plot_stable_baselines_actions(model, env): 63 | timestamps = get_timestamps(env) 64 | inventory_action_dict = {} 65 | price = 100 66 | cash = 100 67 | for inventory in [-3, -2, -1, 0, 1, 2, 3]: 68 | actions = model.predict([price, cash, inventory, 0], deterministic=True)[0].reshape((1, 2)) 69 | for ts in timestamps[1:]: 70 | actions = np.append( 71 | actions, model.predict([price, cash, inventory, ts], deterministic=True)[0].reshape((1, 2)), axis=0 72 | ) 73 | inventory_action_dict[inventory] = actions 74 | for inventory in [-3, -2, -1, 0, 1, 2, 3]: 75 | plt.plot(np.array(inventory_action_dict[inventory]).T[0], label=inventory) 76 | plt.legend() 77 | plt.show() 78 | for inventory in [-3, -2, -1, 0, 1, 2, 3]: 79 | plt.plot(np.array(inventory_action_dict[inventory]).T[1], label=inventory) 80 | plt.legend() 81 | plt.show() 82 | 83 | 84 | def plot_pnl(rewards, symmetric_rewards=None): 85 | fig, ax = plt.subplots(1, 1, figsize=(20, 10)) 86 | if symmetric_rewards is not None: 87 | sns.histplot(symmetric_rewards, label="Rewards of symmetric strategy", stat="density", bins=50, ax=ax) 88 | sns.histplot(rewards, label="Rewards", color="red", stat="density", bins=50, ax=ax) 89 | ax.legend() 90 | plt.close() 91 | return fig 92 | 93 | 94 | def generate_results_table_and_hist(vec_env: TradingEnvironment, agent: Agent, n_episodes: int = 1000): 95 | assert vec_env.num_trajectories > 1, "To generate a results table and hist, vec_env must roll out > 1 trajectory." 96 | observations, actions, rewards = generate_trajectory(vec_env, agent) 97 | total_rewards = rewards.sum(axis=-1).reshape(-1) 98 | terminal_inventories = observations[:, INVENTORY_INDEX, -1] 99 | half_spreads = actions.mean(axis=(-1, -2)) 100 | 101 | rows = ["Inventory"] 102 | columns = ["Mean spread", "Mean PnL", "Std PnL", "Mean terminal inventory", "Std terminal inventory"] 103 | results = pd.DataFrame(index=rows, columns=columns) 104 | results.loc[:, "Mean spread"] = 2 * np.mean(half_spreads) 105 | results.loc["Inventory", "Mean PnL"] = np.mean(total_rewards) 106 | results.loc["Inventory", "Std PnL"] = np.std(total_rewards) 107 | results.loc["Inventory", "Mean terminal inventory"] = np.mean(terminal_inventories) 108 | results.loc["Inventory", "Std terminal inventory"] = np.std(terminal_inventories) 109 | fig = plot_pnl(total_rewards) 110 | return results, fig, total_rewards 111 | 112 | 113 | def get_timestamps(env): 114 | return np.linspace(0, env.terminal_time, env.n_steps + 1) 115 | -------------------------------------------------------------------------------- /mbt_gym/gym/index_names.py: -------------------------------------------------------------------------------- 1 | CASH_INDEX = 0 2 | INVENTORY_INDEX = 1 3 | TIME_INDEX = 2 4 | ASSET_PRICE_INDEX = 3 5 | 6 | BID_INDEX = 0 7 | ASK_INDEX = 1 8 | -------------------------------------------------------------------------------- /mbt_gym/gym/info_calculators.py: -------------------------------------------------------------------------------- 1 | import abc 2 | from typing import Union, List 3 | 4 | import gym 5 | import numpy as np 6 | 7 | 8 | class InfoCalculator(metaclass=abc.ABCMeta): 9 | @abc.abstractmethod 10 | def calculate(self, state: np.ndarray, action: np.ndarray, reward: np.ndarray, done: bool) -> dict: 11 | pass 12 | 13 | @abc.abstractmethod 14 | def reset(self, initial_state: np.ndarray): 15 | pass 16 | 17 | 18 | class ActionInfoCalculator(InfoCalculator): 19 | """ActionInfoCalculator records the actions taken throughout the episode and then outputs the mean actions taken at 20 | the terminal step as an info dict. This is the Stable Baselines 3 convention. See for example, the VecMonitor class 21 | of SB3.""" 22 | 23 | def __init__(self, action_space: gym.spaces.Box, n_steps: int = 10 * 10, num_trajectories: int = 1000): 24 | self.action_space = action_space 25 | self.n_steps = n_steps 26 | self.num_trajectories = num_trajectories 27 | self.nan_matrix = np.empty((self.num_trajectories, self.action_space.shape[0], self.n_steps)) 28 | self.nan_matrix[:] = np.nan 29 | self.actions = self.nan_matrix.copy() 30 | self.empty_infos = [{} for _ in range(self.num_trajectories)] if self.num_trajectories > 1 else {} 31 | self.count = 0 32 | 33 | def calculate( 34 | self, state: np.ndarray, action: np.ndarray, reward: np.ndarray, done: bool 35 | ) -> Union[dict, List[dict]]: 36 | if done: 37 | mean_actions = self._calculate_mean_actions() 38 | return [ 39 | {f"action_{j}": mean_actions[i, j] for j in range(mean_actions.shape[1])} 40 | for i in range(mean_actions.shape[0]) 41 | ] 42 | else: 43 | self.actions[:, :, self.count] = action 44 | self.count += 1 45 | return self.empty_infos 46 | 47 | def reset(self, initial_state: np.ndarray): 48 | self.count = 0 49 | self.actions = self.nan_matrix.copy() 50 | 51 | def _calculate_mean_actions(self): 52 | return self.actions.nanmean(axis=2) 53 | -------------------------------------------------------------------------------- /mbt_gym/gym/wrappers.py: -------------------------------------------------------------------------------- 1 | import gym 2 | 3 | import numpy as np 4 | 5 | from mbt_gym.gym.index_names import INVENTORY_INDEX, TIME_INDEX 6 | 7 | from math import sqrt 8 | 9 | 10 | class ReduceStateSizeWrapper(gym.Wrapper): 11 | """ 12 | :param env: (gym.Env) Gym environment that will be wrapped 13 | """ 14 | 15 | def __init__(self, env, list_of_state_indices: list = [INVENTORY_INDEX, TIME_INDEX]): 16 | # Call the parent constructor, so we can access self.env later 17 | super(ReduceStateSizeWrapper, self).__init__(env) 18 | assert type(env.observation_space) == gym.spaces.box.Box 19 | self.observation_space = gym.spaces.box.Box( 20 | low=env.observation_space.low[list_of_state_indices], 21 | high=env.observation_space.high[list_of_state_indices], 22 | dtype=np.float64, 23 | ) 24 | self.list_of_state_indices = list_of_state_indices 25 | 26 | def reset(self): 27 | """ 28 | Reset the environment 29 | """ 30 | obs = self.env.reset() 31 | return obs[:, self.list_of_state_indices] 32 | 33 | def step(self, action): 34 | """ 35 | :param action: ([float] or int) Action taken by the agent 36 | :return: (np.ndarray, float, bool, dict) observation, reward, is the episode over?, additional informations 37 | """ 38 | obs, reward, done, info = self.env.step(action) 39 | return obs[:, self.list_of_state_indices], reward, done, info 40 | 41 | @property 42 | def spec(self): 43 | return self.env.spec 44 | 45 | 46 | class NormaliseASObservation(gym.Wrapper): 47 | """ 48 | :param env: (gym.Env) Gym environment that will be wrapped 49 | """ 50 | 51 | def __init__(self, env): 52 | # Call the parent constructor, so we can access self.env later 53 | super(NormaliseASObservation, self).__init__(env) 54 | self.normalisation_factor = 2 / (env.observation_space.high - env.observation_space.low) 55 | self.normalisation_offset = (env.observation_space.high + env.observation_space.low) / 2 56 | assert type(env.observation_space) == gym.spaces.box.Box 57 | self.observation_space = gym.spaces.box.Box( 58 | low=-np.ones(env.observation_space.shape), 59 | high=np.ones(env.observation_space.shape), 60 | dtype=np.float64, 61 | ) 62 | 63 | def reset(self): 64 | """ 65 | Reset the environment 66 | """ 67 | obs = self.env.reset() 68 | return (obs - self.normalisation_offset) * self.normalisation_factor 69 | 70 | def step(self, action): 71 | """ 72 | :param action: ([float] or int) Action taken by the agent 73 | :return: (np.ndarray, float, bool, dict) observation, reward, is the episode over?, additional informations 74 | """ 75 | obs, reward, done, info = self.env.step(action) 76 | return obs / self.normalisation_factor, reward, done, info 77 | 78 | 79 | class RemoveTerminalRewards(gym.Wrapper): 80 | """ 81 | :param env: (gym.Env) Gym environment that will be wrapped 82 | """ 83 | 84 | def __init__(self, env, num_final_steps: int = 5): 85 | # Call the parent constructor, so we can access self.env later 86 | super(RemoveTerminalRewards, self).__init__(env) 87 | 88 | def reset(self): 89 | """ 90 | Reset the environment 91 | """ 92 | return self.env.reset() 93 | 94 | def step(self, action): 95 | """ 96 | :param action: ([float] or int) Action taken by the agent 97 | :return: (np.ndarray, float, bool, dict) observation, reward, is the episode over?, additional informations 98 | """ 99 | state, reward, done, _ = self.env.step(action) 100 | if done: 101 | reward *= ( 102 | self.env.reward_function.per_step_inventory_aversion 103 | / self.env.reward_function.terminal_inventory_aversion 104 | ) 105 | return state, reward, done, {} 106 | -------------------------------------------------------------------------------- /mbt_gym/rewards/RewardFunctions.py: -------------------------------------------------------------------------------- 1 | import abc 2 | from typing import Union 3 | 4 | import numpy as np 5 | from mbt_gym.gym.index_names import CASH_INDEX, INVENTORY_INDEX, TIME_INDEX, ASSET_PRICE_INDEX 6 | 7 | 8 | class RewardFunction(metaclass=abc.ABCMeta): 9 | @abc.abstractmethod 10 | def calculate( 11 | self, current_state: np.ndarray, action: np.ndarray, next_state: np.ndarray, is_terminal_step: bool = False 12 | ) -> Union[float, np.ndarray]: 13 | pass 14 | 15 | @abc.abstractmethod 16 | def reset(self, initial_state: np.ndarray): 17 | pass 18 | 19 | 20 | class PnL(RewardFunction): 21 | """A simple profit and loss reward function of the 'mark-to-market' value of the agent's portfolio.""" 22 | 23 | def calculate( 24 | self, current_state: np.ndarray, action: np.ndarray, next_state: np.ndarray, is_terminal_step: bool = False 25 | ) -> float: 26 | assert len(current_state.shape) > 1, "Reward functions must be calculated on state matrices." 27 | current_market_value = ( 28 | current_state[:, CASH_INDEX] + current_state[:, INVENTORY_INDEX] * current_state[:, ASSET_PRICE_INDEX] 29 | ) 30 | next_market_value = ( 31 | next_state[:, CASH_INDEX] + next_state[:, INVENTORY_INDEX] * next_state[:, ASSET_PRICE_INDEX] 32 | ) 33 | return next_market_value - current_market_value 34 | 35 | def reset(self, initial_state: np.ndarray): 36 | pass 37 | 38 | 39 | class CjOeCriterion(RewardFunction): 40 | def __init__( 41 | self, 42 | per_step_inventory_aversion: float = 0.01, 43 | terminal_inventory_aversion: float = 0.0, 44 | inventory_exponent: float = 2.0, 45 | terminal_time: float = 1.0, 46 | ): 47 | self.per_step_inventory_aversion = per_step_inventory_aversion 48 | self.terminal_inventory_aversion = terminal_inventory_aversion 49 | self.pnl = PnL() 50 | self.inventory_exponent = inventory_exponent 51 | self.terminal_time = terminal_time 52 | self.initial_inventory = None 53 | self.episode_length = None 54 | 55 | def calculate( 56 | self, current_state: np.ndarray, action: np.ndarray, next_state: np.ndarray, is_terminal_step: bool = False 57 | ) -> float: 58 | dt = next_state[:, TIME_INDEX] - current_state[:, TIME_INDEX] 59 | return ( 60 | self.pnl.calculate(current_state, action, next_state, is_terminal_step) 61 | - dt * self.per_step_inventory_aversion * next_state[:, INVENTORY_INDEX] ** self.inventory_exponent 62 | - dt 63 | * self.terminal_inventory_aversion 64 | * ( 65 | self.inventory_exponent 66 | * np.squeeze(action) 67 | * (current_state[:, INVENTORY_INDEX]) ** (self.inventory_exponent - 1) 68 | + self.initial_inventory**self.inventory_exponent * self.episode_length 69 | ) 70 | ) 71 | 72 | def reset(self, initial_state: np.ndarray): 73 | self.initial_inventory = initial_state[:, INVENTORY_INDEX] 74 | self.episode_length = self.terminal_time - initial_state[:, TIME_INDEX] 75 | 76 | 77 | class CjMmCriterion(RewardFunction): 78 | """A version of the Cartea-Jaimungal criterion which uses Ito's lemma for Poisson processes to split the negative 79 | reward attributed to terminal inventory aversion over the trajectory of the inventory.""" 80 | 81 | def __init__( 82 | self, 83 | per_step_inventory_aversion: float = 0.01, 84 | terminal_inventory_aversion: float = 0.0, 85 | inventory_exponent: float = 2.0, 86 | terminal_time: float = 1.0, 87 | ): 88 | self.per_step_inventory_aversion = per_step_inventory_aversion 89 | self.terminal_inventory_aversion = terminal_inventory_aversion 90 | self.pnl = PnL() 91 | self.inventory_exponent = inventory_exponent 92 | self.terminal_time = terminal_time 93 | self.initial_inventory = None 94 | self.episode_length = None 95 | 96 | def calculate( 97 | self, current_state: np.ndarray, action: np.ndarray, next_state: np.ndarray, is_terminal_step: bool = False 98 | ) -> float: 99 | dt = next_state[:, TIME_INDEX] - current_state[:, TIME_INDEX] 100 | return ( 101 | self.pnl.calculate(current_state, action, next_state, is_terminal_step) 102 | - dt * self.per_step_inventory_aversion * next_state[:, INVENTORY_INDEX] ** self.inventory_exponent 103 | - self.terminal_inventory_aversion 104 | * ( 105 | next_state[:, INVENTORY_INDEX] ** self.inventory_exponent 106 | - current_state[:, INVENTORY_INDEX] ** self.inventory_exponent 107 | + dt / self.episode_length * self.initial_inventory**self.inventory_exponent 108 | ) 109 | ) 110 | 111 | def reset(self, initial_state: np.ndarray): 112 | self.initial_inventory = initial_state[:, INVENTORY_INDEX] 113 | self.episode_length = self.terminal_time - initial_state[:, TIME_INDEX] 114 | 115 | 116 | class RunningInventoryPenalty(RewardFunction): 117 | def __init__( 118 | self, 119 | per_step_inventory_aversion: float = 0.01, 120 | terminal_inventory_aversion: float = 0.0, 121 | inventory_exponent: float = 2.0, 122 | ): 123 | self.per_step_inventory_aversion = per_step_inventory_aversion 124 | self.terminal_inventory_aversion = terminal_inventory_aversion 125 | self.pnl = PnL() 126 | self.inventory_exponent = inventory_exponent 127 | 128 | def calculate( 129 | self, current_state: np.ndarray, action: np.ndarray, next_state: np.ndarray, is_terminal_step: bool = False 130 | ) -> float: 131 | dt = next_state[:, TIME_INDEX] - current_state[:, TIME_INDEX] 132 | return ( 133 | self.pnl.calculate(current_state, action, next_state, is_terminal_step) 134 | - dt * self.per_step_inventory_aversion * next_state[:, INVENTORY_INDEX] ** self.inventory_exponent 135 | - self.terminal_inventory_aversion 136 | * int(is_terminal_step) 137 | * next_state[:, INVENTORY_INDEX] ** self.inventory_exponent 138 | ) 139 | 140 | def reset(self, initial_state: np.ndarray): 141 | pass 142 | 143 | 144 | # Cartea and Jaimungal criterion is the same as inventory adjusted PnL 145 | 146 | CjCriterion = RunningInventoryPenalty 147 | 148 | 149 | class ExponentialUtility(RewardFunction): 150 | def __init__(self, risk_aversion: float = 0.1): 151 | self.risk_aversion = risk_aversion 152 | 153 | def calculate( 154 | self, current_state: np.ndarray, action: np.ndarray, next_state: np.ndarray, is_terminal_step: bool = False 155 | ) -> float: 156 | return ( 157 | -np.exp( 158 | -self.risk_aversion 159 | * (next_state[:, CASH_INDEX] + next_state[:, INVENTORY_INDEX] * next_state[:, ASSET_PRICE_INDEX]) 160 | ) 161 | if is_terminal_step 162 | else 0 163 | ) 164 | 165 | def reset(self, initial_state: np.ndarray): 166 | pass 167 | -------------------------------------------------------------------------------- /mbt_gym/rewards/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JJJerome/mbt_gym/1e1aa38ca35c4fa472777e3574f9c907f89bb5b0/mbt_gym/rewards/__init__.py -------------------------------------------------------------------------------- /mbt_gym/rewards/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JJJerome/mbt_gym/1e1aa38ca35c4fa472777e3574f9c907f89bb5b0/mbt_gym/rewards/tests/__init__.py -------------------------------------------------------------------------------- /mbt_gym/rewards/tests/testRewardFunctions.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | from unittest import TestCase, main 3 | 4 | import numpy as np 5 | 6 | from mbt_gym.rewards.RewardFunctions import RunningInventoryPenalty, PnL, CjMmCriterion 7 | from mbt_gym.gym.index_names import CASH_INDEX, INVENTORY_INDEX, TIME_INDEX, ASSET_PRICE_INDEX 8 | 9 | STEP_SIZE = 0.2 10 | TEST_CURRENT_STATE = np.array([[120, 2, 0.5, 100]]) 11 | TEST_ACTION = np.array([[1, 1]]) 12 | TEST_NEXT_STATE = np.array([[20, 3, 0.5 + STEP_SIZE, 100.05]]) # Buy order gets filled 13 | TERMINAL_TIME = 1.0 14 | 15 | # CASH, INVENTORY, TIME, ASSET_PRICE 16 | MOCK_OBSERVATIONS = [ 17 | np.array([[100.0, 0, 0.0, 100]]), 18 | np.array([[0.5, 1, STEP_SIZE, 101]]), 19 | np.array([[102.0, 0, 2 * STEP_SIZE, 102]]), 20 | np.array([[103.0, 0, 3 * STEP_SIZE, 103]]), 21 | np.array([[206.5, -1, 4 * STEP_SIZE, 104]]), 22 | np.array([[103.0, 0, 5 * STEP_SIZE, 103]]), 23 | ] 24 | MOCK_ACTIONS = [ 25 | np.array([[0.5, 0.5]]), 26 | np.array([[0.5, 1]]), 27 | np.array([[0.5, 0.5]]), 28 | np.array([[1, 0.5]]), 29 | np.array([[0.5, 0.5]]), 30 | ] 31 | 32 | 33 | class testPnL(TestCase): 34 | def test_calculate_per_step_reward(self): 35 | current_value = ( 36 | TEST_CURRENT_STATE[:, CASH_INDEX] 37 | + TEST_CURRENT_STATE[:, INVENTORY_INDEX] * TEST_CURRENT_STATE[:, ASSET_PRICE_INDEX] 38 | ) 39 | next_value = ( 40 | TEST_NEXT_STATE[:, CASH_INDEX] + TEST_NEXT_STATE[:, INVENTORY_INDEX] * TEST_NEXT_STATE[:, ASSET_PRICE_INDEX] 41 | ) 42 | expected = next_value - current_value 43 | actual = PnL().calculate(current_state=TEST_CURRENT_STATE, action=TEST_ACTION, next_state=TEST_NEXT_STATE) 44 | self.assertEqual(expected, actual, f"PnL calculation should give {expected}. Instead got {actual}!") 45 | 46 | 47 | PER_STEP_INVENTORY_AVERSION = 0.01 48 | TERMINAL_INVENTORY_AVERSION = 1 49 | 50 | 51 | class testInventoryReward(TestCase): 52 | def test_calculate_per_step_reward(self): 53 | reward_function = RunningInventoryPenalty(PER_STEP_INVENTORY_AVERSION, TERMINAL_INVENTORY_AVERSION) 54 | pnl = PnL().calculate(current_state=TEST_CURRENT_STATE, action=TEST_ACTION, next_state=TEST_NEXT_STATE) 55 | inventory_penalty = PER_STEP_INVENTORY_AVERSION * STEP_SIZE * abs(TEST_NEXT_STATE[:, INVENTORY_INDEX]) ** 2 56 | expected = pnl - inventory_penalty 57 | actual = reward_function.calculate(TEST_CURRENT_STATE, TEST_ACTION, TEST_NEXT_STATE) 58 | self.assertAlmostEqual(expected.item(), actual.item(), places=5) 59 | 60 | 61 | class testCjMmCriterion(TestCase): 62 | cj_mm_criterion = CjMmCriterion( 63 | per_step_inventory_aversion=PER_STEP_INVENTORY_AVERSION, 64 | terminal_inventory_aversion=TERMINAL_INVENTORY_AVERSION, 65 | terminal_time=TERMINAL_TIME, 66 | ) 67 | 68 | def test_agreement_with_non_decontructed_version(self): 69 | target_reward_function = RunningInventoryPenalty(PER_STEP_INVENTORY_AVERSION, TERMINAL_INVENTORY_AVERSION) 70 | cj_mm_rewards = [] 71 | target_rewards = [] 72 | self.cj_mm_criterion.reset(MOCK_OBSERVATIONS[0]) 73 | for i in range(len(MOCK_ACTIONS)): 74 | is_terminal_step = MOCK_OBSERVATIONS[i + 1][:, TIME_INDEX] == 1 75 | cj_mm_rewards.append( 76 | self.cj_mm_criterion.calculate( 77 | MOCK_OBSERVATIONS[i], MOCK_ACTIONS[i], MOCK_OBSERVATIONS[i + 1], is_terminal_step 78 | ) 79 | ) 80 | target_rewards.append( 81 | target_reward_function.calculate( 82 | MOCK_OBSERVATIONS[i], MOCK_ACTIONS[i], MOCK_OBSERVATIONS[i + 1], is_terminal_step 83 | ) 84 | ) 85 | self.assertAlmostEqual(float(sum(cj_mm_rewards)), float(sum(target_rewards)), places=5) 86 | 87 | def test_agreement_with_non_decontructed_version_nonzero_initial_inventory(self): 88 | target_reward_function = RunningInventoryPenalty(PER_STEP_INVENTORY_AVERSION, TERMINAL_INVENTORY_AVERSION) 89 | cj_mm_rewards = [] 90 | target_rewards = [] 91 | mock_observations = deepcopy(MOCK_OBSERVATIONS) 92 | mock_observations[0][:, INVENTORY_INDEX] = 2 93 | mock_observations[0][:, CASH_INDEX] = -100 94 | mock_observations[-1] = deepcopy(mock_observations[-2]) 95 | mock_observations[-1][:, TIME_INDEX] = 1.0 96 | self.cj_mm_criterion.reset(mock_observations[0]) 97 | for i in range(len(MOCK_ACTIONS)): 98 | is_terminal_step = mock_observations[i + 1][:, TIME_INDEX] == 1 99 | cj_mm_rewards.append( 100 | self.cj_mm_criterion.calculate( 101 | mock_observations[i], MOCK_ACTIONS[i], mock_observations[i + 1], is_terminal_step 102 | ) 103 | ) 104 | target_rewards.append( 105 | target_reward_function.calculate( 106 | mock_observations[i], MOCK_ACTIONS[i], mock_observations[i + 1], is_terminal_step 107 | ) 108 | ) 109 | self.assertAlmostEqual(float(sum(cj_mm_rewards)), float(sum(target_rewards)), places=5) 110 | 111 | def test_agreement_with_non_decontructed_version_partial_trajectory(self): 112 | target_reward_function = RunningInventoryPenalty(PER_STEP_INVENTORY_AVERSION, TERMINAL_INVENTORY_AVERSION) 113 | cj_mm_rewards = [] 114 | target_rewards = [] 115 | START_STEP = 2 116 | self.cj_mm_criterion.reset(MOCK_OBSERVATIONS[START_STEP]) 117 | for i in range(len(MOCK_ACTIONS[START_STEP:])): 118 | is_terminal_step = MOCK_OBSERVATIONS[START_STEP + i + 1][:, TIME_INDEX] == 1 119 | cj_mm_rewards.append( 120 | self.cj_mm_criterion.calculate( 121 | MOCK_OBSERVATIONS[START_STEP + i], 122 | MOCK_ACTIONS[START_STEP + i], 123 | MOCK_OBSERVATIONS[START_STEP + i + 1], 124 | is_terminal_step, 125 | ) 126 | ) 127 | target_rewards.append( 128 | target_reward_function.calculate( 129 | MOCK_OBSERVATIONS[START_STEP + i], 130 | MOCK_ACTIONS[START_STEP + i], 131 | MOCK_OBSERVATIONS[START_STEP + i + 1], 132 | is_terminal_step, 133 | ) 134 | ) 135 | self.assertAlmostEqual(float(sum(cj_mm_rewards)), float(sum(target_rewards)), places=5) 136 | 137 | 138 | if __name__ == "__main__": 139 | main() 140 | -------------------------------------------------------------------------------- /mbt_gym/stochastic_processes/StochasticProcessModel.py: -------------------------------------------------------------------------------- 1 | import abc 2 | from copy import copy 3 | 4 | import numpy as np 5 | from numpy.random import default_rng 6 | 7 | 8 | class StochasticProcessModel(metaclass=abc.ABCMeta): 9 | def __init__( 10 | self, 11 | min_value: np.ndarray, 12 | max_value: np.ndarray, 13 | step_size: float, 14 | terminal_time: float, 15 | initial_state: np.ndarray, 16 | num_trajectories: int = 1, 17 | seed: int = None, 18 | ): 19 | self.min_value = min_value 20 | self.max_value = max_value 21 | self.step_size = step_size 22 | self.terminal_time = terminal_time 23 | self.num_trajectories = num_trajectories 24 | self.initial_state = initial_state 25 | self._check_attribute_shapes() 26 | self.current_state = copy(self.initial_vector_state) 27 | self.rng = default_rng(seed) 28 | self.seed_ = seed 29 | 30 | def reset(self): 31 | self.current_state = self.initial_vector_state 32 | 33 | @abc.abstractmethod 34 | def update(self, arrivals: np.ndarray, fills: np.ndarray, action: np.ndarray, state: np.ndarray = None): 35 | pass 36 | 37 | def seed(self, seed: int = None): 38 | self.rng = default_rng(seed) 39 | self.seed_ = seed 40 | 41 | def _check_attribute_shapes(self): 42 | for name in ["initial_state", "min_value", "max_value"]: 43 | attribute = getattr(self, name) 44 | assert ( 45 | len(attribute.shape) == 2 and attribute.shape[0] == 1 46 | ), f"Attribute {name} must be a vector of shape (1, state_size)." 47 | 48 | @property 49 | def initial_vector_state(self) -> np.ndarray: 50 | initial_state = self.initial_state 51 | if isinstance(initial_state, list): 52 | initial_state = np.array([self.initial_state]) 53 | return np.repeat(initial_state, self.num_trajectories, axis=0) 54 | -------------------------------------------------------------------------------- /mbt_gym/stochastic_processes/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JJJerome/mbt_gym/1e1aa38ca35c4fa472777e3574f9c907f89bb5b0/mbt_gym/stochastic_processes/__init__.py -------------------------------------------------------------------------------- /mbt_gym/stochastic_processes/arrival_models.py: -------------------------------------------------------------------------------- 1 | import abc 2 | from typing import Optional 3 | 4 | import numpy as np 5 | 6 | from mbt_gym.stochastic_processes.StochasticProcessModel import StochasticProcessModel 7 | 8 | 9 | class ArrivalModel(StochasticProcessModel): 10 | """ArrivalModel models the arrival of orders to the order book. The first entry of arrivals represents an arrival 11 | of an exogenous SELL order (arriving on the buy side of the book) and the second entry represents an arrival of an 12 | exogenous BUY order (arriving on the sell side of the book). 13 | """ 14 | 15 | def __init__( 16 | self, 17 | min_value: np.ndarray, 18 | max_value: np.ndarray, 19 | step_size: float, 20 | terminal_time: float, 21 | initial_state: np.ndarray, 22 | num_trajectories: int = 1, 23 | seed: int = None, 24 | ): 25 | super().__init__(min_value, max_value, step_size, terminal_time, initial_state, num_trajectories, seed) 26 | 27 | @abc.abstractmethod 28 | def get_arrivals(self) -> np.ndarray: 29 | pass 30 | 31 | 32 | class PoissonArrivalModel(ArrivalModel): 33 | def __init__( 34 | self, 35 | intensity: np.ndarray = np.array([140.0, 140.0]), 36 | step_size: float = 0.001, 37 | num_trajectories: int = 1, 38 | seed: Optional[int] = None, 39 | ): 40 | self.intensity = np.array(intensity) 41 | super().__init__( 42 | min_value=np.array([[]]), 43 | max_value=np.array([[]]), 44 | step_size=step_size, 45 | terminal_time=0.0, 46 | initial_state=np.array([[]]), 47 | num_trajectories=num_trajectories, 48 | seed=seed, 49 | ) 50 | 51 | def update(self, arrivals: np.ndarray, fills: np.ndarray, actions: np.ndarray, state: np.ndarray = None): 52 | pass 53 | 54 | def get_arrivals(self) -> np.ndarray: 55 | unif = self.rng.uniform(size=(self.num_trajectories, 2)) 56 | return unif < self.intensity * self.step_size 57 | 58 | 59 | class PoissonArrivalNonLinearModel(ArrivalModel): 60 | def __init__( 61 | self, 62 | intensity: np.ndarray = np.array([140.0, 140.0]), 63 | step_size: float = 0.001, 64 | num_trajectories: int = 1, 65 | seed: Optional[int] = None, 66 | ): 67 | self.intensity = np.array(intensity) 68 | super().__init__( 69 | min_value=np.array([[]]), 70 | max_value=np.array([[]]), 71 | step_size=step_size, 72 | terminal_time=0.0, 73 | initial_state=np.array([[]]), 74 | num_trajectories=num_trajectories, 75 | seed=seed, 76 | ) 77 | 78 | def update(self, arrivals: np.ndarray, fills: np.ndarray, actions: np.ndarray, state: np.ndarray = None): 79 | pass 80 | 81 | def get_arrivals(self) -> np.ndarray: 82 | unif = self.rng.uniform(size=(self.num_trajectories, 2)) 83 | return unif < 1. - np.exp(-self.intensity * self.step_size) 84 | 85 | 86 | class HawkesArrivalModel(ArrivalModel): 87 | def __init__( 88 | self, 89 | baseline_arrival_rate: np.ndarray = np.array([[10.0, 10.0]]), 90 | step_size: float = 0.01, 91 | jump_size: float = 40.0, 92 | mean_reversion_speed: float = 60.0, 93 | terminal_time: float = 1, 94 | num_trajectories: int = 1, 95 | seed: Optional[int] = None, 96 | ): 97 | self.baseline_arrival_rate = baseline_arrival_rate 98 | self.jump_size = jump_size # see https://arxiv.org/pdf/1507.02822.pdf, equation (4). 99 | self.mean_reversion_speed = mean_reversion_speed 100 | super().__init__( 101 | min_value=np.array([[0, 0]]), 102 | max_value=np.array([[1, 1]]) * self._get_max_arrival_rate(), 103 | step_size=step_size, 104 | terminal_time=terminal_time, 105 | initial_state=baseline_arrival_rate, 106 | num_trajectories=num_trajectories, 107 | seed=seed, 108 | ) 109 | 110 | def update(self, arrivals: np.ndarray, fills: np.ndarray, actions: np.ndarray, state: np.ndarray = None) -> np.ndarray: 111 | self.current_state = ( 112 | self.current_state 113 | + self.mean_reversion_speed 114 | * (np.ones((self.num_trajectories, 2)) * self.baseline_arrival_rate - self.current_state) 115 | * self.step_size 116 | * np.ones((self.num_trajectories, 2)) 117 | + self.jump_size * arrivals 118 | ) 119 | return self.current_state 120 | 121 | def get_arrivals(self) -> np.ndarray: 122 | unif = self.rng.uniform(size=(self.num_trajectories, 2)) 123 | return unif < self.current_state * self.step_size 124 | 125 | def _get_max_arrival_rate(self): 126 | return self.baseline_arrival_rate * 10 127 | 128 | # TODO: Improve this with 4*std 129 | # See: https://math.stackexchange.com/questions/4047342/expectation-of-hawkes-process-with-exponential-kernel 130 | -------------------------------------------------------------------------------- /mbt_gym/stochastic_processes/fill_probability_models.py: -------------------------------------------------------------------------------- 1 | import abc 2 | from typing import Optional, Tuple 3 | 4 | import numpy as np 5 | 6 | from mbt_gym.stochastic_processes.StochasticProcessModel import StochasticProcessModel 7 | 8 | 9 | class FillProbabilityModel(StochasticProcessModel): 10 | def __init__( 11 | self, 12 | min_value: np.ndarray, 13 | max_value: np.ndarray, 14 | step_size: float, 15 | terminal_time: float, 16 | initial_state: np.ndarray, 17 | num_trajectories: int = 1, 18 | seed: int = None, 19 | ): 20 | super().__init__(min_value, max_value, step_size, terminal_time, initial_state, num_trajectories, seed) 21 | 22 | @abc.abstractmethod 23 | def _get_fill_probabilities(self, depths: np.ndarray) -> np.ndarray: 24 | """Note that _get_fill_probabilities can return a 'probability' greater than one. However, this is not an issue 25 | for it is only use is in `get_hypothetical_fills` below.""" 26 | pass 27 | 28 | def get_fills(self, depths: np.ndarray) -> np.ndarray: 29 | assert depths.shape == (self.num_trajectories, 2), ( 30 | "Depths must be a numpy array of shape " 31 | + f"({self.num_trajectories},2). Instead it is a numpy array of shape {depths.shape}." 32 | ) 33 | unif = self.rng.uniform(size=(self.num_trajectories, 2)) 34 | return unif < self._get_fill_probabilities(depths) 35 | 36 | @property 37 | @abc.abstractmethod 38 | def max_depth(self) -> float: 39 | pass 40 | 41 | 42 | class ExponentialFillFunction(FillProbabilityModel): 43 | def __init__( 44 | self, fill_exponent: float = 1.5, step_size: float = 0.1, num_trajectories: int = 1, seed: Optional[int] = None 45 | ): 46 | self.fill_exponent = fill_exponent 47 | super().__init__( 48 | min_value=np.array([[]]), 49 | max_value=np.array([[]]), 50 | step_size=step_size, 51 | terminal_time=0.0, 52 | initial_state=np.array([[]]), 53 | num_trajectories=num_trajectories, 54 | seed=seed, 55 | ) 56 | 57 | def _get_fill_probabilities(self, depths: np.ndarray) -> np.ndarray: 58 | return np.exp(-self.fill_exponent * depths) 59 | 60 | @property 61 | def max_depth(self) -> float: 62 | return -np.log(0.01) / self.fill_exponent 63 | 64 | def update(self, arrivals: np.ndarray, fills: np.ndarray, actions: np.ndarray, state: np.ndarray = None): 65 | pass 66 | 67 | 68 | class TriangularFillFunction(FillProbabilityModel): 69 | def __init__( 70 | self, max_fill_depth: float = 1.0, step_size: float = 0.1, num_trajectories: int = 1, seed: Optional[int] = None 71 | ): 72 | self.max_fill_depth = max_fill_depth 73 | super().__init__( 74 | min_value=np.array([[]]), 75 | max_value=np.array([[]]), 76 | step_size=step_size, 77 | terminal_time=0.0, 78 | initial_state=np.array([[]]), 79 | num_trajectories=num_trajectories, 80 | seed=seed, 81 | ) 82 | 83 | def _get_fill_probabilities(self, depths: np.ndarray) -> np.ndarray: 84 | return np.max(1 - np.max(depths, 0) / self.max_fill_depth, 0) 85 | 86 | @property 87 | def max_depth(self) -> float: 88 | return 1.5 * self.max_fill_depth 89 | 90 | def update(self, arrivals: np.ndarray, fills: np.ndarray, actions: np.ndarray, state: np.ndarray = None): 91 | pass 92 | 93 | 94 | class PowerFillFunction(FillProbabilityModel): 95 | def __init__( 96 | self, 97 | fill_exponent: float = 1.5, 98 | fill_multiplier: float = 1.5, 99 | step_size: float = 0.1, 100 | num_trajectories: int = 1, 101 | seed: Optional[int] = None, 102 | ): 103 | self.fill_exponent = fill_exponent 104 | self.fill_multiplier = fill_multiplier 105 | super().__init__( 106 | min_value=np.array([[]]), 107 | max_value=np.array([[]]), 108 | step_size=step_size, 109 | terminal_time=0.0, 110 | initial_state=np.array([[]]), 111 | num_trajectories=num_trajectories, 112 | seed=seed, 113 | ) 114 | 115 | def _get_fill_probabilities(self, depths: np.ndarray) -> np.ndarray: 116 | return (1 + (self.fill_multiplier * np.max(depths, 0)) ** self.fill_exponent) ** -1 117 | 118 | @property 119 | def max_depth(self) -> float: 120 | return 0.01 ** (-1 / self.fill_exponent) - 1 121 | 122 | def update(self, arrivals: np.ndarray, fills: np.ndarray, actions: np.ndarray, state: np.ndarray = None): 123 | pass 124 | 125 | 126 | class ExogenousMmFillProbabilityModel(FillProbabilityModel): 127 | def __init__( 128 | self, 129 | exogenous_best_depth_processes: Tuple[StochasticProcessModel], 130 | fill_exponent: float = 1.5, 131 | base_fill_probability: float = 1.0, 132 | step_size: float = 0.1, 133 | num_trajectories: int = 1, 134 | seed: Optional[int] = None, 135 | ): 136 | assert len(exogenous_best_depth_processes) == 2, "exogenous_best_depth_processes must be length 2 (bid and ask)" 137 | assert all( 138 | len(process.initial_state) > 0 for process in exogenous_best_depth_processes 139 | ), "Exogenous best depth processes must have a state of at least size 1." 140 | self.exogenous_best_depth_processes = exogenous_best_depth_processes 141 | self.fill_exponent = fill_exponent 142 | self.base_fill_probability = base_fill_probability 143 | super().__init__( 144 | min_value=np.concatenate([process.min_value for process in self.exogenous_best_depth_processes], axis=1), 145 | max_value=np.concatenate([process.max_value for process in self.exogenous_best_depth_processes], axis=1), 146 | step_size=step_size, 147 | terminal_time=0.0, 148 | initial_state=np.concatenate( 149 | ( 150 | self.exogenous_best_depth_processes[0].initial_state, 151 | self.exogenous_best_depth_processes[1].initial_state, 152 | ), 153 | axis=1, 154 | ), 155 | num_trajectories=num_trajectories, 156 | seed=seed, 157 | ) 158 | 159 | def _get_fill_probabilities(self, depths: np.ndarray) -> np.ndarray: 160 | return (depths > self.current_state) * self.base_fill_probability * np.exp( 161 | -self.fill_exponent * (depths - self.current_state) 162 | ) + (depths <= self.current_state) 163 | 164 | @property 165 | def max_depth(self) -> float: 166 | return -np.log(0.01) / self.fill_exponent + np.max(self.exogenous_best_depth_processes[0].max_value) 167 | 168 | def update(self, arrivals: np.ndarray, fills: np.ndarray, actions: np.ndarray, state: np.ndarray = None): 169 | for process in self.exogenous_best_depth_processes: 170 | process.update(arrivals, fills, actions) 171 | -------------------------------------------------------------------------------- /mbt_gym/stochastic_processes/midprice_models.py: -------------------------------------------------------------------------------- 1 | from math import sqrt 2 | from typing import Optional 3 | 4 | import numpy as np 5 | 6 | from mbt_gym.stochastic_processes.StochasticProcessModel import StochasticProcessModel 7 | 8 | MidpriceModel = StochasticProcessModel 9 | 10 | from mbt_gym.gym.index_names import BID_INDEX, ASK_INDEX 11 | 12 | class ConstantMidpriceModel(MidpriceModel): 13 | def __init__( 14 | self, 15 | initial_price: float = 100, 16 | terminal_time: float = 1.0, 17 | step_size: float = 0.01, 18 | num_trajectories: int = 1, 19 | seed: Optional[int] = None, 20 | ): 21 | self.terminal_time = terminal_time 22 | super().__init__( 23 | min_value=np.array([[initial_price]]), 24 | max_value=np.array([[initial_price]]), 25 | step_size=step_size, 26 | terminal_time=terminal_time, 27 | initial_state=np.array([[initial_price]]), 28 | num_trajectories=num_trajectories, 29 | seed=seed, 30 | ) 31 | 32 | def update(self, arrivals: np.ndarray, fills: np.ndarray, actions: np.ndarray, state: np.ndarray = None) -> np.ndarray: 33 | pass 34 | 35 | 36 | class BrownianMotionMidpriceModel(MidpriceModel): 37 | def __init__( 38 | self, 39 | drift: float = 0.0, 40 | volatility: float = 2.0, 41 | initial_price: float = 100, 42 | terminal_time: float = 1.0, 43 | step_size: float = 0.01, 44 | num_trajectories: int = 1, 45 | seed: Optional[int] = None, 46 | ): 47 | self.drift = drift 48 | self.volatility = volatility 49 | self.terminal_time = terminal_time 50 | super().__init__( 51 | min_value=np.array([[initial_price - (self._get_max_value(initial_price, terminal_time) - initial_price)]]), 52 | max_value=np.array([[self._get_max_value(initial_price, terminal_time)]]), 53 | step_size=step_size, 54 | terminal_time=terminal_time, 55 | initial_state=np.array([[initial_price]]), 56 | num_trajectories=num_trajectories, 57 | seed=seed, 58 | ) 59 | 60 | def update(self, arrivals: np.ndarray, fills: np.ndarray, actions: np.ndarray, state: np.ndarray = None) -> np.ndarray: 61 | self.current_state = ( 62 | self.current_state 63 | + self.drift * self.step_size * np.ones((self.num_trajectories, 1)) 64 | + self.volatility * sqrt(self.step_size) * self.rng.normal(size=(self.num_trajectories, 1)) 65 | ) 66 | 67 | def _get_max_value(self, initial_price, terminal_time): 68 | return initial_price + 4 * self.volatility * np.sqrt(terminal_time) 69 | 70 | 71 | class GeometricBrownianMotionMidpriceModel(MidpriceModel): 72 | def __init__( 73 | self, 74 | drift: float = 0.0, 75 | volatility: float = 0.1, 76 | initial_price: float = 100, 77 | terminal_time: float = 1.0, 78 | step_size: float = 0.01, 79 | num_trajectories: int = 1, 80 | seed: Optional[int] = None, 81 | ): 82 | self.drift = drift 83 | self.volatility = volatility 84 | self.terminal_time = terminal_time 85 | super().__init__( 86 | min_value=np.array([[initial_price - (self._get_max_value(initial_price, terminal_time) - initial_price)]]), 87 | max_value=np.array([[self._get_max_value(initial_price, terminal_time)]]), 88 | step_size=step_size, 89 | terminal_time=terminal_time, 90 | initial_state=np.array([[initial_price]]), 91 | num_trajectories=num_trajectories, 92 | seed=seed, 93 | ) 94 | 95 | def update(self, arrivals: np.ndarray, fills: np.ndarray, actions: np.ndarray, state: np.ndarray = None) -> np.ndarray: 96 | self.current_state = ( 97 | self.current_state 98 | + self.drift * self.current_state * self.step_size 99 | + self.volatility 100 | * self.current_state 101 | * sqrt(self.step_size) 102 | * self.rng.normal(size=(self.num_trajectories, 1)) 103 | ) 104 | 105 | def _get_max_value(self, initial_price, terminal_time): 106 | stdev = sqrt( 107 | initial_price**2 108 | * np.exp(2 * self.drift * terminal_time) 109 | * (np.exp(self.volatility**2 * terminal_time) - 1) 110 | ) 111 | return initial_price * np.exp(self.drift * terminal_time) + 4 * stdev 112 | 113 | 114 | class OuMidpriceModel(MidpriceModel): 115 | def __init__( 116 | self, 117 | mean_reversion_level: float = 0.0, 118 | mean_reversion_speed: float = 1.0, 119 | volatility: float = 2.0, 120 | initial_price: float = 100.0, 121 | terminal_time: float = 1.0, 122 | step_size: float = 0.01, 123 | num_trajectories: int = 1, 124 | seed: Optional[int] = None, 125 | ): 126 | self.mean_reversion_level = mean_reversion_level 127 | self.mean_reversion_speed = mean_reversion_speed 128 | self.volatility = volatility 129 | self.terminal_time = terminal_time 130 | super().__init__( 131 | min_value=np.array([[initial_price - (self._get_max_value(initial_price, terminal_time) - initial_price)]]), 132 | max_value=np.array([[self._get_max_value(initial_price, terminal_time)]]), 133 | step_size=step_size, 134 | terminal_time=terminal_time, 135 | initial_state=np.array([[initial_price]]), 136 | num_trajectories=num_trajectories, 137 | seed=seed, 138 | ) 139 | 140 | def update(self, arrivals: np.ndarray, fills: np.ndarray, actions: np.ndarray, state: np.ndarray = None) -> np.ndarray: 141 | self.current_state += -self.mean_reversion_speed * ( 142 | self.current_state - self.mean_reversion_level * np.ones((self.num_trajectories, 1)) 143 | ) + self.volatility * sqrt(self.step_size) * self.rng.normal(size=(self.num_trajectories, 1)) 144 | 145 | def _get_max_value(self, initial_price, terminal_time): 146 | return initial_price + 4 * self.volatility * terminal_time # TODO: What should this be? 147 | 148 | 149 | class ShortTermOuAlphaMidpriceModel(MidpriceModel): 150 | def __init__( 151 | self, 152 | volatility: float = 2.0, 153 | ou_process: OuMidpriceModel = None, 154 | initial_price: float = 100.0, 155 | terminal_time: float = 1.0, 156 | step_size: float = 0.01, 157 | num_trajectories: int = 1, 158 | seed: Optional[int] = None, 159 | ): 160 | self.volatility = volatility 161 | self.ou_process = ou_process or OuMidpriceModel(initial_price=0.0) 162 | self.terminal_time = terminal_time 163 | super().__init__( 164 | min_value=np.array( 165 | [ 166 | [ 167 | initial_price - (self._get_max_asset_price(initial_price, terminal_time) - initial_price), 168 | self.ou_process.min_value, 169 | ] 170 | ] 171 | ), 172 | max_value=np.array([[self._get_max_asset_price(initial_price, terminal_time), self.ou_process.max_value]]), 173 | step_size=step_size, 174 | terminal_time=terminal_time, 175 | initial_state=np.array([[initial_price, self.ou_process.initial_state[0][0]]]), 176 | num_trajectories=num_trajectories, 177 | seed=seed, 178 | ) 179 | 180 | def update(self, arrivals: np.ndarray, fills: np.ndarray, actions: np.ndarray, state: np.ndarray = None) -> np.ndarray: 181 | self.current_state[:, 0] = ( 182 | self.current_state[:, 0] 183 | + self.ou_process.current_state * self.step_size * np.ones((self.num_trajectories, 1)) 184 | + self.volatility * sqrt(self.step_size) * self.rng.normal(size=(self.num_trajectories, 1)) 185 | ) 186 | self.ou_process.update(arrivals, fills, actions) 187 | self.current_state[:, 1] = self.ou_process.current_state 188 | 189 | def _get_max_asset_price(self, initial_price, terminal_time): 190 | return initial_price + 4 * self.volatility * terminal_time # TODO: what should this be? 191 | 192 | 193 | class BrownianMotionJumpMidpriceModel(MidpriceModel): 194 | def __init__( 195 | self, 196 | drift: float = 0.0, 197 | volatility: float = 2.0, 198 | jump_size: float = 1.0, 199 | initial_price: float = 100, 200 | terminal_time: float = 1.0, 201 | step_size: float = 0.01, 202 | num_trajectories: int = 1, 203 | seed: Optional[int] = None, 204 | ): 205 | self.drift = drift 206 | self.volatility = volatility 207 | self.jump_size = jump_size 208 | self.terminal_time = terminal_time 209 | super().__init__( 210 | min_value=np.array([[initial_price - (self._get_max_value(initial_price, terminal_time) - initial_price)]]), 211 | max_value=np.array([[self._get_max_value(initial_price, terminal_time)]]), 212 | step_size=step_size, 213 | terminal_time=terminal_time, 214 | initial_state=np.array([[initial_price]]), 215 | num_trajectories=num_trajectories, 216 | seed=seed, 217 | ) 218 | 219 | def update(self, arrivals: np.ndarray, fills: np.ndarray, actions: np.ndarray, state: np.ndarray = None) -> np.ndarray: 220 | fills_bid = fills[:, BID_INDEX] * arrivals[:, BID_INDEX] 221 | fills_ask = fills[:, ASK_INDEX] * arrivals[:, ASK_INDEX] 222 | self.current_state = ( 223 | self.current_state 224 | + self.drift * self.step_size * np.ones((self.num_trajectories, 1)) 225 | + self.volatility * sqrt(self.step_size) * self.rng.normal(size=(self.num_trajectories, 1)) 226 | + (self.jump_size * fills_ask - self.jump_size * fills_bid).reshape(-1,1) 227 | ) 228 | 229 | def _get_max_value(self, initial_price, terminal_time): 230 | return initial_price + 4 * self.volatility * terminal_time 231 | 232 | 233 | class OuJumpMidpriceModel(MidpriceModel): 234 | def __init__( 235 | self, 236 | mean_reversion_level: float = 0.0, 237 | mean_reversion_speed: float = 1.0, 238 | volatility: float = 2.0, 239 | jump_size: float = 1.0, 240 | initial_price: float = 100.0, 241 | terminal_time: float = 1.0, 242 | step_size: float = 0.01, 243 | num_trajectories: int = 1, 244 | seed: Optional[int] = None, 245 | ): 246 | self.mean_reversion_level = mean_reversion_level 247 | self.mean_reversion_speed = mean_reversion_speed 248 | self.volatility = volatility 249 | self.jump_size = jump_size 250 | self.terminal_time = terminal_time 251 | super().__init__( 252 | min_value=np.array([[initial_price - (self._get_max_value(initial_price, terminal_time) - initial_price)]]), 253 | max_value=np.array([[self._get_max_value(initial_price, terminal_time)]]), 254 | step_size=step_size, 255 | terminal_time=terminal_time, 256 | initial_state=np.array([[initial_price]]), 257 | num_trajectories=num_trajectories, 258 | seed=seed, 259 | ) 260 | 261 | def update(self, arrivals: np.ndarray, fills: np.ndarray, actions: np.ndarray, state: np.ndarray = None) -> np.ndarray: 262 | fills_bid = fills[:, BID_INDEX] * arrivals[:, BID_INDEX] 263 | fills_ask = fills[:, ASK_INDEX] * arrivals[:, ASK_INDEX] 264 | self.current_state = ( 265 | self.current_state 266 | - self.mean_reversion_speed 267 | * (self.current_state - self.mean_reversion_level * np.ones((self.num_trajectories, 1))) 268 | + self.volatility * sqrt(self.step_size) * self.rng.normal(size=(self.num_trajectories, 1)) 269 | + (self.jump_size * fills_ask - self.jump_size * fills_bid).reshape(-1,1) 270 | ) 271 | 272 | def _get_max_value(self, initial_price, terminal_time): 273 | return initial_price + 4 * self.volatility * terminal_time 274 | 275 | 276 | class ShortTermJumpAlphaMidpriceModel(MidpriceModel): 277 | def __init__( 278 | self, 279 | volatility: float = 2.0, 280 | ou_jump_process: OuJumpMidpriceModel = None, 281 | initial_price: float = 100.0, 282 | terminal_time: float = 1.0, 283 | step_size: float = 0.01, 284 | num_trajectories: int = 1, 285 | seed: Optional[int] = None, 286 | ): 287 | self.volatility = volatility 288 | self.ou_jump_process = ou_jump_process or OuJumpMidpriceModel(initial_price=0.0) 289 | self.terminal_time = terminal_time 290 | super().__init__( 291 | min_value=np.array( 292 | [ 293 | [ 294 | initial_price - (self._get_max_asset_price(initial_price, terminal_time) - initial_price), 295 | self.ou_jump_process.min_value, 296 | ] 297 | ] 298 | ), 299 | max_value=np.array( 300 | [[self._get_max_asset_price(initial_price, terminal_time), self.ou_jump_process.max_value]] 301 | ), 302 | step_size=step_size, 303 | terminal_time=terminal_time, 304 | initial_state=np.array([[initial_price, self.ou_jump_process.initial_state[0][0]]]), 305 | num_trajectories=num_trajectories, 306 | seed=seed, 307 | ) 308 | 309 | def update(self, arrivals: np.ndarray, fills: np.ndarray, actions: np.ndarray, state: np.ndarray = None) -> np.ndarray: 310 | self.current_state[:, 0] = ( 311 | self.current_state[:, 0] 312 | + self.ou_jump_process.current_state * self.step_size * np.ones((self.num_trajectories, 1)) 313 | + self.volatility * sqrt(self.step_size) * self.rng.normal(size=(self.num_trajectories, 1)) 314 | ) 315 | self.ou_jump_process.update(arrivals, fills, actions) 316 | self.current_state[:, 1] = self.ou_jump_process.current_state 317 | 318 | def _get_max_asset_price(self, initial_price, terminal_time): 319 | return initial_price + 4 * self.volatility * terminal_time # TODO: what should this be? 320 | 321 | 322 | class HestonMidpriceModel(MidpriceModel): 323 | # Current/Initial State with the Heston model will consist of price AND current variance, not just price 324 | def __init__( 325 | self, 326 | drift: float = 0.05, 327 | volatility_mean_reversion_rate: float = 3, 328 | volatility_mean_reversion_level: float = 0.04, 329 | weiner_correlation: float = -0.8, 330 | volatility_of_volatility: float = 0.6, 331 | initial_price: float = 100, 332 | initial_variance: float = 0.2**2, 333 | terminal_time: float = 1.0, 334 | step_size: float = 0.01, 335 | num_trajectories: int = 1, 336 | seed: Optional[int] = None, 337 | ): 338 | self.drift = drift 339 | self.volatility_mean_reversion_rate = volatility_mean_reversion_rate 340 | self.terminal_time = terminal_time 341 | self.weiner_correlation = weiner_correlation 342 | self.volatility_mean_reversion_level = volatility_mean_reversion_level 343 | self.volatility_of_volatility = volatility_of_volatility 344 | super().__init__( 345 | min_value=np.array([[initial_price - (self._get_max_value(initial_price, terminal_time) - initial_price)]]), 346 | max_value=np.array([[self._get_max_value(initial_price, terminal_time)]]), 347 | step_size=step_size, 348 | terminal_time=terminal_time, 349 | initial_state=np.array([[initial_price, initial_variance]]), 350 | num_trajectories=num_trajectories, 351 | seed=seed, 352 | ) 353 | 354 | def update(self, arrivals: np.ndarray, fills: np.ndarray, actions: np.ndarray, state: np.ndarray = None) -> np.ndarray: 355 | weiner_means = np.array([0, 0]) 356 | weiner_corr = np.array([[1, self.weiner_correlation], [self.weiner_correlation, 1]]) 357 | weiners = np.random.multivariate_normal(weiner_means, cov=weiner_corr, size=self.num_trajectories) 358 | self.current_state[:, 0] = ( 359 | self.current_state[:, 0] 360 | + self.drift * self.current_state[:, 0] * self.step_size 361 | + np.sqrt(self.current_state[:, 1] * self.step_size) * self.current_state[:, 0] * weiners[:, 0] 362 | ) 363 | self.current_state[:, 1] = np.abs( 364 | self.current_state[:, 1] 365 | + self.volatility_mean_reversion_rate 366 | * (self.volatility_mean_reversion_level - self.current_state[:, 1]) 367 | * self.step_size 368 | + self.volatility_of_volatility * np.sqrt(self.current_state[:, 1] * self.step_size) * weiners[:, 1] 369 | ) 370 | 371 | def _get_max_value(self, initial_price, terminal_time): 372 | return initial_price + 4 * self.volatility_mean_reversion_level * terminal_time 373 | 374 | 375 | class ConstantElasticityOfVarianceMidpriceModel(MidpriceModel): 376 | def __init__( 377 | self, 378 | drift: float = 0.0, 379 | volatility: float = 0.1, 380 | gamma: float = 1, # gamma = 1 is just gbm 381 | initial_price: float = 100, 382 | terminal_time: float = 1.0, 383 | step_size: float = 0.01, 384 | num_trajectories: int = 1, 385 | seed: Optional[int] = None, 386 | ): 387 | self.drift = drift 388 | self.volatility = volatility 389 | self.gamma = gamma 390 | self.terminal_time = terminal_time 391 | super().__init__( 392 | min_value=np.array([[initial_price - (self._get_max_value(initial_price, terminal_time) - initial_price)]]), 393 | max_value=np.array([[self._get_max_value(initial_price, terminal_time)]]), 394 | step_size=step_size, 395 | terminal_time=terminal_time, 396 | initial_state=np.array([[initial_price]]), 397 | num_trajectories=num_trajectories, 398 | seed=seed, 399 | ) 400 | 401 | def update(self, arrivals: np.ndarray, fills: np.ndarray, actions: np.ndarray, state: np.ndarray = None) -> np.ndarray: 402 | self.current_state = ( 403 | self.current_state 404 | + self.current_state * self.drift * self.step_size # *np.ones((self.num_trajectories, 1)) 405 | + self.volatility 406 | * (self.current_state**self.gamma) 407 | * np.sqrt(self.step_size) 408 | * np.random.normal(size=self.num_trajectories) 409 | ) 410 | 411 | def _get_max_value(self, initial_price, terminal_time): 412 | return initial_price + 4 * self.volatility * terminal_time 413 | 414 | 415 | 416 | -------------------------------------------------------------------------------- /mbt_gym/stochastic_processes/price_impact_models.py: -------------------------------------------------------------------------------- 1 | import abc 2 | from typing import Optional 3 | 4 | import numpy as np 5 | 6 | from mbt_gym.stochastic_processes.StochasticProcessModel import StochasticProcessModel 7 | 8 | 9 | class PriceImpactModel(StochasticProcessModel): 10 | """PriceImpactModel models the price impact of orders in the order book.""" 11 | 12 | def __init__( 13 | self, 14 | min_value: np.ndarray, 15 | max_value: np.ndarray, 16 | step_size: float, 17 | terminal_time: float, 18 | initial_state: np.ndarray, 19 | num_trajectories: int = 1, 20 | seed: int = None, 21 | ): 22 | super().__init__(min_value, max_value, step_size, terminal_time, initial_state, num_trajectories, seed) 23 | 24 | @abc.abstractmethod 25 | def get_impact(self, action: np.ndarray) -> np.ndarray: 26 | pass 27 | 28 | @property 29 | @abc.abstractmethod 30 | def max_speed(self) -> float: 31 | pass 32 | 33 | 34 | class TemporaryPowerPriceImpact(PriceImpactModel): 35 | def __init__( 36 | self, 37 | temporary_impact_coefficient: float = 0.01, 38 | temporary_impact_exponent: float = 1.0, 39 | num_trajectories: int = 1, 40 | ): 41 | self.temporary_impact_coefficient = temporary_impact_coefficient 42 | self.temporary_impact_exponent = temporary_impact_exponent 43 | super().__init__( 44 | min_value=np.array([[]]), 45 | max_value=np.array([[]]), 46 | step_size=None, 47 | terminal_time=0.0, 48 | initial_state=np.array([[]]), 49 | num_trajectories=num_trajectories, 50 | seed=None, 51 | ) 52 | 53 | def update(self, arrivals: np.ndarray, fills: np.ndarray, actions: np.ndarray, state: np.ndarray = None): 54 | pass 55 | 56 | def get_impact(self, action) -> np.ndarray: 57 | return self.temporary_impact_coefficient * action**self.temporary_impact_exponent 58 | 59 | @property 60 | def max_speed(self) -> float: 61 | return 100.0 # TODO: link to asset price perhaps? 62 | 63 | 64 | class TemporaryAndPermanentPriceImpact(PriceImpactModel): 65 | def __init__( 66 | self, 67 | temporary_impact_coefficient: float = 0.01, 68 | permanent_impact_coefficient: float = 0.01, 69 | n_steps: int = 20 * 10, 70 | terminal_time: float = 1.0, 71 | num_trajectories: int = 1, 72 | ): 73 | self.temporary_impact_coefficient = temporary_impact_coefficient 74 | self.permanent_impact_coefficient = permanent_impact_coefficient 75 | self.n_steps = n_steps 76 | self.terminal_time = terminal_time 77 | self.step_size = self.terminal_time / self.n_steps 78 | super().__init__( 79 | min_value=np.array([[-self.max_speed * self.terminal_time * self.permanent_impact_coefficient]]), 80 | max_value=np.array([[self.max_speed * self.terminal_time * self.permanent_impact_coefficient]]), 81 | step_size=self.step_size, 82 | terminal_time=0.0, 83 | initial_state=np.array([[0]]), 84 | num_trajectories=num_trajectories, 85 | seed=None, 86 | ) 87 | 88 | def update(self, arrivals: np.ndarray, fills: np.ndarray, actions: np.ndarray, state: np.ndarray = None): 89 | self.current_state = self.current_state + self.permanent_impact_coefficient * actions * self.step_size 90 | 91 | def get_impact(self, action) -> np.ndarray: 92 | return self.temporary_impact_coefficient * action + self.current_state 93 | 94 | @property 95 | def max_speed(self) -> float: 96 | return 10.0 # TODO: link to asset price perhaps? 97 | 98 | 99 | class TemporaryAndTransientPriceImpact(PriceImpactModel): 100 | def __init__( 101 | self, 102 | temporary_impact_coefficient: float = 0.01, 103 | transient_impact_coefficient: float = 0.01, # kappa in Neuman-Voß (2022) 104 | resilience_coefficient: float = 0.01, # rho in Neuman-Voß (2022) 105 | initial_transient_impact: float = 0.01, # y in Neuman-Voß (2022) 106 | linear_kernel_coefficient: float = 0.01, # gamma in Neuman-Voß (2022) 107 | n_steps: int = 20 * 10, 108 | terminal_time: float = 1.0, 109 | num_trajectories: int = 1, 110 | ): 111 | self.temporary_impact_coefficient = temporary_impact_coefficient 112 | self.transient_impact_coefficient = transient_impact_coefficient 113 | self.resilience_coefficient = resilience_coefficient 114 | self.initial_transient_impact = initial_transient_impact 115 | self.linear_kernel_coefficient = linear_kernel_coefficient 116 | self.n_steps = n_steps 117 | self.terminal_time = terminal_time 118 | self.step_size = self.terminal_time / self.n_steps 119 | super().__init__( 120 | min_value=np.array([[-self.max_speed * self.terminal_time * self.transient_impact_coefficient]]), 121 | max_value=np.array([[self.max_speed * self.terminal_time * self.transient_impact_coefficient]]), 122 | step_size=self.step_size, 123 | terminal_time=0.0, 124 | initial_state=np.array([[self.initial_transient_impact]]), 125 | num_trajectories=num_trajectories, 126 | seed=None, 127 | ) 128 | 129 | def update(self, arrivals: np.ndarray, fills: np.ndarray, actions: np.ndarray, state: np.ndarray = None): 130 | self.current_state = (self.current_state - self.resilience_coefficient * self.current_state * self.step_size 131 | + self.linear_kernel_coefficient * actions * self.step_size) 132 | 133 | def get_impact(self, action) -> np.ndarray: 134 | return self.temporary_impact_coefficient * action + self.transient_impact_coefficient * self.current_state 135 | 136 | @property 137 | def max_speed(self) -> float: 138 | return 10.0 # TODO: link to asset price 139 | 140 | 141 | 142 | class TransientPriceImpact(PriceImpactModel): 143 | def __init__( 144 | self, 145 | transient_impact_coefficient: float = 0.01, # kappa in Neuman-Voß (2022) 146 | resilience_coefficient: float = 0.01, # rho in Neuman-Voß (2022) 147 | initial_transient_impact: float = 0.01, # y in Neuman-Voß (2022) 148 | linear_kernel_coefficient: float = 0.01, # gamma in Neuman-Voß (2022) 149 | n_steps: int = 20 * 10, 150 | terminal_time: float = 1.0, 151 | num_trajectories: int = 1, 152 | ): 153 | self.transient_impact_coefficient = transient_impact_coefficient 154 | self.resilience_coefficient = resilience_coefficient 155 | self.initial_transient_impact = initial_transient_impact 156 | self.linear_kernel_coefficient = linear_kernel_coefficient 157 | self.n_steps = n_steps 158 | self.terminal_time = terminal_time 159 | self.step_size = self.terminal_time / self.n_steps 160 | super().__init__( 161 | min_value=np.array([[-self.max_speed * self.terminal_time * self.transient_impact_coefficient]]), 162 | max_value=np.array([[self.max_speed * self.terminal_time * self.transient_impact_coefficient]]), 163 | step_size=self.step_size, 164 | terminal_time=0.0, 165 | initial_state=np.array([[self.initial_transient_impact]]), 166 | num_trajectories=num_trajectories, 167 | seed=None, 168 | ) 169 | 170 | def update(self, arrivals: np.ndarray, fills: np.ndarray, actions: np.ndarray, state: np.ndarray = None): 171 | self.current_state = (self.current_state - self.resilience_coefficient * self.current_state * self.step_size 172 | + self.linear_kernel_coefficient * actions * self.step_size) 173 | 174 | def get_impact(self, action) -> np.ndarray: 175 | return self.transient_impact_coefficient * self.current_state 176 | 177 | @property 178 | def max_speed(self) -> float: 179 | return 10.0 # TODO: link to asset price 180 | 181 | 182 | -------------------------------------------------------------------------------- /mypy.ini: -------------------------------------------------------------------------------- 1 | [mypy] 2 | ignore_missing_imports = True 3 | check_untyped_defs = True 4 | -------------------------------------------------------------------------------- /notebooks/Learning_to_make_a_market_with_mbt_gym_and_Stable_Baselines_3.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "b097fbb8", 6 | "metadata": {}, 7 | "source": [ 8 | "# Learning to make a market with mbt_gym and Stable Baselines 3" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "2b918608", 14 | "metadata": {}, 15 | "source": [ 16 | "### Import external modules" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": null, 22 | "id": "56ffdffb", 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "import gym\n", 27 | "import matplotlib.pyplot as plt\n", 28 | "import numpy as np\n", 29 | "\n", 30 | "from stable_baselines3 import PPO\n", 31 | "from stable_baselines3.common.callbacks import EvalCallback\n", 32 | "from stable_baselines3.common.env_util import make_vec_env\n", 33 | "from stable_baselines3.common.vec_env import VecMonitor" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "id": "16d0a2c9", 39 | "metadata": {}, 40 | "source": [ 41 | "### Add mbt-gym to path" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "id": "387934ba", 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "import sys\n", 52 | "sys.path.append(\"../\")" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": null, 58 | "id": "5cb89dbb", 59 | "metadata": {}, 60 | "outputs": [], 61 | "source": [ 62 | "from mbt_gym.agents.BaselineAgents import CarteaJaimungalMmAgent\n", 63 | "from mbt_gym.gym.helpers.generate_trajectory import generate_trajectory\n", 64 | "from mbt_gym.gym.StableBaselinesTradingEnvironment import StableBaselinesTradingEnvironment\n", 65 | "from mbt_gym.gym.TradingEnvironment import TradingEnvironment\n", 66 | "from mbt_gym.gym.wrappers import *\n", 67 | "from mbt_gym.rewards.RewardFunctions import PnL, CjMmCriterion\n", 68 | "from mbt_gym.stochastic_processes.midprice_models import BrownianMotionMidpriceModel\n", 69 | "from mbt_gym.stochastic_processes.arrival_models import PoissonArrivalModel\n", 70 | "from mbt_gym.stochastic_processes.fill_probability_models import ExponentialFillFunction\n", 71 | "from mbt_gym.gym.ModelDynamics import LimitOrderModelDynamics" 72 | ] 73 | }, 74 | { 75 | "cell_type": "markdown", 76 | "id": "535d65b0", 77 | "metadata": {}, 78 | "source": [ 79 | "### Create market making environment" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": null, 85 | "id": "98ab1846", 86 | "metadata": {}, 87 | "outputs": [], 88 | "source": [ 89 | "terminal_time = 1.0\n", 90 | "arrival_rate = 10.0\n", 91 | "n_steps = int(10 * terminal_time * arrival_rate)\n", 92 | "phi = 0.5\n", 93 | "alpha = 0.001" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": null, 99 | "id": "11432746", 100 | "metadata": {}, 101 | "outputs": [], 102 | "source": [ 103 | "def get_cj_env(num_trajectories:int = 1):\n", 104 | " fill_exponent = 1\n", 105 | " sigma = 0.1\n", 106 | " initial_inventory = (-4,5)\n", 107 | " initial_price = 100\n", 108 | " step_size = 1/n_steps\n", 109 | " timestamps = np.linspace(0, terminal_time, n_steps + 1)\n", 110 | " midprice_model = BrownianMotionMidpriceModel(volatility=sigma, step_size=1/n_steps,\n", 111 | " num_trajectories=num_trajectories)\n", 112 | " arrival_model = PoissonArrivalModel(intensity=np.array([arrival_rate, arrival_rate]), \n", 113 | " step_size=1/n_steps, \n", 114 | " num_trajectories=num_trajectories)\n", 115 | " fill_probability_model = ExponentialFillFunction(fill_exponent=fill_exponent, \n", 116 | " step_size=1/n_steps,\n", 117 | " num_trajectories=num_trajectories)\n", 118 | " LOtrader = LimitOrderModelDynamics(midprice_model = midprice_model, arrival_model = arrival_model, \n", 119 | " fill_probability_model = fill_probability_model,\n", 120 | " num_trajectories = num_trajectories)\n", 121 | " reward_function = CjMmCriterion(per_step_inventory_aversion = phi, terminal_inventory_aversion = alpha)\n", 122 | " env_params = dict(terminal_time=terminal_time, \n", 123 | " n_steps=n_steps,\n", 124 | " initial_inventory = initial_inventory,\n", 125 | " model_dynamics = LOtrader,\n", 126 | " max_inventory=n_steps,\n", 127 | " normalise_action_space = False,\n", 128 | " normalise_observation_space = False,\n", 129 | " reward_function = reward_function,\n", 130 | " num_trajectories=num_trajectories)\n", 131 | " return TradingEnvironment(**env_params)" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": null, 137 | "id": "0d29022e", 138 | "metadata": {}, 139 | "outputs": [], 140 | "source": [ 141 | "num_trajectories = 1000\n", 142 | "env = ReduceStateSizeWrapper(get_cj_env(num_trajectories))\n", 143 | "sb_env = StableBaselinesTradingEnvironment(trading_env=env)" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": null, 149 | "id": "3f837dc9", 150 | "metadata": {}, 151 | "outputs": [], 152 | "source": [ 153 | "# Monitor sb_env\n", 154 | "sb_env = VecMonitor(sb_env)\n", 155 | "# Add directory for tensorboard logging and best model\n", 156 | "tensorboard_logdir = \"./tensorboard/PPO-learning-CJ/\"\n", 157 | "best_model_path = \"./SB_models/PPO-best-CJ\"" 158 | ] 159 | }, 160 | { 161 | "cell_type": "markdown", 162 | "id": "3106df91", 163 | "metadata": {}, 164 | "source": [ 165 | "### Define PPO policy" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": null, 171 | "id": "b5d0e1c5", 172 | "metadata": {}, 173 | "outputs": [], 174 | "source": [ 175 | "policy_kwargs = dict(net_arch=[dict(pi=[256, 256], vf=[256, 256])])\n", 176 | "PPO_params = {\"policy\":'MlpPolicy', \"env\": sb_env, \"verbose\":1, \n", 177 | " \"policy_kwargs\":policy_kwargs, \n", 178 | " \"tensorboard_log\":tensorboard_logdir,\n", 179 | " \"n_epochs\":3,\n", 180 | " \"batch_size\": int(n_steps * num_trajectories / 10), \n", 181 | " \"n_steps\": int(n_steps)}\n", 182 | "callback_params = dict(eval_env=sb_env, n_eval_episodes = 2048, #200 before (n_eval_episodes)\n", 183 | " best_model_save_path = best_model_path, \n", 184 | " deterministic=True)\n", 185 | "\n", 186 | "callback = EvalCallback(**callback_params)\n", 187 | "model = PPO(**PPO_params, device=\"cpu\")" 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": null, 193 | "id": "01707612", 194 | "metadata": {}, 195 | "outputs": [], 196 | "source": [ 197 | "model.learn(total_timesteps = 10_000_000) # Increase number of training timesteps according to computing resources" 198 | ] 199 | }, 200 | { 201 | "cell_type": "markdown", 202 | "id": "6d74b6cd", 203 | "metadata": {}, 204 | "source": [ 205 | "## Comparing the learnt policy to the optimal policy" 206 | ] 207 | }, 208 | { 209 | "cell_type": "code", 210 | "execution_count": null, 211 | "id": "bcc4d5d9", 212 | "metadata": {}, 213 | "outputs": [], 214 | "source": [ 215 | "from mbt_gym.agents.SbAgent import SbAgent" 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": null, 221 | "id": "f80b78c0", 222 | "metadata": {}, 223 | "outputs": [], 224 | "source": [ 225 | "ppo_agent = SbAgent(model)" 226 | ] 227 | }, 228 | { 229 | "cell_type": "code", 230 | "execution_count": null, 231 | "id": "3dcb28e9", 232 | "metadata": {}, 233 | "outputs": [], 234 | "source": [ 235 | "inventories = np.arange(-3,4,1)\n", 236 | "bid_actions = []\n", 237 | "ask_actions = []\n", 238 | "for inventory in inventories:\n", 239 | " bid_action, ask_action = ppo_agent.get_action(np.array([[inventory,0.5]])).reshape(-1)\n", 240 | " bid_actions.append(bid_action)\n", 241 | " ask_actions.append(ask_action) " 242 | ] 243 | }, 244 | { 245 | "cell_type": "code", 246 | "execution_count": null, 247 | "id": "de3df21d", 248 | "metadata": {}, 249 | "outputs": [], 250 | "source": [ 251 | "#ppo_agent.get_action(np.array([[inventory,0.5]]))" 252 | ] 253 | }, 254 | { 255 | "cell_type": "code", 256 | "execution_count": null, 257 | "id": "fb6bb5cc", 258 | "metadata": {}, 259 | "outputs": [], 260 | "source": [ 261 | "cj_agent = CarteaJaimungalMmAgent(env=get_cj_env())" 262 | ] 263 | }, 264 | { 265 | "cell_type": "code", 266 | "execution_count": null, 267 | "id": "a6a344c6", 268 | "metadata": {}, 269 | "outputs": [], 270 | "source": [ 271 | "# Get the Cartea Jaimungal action\n", 272 | "cj_bid_actions = []\n", 273 | "cj_ask_actions = []\n", 274 | "for inventory in inventories:\n", 275 | " bid_action, ask_action = cj_agent.get_action(np.array([[0,inventory,0.5]])).reshape(-1)\n", 276 | " cj_bid_actions.append(bid_action)\n", 277 | " cj_ask_actions.append(ask_action)" 278 | ] 279 | }, 280 | { 281 | "cell_type": "code", 282 | "execution_count": null, 283 | "id": "615c8e8f", 284 | "metadata": {}, 285 | "outputs": [], 286 | "source": [ 287 | "plt.plot(inventories, bid_actions, label = \"bid\", color = \"k\")\n", 288 | "plt.plot(inventories, ask_actions, label = \"ask\", color = \"r\")\n", 289 | "plt.plot(inventories, cj_bid_actions, label = \"bid cj\", color = \"k\", linestyle = \"--\")\n", 290 | "plt.plot(inventories, cj_ask_actions, label = \"ask cj\", color = \"r\", linestyle = \"--\")\n", 291 | "plt.legend()" 292 | ] 293 | } 294 | ], 295 | "metadata": { 296 | "hide_input": false, 297 | "kernelspec": { 298 | "display_name": "Python 3 (ipykernel)", 299 | "language": "python", 300 | "name": "python3" 301 | }, 302 | "language_info": { 303 | "codemirror_mode": { 304 | "name": "ipython", 305 | "version": 3 306 | }, 307 | "file_extension": ".py", 308 | "mimetype": "text/x-python", 309 | "name": "python", 310 | "nbconvert_exporter": "python", 311 | "pygments_lexer": "ipython3", 312 | "version": "3.10.10" 313 | }, 314 | "toc": { 315 | "base_numbering": 1, 316 | "nav_menu": {}, 317 | "number_sections": true, 318 | "sideBar": true, 319 | "skip_h1_title": false, 320 | "title_cell": "Table of Contents", 321 | "title_sidebar": "Contents", 322 | "toc_cell": false, 323 | "toc_position": {}, 324 | "toc_section_display": true, 325 | "toc_window_display": false 326 | } 327 | }, 328 | "nbformat": 4, 329 | "nbformat_minor": 5 330 | } 331 | -------------------------------------------------------------------------------- /notebooks/Test_2_-_replicate_CJP_2015_-_closed-form_solution_for_value_function.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "08005fcb", 6 | "metadata": {}, 7 | "source": [ 8 | "# Cartea Jaimungal Penalva 2015" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": null, 14 | "id": "96ce4fb2", 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import sys\n", 19 | "sys.path.append(\"../\") # This version of the notebook is in the subfolder \"notebooks\" of the repo\n", 20 | "\n", 21 | "import gym\n", 22 | "import matplotlib.pyplot as plt\n", 23 | "import numpy as np\n", 24 | "import pandas as pd\n", 25 | "import seaborn as sns\n", 26 | "import scipy\n", 27 | "\n", 28 | "from copy import deepcopy\n", 29 | "\n", 30 | "\n", 31 | "from mbt_gym.agents.BaselineAgents import *\n", 32 | "from mbt_gym.gym.TradingEnvironment import TradingEnvironment\n", 33 | "from mbt_gym.gym.helpers.generate_trajectory import generate_trajectory\n", 34 | "from mbt_gym.gym.helpers.plotting import *\n", 35 | "from mbt_gym.stochastic_processes.midprice_models import *\n", 36 | "from mbt_gym.stochastic_processes.arrival_models import *\n", 37 | "from mbt_gym.stochastic_processes.fill_probability_models import *\n", 38 | "import torch\n", 39 | "#print(torch.cuda.is_available())\n", 40 | "#print(torch.cuda.get_device_name())" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "id": "41b5cd51", 47 | "metadata": {}, 48 | "outputs": [], 49 | "source": [ 50 | "from mbt_gym.gym.ModelDynamics import LimitOrderModelDynamics\n", 51 | "from mbt_gym.rewards.RewardFunctions import CjMmCriterion\n", 52 | "seed = 410\n", 53 | "max_inventory = 100" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": null, 59 | "id": "00dc33ca", 60 | "metadata": {}, 61 | "outputs": [], 62 | "source": [ 63 | "def get_env(num_trajectories:int = 1,\n", 64 | " initial_price = 100,\n", 65 | " terminal_time = 1.0,\n", 66 | " sigma = 2.0,\n", 67 | " n_steps = 1000,\n", 68 | " initial_inventory = 0,\n", 69 | " arrival_rate = 140,\n", 70 | " fill_exponent = 1.5,\n", 71 | " per_step_inventory_aversion = 0.01,\n", 72 | " terminal_inventory_aversion = 0.001): \n", 73 | " midprice_model = BrownianMotionMidpriceModel(initial_price = initial_price, \n", 74 | " volatility=sigma, step_size=terminal_time/n_steps,\n", 75 | " terminal_time = terminal_time,\n", 76 | " num_trajectories=num_trajectories)\n", 77 | " arrival_model = PoissonArrivalModel(intensity=np.array([arrival_rate, arrival_rate]), \n", 78 | " step_size=terminal_time/n_steps, \n", 79 | " num_trajectories=num_trajectories)\n", 80 | " fill_probability_model = ExponentialFillFunction(fill_exponent=fill_exponent, \n", 81 | " step_size=terminal_time/n_steps,\n", 82 | " num_trajectories=num_trajectories)\n", 83 | " LOtrader = LimitOrderModelDynamics(midprice_model = midprice_model, arrival_model = arrival_model, \n", 84 | " fill_probability_model = fill_probability_model,\n", 85 | " num_trajectories = num_trajectories)\n", 86 | " reward = CjMmCriterion(per_step_inventory_aversion = per_step_inventory_aversion,\n", 87 | " terminal_inventory_aversion = terminal_inventory_aversion,\n", 88 | " terminal_time = terminal_time)\n", 89 | " env_params = dict(terminal_time=terminal_time, \n", 90 | " n_steps=n_steps,\n", 91 | " seed = seed,\n", 92 | " initial_inventory = initial_inventory,\n", 93 | " model_dynamics = LOtrader,\n", 94 | " reward_function = reward,\n", 95 | " max_inventory=max_inventory,\n", 96 | " normalise_action_space = False,\n", 97 | " normalise_observation_space = False,\n", 98 | " num_trajectories=num_trajectories)\n", 99 | " return TradingEnvironment(**env_params)" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": null, 105 | "id": "d0c171a6", 106 | "metadata": {}, 107 | "outputs": [], 108 | "source": [ 109 | "env = get_env()\n", 110 | "agent = CarteaJaimungalMmAgent(env = env, max_inventory = max_inventory)" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": null, 116 | "id": "7dd86124", 117 | "metadata": {}, 118 | "outputs": [], 119 | "source": [ 120 | "plot_trajectory(env, agent, seed = seed)" 121 | ] 122 | }, 123 | { 124 | "cell_type": "markdown", 125 | "id": "ed76675c", 126 | "metadata": {}, 127 | "source": [ 128 | "### Comparing the value function to the simulated optimal agent " 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": null, 134 | "id": "b4b4a440", 135 | "metadata": {}, 136 | "outputs": [], 137 | "source": [ 138 | "num_trajectories = 1_000\n", 139 | "vec_env = get_env(num_trajectories)\n", 140 | "vec_agent = CarteaJaimungalMmAgent(env = vec_env, max_inventory = max_inventory)" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": null, 146 | "id": "9ea588e8", 147 | "metadata": {}, 148 | "outputs": [], 149 | "source": [ 150 | "observations, actions, rewards = generate_trajectory(vec_env, vec_agent)" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": null, 156 | "id": "97b71326", 157 | "metadata": {}, 158 | "outputs": [], 159 | "source": [ 160 | "results, fig, total_rewards = generate_results_table_and_hist(vec_env=vec_env,agent=vec_agent)" 161 | ] 162 | }, 163 | { 164 | "cell_type": "markdown", 165 | "id": "f71e7174", 166 | "metadata": {}, 167 | "source": [ 168 | "# Value function versus total rewards" 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": null, 174 | "id": "db4aa92d", 175 | "metadata": {}, 176 | "outputs": [], 177 | "source": [ 178 | "vec_env.reset()\n", 179 | "agent = CarteaJaimungalMmAgent(env = vec_env, max_inventory = max_inventory)\n", 180 | "agent.calculate_true_value_function(vec_env.state[0].reshape(1,-1))" 181 | ] 182 | }, 183 | { 184 | "cell_type": "code", 185 | "execution_count": null, 186 | "id": "0a766459", 187 | "metadata": {}, 188 | "outputs": [], 189 | "source": [ 190 | "np.mean(total_rewards), np.std(total_rewards)" 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": null, 196 | "id": "fe0ed865", 197 | "metadata": {}, 198 | "outputs": [], 199 | "source": [ 200 | "true_mean = agent.calculate_true_value_function(vec_env.state[0].reshape(1,-1))[0,0]\n", 201 | "sample_mean = np.mean(total_rewards)\n", 202 | "N = len(total_rewards)\n", 203 | "sample_variance = np.var(total_rewards) * N/(N-1)\n", 204 | "T = (sample_mean - true_mean)/ (np.sqrt(sample_variance) / np.sqrt(N))\n", 205 | "q_l, q_u = scipy.stats.t(df=(N-1)).ppf((0.1, 0.9))\n", 206 | "if T>q_l and Tq_l and Tq_l and Tq_l and T=1.13.1 16 | -------------------------------------------------------------------------------- /requirements_no_versions.txt: -------------------------------------------------------------------------------- 1 | # specific version of setuptools needed for gym 0.21 2 | # https://github.com/openai/gym/issues/3176 3 | # (it is not clear why it goes for gym 0.21 not e.g., 0.26.1) 4 | setuptools==65.5.0 5 | black 6 | flake8 7 | invoke 8 | jupyter 9 | jupyter_nbextensions_configurator 10 | matplotlib 11 | mypy 12 | numpy 13 | pandas 14 | seaborn 15 | stable_baselines3 16 | stochastic 17 | tensorboard 18 | torch 19 | gym 20 | -------------------------------------------------------------------------------- /roadmap.md: -------------------------------------------------------------------------------- 1 | # Roadmap for mbt_gym 2 | 3 | ## General additions 4 | 5 | - [Stochastic and local volatility models](https://en.wikipedia.org/wiki/Stochastic_volatility) for the midprice 6 | process. For example, the Heston or CEV model. 7 | - Hawkes processes with more general kernels for the arrival rate process. 8 | - Robust adversarial reinforcement learning. See, for example, 9 | [the paper by Spooner and Savani](https://arxiv.org/abs/2003.01820). 10 | 11 | ## Market making 12 | 13 | - Market making with multiple assets. 14 | 15 | ## Optimal execution 16 | 17 | - More general price impact processes. 18 | - Optimal execution with multiple assets 19 | 20 | -------------------------------------------------------------------------------- /tasks.py: -------------------------------------------------------------------------------- 1 | from invoke import task 2 | 3 | 4 | MODULES_TO_CHECK = ["mbt_gym", "*.py"] 5 | MODULES_TO_CHECK_STR = " ".join(MODULES_TO_CHECK) 6 | BLACK_PATHS_TO_IGNORE = [] 7 | BLACK_PATHS_TO_IGNORE_STR = " ".join(BLACK_PATHS_TO_IGNORE) 8 | MYPY_PATHS_TO_IGNORE = [] 9 | MYPY_EXCLUSION_STR = "" 10 | for path in MYPY_PATHS_TO_IGNORE: 11 | MYPY_EXCLUSION_STR += " --exclude " + path 12 | FLAKE_ERROR_CODE_IGNORE_STR = "mbt_gym/*.py:E203" 13 | 14 | 15 | @task 16 | def black_reformat(c): 17 | if len(BLACK_PATHS_TO_IGNORE) > 0: 18 | c.run(f"black --line-length 120 {MODULES_TO_CHECK_STR} --exclude {BLACK_PATHS_TO_IGNORE_STR}") 19 | else: 20 | c.run(f"black --line-length 120 {MODULES_TO_CHECK_STR}") 21 | 22 | 23 | @task 24 | def check_python(c): 25 | if len(BLACK_PATHS_TO_IGNORE) > 0: 26 | c.run(f"black --check --line-length 120 {MODULES_TO_CHECK_STR} --exclude {BLACK_PATHS_TO_IGNORE_STR}") 27 | else: 28 | c.run(f"black --check --line-length 120 {MODULES_TO_CHECK_STR}") 29 | print("Running flake8...") 30 | c.run(f"flake8 --max-line-length 120 {MODULES_TO_CHECK_STR} --per-file-ignores={FLAKE_ERROR_CODE_IGNORE_STR}") 31 | print("No flake8 errors") 32 | print("Running mypy...") 33 | c.run(f"mypy -p {MODULES_TO_CHECK[0]}" + MYPY_EXCLUSION_STR) 34 | print("No mypy errors") 35 | c.run("python check_init_files.py") 36 | c.run("python check_all_py_imports.py") 37 | --------------------------------------------------------------------------------