├── .gitignore ├── LICENSE.txt ├── README.md ├── code ├── 01_rlfinance.ipynb ├── 02_rlfinance.ipynb ├── 03_rlfinance.ipynb ├── 04_rlfinance.ipynb ├── 05_rlfinance.ipynb ├── 06_rlfinance.ipynb ├── 07_rlfinance.ipynb ├── 08_rlfinance_2ac.ipynb ├── 08_rlfinance_3ac.ipynb ├── 09_rlfinance.ipynb ├── assetallocation.py ├── bsm73.py ├── dqlagent.py ├── finance.py └── simulation.py ├── pytorch ├── 01_rl4f.ipynb ├── 02_rl4f_pytorch.ipynb ├── 03_rl4f_pytorch.ipynb ├── 04_rl4f.ipynb ├── 05_rl4f_pytorch.ipynb ├── 06_rl4f_pytorch.ipynb ├── 07_rl4f_pytorch.ipynb ├── 08_rl4f_2ac_pytorch.ipynb ├── 08_rl4f_3ac_pytorch.ipynb ├── 09_rl4f_pytorch.ipynb ├── assetallocation_pytorch.py ├── bsm73.py ├── dqlagent_pytorch.py ├── finance.py └── simulation.py └── rl4f_tf210.yaml /.gitignore: -------------------------------------------------------------------------------- 1 | # Specifics 2 | *.swp 3 | *.cfg 4 | _build/ 5 | .DS_Store 6 | orig/ 7 | # Byte-compiled / optimized / DLL files 8 | __pycache__/ 9 | *.py[cod] 10 | *$py.class 11 | 12 | # C extensions 13 | *.so 14 | 15 | # Distribution / packaging 16 | .Python 17 | build/ 18 | develop-eggs/ 19 | dist/ 20 | downloads/ 21 | eggs/ 22 | .eggs/ 23 | lib/ 24 | lib64/ 25 | parts/ 26 | sdist/ 27 | var/ 28 | wheels/ 29 | *.egg-info/ 30 | .installed.cfg 31 | *.egg 32 | MANIFEST 33 | 34 | # PyInstaller 35 | # Usually these files are written by a python script from a template 36 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 37 | *.manifest 38 | *.spec 39 | 40 | # Installer logs 41 | pip-log.txt 42 | pip-delete-this-directory.txt 43 | 44 | # Unit test / coverage reports 45 | htmlcov/ 46 | .tox/ 47 | .coverage 48 | .coverage.* 49 | .cache 50 | nosetests.xml 51 | coverage.xml 52 | *.cover 53 | .hypothesis/ 54 | .pytest_cache/ 55 | 56 | # Translations 57 | *.mo 58 | *.pot 59 | 60 | # Django stuff: 61 | *.log 62 | local_settings.py 63 | db.sqlite3 64 | 65 | # Flask stuff: 66 | instance/ 67 | .webassets-cache 68 | 69 | # Scrapy stuff: 70 | .scrapy 71 | 72 | # Sphinx documentation 73 | docs/_build/ 74 | 75 | # PyBuilder 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # pyenv 82 | .python-version 83 | 84 | # celery beat schedule file 85 | celerybeat-schedule 86 | 87 | # SageMath parsed files 88 | *.sage.py 89 | 90 | # Environments 91 | .env 92 | .venv 93 | env/ 94 | venv/ 95 | ENV/ 96 | env.bak/ 97 | venv.bak/ 98 | 99 | # Spyder project settings 100 | .spyderproject 101 | .spyproject 102 | 103 | # Rope project settings 104 | .ropeproject 105 | 106 | # mkdocs documentation 107 | /site 108 | 109 | # mypy 110 | .mypy_cache/ 111 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | All the contents as well as the code, Jupyter Notebooks and other materials in this Github repository and on the Quant Platform (https://rl4f.pqp.io) related to Reinforcement Learning for Finance (book, course, class, program) by Dr. Yves J. Hilpisch (The Python Quants GmbH) are copyrighted and only intended for personal use. 2 | 3 | Any kind of sharing, distribution, duplication, etc. without written permission by the The Python Quants GmbH is prohibited. 4 | 5 | The contents, Python code, Jupyter Notebooks and other materials come without warranties or representations, to the extent permitted by applicable law. 6 | 7 | Notice that the code provided might be work in progress and that substantial additions, changes, updates, etc. can take place in the future. It is advised to regularly check for updates. 8 | 9 | None of the material represents any kind of recommendation or investment advice. The material is only meant as a technical illustration. Leveraged and unleveraged trading of financial instruments, and contracts for difference (CFDs) in particular, involves a number of risks. Make sure to understand and manage these risks. 10 | 11 | (c) Dr. Yves J. Hilpisch | The Python Quants GmbH | October 2024 12 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Reinforcement Learning for Finance 2 | 3 | **A Python-Based Introduction** 4 | 5 | ## About this Repository 6 | 7 | This repository provides Python code and Jupyter Notebooks accompanying the 8 | _**Reinforcement Learning for Finance — A Python-Based Introduction**_ book published by [O'Reilly](https://learning.oreilly.com/library/view/financial-theory-with/9781098104344/). 9 | 10 | 11 | 12 | You can **register for free** on our [Quant Platform](https://rl4f.pqp.io) to make 13 | easy use of the Python code in the cloud. No local Python installation is required. 14 | 15 | ## Python Environment 16 | 17 | The original code of the book has been developed using `TensorFlow` 2.10. You can use the YAML file in the repository (`rl4f_tf210.yaml`) to create a Python environment with `conda` as follows: 18 | 19 | conda env create -n rl4f_tf210 -f rl4f_tf210.yaml 20 | 21 | ## Copyright & Disclaimer 22 | 23 | © Dr. Yves J. Hilpisch | The Python Quants GmbH | October 2024 24 | 25 | All code and Jupyter Notebooks come without representations or warranties, to the extent permitted by applicable law. They are intended for personal use only and do not represent any investment advice or recommendation of any form. 26 | 27 | 28 | 29 | https://tpq.io | training@tpq.io | https://x.com/dyjh | https://youtube.com/dyjh 30 | 31 | -------------------------------------------------------------------------------- /code/01_rlfinance.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "475819a4-e148-4616-b1cb-44b659aeb08a", 6 | "metadata": {}, 7 | "source": [ 8 | "\"The
" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "280cc0c6-2c18-46cd-8af7-3f19b64a6d7e", 14 | "metadata": {}, 15 | "source": [ 16 | "# Reinforcement Learning for Finance\n", 17 | "\n", 18 | "**Chapter 01 — Learning through Interaction**\n", 19 | "\n", 20 | "© Dr. Yves J. Hilpisch\n", 21 | "\n", 22 | "https://tpq.io | @dyjh | team@tpq.io" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "id": "3bbe3719-fcab-4963-8701-087562dd5d79", 28 | "metadata": {}, 29 | "source": [ 30 | "## Learning" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "id": "a6b8710a-19a3-4b5b-ae60-ffbd53dc45c4", 36 | "metadata": {}, 37 | "source": [ 38 | "### Tossing a Biased Coin" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 1, 44 | "id": "5d781056-299a-4dd5-8908-038a2438ec44", 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "import numpy as np\n", 49 | "from numpy.random import default_rng\n", 50 | "rng = default_rng(seed=100)" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": 2, 56 | "id": "3cf14cf9-53ed-428b-a597-3f380f4cff5a", 57 | "metadata": {}, 58 | "outputs": [], 59 | "source": [ 60 | "ssp = [1, 0]" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 3, 66 | "id": "ed2b1719-cac4-46c2-9398-c634068d3666", 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [ 70 | "asp = [1, 0]" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 4, 76 | "id": "20cfd83a-7e57-4fa4-8126-81bb7a4758ba", 77 | "metadata": {}, 78 | "outputs": [], 79 | "source": [ 80 | "def epoch():\n", 81 | " tr = 0\n", 82 | " for _ in range(100):\n", 83 | " a = rng.choice(asp)\n", 84 | " s = rng.choice(ssp)\n", 85 | " if a == s:\n", 86 | " tr += 1\n", 87 | " return tr" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": 5, 93 | "id": "89fc1f62-24df-4baa-9784-3126431dbdfe", 94 | "metadata": {}, 95 | "outputs": [ 96 | { 97 | "data": { 98 | "text/plain": [ 99 | "array([56, 47, 48, 55, 55, 51, 54, 43, 55, 40])" 100 | ] 101 | }, 102 | "execution_count": 5, 103 | "metadata": {}, 104 | "output_type": "execute_result" 105 | } 106 | ], 107 | "source": [ 108 | "rl = np.array([epoch() for _ in range(250)])\n", 109 | "rl[:10]" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": 6, 115 | "id": "52f92a52-d305-42f1-a1ff-7b3aacc26549", 116 | "metadata": {}, 117 | "outputs": [ 118 | { 119 | "data": { 120 | "text/plain": [ 121 | "49.968" 122 | ] 123 | }, 124 | "execution_count": 6, 125 | "metadata": {}, 126 | "output_type": "execute_result" 127 | } 128 | ], 129 | "source": [ 130 | "rl.mean()" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": 7, 136 | "id": "855b4cf5-75d8-4dbc-bdae-1cd753e50691", 137 | "metadata": {}, 138 | "outputs": [], 139 | "source": [ 140 | "ssp = [1, 1, 1, 1, 0]" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": 8, 146 | "id": "8148d8b9-de41-4d16-ab8f-b41d45a2a1a7", 147 | "metadata": {}, 148 | "outputs": [], 149 | "source": [ 150 | "asp = [1, 0]" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": 9, 156 | "id": "bea9ad54-804a-4d76-a614-50b01be65805", 157 | "metadata": {}, 158 | "outputs": [], 159 | "source": [ 160 | "def epoch():\n", 161 | " tr = 0\n", 162 | " for _ in range(100):\n", 163 | " a = rng.choice(asp)\n", 164 | " s = rng.choice(ssp)\n", 165 | " if a == s:\n", 166 | " tr += 1\n", 167 | " return tr" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": 10, 173 | "id": "988094e8-64c7-46e4-a54e-f111765c9e71", 174 | "metadata": {}, 175 | "outputs": [ 176 | { 177 | "data": { 178 | "text/plain": [ 179 | "array([53, 56, 40, 55, 53, 49, 43, 45, 50, 51])" 180 | ] 181 | }, 182 | "execution_count": 10, 183 | "metadata": {}, 184 | "output_type": "execute_result" 185 | } 186 | ], 187 | "source": [ 188 | "rl = np.array([epoch() for _ in range(250)])\n", 189 | "rl[:10]" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": 11, 195 | "id": "0aeed633-c81c-4b7f-9e19-c1a03ac3e32d", 196 | "metadata": {}, 197 | "outputs": [ 198 | { 199 | "data": { 200 | "text/plain": [ 201 | "49.924" 202 | ] 203 | }, 204 | "execution_count": 11, 205 | "metadata": {}, 206 | "output_type": "execute_result" 207 | } 208 | ], 209 | "source": [ 210 | "rl.mean()" 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": 12, 216 | "id": "f2220ff9-c8c2-462f-aad0-c07405272976", 217 | "metadata": {}, 218 | "outputs": [], 219 | "source": [ 220 | "ssp = [1, 1, 1, 1, 0]" 221 | ] 222 | }, 223 | { 224 | "cell_type": "code", 225 | "execution_count": 13, 226 | "id": "e043cb3e-b943-4c4a-a337-f50810795d63", 227 | "metadata": {}, 228 | "outputs": [], 229 | "source": [ 230 | "def epoch(n):\n", 231 | " tr = 0\n", 232 | " asp = [0, 1]\n", 233 | " for _ in range(n):\n", 234 | " a = rng.choice(asp)\n", 235 | " s = rng.choice(ssp)\n", 236 | " if a == s:\n", 237 | " tr += 1\n", 238 | " asp.append(s)\n", 239 | " return tr" 240 | ] 241 | }, 242 | { 243 | "cell_type": "code", 244 | "execution_count": 14, 245 | "id": "63ed3ba7-5701-4613-8a37-94eb4b114354", 246 | "metadata": {}, 247 | "outputs": [ 248 | { 249 | "data": { 250 | "text/plain": [ 251 | "array([71, 65, 67, 69, 68, 72, 68, 68, 77, 73])" 252 | ] 253 | }, 254 | "execution_count": 14, 255 | "metadata": {}, 256 | "output_type": "execute_result" 257 | } 258 | ], 259 | "source": [ 260 | "rl = np.array([epoch(100) for _ in range(250)])\n", 261 | "rl[:10]" 262 | ] 263 | }, 264 | { 265 | "cell_type": "code", 266 | "execution_count": 15, 267 | "id": "ccb173db-cf9f-4ee2-8bb1-f2b41990f130", 268 | "metadata": {}, 269 | "outputs": [ 270 | { 271 | "data": { 272 | "text/plain": [ 273 | "66.78" 274 | ] 275 | }, 276 | "execution_count": 15, 277 | "metadata": {}, 278 | "output_type": "execute_result" 279 | } 280 | ], 281 | "source": [ 282 | "rl.mean()" 283 | ] 284 | }, 285 | { 286 | "cell_type": "code", 287 | "execution_count": 16, 288 | "id": "74d45682-4f46-4950-b35c-2f8dff86d448", 289 | "metadata": {}, 290 | "outputs": [], 291 | "source": [ 292 | "from collections import Counter" 293 | ] 294 | }, 295 | { 296 | "cell_type": "code", 297 | "execution_count": 17, 298 | "id": "535ead89-8667-48ae-830f-ec6679780272", 299 | "metadata": {}, 300 | "outputs": [], 301 | "source": [ 302 | "ssp = [1, 1, 1, 1, 0]" 303 | ] 304 | }, 305 | { 306 | "cell_type": "code", 307 | "execution_count": 18, 308 | "id": "67569ec3-4525-443e-8cda-390af539804d", 309 | "metadata": {}, 310 | "outputs": [], 311 | "source": [ 312 | "def epoch(n):\n", 313 | " tr = 0\n", 314 | " asp = [0, 1]\n", 315 | " for _ in range(n):\n", 316 | " c = Counter(asp)\n", 317 | " a = c.most_common()[0][0]\n", 318 | " s = rng.choice(ssp)\n", 319 | " if a == s:\n", 320 | " tr += 1\n", 321 | " asp.append(s)\n", 322 | " return tr" 323 | ] 324 | }, 325 | { 326 | "cell_type": "code", 327 | "execution_count": 19, 328 | "id": "fc5893e5-a997-4fe8-88a4-13afe44c5175", 329 | "metadata": {}, 330 | "outputs": [ 331 | { 332 | "data": { 333 | "text/plain": [ 334 | "array([81, 70, 74, 77, 82, 74, 81, 80, 77, 78])" 335 | ] 336 | }, 337 | "execution_count": 19, 338 | "metadata": {}, 339 | "output_type": "execute_result" 340 | } 341 | ], 342 | "source": [ 343 | "rl = np.array([epoch(100) for _ in range(250)])\n", 344 | "rl[:10]" 345 | ] 346 | }, 347 | { 348 | "cell_type": "code", 349 | "execution_count": 20, 350 | "id": "7187f48e-e276-4f0a-959b-62ddc1bd23e8", 351 | "metadata": {}, 352 | "outputs": [ 353 | { 354 | "data": { 355 | "text/plain": [ 356 | "78.828" 357 | ] 358 | }, 359 | "execution_count": 20, 360 | "metadata": {}, 361 | "output_type": "execute_result" 362 | } 363 | ], 364 | "source": [ 365 | "rl.mean()" 366 | ] 367 | }, 368 | { 369 | "cell_type": "markdown", 370 | "id": "451350fe-1075-4969-808c-b5aaf37cec25", 371 | "metadata": {}, 372 | "source": [ 373 | "### Rolling a Biased Die" 374 | ] 375 | }, 376 | { 377 | "cell_type": "code", 378 | "execution_count": 21, 379 | "id": "bf4b0649-b1fa-4b74-bd31-ae5f20d00105", 380 | "metadata": {}, 381 | "outputs": [], 382 | "source": [ 383 | "ssp = [1, 2, 3, 4, 4, 4, 4, 4, 5, 6]" 384 | ] 385 | }, 386 | { 387 | "cell_type": "code", 388 | "execution_count": 22, 389 | "id": "4e3900fe-b22b-4ea2-b00c-8d057e553cad", 390 | "metadata": {}, 391 | "outputs": [], 392 | "source": [ 393 | "asp = [1, 2, 3, 4, 5, 6]" 394 | ] 395 | }, 396 | { 397 | "cell_type": "code", 398 | "execution_count": 23, 399 | "id": "6bac0a45-5a2a-4276-a329-86978e3f9db1", 400 | "metadata": {}, 401 | "outputs": [], 402 | "source": [ 403 | "def epoch():\n", 404 | " tr = 0\n", 405 | " for _ in range(600):\n", 406 | " a = rng.choice(asp)\n", 407 | " s = rng.choice(ssp)\n", 408 | " if a == s:\n", 409 | " tr += 1\n", 410 | " return tr" 411 | ] 412 | }, 413 | { 414 | "cell_type": "code", 415 | "execution_count": 24, 416 | "id": "062abdd3-2a65-4d1e-a9af-cf25772b54c4", 417 | "metadata": {}, 418 | "outputs": [ 419 | { 420 | "data": { 421 | "text/plain": [ 422 | "array([ 92, 96, 106, 99, 96, 107, 101, 106, 92, 117])" 423 | ] 424 | }, 425 | "execution_count": 24, 426 | "metadata": {}, 427 | "output_type": "execute_result" 428 | } 429 | ], 430 | "source": [ 431 | "rl = np.array([epoch() for _ in range(250)])\n", 432 | "rl[:10]" 433 | ] 434 | }, 435 | { 436 | "cell_type": "code", 437 | "execution_count": 25, 438 | "id": "a82b6f5f-7b32-403a-94a5-91ebc9e90815", 439 | "metadata": {}, 440 | "outputs": [ 441 | { 442 | "data": { 443 | "text/plain": [ 444 | "101.22" 445 | ] 446 | }, 447 | "execution_count": 25, 448 | "metadata": {}, 449 | "output_type": "execute_result" 450 | } 451 | ], 452 | "source": [ 453 | "rl.mean()" 454 | ] 455 | }, 456 | { 457 | "cell_type": "code", 458 | "execution_count": 26, 459 | "id": "7e3a9fb0-22ea-4fed-8ff3-f0ab48169031", 460 | "metadata": {}, 461 | "outputs": [], 462 | "source": [ 463 | "def epoch():\n", 464 | " tr = 0\n", 465 | " asp = [1, 2, 3, 4, 5, 6]\n", 466 | " for _ in range(600):\n", 467 | " a = rng.choice(asp)\n", 468 | " s = rng.choice(ssp)\n", 469 | " if a == s:\n", 470 | " tr += 1\n", 471 | " asp.append(s)\n", 472 | " return tr" 473 | ] 474 | }, 475 | { 476 | "cell_type": "code", 477 | "execution_count": 27, 478 | "id": "79f099b7-ca59-45d1-bb10-0f19c8f7fd35", 479 | "metadata": {}, 480 | "outputs": [ 481 | { 482 | "data": { 483 | "text/plain": [ 484 | "array([182, 174, 162, 157, 184, 167, 190, 208, 171, 153])" 485 | ] 486 | }, 487 | "execution_count": 27, 488 | "metadata": {}, 489 | "output_type": "execute_result" 490 | } 491 | ], 492 | "source": [ 493 | "rl = np.array([epoch() for _ in range(250)])\n", 494 | "rl[:10]" 495 | ] 496 | }, 497 | { 498 | "cell_type": "code", 499 | "execution_count": 28, 500 | "id": "cd641f5f-205e-4414-8006-1a8464aa49cb", 501 | "metadata": {}, 502 | "outputs": [ 503 | { 504 | "data": { 505 | "text/plain": [ 506 | "176.296" 507 | ] 508 | }, 509 | "execution_count": 28, 510 | "metadata": {}, 511 | "output_type": "execute_result" 512 | } 513 | ], 514 | "source": [ 515 | "rl.mean()" 516 | ] 517 | }, 518 | { 519 | "cell_type": "code", 520 | "execution_count": 29, 521 | "id": "be27b1cb-19bf-4c08-bffe-84e7164a2131", 522 | "metadata": {}, 523 | "outputs": [], 524 | "source": [ 525 | "def epoch():\n", 526 | " tr = 0\n", 527 | " asp = [1, 2, 3, 4, 5, 6]\n", 528 | " for _ in range(600):\n", 529 | " c = Counter(asp)\n", 530 | " a = c.most_common()[0][0]\n", 531 | " s = rng.choice(ssp)\n", 532 | " if a == s:\n", 533 | " tr += 1\n", 534 | " asp.append(s)\n", 535 | " return tr" 536 | ] 537 | }, 538 | { 539 | "cell_type": "code", 540 | "execution_count": 30, 541 | "id": "fd68ba52-aaca-4c17-819e-5a9f96053c14", 542 | "metadata": {}, 543 | "outputs": [ 544 | { 545 | "data": { 546 | "text/plain": [ 547 | "array([305, 288, 312, 306, 318, 302, 304, 311, 313, 281])" 548 | ] 549 | }, 550 | "execution_count": 30, 551 | "metadata": {}, 552 | "output_type": "execute_result" 553 | } 554 | ], 555 | "source": [ 556 | "rl = np.array([epoch() for _ in range(250)])\n", 557 | "rl[:10]" 558 | ] 559 | }, 560 | { 561 | "cell_type": "code", 562 | "execution_count": 31, 563 | "id": "6c6b0239-493d-4fc8-8ca9-2dd49f8eff4f", 564 | "metadata": {}, 565 | "outputs": [ 566 | { 567 | "data": { 568 | "text/plain": [ 569 | "297.204" 570 | ] 571 | }, 572 | "execution_count": 31, 573 | "metadata": {}, 574 | "output_type": "execute_result" 575 | } 576 | ], 577 | "source": [ 578 | "rl.mean()" 579 | ] 580 | }, 581 | { 582 | "cell_type": "code", 583 | "execution_count": 32, 584 | "id": "c0337cd1-b618-48df-bb51-3686caa3f1dd", 585 | "metadata": {}, 586 | "outputs": [ 587 | { 588 | "name": "stdout", 589 | "output_type": "stream", 590 | "text": [ 591 | "10,000,000,000,000,000,000,000,000,000,000,000,000,000\n" 592 | ] 593 | } 594 | ], 595 | "source": [ 596 | "cm = 10 ** 40\n", 597 | "print(f'{cm:,}')" 598 | ] 599 | }, 600 | { 601 | "cell_type": "markdown", 602 | "id": "20e3eaa7-ac35-44e5-bffc-93662c2d2c55", 603 | "metadata": {}, 604 | "source": [ 605 | "\"The
\n", 606 | "\n", 607 | "https://tpq.io | @dyjh | team@tpq.io" 608 | ] 609 | } 610 | ], 611 | "metadata": { 612 | "kernelspec": { 613 | "display_name": "Python 3 (ipykernel)", 614 | "language": "python", 615 | "name": "python3" 616 | }, 617 | "language_info": { 618 | "codemirror_mode": { 619 | "name": "ipython", 620 | "version": 3 621 | }, 622 | "file_extension": ".py", 623 | "mimetype": "text/x-python", 624 | "name": "python", 625 | "nbconvert_exporter": "python", 626 | "pygments_lexer": "ipython3", 627 | "version": "3.10.14" 628 | } 629 | }, 630 | "nbformat": 4, 631 | "nbformat_minor": 5 632 | } 633 | -------------------------------------------------------------------------------- /code/02_rlfinance.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "475819a4-e148-4616-b1cb-44b659aeb08a", 6 | "metadata": {}, 7 | "source": [ 8 | "\"The
" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "280cc0c6-2c18-46cd-8af7-3f19b64a6d7e", 14 | "metadata": {}, 15 | "source": [ 16 | "# Reinforcement Learning for Finance\n", 17 | "\n", 18 | "**Chapter 02 — Deep Q-Learning**\n", 19 | "\n", 20 | "© Dr. Yves J. Hilpisch\n", 21 | "\n", 22 | "https://tpq.io | @dyjh | team@tpq.io" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "id": "d6be6f8b-e00e-402c-9df1-1d3f16e76c7e", 28 | "metadata": {}, 29 | "source": [ 30 | "## CartPole" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "id": "5e3924c3-2cad-4400-8806-5acf2f4b9b16", 36 | "metadata": {}, 37 | "source": [ 38 | "### The Game Environment " 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 1, 44 | "id": "72f3a51a-71e6-497d-bab3-926444a6bb30", 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "import gymnasium as gym" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 2, 54 | "id": "e19725f2-a026-487e-826c-00fa5fce71ec", 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [ 58 | "env = gym.make('CartPole-v1')" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 3, 64 | "id": "af76fb4e-3b31-4465-bff5-e5f8362af3d2", 65 | "metadata": {}, 66 | "outputs": [ 67 | { 68 | "data": { 69 | "text/plain": [ 70 | "Discrete(2)" 71 | ] 72 | }, 73 | "execution_count": 3, 74 | "metadata": {}, 75 | "output_type": "execute_result" 76 | } 77 | ], 78 | "source": [ 79 | "env.action_space" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 4, 85 | "id": "bdb45da1-6f9c-464d-bb16-e098ddd52838", 86 | "metadata": {}, 87 | "outputs": [ 88 | { 89 | "data": { 90 | "text/plain": [ 91 | "2" 92 | ] 93 | }, 94 | "execution_count": 4, 95 | "metadata": {}, 96 | "output_type": "execute_result" 97 | } 98 | ], 99 | "source": [ 100 | "env.action_space.n" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": 5, 106 | "id": "77e8ec50-f5a4-4706-8937-6724582ebdc3", 107 | "metadata": {}, 108 | "outputs": [ 109 | { 110 | "data": { 111 | "text/plain": [ 112 | "[0, 1, 0, 0, 1, 0, 0, 0, 0, 1]" 113 | ] 114 | }, 115 | "execution_count": 5, 116 | "metadata": {}, 117 | "output_type": "execute_result" 118 | } 119 | ], 120 | "source": [ 121 | "[env.action_space.sample() for _ in range(10)]" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 6, 127 | "id": "592d3ddc-3958-42ff-b4c7-8924ce0a343d", 128 | "metadata": {}, 129 | "outputs": [ 130 | { 131 | "data": { 132 | "text/plain": [ 133 | "Box([-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38], [4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38], (4,), float32)" 134 | ] 135 | }, 136 | "execution_count": 6, 137 | "metadata": {}, 138 | "output_type": "execute_result" 139 | } 140 | ], 141 | "source": [ 142 | "env.observation_space" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": 7, 148 | "id": "19474f1a-29c3-4cc2-89f6-6226845f5468", 149 | "metadata": {}, 150 | "outputs": [ 151 | { 152 | "data": { 153 | "text/plain": [ 154 | "(4,)" 155 | ] 156 | }, 157 | "execution_count": 7, 158 | "metadata": {}, 159 | "output_type": "execute_result" 160 | } 161 | ], 162 | "source": [ 163 | "env.observation_space.shape" 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": 8, 169 | "id": "4bdd054d-4a5e-429e-9e44-3e436a20446d", 170 | "metadata": {}, 171 | "outputs": [ 172 | { 173 | "data": { 174 | "text/plain": [ 175 | "(array([ 0.03349816, 0.0096554 , -0.02111368, -0.04570484], dtype=float32),\n", 176 | " {})" 177 | ] 178 | }, 179 | "execution_count": 8, 180 | "metadata": {}, 181 | "output_type": "execute_result" 182 | } 183 | ], 184 | "source": [ 185 | "env.reset(seed=100)\n", 186 | "# cart position, cart velocity, pole angle, pole angular velocity" 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": 9, 192 | "id": "875c67b7-4817-4fac-8fbb-0596c399af96", 193 | "metadata": {}, 194 | "outputs": [ 195 | { 196 | "data": { 197 | "text/plain": [ 198 | "(array([ 0.03369127, -0.18515752, -0.02202777, 0.24024247], dtype=float32),\n", 199 | " 1.0,\n", 200 | " False,\n", 201 | " False,\n", 202 | " {})" 203 | ] 204 | }, 205 | "execution_count": 9, 206 | "metadata": {}, 207 | "output_type": "execute_result" 208 | } 209 | ], 210 | "source": [ 211 | "env.step(0)" 212 | ] 213 | }, 214 | { 215 | "cell_type": "code", 216 | "execution_count": 10, 217 | "id": "7be7afb1-e69d-41d7-b869-c73747e38b61", 218 | "metadata": {}, 219 | "outputs": [ 220 | { 221 | "data": { 222 | "text/plain": [ 223 | "(array([ 0.02998812, 0.01027205, -0.01722292, -0.05930644], dtype=float32),\n", 224 | " 1.0,\n", 225 | " False,\n", 226 | " False,\n", 227 | " {})" 228 | ] 229 | }, 230 | "execution_count": 10, 231 | "metadata": {}, 232 | "output_type": "execute_result" 233 | } 234 | ], 235 | "source": [ 236 | "env.step(1)" 237 | ] 238 | }, 239 | { 240 | "cell_type": "code", 241 | "execution_count": 11, 242 | "id": "f8f6e49b-3308-418a-999c-f7d6a052cfea", 243 | "metadata": {}, 244 | "outputs": [], 245 | "source": [ 246 | "class RandomAgent:\n", 247 | " def __init__(self):\n", 248 | " self.env = gym.make('CartPole-v1')\n", 249 | " def play(self, episodes=1):\n", 250 | " self.trewards = list()\n", 251 | " for e in range(episodes):\n", 252 | " self.env.reset()\n", 253 | " for step in range(1, 100):\n", 254 | " a = self.env.action_space.sample()\n", 255 | " state, reward, done, trunc, info = self.env.step(a)\n", 256 | " if done:\n", 257 | " self.trewards.append(step)\n", 258 | " break" 259 | ] 260 | }, 261 | { 262 | "cell_type": "code", 263 | "execution_count": 12, 264 | "id": "dffbb689-b81e-48cc-9fac-3a7dec9c1ff7", 265 | "metadata": {}, 266 | "outputs": [], 267 | "source": [ 268 | "ra = RandomAgent()" 269 | ] 270 | }, 271 | { 272 | "cell_type": "code", 273 | "execution_count": 13, 274 | "id": "cbb3b03c-ded1-4ca7-80d2-e316635379b8", 275 | "metadata": {}, 276 | "outputs": [], 277 | "source": [ 278 | "ra.play(15)" 279 | ] 280 | }, 281 | { 282 | "cell_type": "code", 283 | "execution_count": 14, 284 | "id": "5b83a7c9-485a-433d-b637-9ffbe6fe7146", 285 | "metadata": {}, 286 | "outputs": [ 287 | { 288 | "data": { 289 | "text/plain": [ 290 | "[19, 17, 10, 13, 13, 12, 35, 21, 17, 26, 16, 49, 20, 19, 26]" 291 | ] 292 | }, 293 | "execution_count": 14, 294 | "metadata": {}, 295 | "output_type": "execute_result" 296 | } 297 | ], 298 | "source": [ 299 | "ra.trewards" 300 | ] 301 | }, 302 | { 303 | "cell_type": "code", 304 | "execution_count": 15, 305 | "id": "27d9d910-4f2d-4d7b-bcaa-a28747474c00", 306 | "metadata": {}, 307 | "outputs": [ 308 | { 309 | "data": { 310 | "text/plain": [ 311 | "20.87" 312 | ] 313 | }, 314 | "execution_count": 15, 315 | "metadata": {}, 316 | "output_type": "execute_result" 317 | } 318 | ], 319 | "source": [ 320 | "round(sum(ra.trewards) / len(ra.trewards), 2)" 321 | ] 322 | }, 323 | { 324 | "cell_type": "code", 325 | "execution_count": 16, 326 | "id": "12e1594d-ea7c-49e9-9149-92848ba72440", 327 | "metadata": {}, 328 | "outputs": [], 329 | "source": [ 330 | "import os\n", 331 | "import random\n", 332 | "import warnings\n", 333 | "import numpy as np\n", 334 | "import tensorflow as tf\n", 335 | "from tensorflow import keras\n", 336 | "from collections import deque\n", 337 | "from keras.layers import Dense\n", 338 | "from keras.models import Sequential" 339 | ] 340 | }, 341 | { 342 | "cell_type": "code", 343 | "execution_count": 17, 344 | "id": "fa105bbb-727f-488d-8152-b5c1cc4d7646", 345 | "metadata": {}, 346 | "outputs": [], 347 | "source": [ 348 | "warnings.simplefilter('ignore')\n", 349 | "os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'\n", 350 | "os.environ['PYTHONHASHSEED'] = '0'" 351 | ] 352 | }, 353 | { 354 | "cell_type": "code", 355 | "execution_count": 18, 356 | "id": "a21cd6c5-058b-45cb-abfa-78a9cbb3633b", 357 | "metadata": {}, 358 | "outputs": [], 359 | "source": [ 360 | "from tensorflow.python.framework.ops import disable_eager_execution\n", 361 | "disable_eager_execution()" 362 | ] 363 | }, 364 | { 365 | "cell_type": "code", 366 | "execution_count": 19, 367 | "id": "0264fac6-2c4a-4ea3-9031-e5006dce93c4", 368 | "metadata": {}, 369 | "outputs": [], 370 | "source": [ 371 | "opt = keras.optimizers.legacy.Adam(learning_rate=0.0001)" 372 | ] 373 | }, 374 | { 375 | "cell_type": "code", 376 | "execution_count": 20, 377 | "id": "e7c28ee7-4be2-459c-8e27-029ec6ff4b4d", 378 | "metadata": {}, 379 | "outputs": [], 380 | "source": [ 381 | "random.seed(100)\n", 382 | "tf.random.set_seed(100)" 383 | ] 384 | }, 385 | { 386 | "cell_type": "code", 387 | "execution_count": 21, 388 | "id": "072e8f75-0936-434f-ad65-c2f7cff91b7c", 389 | "metadata": {}, 390 | "outputs": [], 391 | "source": [ 392 | "class DQLAgent:\n", 393 | " def __init__(self):\n", 394 | " self.epsilon = 1.0\n", 395 | " self.epsilon_decay = 0.9975\n", 396 | " self.epsilon_min = 0.1\n", 397 | " self.memory = deque(maxlen=2000)\n", 398 | " self.batch_size = 32\n", 399 | " self.gamma = 0.9\n", 400 | " self.trewards = list()\n", 401 | " self.max_treward = 0\n", 402 | " self._create_model()\n", 403 | " self.env = gym.make('CartPole-v1')\n", 404 | " def _create_model(self):\n", 405 | " self.model = Sequential()\n", 406 | " self.model.add(Dense(24, activation='relu', input_dim=4))\n", 407 | " self.model.add(Dense(24, activation='relu'))\n", 408 | " self.model.add(Dense(2, activation='linear'))\n", 409 | " self.model.compile(loss='mse', optimizer=opt)" 410 | ] 411 | }, 412 | { 413 | "cell_type": "code", 414 | "execution_count": 22, 415 | "id": "03e2299c-14bd-4cc8-af41-89b69d532544", 416 | "metadata": {}, 417 | "outputs": [], 418 | "source": [ 419 | "class DQLAgent(DQLAgent):\n", 420 | " def act(self, state):\n", 421 | " if random.random() < self.epsilon:\n", 422 | " return self.env.action_space.sample()\n", 423 | " return np.argmax(self.model.predict(state)[0])\n", 424 | " def replay(self):\n", 425 | " batch = random.sample(self.memory, self.batch_size)\n", 426 | " for state, action, next_state, reward, done in batch:\n", 427 | " if not done:\n", 428 | " reward += self.gamma * np.amax(\n", 429 | " self.model.predict(next_state)[0])\n", 430 | " target = self.model.predict(state)\n", 431 | " target[0, action] = reward\n", 432 | " self.model.fit(state, target, epochs=2, verbose=False)\n", 433 | " if self.epsilon > self.epsilon_min:\n", 434 | " self.epsilon *= self.epsilon_decay" 435 | ] 436 | }, 437 | { 438 | "cell_type": "code", 439 | "execution_count": 23, 440 | "id": "2bf59f89-41a4-4f6e-8635-0513b3c3d8c1", 441 | "metadata": {}, 442 | "outputs": [], 443 | "source": [ 444 | "class DQLAgent(DQLAgent):\n", 445 | " def learn(self, episodes):\n", 446 | " for e in range(1, episodes + 1):\n", 447 | " state, _ = self.env.reset()\n", 448 | " state = np.reshape(state, [1, 4])\n", 449 | " for f in range(1, 5000):\n", 450 | " action = self.act(state)\n", 451 | " next_state, reward, done, trunc, _ = \\\n", 452 | " self.env.step(action)\n", 453 | " next_state = np.reshape(next_state, [1, 4])\n", 454 | " self.memory.append(\n", 455 | " [state, action, next_state, reward, done])\n", 456 | " state = next_state\n", 457 | " if done or trunc:\n", 458 | " self.trewards.append(f)\n", 459 | " self.max_treward = max(self.max_treward, f)\n", 460 | " templ = f'episode={e:4d} | treward={f:4d}'\n", 461 | " templ += f' | max={self.max_treward:4d}'\n", 462 | " print(templ, end='\\r')\n", 463 | " break\n", 464 | " if len(self.memory) > self.batch_size:\n", 465 | " self.replay()\n", 466 | " print()" 467 | ] 468 | }, 469 | { 470 | "cell_type": "code", 471 | "execution_count": 24, 472 | "id": "6a44a5f9-af9b-4929-a5c4-19e87f871c78", 473 | "metadata": {}, 474 | "outputs": [], 475 | "source": [ 476 | "class DQLAgent(DQLAgent):\n", 477 | " def test(self, episodes):\n", 478 | " for e in range(1, episodes + 1):\n", 479 | " state, _ = self.env.reset()\n", 480 | " state = np.reshape(state, [1, 4])\n", 481 | " for f in range(1, 5001):\n", 482 | " action = np.argmax(self.model.predict(state)[0])\n", 483 | " state, reward, done, trunc, _ = self.env.step(action)\n", 484 | " state = np.reshape(state, [1, 4])\n", 485 | " if done or trunc:\n", 486 | " print(f, end=' ')\n", 487 | " break" 488 | ] 489 | }, 490 | { 491 | "cell_type": "code", 492 | "execution_count": 25, 493 | "id": "64417ca0-49ba-4558-8c92-d89604ff3e16", 494 | "metadata": {}, 495 | "outputs": [], 496 | "source": [ 497 | "agent = DQLAgent()" 498 | ] 499 | }, 500 | { 501 | "cell_type": "code", 502 | "execution_count": 26, 503 | "id": "f77a72ab-5a4b-4d3d-863a-f8d08d2e3ce2", 504 | "metadata": { 505 | "tags": [] 506 | }, 507 | "outputs": [ 508 | { 509 | "name": "stdout", 510 | "output_type": "stream", 511 | "text": [ 512 | "episode=1500 | treward= 254 | max= 500\n", 513 | "CPU times: user 2min 11s, sys: 23.2 s, total: 2min 34s\n", 514 | "Wall time: 2min 8s\n" 515 | ] 516 | } 517 | ], 518 | "source": [ 519 | "%time agent.learn(1500)" 520 | ] 521 | }, 522 | { 523 | "cell_type": "code", 524 | "execution_count": 27, 525 | "id": "fbfc1255-66fe-4c69-9135-70100b981109", 526 | "metadata": {}, 527 | "outputs": [ 528 | { 529 | "data": { 530 | "text/plain": [ 531 | "0.09997053357470892" 532 | ] 533 | }, 534 | "execution_count": 27, 535 | "metadata": {}, 536 | "output_type": "execute_result" 537 | } 538 | ], 539 | "source": [ 540 | "agent.epsilon" 541 | ] 542 | }, 543 | { 544 | "cell_type": "code", 545 | "execution_count": 28, 546 | "id": "af72f8d3-4e2a-4d0f-8311-a56ba4487832", 547 | "metadata": {}, 548 | "outputs": [ 549 | { 550 | "name": "stdout", 551 | "output_type": "stream", 552 | "text": [ 553 | "185 211 206 101 198 234 115 287 241 116 98 201 120 174 95 " 554 | ] 555 | } 556 | ], 557 | "source": [ 558 | "agent.test(15)" 559 | ] 560 | }, 561 | { 562 | "cell_type": "markdown", 563 | "id": "20e3eaa7-ac35-44e5-bffc-93662c2d2c55", 564 | "metadata": {}, 565 | "source": [ 566 | "\"The
\n", 567 | "\n", 568 | "https://tpq.io | @dyjh | team@tpq.io" 569 | ] 570 | } 571 | ], 572 | "metadata": { 573 | "kernelspec": { 574 | "display_name": "Python 3 (ipykernel)", 575 | "language": "python", 576 | "name": "python3" 577 | }, 578 | "language_info": { 579 | "codemirror_mode": { 580 | "name": "ipython", 581 | "version": 3 582 | }, 583 | "file_extension": ".py", 584 | "mimetype": "text/x-python", 585 | "name": "python", 586 | "nbconvert_exporter": "python", 587 | "pygments_lexer": "ipython3", 588 | "version": "3.10.14" 589 | } 590 | }, 591 | "nbformat": 4, 592 | "nbformat_minor": 5 593 | } 594 | -------------------------------------------------------------------------------- /code/03_rlfinance.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "475819a4-e148-4616-b1cb-44b659aeb08a", 6 | "metadata": {}, 7 | "source": [ 8 | "\"The
" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "280cc0c6-2c18-46cd-8af7-3f19b64a6d7e", 14 | "metadata": {}, 15 | "source": [ 16 | "# Reinforcement Learning for Finance\n", 17 | "\n", 18 | "**Chapter 03 — Financial Q-Learning**\n", 19 | "\n", 20 | "© Dr. Yves J. Hilpisch\n", 21 | "\n", 22 | "https://tpq.io | @dyjh | team@tpq.io" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "id": "d6be6f8b-e00e-402c-9df1-1d3f16e76c7e", 28 | "metadata": {}, 29 | "source": [ 30 | "## Finance Environment" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 1, 36 | "id": "f2c8cd7e-d93d-4c4d-ba77-3c0cb7b677af", 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "import os\n", 41 | "import random" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 2, 47 | "id": "bd8d3cf4-c30c-432a-bd3f-23e98c4d201c", 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "random.seed(100)\n", 52 | "os.environ['PYTHONHASHSEED'] = '0'" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 3, 58 | "id": "cb33cd0c-4fb1-4456-911f-0d92597db8c0", 59 | "metadata": {}, 60 | "outputs": [], 61 | "source": [ 62 | "class ActionSpace:\n", 63 | " def sample(self):\n", 64 | " return random.randint(0, 1)" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": 4, 70 | "id": "30d49bdd-e24b-4d87-a4dc-5639cc172f8e", 71 | "metadata": {}, 72 | "outputs": [], 73 | "source": [ 74 | "action_space = ActionSpace()" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 5, 80 | "id": "416ce315-16d7-4c47-845a-f21a099b8ba3", 81 | "metadata": {}, 82 | "outputs": [ 83 | { 84 | "data": { 85 | "text/plain": [ 86 | "[0, 1, 1, 0, 1, 1, 1, 0, 0, 0]" 87 | ] 88 | }, 89 | "execution_count": 5, 90 | "metadata": {}, 91 | "output_type": "execute_result" 92 | } 93 | ], 94 | "source": [ 95 | "[action_space.sample() for _ in range(10)]" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 6, 101 | "id": "f4df457f-9014-4e6a-878a-23645c77037d", 102 | "metadata": {}, 103 | "outputs": [], 104 | "source": [ 105 | "import numpy as np\n", 106 | "import pandas as pd" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": 7, 112 | "id": "952353e1-8f39-48ac-ac6d-5a21b9a44315", 113 | "metadata": {}, 114 | "outputs": [], 115 | "source": [ 116 | "class Finance:\n", 117 | " url = 'https://certificate.tpq.io/rl4finance.csv'\n", 118 | " def __init__(self, symbol, feature,\n", 119 | " min_accuracy=0.485, n_features=4):\n", 120 | " self.symbol = symbol\n", 121 | " self.feature = feature\n", 122 | " self.n_features = n_features\n", 123 | " self.action_space = ActionSpace()\n", 124 | " self.min_accuracy = min_accuracy\n", 125 | " self._get_data()\n", 126 | " self._prepare_data()\n", 127 | " def _get_data(self):\n", 128 | " self.raw = pd.read_csv(self.url,\n", 129 | " index_col=0, parse_dates=True)" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": 8, 135 | "id": "69e1ed75-1e55-42f4-86a3-db54c60acf1f", 136 | "metadata": {}, 137 | "outputs": [], 138 | "source": [ 139 | "class Finance(Finance):\n", 140 | " def _prepare_data(self):\n", 141 | " self.data = pd.DataFrame(self.raw[self.symbol]).dropna()\n", 142 | " self.data['r'] = np.log(self.data / self.data.shift(1))\n", 143 | " self.data['d'] = np.where(self.data['r'] > 0, 1, 0)\n", 144 | " self.data.dropna(inplace=True)\n", 145 | " self.data_ = (self.data - self.data.mean()) / self.data.std()\n", 146 | " def reset(self):\n", 147 | " self.bar = self.n_features\n", 148 | " self.treward = 0\n", 149 | " state = self.data_[self.feature].iloc[\n", 150 | " self.bar - self.n_features:self.bar].values\n", 151 | " return state, {}" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": 9, 157 | "id": "a2b0ccc6-d8ec-4156-bf7a-30ba263fdde9", 158 | "metadata": {}, 159 | "outputs": [], 160 | "source": [ 161 | "class Finance(Finance):\n", 162 | " def step(self, action):\n", 163 | " if action == self.data['d'].iloc[self.bar]:\n", 164 | " correct = True\n", 165 | " else:\n", 166 | " correct = False\n", 167 | " reward = 1 if correct else 0\n", 168 | " self.treward += reward\n", 169 | " self.bar += 1\n", 170 | " self.accuracy = self.treward / (self.bar - self.n_features)\n", 171 | " if self.bar >= len(self.data):\n", 172 | " done = True\n", 173 | " elif reward == 1:\n", 174 | " done = False\n", 175 | " elif (self.accuracy < self.min_accuracy) and (self.bar > 15):\n", 176 | " done = True\n", 177 | " else:\n", 178 | " done = False\n", 179 | " next_state = self.data_[self.feature].iloc[\n", 180 | " self.bar - self.n_features:self.bar].values\n", 181 | " return next_state, reward, done, False, {}" 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": 10, 187 | "id": "373a0a8c-3b85-4933-8de5-1103d4cc1a6b", 188 | "metadata": {}, 189 | "outputs": [], 190 | "source": [ 191 | "fin = Finance(symbol='EUR=', feature='EUR=')" 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": 11, 197 | "id": "d4c4248b-2168-42d2-b766-27270681b5dd", 198 | "metadata": {}, 199 | "outputs": [ 200 | { 201 | "data": { 202 | "text/plain": [ 203 | "['AAPL.O',\n", 204 | " 'MSFT.O',\n", 205 | " 'INTC.O',\n", 206 | " 'AMZN.O',\n", 207 | " 'GS.N',\n", 208 | " '.SPX',\n", 209 | " '.VIX',\n", 210 | " 'SPY',\n", 211 | " 'EUR=',\n", 212 | " 'XAU=',\n", 213 | " 'GDX',\n", 214 | " 'GLD']" 215 | ] 216 | }, 217 | "execution_count": 11, 218 | "metadata": {}, 219 | "output_type": "execute_result" 220 | } 221 | ], 222 | "source": [ 223 | "list(fin.raw.columns)" 224 | ] 225 | }, 226 | { 227 | "cell_type": "code", 228 | "execution_count": 12, 229 | "id": "0c2042dd-3d9a-4976-bb6d-d58daeeaf650", 230 | "metadata": {}, 231 | "outputs": [ 232 | { 233 | "data": { 234 | "text/plain": [ 235 | "(array([2.74844931, 2.64643904, 2.69560062, 2.68085214]), {})" 236 | ] 237 | }, 238 | "execution_count": 12, 239 | "metadata": {}, 240 | "output_type": "execute_result" 241 | } 242 | ], 243 | "source": [ 244 | "fin.reset()\n", 245 | "# four lagged, normalized price points" 246 | ] 247 | }, 248 | { 249 | "cell_type": "code", 250 | "execution_count": 13, 251 | "id": "d0e04a87-7f63-4532-8609-2ad598d67067", 252 | "metadata": {}, 253 | "outputs": [ 254 | { 255 | "data": { 256 | "text/plain": [ 257 | "1" 258 | ] 259 | }, 260 | "execution_count": 13, 261 | "metadata": {}, 262 | "output_type": "execute_result" 263 | } 264 | ], 265 | "source": [ 266 | "fin.action_space.sample()" 267 | ] 268 | }, 269 | { 270 | "cell_type": "code", 271 | "execution_count": 14, 272 | "id": "2c6a11b6-87da-4226-baad-0fa9f4942c44", 273 | "metadata": {}, 274 | "outputs": [ 275 | { 276 | "data": { 277 | "text/plain": [ 278 | "(array([2.64643904, 2.69560062, 2.68085214, 2.63046153]), 0, False, False, {})" 279 | ] 280 | }, 281 | "execution_count": 14, 282 | "metadata": {}, 283 | "output_type": "execute_result" 284 | } 285 | ], 286 | "source": [ 287 | "fin.step(fin.action_space.sample())" 288 | ] 289 | }, 290 | { 291 | "cell_type": "code", 292 | "execution_count": 15, 293 | "id": "c0a3b905-2eea-406f-9bee-bb61d6f5e463", 294 | "metadata": {}, 295 | "outputs": [], 296 | "source": [ 297 | "fin = Finance('EUR=', 'r')" 298 | ] 299 | }, 300 | { 301 | "cell_type": "code", 302 | "execution_count": 16, 303 | "id": "c490647f-9757-46bf-911d-c53477d9b3d0", 304 | "metadata": {}, 305 | "outputs": [ 306 | { 307 | "data": { 308 | "text/plain": [ 309 | "(array([-1.19130476, -1.21344494, 0.61099805, -0.16094865]), {})" 310 | ] 311 | }, 312 | "execution_count": 16, 313 | "metadata": {}, 314 | "output_type": "execute_result" 315 | } 316 | ], 317 | "source": [ 318 | "fin.reset()\n", 319 | "# four lagged, normalized log returns" 320 | ] 321 | }, 322 | { 323 | "cell_type": "code", 324 | "execution_count": 17, 325 | "id": "1c0bab87-6d45-4e17-a52c-3d19273bd804", 326 | "metadata": {}, 327 | "outputs": [], 328 | "source": [ 329 | "class RandomAgent:\n", 330 | " def __init__(self):\n", 331 | " self.env = Finance('EUR=', 'r')\n", 332 | " def play(self, episodes=1):\n", 333 | " self.trewards = list()\n", 334 | " for e in range(episodes):\n", 335 | " self.env.reset()\n", 336 | " for step in range(1, 100):\n", 337 | " a = self.env.action_space.sample()\n", 338 | " state, reward, done, trunc, info = self.env.step(a)\n", 339 | " if done:\n", 340 | " self.trewards.append(step)\n", 341 | " break" 342 | ] 343 | }, 344 | { 345 | "cell_type": "code", 346 | "execution_count": 18, 347 | "id": "417b3f00-199f-4db7-b500-b7b7f99ce15b", 348 | "metadata": {}, 349 | "outputs": [], 350 | "source": [ 351 | "ra = RandomAgent()" 352 | ] 353 | }, 354 | { 355 | "cell_type": "code", 356 | "execution_count": 19, 357 | "id": "99850e42-8c2b-46a6-9a92-59a0e5940061", 358 | "metadata": {}, 359 | "outputs": [], 360 | "source": [ 361 | "ra.play(15)" 362 | ] 363 | }, 364 | { 365 | "cell_type": "code", 366 | "execution_count": 20, 367 | "id": "1a6351f5-e532-4703-ae3b-0f7ec2483f48", 368 | "metadata": {}, 369 | "outputs": [ 370 | { 371 | "data": { 372 | "text/plain": [ 373 | "[17, 13, 17, 12, 12, 12, 13, 23, 31, 13, 12, 15]" 374 | ] 375 | }, 376 | "execution_count": 20, 377 | "metadata": {}, 378 | "output_type": "execute_result" 379 | } 380 | ], 381 | "source": [ 382 | "ra.trewards" 383 | ] 384 | }, 385 | { 386 | "cell_type": "code", 387 | "execution_count": 21, 388 | "id": "9590104e-899f-4a4a-81a3-0b952a8f1818", 389 | "metadata": {}, 390 | "outputs": [ 391 | { 392 | "data": { 393 | "text/plain": [ 394 | "15.83" 395 | ] 396 | }, 397 | "execution_count": 21, 398 | "metadata": {}, 399 | "output_type": "execute_result" 400 | } 401 | ], 402 | "source": [ 403 | "round(sum(ra.trewards) / len(ra.trewards), 2)" 404 | ] 405 | }, 406 | { 407 | "cell_type": "code", 408 | "execution_count": 22, 409 | "id": "2252d5e0-0c3f-4900-a96f-1fe6348ccd18", 410 | "metadata": {}, 411 | "outputs": [ 412 | { 413 | "data": { 414 | "text/plain": [ 415 | "2607" 416 | ] 417 | }, 418 | "execution_count": 22, 419 | "metadata": {}, 420 | "output_type": "execute_result" 421 | } 422 | ], 423 | "source": [ 424 | "len(fin.data)" 425 | ] 426 | }, 427 | { 428 | "cell_type": "code", 429 | "execution_count": 23, 430 | "id": "06e651e5-4eb4-4001-b8a3-d629721b6eed", 431 | "metadata": {}, 432 | "outputs": [], 433 | "source": [ 434 | "import os\n", 435 | "import random\n", 436 | "import warnings\n", 437 | "import numpy as np\n", 438 | "import tensorflow as tf\n", 439 | "from tensorflow import keras\n", 440 | "from collections import deque\n", 441 | "from keras.layers import Dense\n", 442 | "from keras.models import Sequential" 443 | ] 444 | }, 445 | { 446 | "cell_type": "code", 447 | "execution_count": 24, 448 | "id": "a04e9dcb-5a0c-463b-9714-012a9b8e4093", 449 | "metadata": {}, 450 | "outputs": [], 451 | "source": [ 452 | "warnings.simplefilter('ignore')\n", 453 | "os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'" 454 | ] 455 | }, 456 | { 457 | "cell_type": "code", 458 | "execution_count": 25, 459 | "id": "c047b3c4-d7ca-4e17-b290-6dfce70690fc", 460 | "metadata": {}, 461 | "outputs": [], 462 | "source": [ 463 | "from tensorflow.python.framework.ops import disable_eager_execution\n", 464 | "disable_eager_execution()" 465 | ] 466 | }, 467 | { 468 | "cell_type": "code", 469 | "execution_count": 26, 470 | "id": "9c5656a5-7378-494b-a43f-5ba736105485", 471 | "metadata": {}, 472 | "outputs": [], 473 | "source": [ 474 | "opt = keras.optimizers.legacy.Adam(learning_rate=0.0001)" 475 | ] 476 | }, 477 | { 478 | "cell_type": "code", 479 | "execution_count": 27, 480 | "id": "9a1c06c7-6477-4a73-9bf5-68b497c52e8c", 481 | "metadata": {}, 482 | "outputs": [], 483 | "source": [ 484 | "class DQLAgent:\n", 485 | " def __init__(self, symbol, feature, min_accuracy, n_features=4):\n", 486 | " self.epsilon = 1.0\n", 487 | " self.epsilon_decay = 0.9975\n", 488 | " self.epsilon_min = 0.1\n", 489 | " self.memory = deque(maxlen=2000)\n", 490 | " self.batch_size = 32\n", 491 | " self.gamma = 0.5\n", 492 | " self.trewards = list()\n", 493 | " self.max_treward = 0\n", 494 | " self.n_features = n_features\n", 495 | " self._create_model()\n", 496 | " self.env = Finance(symbol, feature,\n", 497 | " min_accuracy, n_features)\n", 498 | " def _create_model(self):\n", 499 | " self.model = Sequential()\n", 500 | " self.model.add(Dense(24, activation='relu',\n", 501 | " input_dim=self.n_features))\n", 502 | " self.model.add(Dense(24, activation='relu'))\n", 503 | " self.model.add(Dense(2, activation='linear'))\n", 504 | " self.model.compile(loss='mse', optimizer=opt)\n", 505 | " def act(self, state):\n", 506 | " if random.random() < self.epsilon:\n", 507 | " return self.env.action_space.sample()\n", 508 | " return np.argmax(self.model.predict(state)[0])\n", 509 | " def replay(self):\n", 510 | " batch = random.sample(self.memory, self.batch_size)\n", 511 | " for state, action, next_state, reward, done in batch:\n", 512 | " if not done:\n", 513 | " reward += self.gamma * np.amax(\n", 514 | " self.model.predict(next_state)[0])\n", 515 | " target = self.model.predict(state)\n", 516 | " target[0, action] = reward\n", 517 | " self.model.fit(state, target, epochs=1, verbose=False)\n", 518 | " if self.epsilon > self.epsilon_min:\n", 519 | " self.epsilon *= self.epsilon_decay\n", 520 | " def learn(self, episodes):\n", 521 | " for e in range(1, episodes + 1):\n", 522 | " state, _ = self.env.reset()\n", 523 | " state = np.reshape(state, [1, self.n_features])\n", 524 | " for f in range(1, 5000):\n", 525 | " action = self.act(state)\n", 526 | " next_state, reward, done, trunc, _ = \\\n", 527 | " self.env.step(action)\n", 528 | " next_state = np.reshape(next_state,\n", 529 | " [1, self.n_features])\n", 530 | " self.memory.append(\n", 531 | " [state, action, next_state, reward, done])\n", 532 | " state = next_state \n", 533 | " if done:\n", 534 | " self.trewards.append(f)\n", 535 | " self.max_treward = max(self.max_treward, f)\n", 536 | " templ = f'episode={e:4d} | treward={f:4d}'\n", 537 | " templ += f' | max={self.max_treward:4d}'\n", 538 | " print(templ, end='\\r')\n", 539 | " break\n", 540 | " if len(self.memory) > self.batch_size:\n", 541 | " self.replay()\n", 542 | " print()\n", 543 | " def test(self, episodes):\n", 544 | " ma = self.env.min_accuracy\n", 545 | " self.env.min_accuracy = 0.5\n", 546 | " for e in range(1, episodes + 1):\n", 547 | " state, _ = self.env.reset()\n", 548 | " state = np.reshape(state, [1, self.n_features])\n", 549 | " for f in range(1, 5001):\n", 550 | " action = np.argmax(self.model.predict(state)[0])\n", 551 | " state, reward, done, trunc, _ = self.env.step(action)\n", 552 | " state = np.reshape(state, [1, self.n_features])\n", 553 | " if done:\n", 554 | " tmpl = f'total reward={f} | '\n", 555 | " tmpl += f'accuracy={self.env.accuracy:.3f}'\n", 556 | " print(tmpl)\n", 557 | " break\n", 558 | " self.env.min_accuracy = ma" 559 | ] 560 | }, 561 | { 562 | "cell_type": "code", 563 | "execution_count": 28, 564 | "id": "d83cf567-0389-474d-accd-38431edaf755", 565 | "metadata": {}, 566 | "outputs": [], 567 | "source": [ 568 | "random.seed(250)\n", 569 | "tf.random.set_seed(250)" 570 | ] 571 | }, 572 | { 573 | "cell_type": "code", 574 | "execution_count": 29, 575 | "id": "268f6f90-082d-4827-bdef-8bffa57016c7", 576 | "metadata": {}, 577 | "outputs": [], 578 | "source": [ 579 | "agent = DQLAgent('EUR=', 'r', 0.495, 4)" 580 | ] 581 | }, 582 | { 583 | "cell_type": "code", 584 | "execution_count": 30, 585 | "id": "ae2336af-de7e-4b3a-8ecd-292a06a0beb4", 586 | "metadata": {}, 587 | "outputs": [ 588 | { 589 | "name": "stdout", 590 | "output_type": "stream", 591 | "text": [ 592 | "episode= 250 | treward= 12 | max=2603\n", 593 | "CPU times: user 21.1 s, sys: 3.05 s, total: 24.1 s\n", 594 | "Wall time: 21 s\n" 595 | ] 596 | } 597 | ], 598 | "source": [ 599 | "%time agent.learn(250)" 600 | ] 601 | }, 602 | { 603 | "cell_type": "code", 604 | "execution_count": 31, 605 | "id": "6a1023a5-07ef-4ac3-86c4-307a356cd2ba", 606 | "metadata": {}, 607 | "outputs": [ 608 | { 609 | "name": "stdout", 610 | "output_type": "stream", 611 | "text": [ 612 | "total reward=2603 | accuracy=0.525\n", 613 | "total reward=2603 | accuracy=0.525\n", 614 | "total reward=2603 | accuracy=0.525\n", 615 | "total reward=2603 | accuracy=0.525\n", 616 | "total reward=2603 | accuracy=0.525\n" 617 | ] 618 | } 619 | ], 620 | "source": [ 621 | "agent.test(5)" 622 | ] 623 | }, 624 | { 625 | "cell_type": "markdown", 626 | "id": "20e3eaa7-ac35-44e5-bffc-93662c2d2c55", 627 | "metadata": {}, 628 | "source": [ 629 | "\"The
\n", 630 | "\n", 631 | "https://tpq.io | @dyjh | team@tpq.io" 632 | ] 633 | } 634 | ], 635 | "metadata": { 636 | "kernelspec": { 637 | "display_name": "Python 3 (ipykernel)", 638 | "language": "python", 639 | "name": "python3" 640 | }, 641 | "language_info": { 642 | "codemirror_mode": { 643 | "name": "ipython", 644 | "version": 3 645 | }, 646 | "file_extension": ".py", 647 | "mimetype": "text/x-python", 648 | "name": "python", 649 | "nbconvert_exporter": "python", 650 | "pygments_lexer": "ipython3", 651 | "version": "3.10.14" 652 | } 653 | }, 654 | "nbformat": 4, 655 | "nbformat_minor": 5 656 | } 657 | -------------------------------------------------------------------------------- /code/assetallocation.py: -------------------------------------------------------------------------------- 1 | # 2 | # Investing Environment and Agent 3 | # Three Asset Case 4 | # 5 | # (c) Dr. Yves J. Hilpisch 6 | # Reinforcement Learning for Finance 7 | # 8 | 9 | import os 10 | import math 11 | import random 12 | import numpy as np 13 | import pandas as pd 14 | from scipy import stats 15 | from pylab import plt, mpl 16 | from scipy.optimize import minimize 17 | 18 | from dqlagent import * 19 | 20 | plt.style.use('seaborn-v0_8') 21 | mpl.rcParams['figure.dpi'] = 300 22 | mpl.rcParams['savefig.dpi'] = 300 23 | mpl.rcParams['font.family'] = 'serif' 24 | np.set_printoptions(suppress=True) 25 | 26 | opt = keras.optimizers.legacy.Adam 27 | 28 | os.environ['PYTHONHASHSEED'] = '0' 29 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 30 | 31 | 32 | class observation_space: 33 | def __init__(self, n): 34 | self.shape = (n,) 35 | 36 | 37 | class action_space: 38 | def __init__(self, n): 39 | self.n = n 40 | def seed(self, seed): 41 | random.seed(seed) 42 | def sample(self): 43 | rn = np.random.random(3) 44 | return rn / rn.sum() 45 | 46 | 47 | class Investing: 48 | def __init__(self, asset_one, asset_two, asset_three, 49 | steps=252, amount=1): 50 | self.asset_one = asset_one 51 | self.asset_two = asset_two 52 | self.asset_three = asset_three 53 | self.steps = steps 54 | self.initial_balance = amount 55 | self.portfolio_value = amount 56 | self.portfolio_value_new = amount 57 | self.observation_space = observation_space(4) 58 | self.osn = self.observation_space.shape[0] 59 | self.action_space = action_space(3) 60 | self.retrieved = 0 61 | self._generate_data() 62 | self.portfolios = pd.DataFrame() 63 | self.episode = 0 64 | 65 | def _generate_data(self): 66 | if self.retrieved: 67 | pass 68 | else: 69 | url = 'https://certificate.tpq.io/rl4finance.csv' 70 | self.raw = pd.read_csv(url, index_col=0, parse_dates=True).dropna() 71 | self.retrieved 72 | self.data = pd.DataFrame() 73 | self.data['X'] = self.raw[self.asset_one] 74 | self.data['Y'] = self.raw[self.asset_two] 75 | self.data['Z'] = self.raw[self.asset_three] 76 | s = random.randint(self.steps, len(self.data)) 77 | self.data = self.data.iloc[s-self.steps:s] 78 | self.data = self.data / self.data.iloc[0] 79 | 80 | def _get_state(self): 81 | Xt = self.data['X'].iloc[self.bar] 82 | Yt = self.data['Y'].iloc[self.bar] 83 | Zt = self.data['Z'].iloc[self.bar] 84 | date = self.data.index[self.bar] 85 | return np.array( 86 | [Xt, Yt, Zt, self.xt, self.yt, self.zt] 87 | ), {'date': date} 88 | 89 | def seed(self, seed=None): 90 | if seed is not None: 91 | random.seed(seed) 92 | 93 | def reset(self): 94 | self.xt = 0 95 | self.yt = 0 96 | self.zt = 0 97 | self.bar = 0 98 | self.treward = 0 99 | self.portfolio_value = self.initial_balance 100 | self.portfolio_value_new = self.initial_balance 101 | self.episode += 1 102 | self._generate_data() 103 | self.state, info = self._get_state() 104 | return self.state, info 105 | 106 | def add_results(self, pl): 107 | df = pd.DataFrame({ 108 | 'e': self.episode, 'date': self.date, 109 | 'xt': self.xt, 'yt': self.yt, 'zt': self.zt, 110 | 'pv': self.portfolio_value, 111 | 'pv_new': self.portfolio_value_new, 'p&l[$]': pl, 112 | 'p&l[%]': pl / self.portfolio_value_new * 100, 113 | 'Xt': self.state[0], 'Yt': self.state[1], 114 | 'Zt': self.state[2], 'Xt_new': self.new_state[0], 115 | 'Yt_new': self.new_state[1], 116 | 'Zt_new': self.new_state[2], 117 | }, index=[0]) 118 | self.portfolios = pd.concat((self.portfolios, df), ignore_index=True) 119 | 120 | def step(self, action): 121 | self.bar += 1 122 | self.new_state, info = self._get_state() 123 | self.date = info['date'] 124 | if self.bar == 1: 125 | self.xt = action[0] 126 | self.yt = action[1] 127 | self.zt = action[2] 128 | pl = 0. 129 | reward = 0. 130 | self.add_results(pl) 131 | else: 132 | self.portfolio_value_new = ( 133 | self.xt * self.portfolio_value * 134 | self.new_state[0] / self.state[0] + 135 | self.yt * self.portfolio_value * 136 | self.new_state[1] / self.state[1] + 137 | self.zt * self.portfolio_value * 138 | self.new_state[2] / self.state[2] 139 | ) 140 | pl = self.portfolio_value_new - self.portfolio_value 141 | self.xt = action[0] 142 | self.yt = action[1] 143 | self.zt = action[2] 144 | self.add_results(pl) 145 | ret = self.portfolios['p&l[%]'].iloc[-1] / 100 * 252 146 | vol = self.portfolios['p&l[%]'].rolling( 147 | 20, min_periods=1).std().iloc[-1] * math.sqrt(252) 148 | sharpe = ret / vol 149 | reward = sharpe 150 | self.portfolio_value = self.portfolio_value_new 151 | if self.bar == len(self.data) - 1: 152 | done = True 153 | else: 154 | done = False 155 | self.state = self.new_state 156 | return self.state, reward, done, False, {} 157 | 158 | 159 | class InvestingAgent(DQLAgent): 160 | def _create_model(self, hu, lr): 161 | self.model = Sequential() 162 | self.model.add(Dense(hu, input_dim=self.n_features, 163 | activation='relu')) 164 | self.model.add(Dense(hu, activation='relu')) 165 | self.model.add(Dense(1, activation='linear')) 166 | self.model.compile(loss='mse', 167 | optimizer=opt(learning_rate=lr)) 168 | 169 | def opt_action(self, state): 170 | bnds = 3 * [(0, 1)] 171 | cons = [{'type': 'eq', 'fun': lambda x: x.sum() - 1}] 172 | def f(state, x): 173 | s = state.copy() 174 | s[0, 3] = x[0] 175 | s[0, 4] = x[1] 176 | s[0, 5] = x[2] 177 | pen = np.mean((state[0, 3:] - x) ** 2) 178 | return self.model.predict(s)[0, 0] - pen 179 | try: 180 | state = self._reshape(state) 181 | self.action = minimize(lambda x: -f(state, x), 182 | 3 * [1 / 3], 183 | bounds=bnds, 184 | constraints=cons, 185 | options={ 186 | 'eps': 1e-4, 187 | }, 188 | method='SLSQP' 189 | )['x'] 190 | except: 191 | print(state) 192 | return self.action 193 | 194 | def act(self, state): 195 | if random.random() <= self.epsilon: 196 | return self.env.action_space.sample() 197 | action = self.opt_action(state) 198 | return action 199 | 200 | def replay(self): 201 | batch = random.sample(self.memory, self.batch_size) 202 | for state, action, next_state, reward, done in batch: 203 | target = reward 204 | if not done: 205 | ns = next_state.copy() 206 | action = self.opt_action(ns) 207 | ns[0, 3:] = action 208 | target += self.gamma * self.model.predict(ns)[0, 0] 209 | self.model.fit(state, np.array([target]), epochs=1, 210 | verbose=False) 211 | if self.epsilon > self.epsilon_min: 212 | self.epsilon *= self.epsilon_decay 213 | 214 | def test(self, episodes, verbose=True): 215 | for e in range(1, episodes + 1): 216 | state, _ = self.env.reset() 217 | state = self._reshape(state) 218 | treward = 0 219 | for _ in range(1, len(self.env.data) + 1): 220 | action = self.opt_action(state) 221 | state, reward, done, trunc, _ = self.env.step(action) 222 | state = self._reshape(state) 223 | treward += reward 224 | if done: 225 | templ = f'episode={e} | ' 226 | templ += f'total reward={treward:4.2f}' 227 | if verbose: 228 | print(templ, end='\r') 229 | break 230 | print() 231 | 232 | -------------------------------------------------------------------------------- /code/bsm73.py: -------------------------------------------------------------------------------- 1 | # 2 | # Valuation of European call options 3 | # in Black-Scholes-Merton (1973) model 4 | # 5 | # (c) Dr. Yves J. Hilpisch 6 | # Reinforcement Learning for Finance 7 | # 8 | 9 | from math import log, sqrt, exp 10 | from scipy import stats 11 | 12 | 13 | def bsm_call_value(St, K, T, t, r, sigma): 14 | ''' Valuation of European call option in BSM model. 15 | Analytical formula. 16 | 17 | Parameters 18 | ========== 19 | St: float 20 | stock/index level at date/time t 21 | K: float 22 | fixed strike price 23 | T: float 24 | maturity date/time (in year fractions) 25 | t: float 26 | current data/time 27 | r: float 28 | constant risk-free short rate 29 | sigma: float 30 | volatility factor in diffusion term 31 | 32 | Returns 33 | ======= 34 | value: float 35 | present value of the European call option 36 | ''' 37 | St = float(St) 38 | d1 = (log(St / K) + (r + 0.5 * sigma ** 2) * (T - t)) / (sigma * sqrt(T - t)) 39 | d2 = (log(St / K) + (r - 0.5 * sigma ** 2) * (T - t)) / (sigma * sqrt(T - t)) 40 | # stats.norm.cdf --> cumulative distribution function 41 | # for normal distribution 42 | value = (St * stats.norm.cdf(d1, 0, 1) - 43 | K * exp(-r * (T - t)) * stats.norm.cdf(d2, 0, 1)) 44 | return value 45 | 46 | -------------------------------------------------------------------------------- /code/dqlagent.py: -------------------------------------------------------------------------------- 1 | # 2 | # Deep Q-Learning Agent 3 | # 4 | # (c) Dr. Yves J. Hilpisch 5 | # Reinforcement Learning for Finance 6 | # 7 | 8 | import os 9 | import random 10 | import warnings 11 | import numpy as np 12 | import tensorflow as tf 13 | from tensorflow import keras 14 | from collections import deque 15 | from keras.layers import Dense, Flatten 16 | from keras.models import Sequential 17 | 18 | warnings.simplefilter('ignore') 19 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 20 | 21 | 22 | from tensorflow.python.framework.ops import disable_eager_execution 23 | disable_eager_execution() 24 | 25 | opt = keras.optimizers.legacy.Adam 26 | 27 | 28 | class DQLAgent: 29 | def __init__(self, symbol, feature, n_features, env, hu=24, lr=0.001): 30 | self.epsilon = 1.0 31 | self.epsilon_decay = 0.9975 32 | self.epsilon_min = 0.1 33 | self.memory = deque(maxlen=2000) 34 | self.batch_size = 32 35 | self.gamma = 0.5 36 | self.trewards = list() 37 | self.max_treward = -np.inf 38 | self.n_features = n_features 39 | self.env = env 40 | self.episodes = 0 41 | self._create_model(hu, lr) 42 | 43 | def _create_model(self, hu, lr): 44 | self.model = Sequential() 45 | self.model.add(Dense(hu, activation='relu', 46 | input_dim=self.n_features)) 47 | self.model.add(Dense(hu, activation='relu')) 48 | self.model.add(Dense(2, activation='linear')) 49 | self.model.compile(loss='mse', optimizer=opt(learning_rate=lr)) 50 | 51 | def _reshape(self, state): 52 | state = state.flatten() 53 | return np.reshape(state, [1, len(state)]) 54 | 55 | def act(self, state): 56 | if random.random() < self.epsilon: 57 | return self.env.action_space.sample() 58 | return np.argmax(self.model.predict(state)[0]) 59 | 60 | def replay(self): 61 | batch = random.sample(self.memory, self.batch_size) 62 | for state, action, next_state, reward, done in batch: 63 | if not done: 64 | reward += self.gamma * np.amax( 65 | self.model.predict(next_state)[0]) 66 | target = self.model.predict(state) 67 | target[0, action] = reward 68 | self.model.fit(state, target, epochs=1, verbose=False) 69 | if self.epsilon > self.epsilon_min: 70 | self.epsilon *= self.epsilon_decay 71 | 72 | def learn(self, episodes): 73 | for e in range(1, episodes + 1): 74 | self.episodes += 1 75 | state, _ = self.env.reset() 76 | state = self._reshape(state) 77 | treward = 0 78 | for f in range(1, 5000): 79 | self.f = f 80 | action = self.act(state) 81 | next_state, reward, done, trunc, _ = self.env.step(action) 82 | treward += reward 83 | next_state = self._reshape(next_state) 84 | self.memory.append( 85 | [state, action, next_state, reward, done]) 86 | state = next_state 87 | if done: 88 | self.trewards.append(treward) 89 | self.max_treward = max(self.max_treward, treward) 90 | templ = f'episode={self.episodes:4d} | ' 91 | templ += f'treward={treward:7.3f}' 92 | templ += f' | max={self.max_treward:7.3f}' 93 | print(templ, end='\r') 94 | break 95 | if len(self.memory) > self.batch_size: 96 | self.replay() 97 | print() 98 | 99 | def test(self, episodes, min_accuracy=0.0, 100 | min_performance=0.0, verbose=True, 101 | full=True): 102 | ma = self.env.min_accuracy 103 | self.env.min_accuracy = min_accuracy 104 | if hasattr(self.env, 'min_performance'): 105 | mp = self.env.min_performance 106 | self.env.min_performance = min_performance 107 | self.performances = list() 108 | for e in range(1, episodes + 1): 109 | state, _ = self.env.reset() 110 | state = self._reshape(state) 111 | for f in range(1, 5001): 112 | action = np.argmax(self.model.predict(state)[0]) 113 | state, reward, done, trunc, _ = self.env.step(action) 114 | state = self._reshape(state) 115 | if done: 116 | templ = f'total reward={f:4d} | ' 117 | templ += f'accuracy={self.env.accuracy:.3f}' 118 | if hasattr(self.env, 'min_performance'): 119 | self.performances.append(self.env.performance) 120 | templ += f' | performance={self.env.performance:.3f}' 121 | if verbose: 122 | if full: 123 | print(templ) 124 | else: 125 | print(templ, end='\r') 126 | break 127 | self.env.min_accuracy = ma 128 | if hasattr(self.env, 'min_performance'): 129 | self.env.min_performance = mp 130 | print() 131 | 132 | -------------------------------------------------------------------------------- /code/finance.py: -------------------------------------------------------------------------------- 1 | # 2 | # Finance Environment with Historical Data 3 | # 4 | # (c) Dr. Yves J. Hilpisch 5 | # Reinforcement Learning for Finance 6 | # 7 | 8 | import random 9 | import numpy as np 10 | import pandas as pd 11 | 12 | 13 | class ActionSpace: 14 | def sample(self): 15 | return random.randint(0, 1) 16 | 17 | 18 | class Finance: 19 | url = 'https://certificate.tpq.io/rl4finance.csv' 20 | def __init__(self, symbol, feature, min_accuracy=0.485, n_features=4): 21 | self.symbol = symbol 22 | self.feature = feature 23 | self.n_features = n_features 24 | self.action_space = ActionSpace() 25 | self.min_accuracy = min_accuracy 26 | self._get_data() 27 | self._prepare_data() 28 | 29 | def _get_data(self): 30 | self.raw = pd.read_csv(self.url, 31 | index_col=0, parse_dates=True) 32 | 33 | def _prepare_data(self): 34 | self.data = pd.DataFrame(self.raw[self.symbol]).dropna() 35 | self.data['r'] = np.log(self.data / self.data.shift(1)) 36 | self.data['d'] = np.where(self.data['r'] > 0, 1, 0) 37 | self.data.dropna(inplace=True) 38 | self.data_ = (self.data - self.data.mean()) / self.data.std() 39 | 40 | def reset(self): 41 | self.bar = self.n_features 42 | self.treward = 0 43 | state = self.data_[self.feature].iloc[ 44 | self.bar - self.n_features:self.bar].values 45 | return state, {} 46 | 47 | def step(self, action): 48 | if action == self.data['d'].iloc[self.bar]: 49 | correct = True 50 | else: 51 | correct = False 52 | reward = 1 if correct else 0 53 | self.treward += reward 54 | self.bar += 1 55 | self.accuracy = self.treward / (self.bar - self.n_features) 56 | if self.bar >= len(self.data): 57 | done = True 58 | elif reward == 1: 59 | done = False 60 | elif (self.accuracy < self.min_accuracy) and (self.bar > 15): 61 | done = True 62 | else: 63 | done = False 64 | next_state = self.data_[self.feature].iloc[ 65 | self.bar - self.n_features:self.bar].values 66 | return next_state, reward, done, False, {} 67 | 68 | -------------------------------------------------------------------------------- /code/simulation.py: -------------------------------------------------------------------------------- 1 | # 2 | # Monte Carlo Simulation Environment 3 | # 4 | # (c) Dr. Yves J. Hilpisch 5 | # Reinforcement Learning for Finance 6 | # 7 | 8 | import math 9 | import random 10 | import numpy as np 11 | import pandas as pd 12 | from numpy.random import default_rng 13 | 14 | rng = default_rng() 15 | 16 | 17 | class ActionSpace: 18 | def sample(self): 19 | return random.randint(0, 1) 20 | 21 | 22 | class Simulation: 23 | def __init__(self, symbol, feature, n_features, 24 | start, end, periods, 25 | min_accuracy=0.525, x0=100, 26 | kappa=1, theta=100, sigma=0.2, 27 | normalize=True, new=False): 28 | self.symbol = symbol 29 | self.feature = feature 30 | self.n_features = n_features 31 | self.start = start 32 | self.end = end 33 | self.periods = periods 34 | self.x0 = x0 35 | self.kappa = kappa 36 | self.theta = theta 37 | self.sigma = sigma 38 | self.min_accuracy = min_accuracy 39 | self.normalize = normalize 40 | self.new = new 41 | self.action_space = ActionSpace() 42 | self._simulate_data() 43 | self._prepare_data() 44 | 45 | def _simulate_data(self): 46 | index = pd.date_range(start=self.start, 47 | end=self.end, periods=self.periods) 48 | s = [self.x0] 49 | dt = (index[-1] - index[0]).days / 365 / self.periods 50 | for t in range(1, len(index)): 51 | s_ = (s[t - 1] + self.kappa * (self.theta - s[t - 1]) * dt + 52 | s[t - 1] * self.sigma * math.sqrt(dt) * random.gauss(0, 1)) 53 | s.append(s_) 54 | 55 | self.data = pd.DataFrame(s, columns=[self.symbol], index=index) 56 | 57 | def _prepare_data(self): 58 | self.data['r'] = np.log(self.data / self.data.shift(1)) 59 | self.data.dropna(inplace=True) 60 | if self.normalize: 61 | self.mu = self.data.mean() 62 | self.std = self.data.std() 63 | self.data_ = (self.data - self.mu) / self.std 64 | else: 65 | self.data_ = self.data.copy() 66 | self.data['d'] = np.where(self.data['r'] > 0, 1, 0) 67 | self.data['d'] = self.data['d'].astype(int) 68 | 69 | def _get_state(self): 70 | return self.data_[self.feature].iloc[self.bar - 71 | self.n_features:self.bar] 72 | 73 | def seed(self, seed): 74 | random.seed(seed) 75 | np.random.seed(seed) 76 | tf.random.set_random_seed(seed) 77 | 78 | def reset(self): 79 | if self.new: 80 | self._simulate_data() 81 | self._prepare_data() 82 | self.treward = 0 83 | self.accuracy = 0 84 | self.bar = self.n_features 85 | state = self._get_state() 86 | return state.values, {} 87 | 88 | def step(self, action): 89 | if action == self.data['d'].iloc[self.bar]: 90 | correct = True 91 | else: 92 | correct = False 93 | reward = 1 if correct else 0 94 | self.treward += reward 95 | self.bar += 1 96 | self.accuracy = self.treward / (self.bar - self.n_features) 97 | if self.bar >= len(self.data): 98 | done = True 99 | elif reward == 1: 100 | done = False 101 | elif (self.accuracy < self.min_accuracy and 102 | self.bar > self.n_features + 15): 103 | done = True 104 | else: 105 | done = False 106 | next_state = self.data_[self.feature].iloc[ 107 | self.bar - self.n_features:self.bar].values 108 | return next_state, reward, done, False, {} 109 | 110 | -------------------------------------------------------------------------------- /pytorch/01_rl4f.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "475819a4-e148-4616-b1cb-44b659aeb08a", 6 | "metadata": {}, 7 | "source": [ 8 | "\"The
" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "280cc0c6-2c18-46cd-8af7-3f19b64a6d7e", 14 | "metadata": {}, 15 | "source": [ 16 | "# Reinforcement Learning for Finance\n", 17 | "\n", 18 | "**Chapter 01 — Learning through Interaction**\n", 19 | "\n", 20 | "© Dr. Yves J. Hilpisch\n", 21 | "\n", 22 | "https://tpq.io | @dyjh | team@tpq.io" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "id": "3bbe3719-fcab-4963-8701-087562dd5d79", 28 | "metadata": {}, 29 | "source": [ 30 | "## Learning" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "id": "a6b8710a-19a3-4b5b-ae60-ffbd53dc45c4", 36 | "metadata": {}, 37 | "source": [ 38 | "### Tossing a Biased Coin" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 1, 44 | "id": "5d781056-299a-4dd5-8908-038a2438ec44", 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "import numpy as np\n", 49 | "from numpy.random import default_rng\n", 50 | "rng = default_rng(seed=100)" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": 2, 56 | "id": "3cf14cf9-53ed-428b-a597-3f380f4cff5a", 57 | "metadata": {}, 58 | "outputs": [], 59 | "source": [ 60 | "ssp = [1, 0]" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 3, 66 | "id": "ed2b1719-cac4-46c2-9398-c634068d3666", 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [ 70 | "asp = [1, 0]" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 4, 76 | "id": "20cfd83a-7e57-4fa4-8126-81bb7a4758ba", 77 | "metadata": {}, 78 | "outputs": [], 79 | "source": [ 80 | "def epoch():\n", 81 | " tr = 0\n", 82 | " for _ in range(100):\n", 83 | " a = rng.choice(asp)\n", 84 | " s = rng.choice(ssp)\n", 85 | " if a == s:\n", 86 | " tr += 1\n", 87 | " return tr" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": 5, 93 | "id": "89fc1f62-24df-4baa-9784-3126431dbdfe", 94 | "metadata": {}, 95 | "outputs": [ 96 | { 97 | "data": { 98 | "text/plain": [ 99 | "array([56, 47, 48, 55, 55, 51, 54, 43, 55, 40])" 100 | ] 101 | }, 102 | "execution_count": 5, 103 | "metadata": {}, 104 | "output_type": "execute_result" 105 | } 106 | ], 107 | "source": [ 108 | "rl = np.array([epoch() for _ in range(250)])\n", 109 | "rl[:10]" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": 6, 115 | "id": "52f92a52-d305-42f1-a1ff-7b3aacc26549", 116 | "metadata": {}, 117 | "outputs": [ 118 | { 119 | "data": { 120 | "text/plain": [ 121 | "49.968" 122 | ] 123 | }, 124 | "execution_count": 6, 125 | "metadata": {}, 126 | "output_type": "execute_result" 127 | } 128 | ], 129 | "source": [ 130 | "rl.mean()" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": 7, 136 | "id": "855b4cf5-75d8-4dbc-bdae-1cd753e50691", 137 | "metadata": {}, 138 | "outputs": [], 139 | "source": [ 140 | "ssp = [1, 1, 1, 1, 0]" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": 8, 146 | "id": "8148d8b9-de41-4d16-ab8f-b41d45a2a1a7", 147 | "metadata": {}, 148 | "outputs": [], 149 | "source": [ 150 | "asp = [1, 0]" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": 9, 156 | "id": "bea9ad54-804a-4d76-a614-50b01be65805", 157 | "metadata": {}, 158 | "outputs": [], 159 | "source": [ 160 | "def epoch():\n", 161 | " tr = 0\n", 162 | " for _ in range(100):\n", 163 | " a = rng.choice(asp)\n", 164 | " s = rng.choice(ssp)\n", 165 | " if a == s:\n", 166 | " tr += 1\n", 167 | " return tr" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": 10, 173 | "id": "988094e8-64c7-46e4-a54e-f111765c9e71", 174 | "metadata": {}, 175 | "outputs": [ 176 | { 177 | "data": { 178 | "text/plain": [ 179 | "array([53, 56, 40, 55, 53, 49, 43, 45, 50, 51])" 180 | ] 181 | }, 182 | "execution_count": 10, 183 | "metadata": {}, 184 | "output_type": "execute_result" 185 | } 186 | ], 187 | "source": [ 188 | "rl = np.array([epoch() for _ in range(250)])\n", 189 | "rl[:10]" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": 11, 195 | "id": "0aeed633-c81c-4b7f-9e19-c1a03ac3e32d", 196 | "metadata": {}, 197 | "outputs": [ 198 | { 199 | "data": { 200 | "text/plain": [ 201 | "49.924" 202 | ] 203 | }, 204 | "execution_count": 11, 205 | "metadata": {}, 206 | "output_type": "execute_result" 207 | } 208 | ], 209 | "source": [ 210 | "rl.mean()" 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": 12, 216 | "id": "f2220ff9-c8c2-462f-aad0-c07405272976", 217 | "metadata": {}, 218 | "outputs": [], 219 | "source": [ 220 | "ssp = [1, 1, 1, 1, 0]" 221 | ] 222 | }, 223 | { 224 | "cell_type": "code", 225 | "execution_count": 13, 226 | "id": "e043cb3e-b943-4c4a-a337-f50810795d63", 227 | "metadata": {}, 228 | "outputs": [], 229 | "source": [ 230 | "def epoch(n):\n", 231 | " tr = 0\n", 232 | " asp = [0, 1]\n", 233 | " for _ in range(n):\n", 234 | " a = rng.choice(asp)\n", 235 | " s = rng.choice(ssp)\n", 236 | " if a == s:\n", 237 | " tr += 1\n", 238 | " asp.append(s)\n", 239 | " return tr" 240 | ] 241 | }, 242 | { 243 | "cell_type": "code", 244 | "execution_count": 14, 245 | "id": "63ed3ba7-5701-4613-8a37-94eb4b114354", 246 | "metadata": {}, 247 | "outputs": [ 248 | { 249 | "data": { 250 | "text/plain": [ 251 | "array([71, 65, 67, 69, 68, 72, 68, 68, 77, 73])" 252 | ] 253 | }, 254 | "execution_count": 14, 255 | "metadata": {}, 256 | "output_type": "execute_result" 257 | } 258 | ], 259 | "source": [ 260 | "rl = np.array([epoch(100) for _ in range(250)])\n", 261 | "rl[:10]" 262 | ] 263 | }, 264 | { 265 | "cell_type": "code", 266 | "execution_count": 15, 267 | "id": "ccb173db-cf9f-4ee2-8bb1-f2b41990f130", 268 | "metadata": {}, 269 | "outputs": [ 270 | { 271 | "data": { 272 | "text/plain": [ 273 | "66.78" 274 | ] 275 | }, 276 | "execution_count": 15, 277 | "metadata": {}, 278 | "output_type": "execute_result" 279 | } 280 | ], 281 | "source": [ 282 | "rl.mean()" 283 | ] 284 | }, 285 | { 286 | "cell_type": "code", 287 | "execution_count": 16, 288 | "id": "74d45682-4f46-4950-b35c-2f8dff86d448", 289 | "metadata": {}, 290 | "outputs": [], 291 | "source": [ 292 | "from collections import Counter" 293 | ] 294 | }, 295 | { 296 | "cell_type": "code", 297 | "execution_count": 17, 298 | "id": "535ead89-8667-48ae-830f-ec6679780272", 299 | "metadata": {}, 300 | "outputs": [], 301 | "source": [ 302 | "ssp = [1, 1, 1, 1, 0]" 303 | ] 304 | }, 305 | { 306 | "cell_type": "code", 307 | "execution_count": 18, 308 | "id": "67569ec3-4525-443e-8cda-390af539804d", 309 | "metadata": {}, 310 | "outputs": [], 311 | "source": [ 312 | "def epoch(n):\n", 313 | " tr = 0\n", 314 | " asp = [0, 1]\n", 315 | " for _ in range(n):\n", 316 | " c = Counter(asp)\n", 317 | " a = c.most_common()[0][0]\n", 318 | " s = rng.choice(ssp)\n", 319 | " if a == s:\n", 320 | " tr += 1\n", 321 | " asp.append(s)\n", 322 | " return tr" 323 | ] 324 | }, 325 | { 326 | "cell_type": "code", 327 | "execution_count": 19, 328 | "id": "fc5893e5-a997-4fe8-88a4-13afe44c5175", 329 | "metadata": {}, 330 | "outputs": [ 331 | { 332 | "data": { 333 | "text/plain": [ 334 | "array([81, 70, 74, 77, 82, 74, 81, 80, 77, 78])" 335 | ] 336 | }, 337 | "execution_count": 19, 338 | "metadata": {}, 339 | "output_type": "execute_result" 340 | } 341 | ], 342 | "source": [ 343 | "rl = np.array([epoch(100) for _ in range(250)])\n", 344 | "rl[:10]" 345 | ] 346 | }, 347 | { 348 | "cell_type": "code", 349 | "execution_count": 20, 350 | "id": "7187f48e-e276-4f0a-959b-62ddc1bd23e8", 351 | "metadata": {}, 352 | "outputs": [ 353 | { 354 | "data": { 355 | "text/plain": [ 356 | "78.828" 357 | ] 358 | }, 359 | "execution_count": 20, 360 | "metadata": {}, 361 | "output_type": "execute_result" 362 | } 363 | ], 364 | "source": [ 365 | "rl.mean()" 366 | ] 367 | }, 368 | { 369 | "cell_type": "markdown", 370 | "id": "451350fe-1075-4969-808c-b5aaf37cec25", 371 | "metadata": {}, 372 | "source": [ 373 | "### Rolling a Biased Die" 374 | ] 375 | }, 376 | { 377 | "cell_type": "code", 378 | "execution_count": 21, 379 | "id": "bf4b0649-b1fa-4b74-bd31-ae5f20d00105", 380 | "metadata": {}, 381 | "outputs": [], 382 | "source": [ 383 | "ssp = [1, 2, 3, 4, 4, 4, 4, 4, 5, 6]" 384 | ] 385 | }, 386 | { 387 | "cell_type": "code", 388 | "execution_count": 22, 389 | "id": "4e3900fe-b22b-4ea2-b00c-8d057e553cad", 390 | "metadata": {}, 391 | "outputs": [], 392 | "source": [ 393 | "asp = [1, 2, 3, 4, 5, 6]" 394 | ] 395 | }, 396 | { 397 | "cell_type": "code", 398 | "execution_count": 23, 399 | "id": "6bac0a45-5a2a-4276-a329-86978e3f9db1", 400 | "metadata": {}, 401 | "outputs": [], 402 | "source": [ 403 | "def epoch():\n", 404 | " tr = 0\n", 405 | " for _ in range(600):\n", 406 | " a = rng.choice(asp)\n", 407 | " s = rng.choice(ssp)\n", 408 | " if a == s:\n", 409 | " tr += 1\n", 410 | " return tr" 411 | ] 412 | }, 413 | { 414 | "cell_type": "code", 415 | "execution_count": 24, 416 | "id": "062abdd3-2a65-4d1e-a9af-cf25772b54c4", 417 | "metadata": {}, 418 | "outputs": [ 419 | { 420 | "data": { 421 | "text/plain": [ 422 | "array([ 92, 96, 106, 99, 96, 107, 101, 106, 92, 117])" 423 | ] 424 | }, 425 | "execution_count": 24, 426 | "metadata": {}, 427 | "output_type": "execute_result" 428 | } 429 | ], 430 | "source": [ 431 | "rl = np.array([epoch() for _ in range(250)])\n", 432 | "rl[:10]" 433 | ] 434 | }, 435 | { 436 | "cell_type": "code", 437 | "execution_count": 25, 438 | "id": "a82b6f5f-7b32-403a-94a5-91ebc9e90815", 439 | "metadata": {}, 440 | "outputs": [ 441 | { 442 | "data": { 443 | "text/plain": [ 444 | "101.22" 445 | ] 446 | }, 447 | "execution_count": 25, 448 | "metadata": {}, 449 | "output_type": "execute_result" 450 | } 451 | ], 452 | "source": [ 453 | "rl.mean()" 454 | ] 455 | }, 456 | { 457 | "cell_type": "code", 458 | "execution_count": 26, 459 | "id": "7e3a9fb0-22ea-4fed-8ff3-f0ab48169031", 460 | "metadata": {}, 461 | "outputs": [], 462 | "source": [ 463 | "def epoch():\n", 464 | " tr = 0\n", 465 | " asp = [1, 2, 3, 4, 5, 6]\n", 466 | " for _ in range(600):\n", 467 | " a = rng.choice(asp)\n", 468 | " s = rng.choice(ssp)\n", 469 | " if a == s:\n", 470 | " tr += 1\n", 471 | " asp.append(s)\n", 472 | " return tr" 473 | ] 474 | }, 475 | { 476 | "cell_type": "code", 477 | "execution_count": 27, 478 | "id": "79f099b7-ca59-45d1-bb10-0f19c8f7fd35", 479 | "metadata": {}, 480 | "outputs": [ 481 | { 482 | "data": { 483 | "text/plain": [ 484 | "array([182, 174, 162, 157, 184, 167, 190, 208, 171, 153])" 485 | ] 486 | }, 487 | "execution_count": 27, 488 | "metadata": {}, 489 | "output_type": "execute_result" 490 | } 491 | ], 492 | "source": [ 493 | "rl = np.array([epoch() for _ in range(250)])\n", 494 | "rl[:10]" 495 | ] 496 | }, 497 | { 498 | "cell_type": "code", 499 | "execution_count": 28, 500 | "id": "cd641f5f-205e-4414-8006-1a8464aa49cb", 501 | "metadata": {}, 502 | "outputs": [ 503 | { 504 | "data": { 505 | "text/plain": [ 506 | "176.296" 507 | ] 508 | }, 509 | "execution_count": 28, 510 | "metadata": {}, 511 | "output_type": "execute_result" 512 | } 513 | ], 514 | "source": [ 515 | "rl.mean()" 516 | ] 517 | }, 518 | { 519 | "cell_type": "code", 520 | "execution_count": 29, 521 | "id": "be27b1cb-19bf-4c08-bffe-84e7164a2131", 522 | "metadata": {}, 523 | "outputs": [], 524 | "source": [ 525 | "def epoch():\n", 526 | " tr = 0\n", 527 | " asp = [1, 2, 3, 4, 5, 6]\n", 528 | " for _ in range(600):\n", 529 | " c = Counter(asp)\n", 530 | " a = c.most_common()[0][0]\n", 531 | " s = rng.choice(ssp)\n", 532 | " if a == s:\n", 533 | " tr += 1\n", 534 | " asp.append(s)\n", 535 | " return tr" 536 | ] 537 | }, 538 | { 539 | "cell_type": "code", 540 | "execution_count": 30, 541 | "id": "fd68ba52-aaca-4c17-819e-5a9f96053c14", 542 | "metadata": {}, 543 | "outputs": [ 544 | { 545 | "data": { 546 | "text/plain": [ 547 | "array([305, 288, 312, 306, 318, 302, 304, 311, 313, 281])" 548 | ] 549 | }, 550 | "execution_count": 30, 551 | "metadata": {}, 552 | "output_type": "execute_result" 553 | } 554 | ], 555 | "source": [ 556 | "rl = np.array([epoch() for _ in range(250)])\n", 557 | "rl[:10]" 558 | ] 559 | }, 560 | { 561 | "cell_type": "code", 562 | "execution_count": 31, 563 | "id": "6c6b0239-493d-4fc8-8ca9-2dd49f8eff4f", 564 | "metadata": {}, 565 | "outputs": [ 566 | { 567 | "data": { 568 | "text/plain": [ 569 | "297.204" 570 | ] 571 | }, 572 | "execution_count": 31, 573 | "metadata": {}, 574 | "output_type": "execute_result" 575 | } 576 | ], 577 | "source": [ 578 | "rl.mean()" 579 | ] 580 | }, 581 | { 582 | "cell_type": "code", 583 | "execution_count": 32, 584 | "id": "c0337cd1-b618-48df-bb51-3686caa3f1dd", 585 | "metadata": {}, 586 | "outputs": [ 587 | { 588 | "name": "stdout", 589 | "output_type": "stream", 590 | "text": [ 591 | "10,000,000,000,000,000,000,000,000,000,000,000,000,000\n" 592 | ] 593 | } 594 | ], 595 | "source": [ 596 | "cm = 10 ** 40\n", 597 | "print(f'{cm:,}')" 598 | ] 599 | }, 600 | { 601 | "cell_type": "markdown", 602 | "id": "20e3eaa7-ac35-44e5-bffc-93662c2d2c55", 603 | "metadata": {}, 604 | "source": [ 605 | "\"The
\n", 606 | "\n", 607 | "https://tpq.io | @dyjh | team@tpq.io" 608 | ] 609 | } 610 | ], 611 | "metadata": { 612 | "kernelspec": { 613 | "display_name": "Python 3 (ipykernel)", 614 | "language": "python", 615 | "name": "python3" 616 | }, 617 | "language_info": { 618 | "codemirror_mode": { 619 | "name": "ipython", 620 | "version": 3 621 | }, 622 | "file_extension": ".py", 623 | "mimetype": "text/x-python", 624 | "name": "python", 625 | "nbconvert_exporter": "python", 626 | "pygments_lexer": "ipython3", 627 | "version": "3.10.14" 628 | } 629 | }, 630 | "nbformat": 4, 631 | "nbformat_minor": 5 632 | } 633 | -------------------------------------------------------------------------------- /pytorch/02_rl4f_pytorch.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "475819a4-e148-4616-b1cb-44b659aeb08a", 6 | "metadata": {}, 7 | "source": [ 8 | "\"The
" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "280cc0c6-2c18-46cd-8af7-3f19b64a6d7e", 14 | "metadata": {}, 15 | "source": [ 16 | "# Reinforcement Learning for Finance\n", 17 | "\n", 18 | "**Chapter 02 — Deep Q-Learning**\n", 19 | "\n", 20 | "© Dr. Yves J. Hilpisch\n", 21 | "\n", 22 | "https://tpq.io | @dyjh | team@tpq.io" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "id": "d6be6f8b-e00e-402c-9df1-1d3f16e76c7e", 28 | "metadata": {}, 29 | "source": [ 30 | "## CartPole" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "id": "5e3924c3-2cad-4400-8806-5acf2f4b9b16", 36 | "metadata": {}, 37 | "source": [ 38 | "### The Game Environment " 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": null, 44 | "id": "72f3a51a-71e6-497d-bab3-926444a6bb30", 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "import gymnasium as gym" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": null, 54 | "id": "e19725f2-a026-487e-826c-00fa5fce71ec", 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [ 58 | "env = gym.make('CartPole-v1')" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": null, 64 | "id": "af76fb4e-3b31-4465-bff5-e5f8362af3d2", 65 | "metadata": {}, 66 | "outputs": [], 67 | "source": [ 68 | "env.action_space" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": null, 74 | "id": "bdb45da1-6f9c-464d-bb16-e098ddd52838", 75 | "metadata": {}, 76 | "outputs": [], 77 | "source": [ 78 | "env.action_space.n" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": null, 84 | "id": "77e8ec50-f5a4-4706-8937-6724582ebdc3", 85 | "metadata": {}, 86 | "outputs": [], 87 | "source": [ 88 | "[env.action_space.sample() for _ in range(10)]" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": null, 94 | "id": "592d3ddc-3958-42ff-b4c7-8924ce0a343d", 95 | "metadata": {}, 96 | "outputs": [], 97 | "source": [ 98 | "env.observation_space" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": null, 104 | "id": "19474f1a-29c3-4cc2-89f6-6226845f5468", 105 | "metadata": {}, 106 | "outputs": [], 107 | "source": [ 108 | "env.observation_space.shape" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": null, 114 | "id": "4bdd054d-4a5e-429e-9e44-3e436a20446d", 115 | "metadata": {}, 116 | "outputs": [], 117 | "source": [ 118 | "env.reset(seed=100)\n", 119 | "# cart position, cart velocity, pole angle, pole angular velocity" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": null, 125 | "id": "875c67b7-4817-4fac-8fbb-0596c399af96", 126 | "metadata": {}, 127 | "outputs": [], 128 | "source": [ 129 | "env.step(0)" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": null, 135 | "id": "7be7afb1-e69d-41d7-b869-c73747e38b61", 136 | "metadata": {}, 137 | "outputs": [], 138 | "source": [ 139 | "env.step(1)" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": null, 145 | "id": "f8f6e49b-3308-418a-999c-f7d6a052cfea", 146 | "metadata": {}, 147 | "outputs": [], 148 | "source": [ 149 | "class RandomAgent:\n", 150 | " def __init__(self):\n", 151 | " self.env = gym.make('CartPole-v1')\n", 152 | " def play(self, episodes=1):\n", 153 | " self.trewards = list()\n", 154 | " for e in range(episodes):\n", 155 | " self.env.reset()\n", 156 | " for step in range(1, 100):\n", 157 | " a = self.env.action_space.sample()\n", 158 | " state, reward, done, trunc, info = self.env.step(a)\n", 159 | " if done:\n", 160 | " self.trewards.append(step)\n", 161 | " break" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": null, 167 | "id": "dffbb689-b81e-48cc-9fac-3a7dec9c1ff7", 168 | "metadata": {}, 169 | "outputs": [], 170 | "source": [ 171 | "ra = RandomAgent()" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": null, 177 | "id": "cbb3b03c-ded1-4ca7-80d2-e316635379b8", 178 | "metadata": {}, 179 | "outputs": [], 180 | "source": [ 181 | "ra.play(15)" 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": null, 187 | "id": "5b83a7c9-485a-433d-b637-9ffbe6fe7146", 188 | "metadata": {}, 189 | "outputs": [], 190 | "source": [ 191 | "ra.trewards" 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": null, 197 | "id": "27d9d910-4f2d-4d7b-bcaa-a28747474c00", 198 | "metadata": {}, 199 | "outputs": [], 200 | "source": [ 201 | "round(sum(ra.trewards) / len(ra.trewards), 2)" 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": null, 207 | "id": "12e1594d-ea7c-49e9-9149-92848ba72440", 208 | "metadata": {}, 209 | "outputs": [], 210 | "source": [ 211 | "import os\n", 212 | "import random\n", 213 | "import warnings\n", 214 | "import numpy as np\n", 215 | "import torch\n", 216 | "import torch.nn as nn\n", 217 | "import torch.optim as optim\n", 218 | "from collections import deque" 219 | ] 220 | }, 221 | { 222 | "cell_type": "code", 223 | "execution_count": null, 224 | "id": "fa105bbb-727f-488d-8152-b5c1cc4d7646", 225 | "metadata": {}, 226 | "outputs": [], 227 | "source": [ 228 | "warnings.simplefilter('ignore')\n", 229 | "os.environ['PYTHONHASHSEED'] = '0'\n", 230 | "torch.backends.cudnn.deterministic = True\n", 231 | "torch.backends.cudnn.benchmark = False" 232 | ] 233 | }, 234 | { 235 | "cell_type": "code", 236 | "execution_count": null, 237 | "id": "0264fac6-2c4a-4ea3-9031-e5006dce93c4", 238 | "metadata": {}, 239 | "outputs": [], 240 | "source": [ 241 | "lr = 0.005" 242 | ] 243 | }, 244 | { 245 | "cell_type": "code", 246 | "execution_count": null, 247 | "id": "e7c28ee7-4be2-459c-8e27-029ec6ff4b4d", 248 | "metadata": {}, 249 | "outputs": [], 250 | "source": [ 251 | "random.seed(100)\n", 252 | "np.random.seed(100)\n", 253 | "torch.manual_seed(100)" 254 | ] 255 | }, 256 | { 257 | "cell_type": "code", 258 | "execution_count": null, 259 | "id": "072e8f75-0936-434f-ad65-c2f7cff91b7c", 260 | "metadata": {}, 261 | "outputs": [], 262 | "source": [ 263 | "class DQLAgent:\n", 264 | " def __init__(self):\n", 265 | " self.epsilon = 1.0\n", 266 | " self.epsilon_decay = 0.9975\n", 267 | " self.epsilon_min = 0.1\n", 268 | " self.memory = deque(maxlen=2000)\n", 269 | " self.batch_size = 32\n", 270 | " self.gamma = 0.9\n", 271 | " self.trewards = []\n", 272 | " self.max_treward = 0\n", 273 | " self.env = gym.make('CartPole-v1')\n", 274 | " self.state_size = self.env.observation_space.shape[0]\n", 275 | " self.action_size = self.env.action_space.n\n", 276 | " self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n", 277 | " self.model = self._create_model().to(self.device)\n", 278 | " self.optimizer = optim.Adam(self.model.parameters(), lr=lr)\n", 279 | " self.criterion = nn.MSELoss()\n", 280 | " def _create_model(self):\n", 281 | " model = nn.Sequential(\n", 282 | " nn.Linear(self.state_size, 24),\n", 283 | " nn.ReLU(),\n", 284 | " nn.Linear(24, 24),\n", 285 | " nn.ReLU(),\n", 286 | " nn.Linear(24, self.action_size)\n", 287 | " )\n", 288 | " return model" 289 | ] 290 | }, 291 | { 292 | "cell_type": "code", 293 | "execution_count": null, 294 | "id": "03e2299c-14bd-4cc8-af41-89b69d532544", 295 | "metadata": {}, 296 | "outputs": [], 297 | "source": [ 298 | "class DQLAgent(DQLAgent):\n", 299 | " def act(self, state):\n", 300 | " if random.random() < self.epsilon:\n", 301 | " return self.env.action_space.sample()\n", 302 | " state = torch.FloatTensor(state).to(self.device)\n", 303 | " with torch.no_grad():\n", 304 | " q_values = self.model(state)\n", 305 | " return torch.argmax(q_values).item()\n", 306 | " def replay(self):\n", 307 | " if len(self.memory) < self.batch_size:\n", 308 | " return\n", 309 | " batch = random.sample(self.memory, self.batch_size)\n", 310 | " states, actions, next_states, rewards, dones = zip(*batch)\n", 311 | " states = torch.FloatTensor(states).to(self.device).squeeze(1)\n", 312 | " next_states = torch.FloatTensor(next_states).to(self.device).squeeze(1)\n", 313 | " actions = torch.LongTensor(actions).to(self.device)\n", 314 | " rewards = torch.FloatTensor(rewards).to(self.device)\n", 315 | " dones = torch.FloatTensor(dones).to(self.device)\n", 316 | " q_values = self.model(states)\n", 317 | " q_value = q_values.gather(1, actions.unsqueeze(1)).squeeze(1)\n", 318 | " next_q_values = self.model(next_states).max(1)[0]\n", 319 | " expected_q_value = rewards + self.gamma * next_q_values * (1 - dones)\n", 320 | " loss = self.criterion(q_value, expected_q_value.detach())\n", 321 | " self.optimizer.zero_grad()\n", 322 | " loss.backward()\n", 323 | " self.optimizer.step()\n", 324 | " if self.epsilon > self.epsilon_min:\n", 325 | " self.epsilon *= self.epsilon_decay" 326 | ] 327 | }, 328 | { 329 | "cell_type": "code", 330 | "execution_count": null, 331 | "id": "2bf59f89-41a4-4f6e-8635-0513b3c3d8c1", 332 | "metadata": {}, 333 | "outputs": [], 334 | "source": [ 335 | "class DQLAgent(DQLAgent):\n", 336 | " def learn(self, episodes):\n", 337 | " for e in range(1, episodes + 1):\n", 338 | " state, _ = self.env.reset()\n", 339 | " state = np.reshape(state, [1, self.state_size])\n", 340 | " for f in range(1, 5000):\n", 341 | " action = self.act(state)\n", 342 | " next_state, reward, done, trunc, _ = self.env.step(action)\n", 343 | " next_state = np.reshape(next_state, [1, self.state_size])\n", 344 | " self.memory.append([state, action, next_state, reward, done])\n", 345 | " state = next_state\n", 346 | " if done or trunc:\n", 347 | " self.trewards.append(f)\n", 348 | " self.max_treward = max(self.max_treward, f)\n", 349 | " templ = f'episode={e:4d} | treward={f:4d}'\n", 350 | " templ += f' | max={self.max_treward:4d}'\n", 351 | " print(templ, end='\\r')\n", 352 | " break\n", 353 | " if len(self.memory) > self.batch_size:\n", 354 | " self.replay()\n", 355 | " print()" 356 | ] 357 | }, 358 | { 359 | "cell_type": "code", 360 | "execution_count": null, 361 | "id": "6a44a5f9-af9b-4929-a5c4-19e87f871c78", 362 | "metadata": {}, 363 | "outputs": [], 364 | "source": [ 365 | "class DQLAgent(DQLAgent):\n", 366 | " def test(self, episodes):\n", 367 | " for e in range(1, episodes + 1):\n", 368 | " state, _ = self.env.reset()\n", 369 | " state = np.reshape(state, [1, self.state_size])\n", 370 | " for f in range(1, 5001):\n", 371 | " state_tensor = torch.FloatTensor(state).to(self.device)\n", 372 | " with torch.no_grad():\n", 373 | " q_values = self.model(state_tensor)\n", 374 | " action = torch.argmax(q_values).item()\n", 375 | " state, reward, done, trunc, _ = self.env.step(action)\n", 376 | " state = np.reshape(state, [1, self.state_size])\n", 377 | " if done or trunc:\n", 378 | " print(f, end=' ')\n", 379 | " break" 380 | ] 381 | }, 382 | { 383 | "cell_type": "code", 384 | "execution_count": null, 385 | "id": "64417ca0-49ba-4558-8c92-d89604ff3e16", 386 | "metadata": {}, 387 | "outputs": [], 388 | "source": [ 389 | "agent = DQLAgent()" 390 | ] 391 | }, 392 | { 393 | "cell_type": "code", 394 | "execution_count": null, 395 | "id": "f77a72ab-5a4b-4d3d-863a-f8d08d2e3ce2", 396 | "metadata": { 397 | "tags": [] 398 | }, 399 | "outputs": [], 400 | "source": [ 401 | "%time agent.learn(2500)" 402 | ] 403 | }, 404 | { 405 | "cell_type": "code", 406 | "execution_count": null, 407 | "id": "fbfc1255-66fe-4c69-9135-70100b981109", 408 | "metadata": {}, 409 | "outputs": [], 410 | "source": [ 411 | "agent.epsilon" 412 | ] 413 | }, 414 | { 415 | "cell_type": "code", 416 | "execution_count": null, 417 | "id": "af72f8d3-4e2a-4d0f-8311-a56ba4487832", 418 | "metadata": {}, 419 | "outputs": [], 420 | "source": [ 421 | "agent.test(15)" 422 | ] 423 | }, 424 | { 425 | "cell_type": "markdown", 426 | "id": "20e3eaa7-ac35-44e5-bffc-93662c2d2c55", 427 | "metadata": {}, 428 | "source": [ 429 | "\"The
\n", 430 | "\n", 431 | "https://tpq.io | @dyjh | team@tpq.io" 432 | ] 433 | } 434 | ], 435 | "metadata": { 436 | "kernelspec": { 437 | "display_name": "Python 3 (ipykernel)", 438 | "language": "python", 439 | "name": "python3" 440 | }, 441 | "language_info": { 442 | "codemirror_mode": { 443 | "name": "ipython", 444 | "version": 3 445 | }, 446 | "file_extension": ".py", 447 | "mimetype": "text/x-python", 448 | "name": "python", 449 | "nbconvert_exporter": "python", 450 | "pygments_lexer": "ipython3", 451 | "version": "3.11.12" 452 | } 453 | }, 454 | "nbformat": 4, 455 | "nbformat_minor": 5 456 | } 457 | -------------------------------------------------------------------------------- /pytorch/03_rl4f_pytorch.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "475819a4-e148-4616-b1cb-44b659aeb08a", 6 | "metadata": {}, 7 | "source": [ 8 | "\"The
" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "280cc0c6-2c18-46cd-8af7-3f19b64a6d7e", 14 | "metadata": {}, 15 | "source": [ 16 | "# Reinforcement Learning for Finance\n", 17 | "\n", 18 | "**Chapter 03 — Financial Q-Learning**\n", 19 | "\n", 20 | "© Dr. Yves J. Hilpisch\n", 21 | "\n", 22 | "https://tpq.io | @dyjh | team@tpq.io" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "id": "d6be6f8b-e00e-402c-9df1-1d3f16e76c7e", 28 | "metadata": {}, 29 | "source": [ 30 | "## Finance Environment" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 1, 36 | "id": "f2c8cd7e-d93d-4c4d-ba77-3c0cb7b677af", 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "import os\n", 41 | "import random" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 2, 47 | "id": "bd8d3cf4-c30c-432a-bd3f-23e98c4d201c", 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "random.seed(100)\n", 52 | "os.environ['PYTHONHASHSEED'] = '0'" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 3, 58 | "id": "cb33cd0c-4fb1-4456-911f-0d92597db8c0", 59 | "metadata": {}, 60 | "outputs": [], 61 | "source": [ 62 | "class ActionSpace:\n", 63 | " def sample(self):\n", 64 | " return random.randint(0, 1)" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": 4, 70 | "id": "30d49bdd-e24b-4d87-a4dc-5639cc172f8e", 71 | "metadata": {}, 72 | "outputs": [], 73 | "source": [ 74 | "action_space = ActionSpace()" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 5, 80 | "id": "416ce315-16d7-4c47-845a-f21a099b8ba3", 81 | "metadata": {}, 82 | "outputs": [ 83 | { 84 | "data": { 85 | "text/plain": [ 86 | "[0, 1, 1, 0, 1, 1, 1, 0, 0, 0]" 87 | ] 88 | }, 89 | "execution_count": 5, 90 | "metadata": {}, 91 | "output_type": "execute_result" 92 | } 93 | ], 94 | "source": [ 95 | "[action_space.sample() for _ in range(10)]" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 6, 101 | "id": "f4df457f-9014-4e6a-878a-23645c77037d", 102 | "metadata": {}, 103 | "outputs": [], 104 | "source": [ 105 | "import numpy as np\n", 106 | "import pandas as pd" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": 7, 112 | "id": "952353e1-8f39-48ac-ac6d-5a21b9a44315", 113 | "metadata": {}, 114 | "outputs": [], 115 | "source": [ 116 | "class Finance:\n", 117 | " url = 'https://certificate.tpq.io/rl4finance.csv'\n", 118 | " def __init__(self, symbol, feature,\n", 119 | " min_accuracy=0.485, n_features=4):\n", 120 | " self.symbol = symbol\n", 121 | " self.feature = feature\n", 122 | " self.n_features = n_features\n", 123 | " self.action_space = ActionSpace()\n", 124 | " self.min_accuracy = min_accuracy\n", 125 | " self._get_data()\n", 126 | " self._prepare_data()\n", 127 | " def _get_data(self):\n", 128 | " self.raw = pd.read_csv(self.url,\n", 129 | " index_col=0, parse_dates=True)" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": 8, 135 | "id": "69e1ed75-1e55-42f4-86a3-db54c60acf1f", 136 | "metadata": {}, 137 | "outputs": [], 138 | "source": [ 139 | "class Finance(Finance):\n", 140 | " def _prepare_data(self):\n", 141 | " self.data = pd.DataFrame(self.raw[self.symbol]).dropna()\n", 142 | " self.data['r'] = np.log(self.data / self.data.shift(1))\n", 143 | " self.data['d'] = np.where(self.data['r'] > 0, 1, 0)\n", 144 | " self.data.dropna(inplace=True)\n", 145 | " self.data_ = (self.data - self.data.mean()) / self.data.std()\n", 146 | " def reset(self):\n", 147 | " self.bar = self.n_features\n", 148 | " self.treward = 0\n", 149 | " state = self.data_[self.feature].iloc[\n", 150 | " self.bar - self.n_features:self.bar].values\n", 151 | " return state, {}" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": 9, 157 | "id": "a2b0ccc6-d8ec-4156-bf7a-30ba263fdde9", 158 | "metadata": {}, 159 | "outputs": [], 160 | "source": [ 161 | "class Finance(Finance):\n", 162 | " def step(self, action):\n", 163 | " if action == self.data['d'].iloc[self.bar]:\n", 164 | " correct = True\n", 165 | " else:\n", 166 | " correct = False\n", 167 | " reward = 1 if correct else 0\n", 168 | " self.treward += reward\n", 169 | " self.bar += 1\n", 170 | " self.accuracy = self.treward / (self.bar - self.n_features)\n", 171 | " if self.bar >= len(self.data):\n", 172 | " done = True\n", 173 | " elif reward == 1:\n", 174 | " done = False\n", 175 | " elif (self.accuracy < self.min_accuracy) and (self.bar > 15):\n", 176 | " done = True\n", 177 | " else:\n", 178 | " done = False\n", 179 | " next_state = self.data_[self.feature].iloc[\n", 180 | " self.bar - self.n_features:self.bar].values\n", 181 | " return next_state, reward, done, False, {}" 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": 10, 187 | "id": "373a0a8c-3b85-4933-8de5-1103d4cc1a6b", 188 | "metadata": {}, 189 | "outputs": [], 190 | "source": [ 191 | "fin = Finance(symbol='EUR=', feature='EUR=')" 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": 11, 197 | "id": "d4c4248b-2168-42d2-b766-27270681b5dd", 198 | "metadata": {}, 199 | "outputs": [ 200 | { 201 | "data": { 202 | "text/plain": [ 203 | "['AAPL.O',\n", 204 | " 'MSFT.O',\n", 205 | " 'INTC.O',\n", 206 | " 'AMZN.O',\n", 207 | " 'GS.N',\n", 208 | " '.SPX',\n", 209 | " '.VIX',\n", 210 | " 'SPY',\n", 211 | " 'EUR=',\n", 212 | " 'XAU=',\n", 213 | " 'GDX',\n", 214 | " 'GLD']" 215 | ] 216 | }, 217 | "execution_count": 11, 218 | "metadata": {}, 219 | "output_type": "execute_result" 220 | } 221 | ], 222 | "source": [ 223 | "list(fin.raw.columns)" 224 | ] 225 | }, 226 | { 227 | "cell_type": "code", 228 | "execution_count": 12, 229 | "id": "0c2042dd-3d9a-4976-bb6d-d58daeeaf650", 230 | "metadata": {}, 231 | "outputs": [ 232 | { 233 | "data": { 234 | "text/plain": [ 235 | "(array([2.74844931, 2.64643904, 2.69560062, 2.68085214]), {})" 236 | ] 237 | }, 238 | "execution_count": 12, 239 | "metadata": {}, 240 | "output_type": "execute_result" 241 | } 242 | ], 243 | "source": [ 244 | "fin.reset()\n", 245 | "# four lagged, normalized price points" 246 | ] 247 | }, 248 | { 249 | "cell_type": "code", 250 | "execution_count": 13, 251 | "id": "d0e04a87-7f63-4532-8609-2ad598d67067", 252 | "metadata": {}, 253 | "outputs": [ 254 | { 255 | "data": { 256 | "text/plain": [ 257 | "1" 258 | ] 259 | }, 260 | "execution_count": 13, 261 | "metadata": {}, 262 | "output_type": "execute_result" 263 | } 264 | ], 265 | "source": [ 266 | "fin.action_space.sample()" 267 | ] 268 | }, 269 | { 270 | "cell_type": "code", 271 | "execution_count": 14, 272 | "id": "2c6a11b6-87da-4226-baad-0fa9f4942c44", 273 | "metadata": {}, 274 | "outputs": [ 275 | { 276 | "data": { 277 | "text/plain": [ 278 | "(array([2.64643904, 2.69560062, 2.68085214, 2.63046153]), 0, False, False, {})" 279 | ] 280 | }, 281 | "execution_count": 14, 282 | "metadata": {}, 283 | "output_type": "execute_result" 284 | } 285 | ], 286 | "source": [ 287 | "fin.step(fin.action_space.sample())" 288 | ] 289 | }, 290 | { 291 | "cell_type": "code", 292 | "execution_count": 15, 293 | "id": "c0a3b905-2eea-406f-9bee-bb61d6f5e463", 294 | "metadata": {}, 295 | "outputs": [], 296 | "source": [ 297 | "fin = Finance('EUR=', 'r')" 298 | ] 299 | }, 300 | { 301 | "cell_type": "code", 302 | "execution_count": 16, 303 | "id": "c490647f-9757-46bf-911d-c53477d9b3d0", 304 | "metadata": {}, 305 | "outputs": [ 306 | { 307 | "data": { 308 | "text/plain": [ 309 | "(array([-1.19130476, -1.21344494, 0.61099805, -0.16094865]), {})" 310 | ] 311 | }, 312 | "execution_count": 16, 313 | "metadata": {}, 314 | "output_type": "execute_result" 315 | } 316 | ], 317 | "source": [ 318 | "fin.reset()\n", 319 | "# four lagged, normalized log returns" 320 | ] 321 | }, 322 | { 323 | "cell_type": "code", 324 | "execution_count": 17, 325 | "id": "1c0bab87-6d45-4e17-a52c-3d19273bd804", 326 | "metadata": {}, 327 | "outputs": [], 328 | "source": [ 329 | "class RandomAgent:\n", 330 | " def __init__(self):\n", 331 | " self.env = Finance('EUR=', 'r')\n", 332 | " def play(self, episodes=1):\n", 333 | " self.trewards = list()\n", 334 | " for e in range(episodes):\n", 335 | " self.env.reset()\n", 336 | " for step in range(1, 100):\n", 337 | " a = self.env.action_space.sample()\n", 338 | " state, reward, done, trunc, info = self.env.step(a)\n", 339 | " if done:\n", 340 | " self.trewards.append(step)\n", 341 | " break" 342 | ] 343 | }, 344 | { 345 | "cell_type": "code", 346 | "execution_count": 18, 347 | "id": "417b3f00-199f-4db7-b500-b7b7f99ce15b", 348 | "metadata": {}, 349 | "outputs": [], 350 | "source": [ 351 | "ra = RandomAgent()" 352 | ] 353 | }, 354 | { 355 | "cell_type": "code", 356 | "execution_count": 19, 357 | "id": "99850e42-8c2b-46a6-9a92-59a0e5940061", 358 | "metadata": {}, 359 | "outputs": [], 360 | "source": [ 361 | "ra.play(15)" 362 | ] 363 | }, 364 | { 365 | "cell_type": "code", 366 | "execution_count": 20, 367 | "id": "1a6351f5-e532-4703-ae3b-0f7ec2483f48", 368 | "metadata": {}, 369 | "outputs": [ 370 | { 371 | "data": { 372 | "text/plain": [ 373 | "[17, 13, 17, 12, 12, 12, 13, 23, 31, 13, 12, 15]" 374 | ] 375 | }, 376 | "execution_count": 20, 377 | "metadata": {}, 378 | "output_type": "execute_result" 379 | } 380 | ], 381 | "source": [ 382 | "ra.trewards" 383 | ] 384 | }, 385 | { 386 | "cell_type": "code", 387 | "execution_count": 21, 388 | "id": "9590104e-899f-4a4a-81a3-0b952a8f1818", 389 | "metadata": {}, 390 | "outputs": [ 391 | { 392 | "data": { 393 | "text/plain": [ 394 | "15.83" 395 | ] 396 | }, 397 | "execution_count": 21, 398 | "metadata": {}, 399 | "output_type": "execute_result" 400 | } 401 | ], 402 | "source": [ 403 | "round(sum(ra.trewards) / len(ra.trewards), 2)" 404 | ] 405 | }, 406 | { 407 | "cell_type": "code", 408 | "execution_count": 22, 409 | "id": "2252d5e0-0c3f-4900-a96f-1fe6348ccd18", 410 | "metadata": {}, 411 | "outputs": [ 412 | { 413 | "data": { 414 | "text/plain": [ 415 | "2607" 416 | ] 417 | }, 418 | "execution_count": 22, 419 | "metadata": {}, 420 | "output_type": "execute_result" 421 | } 422 | ], 423 | "source": [ 424 | "len(fin.data)" 425 | ] 426 | }, 427 | { 428 | "cell_type": "markdown", 429 | "id": "b0151fd8-57af-4ea7-9c4b-5aa98d779868", 430 | "metadata": {}, 431 | "source": [ 432 | "## DQL Agent" 433 | ] 434 | }, 435 | { 436 | "cell_type": "code", 437 | "execution_count": 23, 438 | "id": "06e651e5-4eb4-4001-b8a3-d629721b6eed", 439 | "metadata": {}, 440 | "outputs": [], 441 | "source": [ 442 | "import os\n", 443 | "import random\n", 444 | "import warnings\n", 445 | "import numpy as np\n", 446 | "import torch\n", 447 | "import torch.nn as nn\n", 448 | "import torch.optim as optim\n", 449 | "from collections import deque" 450 | ] 451 | }, 452 | { 453 | "cell_type": "code", 454 | "execution_count": 24, 455 | "id": "a04e9dcb-5a0c-463b-9714-012a9b8e4093", 456 | "metadata": {}, 457 | "outputs": [], 458 | "source": [ 459 | "warnings.simplefilter('ignore')" 460 | ] 461 | }, 462 | { 463 | "cell_type": "code", 464 | "execution_count": 26, 465 | "id": "9c5656a5-7378-494b-a43f-5ba736105485", 466 | "metadata": {}, 467 | "outputs": [], 468 | "source": [ 469 | "lr = 0.0001" 470 | ] 471 | }, 472 | { 473 | "cell_type": "code", 474 | "execution_count": 27, 475 | "id": "9a1c06c7-6477-4a73-9bf5-68b497c52e8c", 476 | "metadata": {}, 477 | "outputs": [], 478 | "source": [ 479 | "class DQLAgent:\n", 480 | " def __init__(self, symbol, feature, min_accuracy, n_features=4):\n", 481 | " self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n", 482 | " self.epsilon = 1.0\n", 483 | " self.epsilon_decay = 0.9975\n", 484 | " self.epsilon_min = 0.1\n", 485 | " self.memory = deque(maxlen=2000)\n", 486 | " self.batch_size = 32\n", 487 | " self.gamma = 0.5\n", 488 | " self.trewards = []\n", 489 | " self.max_treward = 0\n", 490 | " self.n_features = n_features\n", 491 | " # Define neural network\n", 492 | " class Net(nn.Module):\n", 493 | " def __init__(self, input_dim, output_dim):\n", 494 | " super(Net, self).__init__()\n", 495 | " self.fc1 = nn.Linear(input_dim, 24)\n", 496 | " self.fc2 = nn.Linear(24, 24)\n", 497 | " self.out = nn.Linear(24, output_dim)\n", 498 | " def forward(self, x):\n", 499 | " x = torch.relu(self.fc1(x))\n", 500 | " x = torch.relu(self.fc2(x))\n", 501 | " return self.out(x)\n", 502 | " self.model = Net(n_features, 2).to(self.device)\n", 503 | " self.optimizer = optim.Adam(self.model.parameters(), lr=lr)\n", 504 | " self.loss_fn = nn.MSELoss()\n", 505 | " self.env = Finance(symbol, feature, min_accuracy, n_features)\n", 506 | " def act(self, state):\n", 507 | " if random.random() < self.epsilon:\n", 508 | " return self.env.action_space.sample()\n", 509 | " state = torch.FloatTensor(state).unsqueeze(0).to(self.device)\n", 510 | " with torch.no_grad():\n", 511 | " q_values = self.model(state)\n", 512 | " return int(torch.argmax(q_values[0]).item())\n", 513 | " def replay(self):\n", 514 | " batch = random.sample(self.memory, self.batch_size)\n", 515 | " for state, action, next_state, reward, done in batch:\n", 516 | " state_tensor = torch.FloatTensor(state).unsqueeze(0).to(self.device)\n", 517 | " next_state_tensor = torch.FloatTensor(next_state).unsqueeze(0).to(self.device)\n", 518 | " with torch.no_grad():\n", 519 | " target_q = reward + (0 if done else self.gamma * torch.max(self.model(next_state_tensor)[0]).item())\n", 520 | " current_q = self.model(state_tensor)[0, action]\n", 521 | " loss = self.loss_fn(current_q, torch.tensor(target_q, dtype=torch.float, device=self.device))\n", 522 | " self.optimizer.zero_grad()\n", 523 | " loss.backward()\n", 524 | " self.optimizer.step()\n", 525 | " if self.epsilon > self.epsilon_min:\n", 526 | " self.epsilon *= self.epsilon_decay\n", 527 | " def learn(self, episodes):\n", 528 | " for e in range(1, episodes + 1):\n", 529 | " state, _ = self.env.reset()\n", 530 | " for f in range(1, 5000):\n", 531 | " action = self.act(state)\n", 532 | " next_state, reward, done, trunc, _ = self.env.step(action)\n", 533 | " self.memory.append((state, action, next_state, reward, done))\n", 534 | " state = next_state\n", 535 | " if done:\n", 536 | " self.trewards.append(f)\n", 537 | " self.max_treward = max(self.max_treward, f)\n", 538 | " print(f'episode={e:4d} | treward={f:4d} | max={self.max_treward:4d}', end='\\r')\n", 539 | " break\n", 540 | " if len(self.memory) > self.batch_size:\n", 541 | " self.replay()\n", 542 | " print()\n", 543 | " def test(self, episodes):\n", 544 | " ma = self.env.min_accuracy\n", 545 | " self.env.min_accuracy = 0.5\n", 546 | " for e in range(1, episodes + 1):\n", 547 | " state, _ = self.env.reset()\n", 548 | " for f in range(1, 5001):\n", 549 | " state_tensor = torch.FloatTensor(state).unsqueeze(0).to(self.device)\n", 550 | " with torch.no_grad():\n", 551 | " action = int(torch.argmax(self.model(state_tensor)[0]).item())\n", 552 | " state, reward, done, trunc, _ = self.env.step(action)\n", 553 | " if done:\n", 554 | " print(f'total reward={f} | accuracy={self.env.accuracy:.3f}')\n", 555 | " break\n", 556 | " self.env.min_accuracy = ma" 557 | ] 558 | }, 559 | { 560 | "cell_type": "code", 561 | "execution_count": 28, 562 | "id": "d83cf567-0389-474d-accd-38431edaf755", 563 | "metadata": {}, 564 | "outputs": [ 565 | { 566 | "data": { 567 | "text/plain": [ 568 | "" 569 | ] 570 | }, 571 | "execution_count": 28, 572 | "metadata": {}, 573 | "output_type": "execute_result" 574 | } 575 | ], 576 | "source": [ 577 | "random.seed(250)\n", 578 | "np.random.seed(250)\n", 579 | "torch.manual_seed(250)" 580 | ] 581 | }, 582 | { 583 | "cell_type": "code", 584 | "execution_count": 29, 585 | "id": "268f6f90-082d-4827-bdef-8bffa57016c7", 586 | "metadata": {}, 587 | "outputs": [], 588 | "source": [ 589 | "agent = DQLAgent('EUR=', 'r', 0.495, 4)" 590 | ] 591 | }, 592 | { 593 | "cell_type": "code", 594 | "execution_count": 32, 595 | "id": "ae2336af-de7e-4b3a-8ecd-292a06a0beb4", 596 | "metadata": {}, 597 | "outputs": [ 598 | { 599 | "name": "stdout", 600 | "output_type": "stream", 601 | "text": [ 602 | "episode= 250 | treward= 29 | max=2603\n", 603 | "CPU times: user 7.36 s, sys: 8.88 ms, total: 7.37 s\n", 604 | "Wall time: 7.39 s\n" 605 | ] 606 | } 607 | ], 608 | "source": [ 609 | "%time agent.learn(250)" 610 | ] 611 | }, 612 | { 613 | "cell_type": "code", 614 | "execution_count": 33, 615 | "id": "6a1023a5-07ef-4ac3-86c4-307a356cd2ba", 616 | "metadata": {}, 617 | "outputs": [ 618 | { 619 | "name": "stdout", 620 | "output_type": "stream", 621 | "text": [ 622 | "total reward=123 | accuracy=0.496\n", 623 | "total reward=123 | accuracy=0.496\n", 624 | "total reward=123 | accuracy=0.496\n", 625 | "total reward=123 | accuracy=0.496\n", 626 | "total reward=123 | accuracy=0.496\n" 627 | ] 628 | } 629 | ], 630 | "source": [ 631 | "agent.test(5)" 632 | ] 633 | }, 634 | { 635 | "cell_type": "markdown", 636 | "id": "20e3eaa7-ac35-44e5-bffc-93662c2d2c55", 637 | "metadata": {}, 638 | "source": [ 639 | "\"The
\n", 640 | "\n", 641 | "https://tpq.io | @dyjh | team@tpq.io" 642 | ] 643 | } 644 | ], 645 | "metadata": { 646 | "kernelspec": { 647 | "display_name": "Python 3 (ipykernel)", 648 | "language": "python", 649 | "name": "python3" 650 | }, 651 | "language_info": { 652 | "codemirror_mode": { 653 | "name": "ipython", 654 | "version": 3 655 | }, 656 | "file_extension": ".py", 657 | "mimetype": "text/x-python", 658 | "name": "python", 659 | "nbconvert_exporter": "python", 660 | "pygments_lexer": "ipython3", 661 | "version": "3.11.12" 662 | } 663 | }, 664 | "nbformat": 4, 665 | "nbformat_minor": 5 666 | } 667 | -------------------------------------------------------------------------------- /pytorch/04_rl4f.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "475819a4-e148-4616-b1cb-44b659aeb08a", 6 | "metadata": {}, 7 | "source": [ 8 | "\"The
" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "280cc0c6-2c18-46cd-8af7-3f19b64a6d7e", 14 | "metadata": {}, 15 | "source": [ 16 | "# Reinforcement Learning for Finance\n", 17 | "\n", 18 | "**Chapter 04 — Simulated Data**\n", 19 | "\n", 20 | "© Dr. Yves J. Hilpisch\n", 21 | "\n", 22 | "https://tpq.io | @dyjh | team@tpq.io" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "id": "d6be6f8b-e00e-402c-9df1-1d3f16e76c7e", 28 | "metadata": {}, 29 | "source": [ 30 | "## Finance Environment" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "id": "cb33cd0c-4fb1-4456-911f-0d92597db8c0", 37 | "metadata": { 38 | "tags": [] 39 | }, 40 | "outputs": [], 41 | "source": [ 42 | "class ActionSpace:\n", 43 | " n = 2\n", 44 | " def sample(self):\n", 45 | " return random.randint(0, 1)" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": null, 51 | "id": "f4df457f-9014-4e6a-878a-23645c77037d", 52 | "metadata": { 53 | "tags": [] 54 | }, 55 | "outputs": [], 56 | "source": [ 57 | "import numpy as np\n", 58 | "import pandas as pd\n", 59 | "from numpy.random import default_rng" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": null, 65 | "id": "27fc1965-f838-4cb3-8d70-2e2266f3f7d7", 66 | "metadata": { 67 | "tags": [] 68 | }, 69 | "outputs": [], 70 | "source": [ 71 | "rng = default_rng(seed=100)" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": null, 77 | "id": "952353e1-8f39-48ac-ac6d-5a21b9a44315", 78 | "metadata": { 79 | "tags": [] 80 | }, 81 | "outputs": [], 82 | "source": [ 83 | "class NoisyData:\n", 84 | " url = 'https://certificate.tpq.io/findata.csv'\n", 85 | " def __init__(self, symbol, feature, n_features=4,\n", 86 | " min_accuracy=0.485, noise=True,\n", 87 | " noise_std=0.001):\n", 88 | " self.symbol = symbol\n", 89 | " self.feature = feature\n", 90 | " self.n_features = n_features\n", 91 | " self.noise = noise\n", 92 | " self.noise_std = noise_std\n", 93 | " self.action_space = ActionSpace()\n", 94 | " self.min_accuracy = min_accuracy\n", 95 | " self._get_data()\n", 96 | " self._prepare_data()\n", 97 | " def _get_data(self):\n", 98 | " self.raw = pd.read_csv(self.url,\n", 99 | " index_col=0, parse_dates=True)" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": null, 105 | "id": "69e1ed75-1e55-42f4-86a3-db54c60acf1f", 106 | "metadata": { 107 | "tags": [] 108 | }, 109 | "outputs": [], 110 | "source": [ 111 | "class NoisyData(NoisyData):\n", 112 | " def _prepare_data(self):\n", 113 | " self.data = pd.DataFrame(self.raw[self.symbol]).dropna()\n", 114 | " if self.noise:\n", 115 | " std = self.data.mean() * self.noise_std\n", 116 | " self.data[self.symbol] = (self.data[self.symbol] +\n", 117 | " rng.normal(0, std, len(self.data)))\n", 118 | " self.data['r'] = np.log(self.data / self.data.shift(1))\n", 119 | " self.data['d'] = np.where(self.data['r'] > 0, 1, 0)\n", 120 | " self.data.dropna(inplace=True)\n", 121 | " ma, mi = self.data.max(), self.data.min()\n", 122 | " self.data_ = (self.data - mi) / (ma - mi)\n", 123 | " def reset(self):\n", 124 | " if self.noise:\n", 125 | " self._prepare_data()\n", 126 | " self.bar = self.n_features\n", 127 | " self.treward = 0\n", 128 | " state = self.data_[self.feature].iloc[\n", 129 | " self.bar - self.n_features:self.bar].values\n", 130 | " return state, {}" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": null, 136 | "id": "a2b0ccc6-d8ec-4156-bf7a-30ba263fdde9", 137 | "metadata": { 138 | "tags": [] 139 | }, 140 | "outputs": [], 141 | "source": [ 142 | "class NoisyData(NoisyData):\n", 143 | " def step(self, action):\n", 144 | " if action == self.data['d'].iloc[self.bar]:\n", 145 | " correct = True\n", 146 | " else:\n", 147 | " correct = False\n", 148 | " reward = 1 if correct else 0 \n", 149 | " self.treward += reward\n", 150 | " self.bar += 1\n", 151 | " self.accuracy = self.treward / (self.bar - self.n_features)\n", 152 | " if self.bar >= len(self.data):\n", 153 | " done = True\n", 154 | " elif reward == 1:\n", 155 | " done = False\n", 156 | " elif (self.accuracy < self.min_accuracy and\n", 157 | " self.bar > self.n_features + 15):\n", 158 | " done = True\n", 159 | " else:\n", 160 | " done = False\n", 161 | " next_state = self.data_[self.feature].iloc[\n", 162 | " self.bar - self.n_features:self.bar].values\n", 163 | " return next_state, reward, done, False, {}" 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": null, 169 | "id": "373a0a8c-3b85-4933-8de5-1103d4cc1a6b", 170 | "metadata": { 171 | "tags": [] 172 | }, 173 | "outputs": [], 174 | "source": [ 175 | "fin = NoisyData(symbol='EUR=', feature='EUR=',\n", 176 | " noise=True, noise_std=0.005)" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": null, 182 | "id": "ef1d9c32-3d42-49e6-8b87-db9287038dae", 183 | "metadata": { 184 | "tags": [] 185 | }, 186 | "outputs": [], 187 | "source": [ 188 | "fin.reset()" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": null, 194 | "id": "a6239a19-edd1-479f-abae-53c5d91e91be", 195 | "metadata": { 196 | "tags": [] 197 | }, 198 | "outputs": [], 199 | "source": [ 200 | "fin.reset()" 201 | ] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "execution_count": null, 206 | "id": "c0a3b905-2eea-406f-9bee-bb61d6f5e463", 207 | "metadata": { 208 | "tags": [] 209 | }, 210 | "outputs": [], 211 | "source": [ 212 | "fin = NoisyData('EUR=', 'r', n_features=4,\n", 213 | " noise=True, noise_std=0.005)" 214 | ] 215 | }, 216 | { 217 | "cell_type": "code", 218 | "execution_count": null, 219 | "id": "c490647f-9757-46bf-911d-c53477d9b3d0", 220 | "metadata": { 221 | "tags": [] 222 | }, 223 | "outputs": [], 224 | "source": [ 225 | "fin.reset()" 226 | ] 227 | }, 228 | { 229 | "cell_type": "code", 230 | "execution_count": null, 231 | "id": "94620793-e2bf-4644-bc6c-9abba7c650cf", 232 | "metadata": { 233 | "tags": [] 234 | }, 235 | "outputs": [], 236 | "source": [ 237 | "fin.reset()" 238 | ] 239 | }, 240 | { 241 | "cell_type": "code", 242 | "execution_count": null, 243 | "id": "f6e60f02-fc2c-4e66-9b0a-720d44794425", 244 | "metadata": { 245 | "tags": [] 246 | }, 247 | "outputs": [], 248 | "source": [ 249 | "from pylab import plt, mpl\n", 250 | "plt.style.use('seaborn-v0_8')\n", 251 | "mpl.rcParams['figure.dpi'] = 300\n", 252 | "mpl.rcParams['savefig.dpi'] = 300\n", 253 | "mpl.rcParams['font.family'] = 'serif'" 254 | ] 255 | }, 256 | { 257 | "cell_type": "code", 258 | "execution_count": null, 259 | "id": "85ccf9e1-24d9-4cf3-954e-412c2d4d60fa", 260 | "metadata": { 261 | "tags": [] 262 | }, 263 | "outputs": [], 264 | "source": [ 265 | "import warnings\n", 266 | "warnings.simplefilter('ignore')" 267 | ] 268 | }, 269 | { 270 | "cell_type": "code", 271 | "execution_count": null, 272 | "id": "a4c01838-aa4f-42d2-b98e-6cacf1008bc8", 273 | "metadata": { 274 | "tags": [] 275 | }, 276 | "outputs": [], 277 | "source": [ 278 | "for _ in range(5):\n", 279 | " fin.reset()\n", 280 | " fin.data[fin.symbol].loc['2022-7-1':].plot(lw=0.75, c='b')" 281 | ] 282 | }, 283 | { 284 | "cell_type": "code", 285 | "execution_count": null, 286 | "id": "acc0b2d2-cca7-4f6f-be11-2e23bae0c883", 287 | "metadata": { 288 | "tags": [] 289 | }, 290 | "outputs": [], 291 | "source": [ 292 | "%run dqlagent_pytorch.py" 293 | ] 294 | }, 295 | { 296 | "cell_type": "code", 297 | "execution_count": null, 298 | "id": "50edc41b-bee9-4a58-a0ef-7cd1551219eb", 299 | "metadata": { 300 | "tags": [] 301 | }, 302 | "outputs": [], 303 | "source": [ 304 | "os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'" 305 | ] 306 | }, 307 | { 308 | "cell_type": "code", 309 | "execution_count": null, 310 | "id": "5bdb6c15-d700-4a7b-aa52-7e5d7ffd6585", 311 | "metadata": { 312 | "tags": [] 313 | }, 314 | "outputs": [], 315 | "source": [ 316 | "agent = DQLAgent(fin.symbol, fin.feature, fin.n_features, fin)" 317 | ] 318 | }, 319 | { 320 | "cell_type": "code", 321 | "execution_count": null, 322 | "id": "68588f83-b8c5-4325-b8d9-a41b370bb937", 323 | "metadata": { 324 | "tags": [] 325 | }, 326 | "outputs": [], 327 | "source": [ 328 | "%time agent.learn(50)" 329 | ] 330 | }, 331 | { 332 | "cell_type": "code", 333 | "execution_count": null, 334 | "id": "8f9f260d-31b2-42b3-b31b-6dda0e49ff40", 335 | "metadata": { 336 | "tags": [] 337 | }, 338 | "outputs": [], 339 | "source": [ 340 | "agent.test(5)" 341 | ] 342 | }, 343 | { 344 | "cell_type": "code", 345 | "execution_count": null, 346 | "id": "f1bfed77-e7e6-42bc-aede-73653c3e39c9", 347 | "metadata": { 348 | "tags": [] 349 | }, 350 | "outputs": [], 351 | "source": [ 352 | "class Simulation:\n", 353 | " def __init__(self, symbol, feature, n_features,\n", 354 | " start, end, periods,\n", 355 | " min_accuracy=0.525, x0=100,\n", 356 | " kappa=1, theta=100, sigma=0.2,\n", 357 | " normalize=True, new=False):\n", 358 | " self.symbol = symbol\n", 359 | " self.feature = feature\n", 360 | " self.n_features = n_features\n", 361 | " self.start = start\n", 362 | " self.end = end\n", 363 | " self.periods = periods\n", 364 | " self.x0 = x0\n", 365 | " self.kappa = kappa\n", 366 | " self.theta = theta\n", 367 | " self.sigma = sigma\n", 368 | " self.min_accuracy = min_accuracy\n", 369 | " self.normalize = normalize\n", 370 | " self.new = new\n", 371 | " self.action_space = ActionSpace()\n", 372 | " self._simulate_data()\n", 373 | " self._prepare_data()" 374 | ] 375 | }, 376 | { 377 | "cell_type": "code", 378 | "execution_count": null, 379 | "id": "93c34b7d-e91b-4082-9762-0a98bd99916b", 380 | "metadata": { 381 | "tags": [] 382 | }, 383 | "outputs": [], 384 | "source": [ 385 | "import math\n", 386 | "class Simulation(Simulation):\n", 387 | " def _simulate_data(self):\n", 388 | " index = pd.date_range(start=self.start,\n", 389 | " end=self.end, periods=self.periods)\n", 390 | " x = [self.x0]\n", 391 | " dt = (index[-1] - index[0]).days / 365 / self.periods\n", 392 | " for t in range(1, len(index)):\n", 393 | " x_ = (x[t - 1] + self.kappa * (self.theta - x[t - 1]) * dt +\n", 394 | " x[t - 1] * self.sigma * math.sqrt(dt) *\n", 395 | " random.gauss(0, 1))\n", 396 | " x.append(x_)\n", 397 | " \n", 398 | " self.data = pd.DataFrame(x, columns=[self.symbol], index=index)" 399 | ] 400 | }, 401 | { 402 | "cell_type": "code", 403 | "execution_count": null, 404 | "id": "125153fb-7edc-4ffe-aff3-d87875fd2277", 405 | "metadata": { 406 | "tags": [] 407 | }, 408 | "outputs": [], 409 | "source": [ 410 | "class Simulation(Simulation):\n", 411 | " def _prepare_data(self):\n", 412 | " self.data['r'] = np.log(self.data / self.data.shift(1))\n", 413 | " self.data.dropna(inplace=True)\n", 414 | " if self.normalize:\n", 415 | " self.mu = self.data.mean()\n", 416 | " self.std = self.data.std()\n", 417 | " self.data_ = (self.data - self.mu) / self.std\n", 418 | " else:\n", 419 | " self.data_ = self.data.copy()\n", 420 | " self.data['d'] = np.where(self.data['r'] > 0, 1, 0)\n", 421 | " self.data['d'] = self.data['d'].astype(int)" 422 | ] 423 | }, 424 | { 425 | "cell_type": "code", 426 | "execution_count": null, 427 | "id": "9aa041d0-cec7-41cd-9717-4057b8a65cb3", 428 | "metadata": { 429 | "tags": [] 430 | }, 431 | "outputs": [], 432 | "source": [ 433 | "class Simulation(Simulation):\n", 434 | " def _get_state(self):\n", 435 | " return self.data_[self.feature].iloc[self.bar -\n", 436 | " self.n_features:self.bar]\n", 437 | " def seed(self, seed):\n", 438 | " random.seed(seed)\n", 439 | " def reset(self):\n", 440 | " self.treward = 0\n", 441 | " self.accuracy = 0\n", 442 | " self.bar = self.n_features\n", 443 | " if self.new:\n", 444 | " self._simulate_data()\n", 445 | " self._prepare_data()\n", 446 | " state = self._get_state()\n", 447 | " return state.values, {}" 448 | ] 449 | }, 450 | { 451 | "cell_type": "code", 452 | "execution_count": null, 453 | "id": "f111e718-5f7d-4f04-b509-5934815711b3", 454 | "metadata": { 455 | "tags": [] 456 | }, 457 | "outputs": [], 458 | "source": [ 459 | "class Simulation(Simulation):\n", 460 | " def step(self, action):\n", 461 | " if action == self.data['d'].iloc[self.bar]:\n", 462 | " correct = True\n", 463 | " else:\n", 464 | " correct = False\n", 465 | " reward = 1 if correct else 0 \n", 466 | " self.treward += reward\n", 467 | " self.bar += 1\n", 468 | " self.accuracy = self.treward / (self.bar - self.n_features)\n", 469 | " if self.bar >= len(self.data):\n", 470 | " done = True\n", 471 | " elif reward == 1:\n", 472 | " done = False\n", 473 | " elif (self.accuracy < self.min_accuracy and self.bar > 25):\n", 474 | " done = True\n", 475 | " else:\n", 476 | " done = False\n", 477 | " next_state = self.data_[self.feature].iloc[\n", 478 | " self.bar - self.n_features:self.bar].values\n", 479 | " return next_state, reward, done, False, {}" 480 | ] 481 | }, 482 | { 483 | "cell_type": "code", 484 | "execution_count": null, 485 | "id": "3978c284-ab0e-4f70-b415-088314324fa9", 486 | "metadata": { 487 | "tags": [] 488 | }, 489 | "outputs": [], 490 | "source": [ 491 | "sym = 'EUR='" 492 | ] 493 | }, 494 | { 495 | "cell_type": "code", 496 | "execution_count": null, 497 | "id": "91d2ca8c-7d7b-4a7a-ba55-14cf2d3183ca", 498 | "metadata": { 499 | "tags": [] 500 | }, 501 | "outputs": [], 502 | "source": [ 503 | "env_base = Simulation(sym, sym, 5, start='2024-1-1', end='2025-1-1',\n", 504 | " periods=252, x0=1, kappa=1, theta=1.1, sigma=0.0,\n", 505 | " normalize=True)" 506 | ] 507 | }, 508 | { 509 | "cell_type": "code", 510 | "execution_count": null, 511 | "id": "00d4f460-e31d-4e7e-9ccc-9bf100255a63", 512 | "metadata": { 513 | "tags": [] 514 | }, 515 | "outputs": [], 516 | "source": [ 517 | "env_trend = Simulation(sym, sym, 5, start='2024-1-1', end='2025-1-1',\n", 518 | " periods=252, x0=1, kappa=1, theta=2, sigma=0.1,\n", 519 | " normalize=True)" 520 | ] 521 | }, 522 | { 523 | "cell_type": "code", 524 | "execution_count": null, 525 | "id": "bc924a45-6203-428f-9970-469c0f51d7be", 526 | "metadata": { 527 | "tags": [] 528 | }, 529 | "outputs": [], 530 | "source": [ 531 | "env_mrev = Simulation(sym, sym, 5, start='2024-1-1', end='2025-1-1',\n", 532 | " periods=252, x0=1, kappa=1, theta=1, sigma=0.1,\n", 533 | " normalize=True)" 534 | ] 535 | }, 536 | { 537 | "cell_type": "code", 538 | "execution_count": null, 539 | "id": "87b0dceb-680e-4c91-92eb-2ffb63ad61d8", 540 | "metadata": { 541 | "tags": [] 542 | }, 543 | "outputs": [], 544 | "source": [ 545 | "env_mrev.data[sym].iloc[:3]" 546 | ] 547 | }, 548 | { 549 | "cell_type": "code", 550 | "execution_count": null, 551 | "id": "47e281e5-69d6-4265-8f39-c25403967e54", 552 | "metadata": { 553 | "tags": [] 554 | }, 555 | "outputs": [], 556 | "source": [ 557 | "env_base.data[sym].plot(figsize=(10, 6), label='baseline', style='r')\n", 558 | "env_trend.data[sym].plot(label='trend', style='b:')\n", 559 | "env_mrev.data[sym].plot(label='mean-reversion', style='g--')\n", 560 | "plt.legend();" 561 | ] 562 | }, 563 | { 564 | "cell_type": "code", 565 | "execution_count": null, 566 | "id": "d3352144-c111-4137-92fe-d388dd9e1063", 567 | "metadata": { 568 | "tags": [] 569 | }, 570 | "outputs": [], 571 | "source": [ 572 | "sim = Simulation(sym, 'r', 4, start='2024-1-1', end='2028-1-1',\n", 573 | " periods=2 * 252, min_accuracy=0.485, x0=1,\n", 574 | " kappa=2, theta=2, sigma=0.15,\n", 575 | " normalize=True, new=True)\n", 576 | "sim.seed(100)" 577 | ] 578 | }, 579 | { 580 | "cell_type": "code", 581 | "execution_count": null, 582 | "id": "4f1cb49b-e8b0-4d7e-9e13-da4c4724b0b0", 583 | "metadata": { 584 | "tags": [] 585 | }, 586 | "outputs": [], 587 | "source": [ 588 | "for _ in range(10):\n", 589 | " sim.reset()\n", 590 | " sim.data[sym].plot(figsize=(10, 6), lw=1.0, c='b');" 591 | ] 592 | }, 593 | { 594 | "cell_type": "code", 595 | "execution_count": null, 596 | "id": "5d764d20-f031-4dd2-a643-925cf3842bc3", 597 | "metadata": { 598 | "tags": [] 599 | }, 600 | "outputs": [], 601 | "source": [ 602 | "agent = DQLAgent(sim.symbol, sim.feature,\n", 603 | " sim.n_features, sim, lr=0.0001)" 604 | ] 605 | }, 606 | { 607 | "cell_type": "code", 608 | "execution_count": null, 609 | "id": "281168cc-ef15-4bd8-9a5b-fd7ecba7d08d", 610 | "metadata": { 611 | "tags": [] 612 | }, 613 | "outputs": [], 614 | "source": [ 615 | "%time agent.learn(50)" 616 | ] 617 | }, 618 | { 619 | "cell_type": "code", 620 | "execution_count": null, 621 | "id": "5b6b2bea-cff0-4204-b717-13bb10eb0a59", 622 | "metadata": { 623 | "tags": [] 624 | }, 625 | "outputs": [], 626 | "source": [ 627 | "agent.test(5)" 628 | ] 629 | }, 630 | { 631 | "cell_type": "code", 632 | "execution_count": null, 633 | "id": "85e8e40d-6c15-48ad-a163-5e5ef2491281", 634 | "metadata": { 635 | "tags": [] 636 | }, 637 | "outputs": [], 638 | "source": [ 639 | "sim = Simulation(sym, 'r', 4, start='2024-1-1', end='2028-1-1',\n", 640 | " periods=2 * 252, min_accuracy=0.6, x0=1,\n", 641 | " kappa=1.25, theta=1, sigma=0.15,\n", 642 | " normalize=True, new=True)\n", 643 | "sim.seed(100)" 644 | ] 645 | }, 646 | { 647 | "cell_type": "code", 648 | "execution_count": null, 649 | "id": "117e5023-d6f7-4120-b184-8c2209e30d81", 650 | "metadata": { 651 | "tags": [] 652 | }, 653 | "outputs": [], 654 | "source": [ 655 | "agent = DQLAgent(sim.symbol, sim.feature,\n", 656 | " sim.n_features, sim, lr=0.0001)" 657 | ] 658 | }, 659 | { 660 | "cell_type": "code", 661 | "execution_count": null, 662 | "id": "38a35712-1a49-463d-b34e-3da2c0bded68", 663 | "metadata": { 664 | "tags": [] 665 | }, 666 | "outputs": [], 667 | "source": [ 668 | "%time agent.learn(150)" 669 | ] 670 | }, 671 | { 672 | "cell_type": "code", 673 | "execution_count": null, 674 | "id": "5e12093f-897c-466b-958e-0c3ade328e9b", 675 | "metadata": { 676 | "tags": [] 677 | }, 678 | "outputs": [], 679 | "source": [ 680 | "agent.test(5)" 681 | ] 682 | }, 683 | { 684 | "cell_type": "markdown", 685 | "id": "20e3eaa7-ac35-44e5-bffc-93662c2d2c55", 686 | "metadata": {}, 687 | "source": [ 688 | "\"The
\n", 689 | "\n", 690 | "https://tpq.io | @dyjh | team@tpq.io" 691 | ] 692 | } 693 | ], 694 | "metadata": { 695 | "kernelspec": { 696 | "display_name": "Python 3 (ipykernel)", 697 | "language": "python", 698 | "name": "python3" 699 | }, 700 | "language_info": { 701 | "codemirror_mode": { 702 | "name": "ipython", 703 | "version": 3 704 | }, 705 | "file_extension": ".py", 706 | "mimetype": "text/x-python", 707 | "name": "python", 708 | "nbconvert_exporter": "python", 709 | "pygments_lexer": "ipython3", 710 | "version": "3.11.12" 711 | } 712 | }, 713 | "nbformat": 4, 714 | "nbformat_minor": 5 715 | } 716 | -------------------------------------------------------------------------------- /pytorch/05_rl4f_pytorch.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "475819a4-e148-4616-b1cb-44b659aeb08a", 6 | "metadata": {}, 7 | "source": [ 8 | "\"The
" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "280cc0c6-2c18-46cd-8af7-3f19b64a6d7e", 14 | "metadata": {}, 15 | "source": [ 16 | "# Reinforcement Learning for Finance\n", 17 | "\n", 18 | "**Chapter 05 — Generated Data**\n", 19 | "\n", 20 | "© Dr. Yves J. Hilpisch\n", 21 | "\n", 22 | "https://tpq.io | @dyjh | team@tpq.io" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "id": "d6be6f8b-e00e-402c-9df1-1d3f16e76c7e", 28 | "metadata": {}, 29 | "source": [ 30 | "## Simple Example" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "id": "be1564c8-e582-444c-a42b-c46119e612df", 37 | "metadata": { 38 | "tags": [] 39 | }, 40 | "outputs": [], 41 | "source": [ 42 | "import os\n", 43 | "import numpy as np\n", 44 | "import pandas as pd\n", 45 | "from pylab import plt, mpl" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": null, 51 | "id": "d3ddcf1b-43a1-4f1b-bf79-6772c6c9cc98", 52 | "metadata": { 53 | "tags": [] 54 | }, 55 | "outputs": [], 56 | "source": [ 57 | "import torch\n", 58 | "import torch.nn as nn\n", 59 | "import torch.optim as optim\n", 60 | "from sklearn.preprocessing import StandardScaler" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": null, 66 | "id": "a94ba4a3-69dd-4342-a682-e96eea349a84", 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [ 70 | "plt.style.use('seaborn-v0_8')\n", 71 | "mpl.rcParams['figure.dpi'] = 300\n", 72 | "mpl.rcParams['savefig.dpi'] = 300\n", 73 | "mpl.rcParams['font.family'] = 'serif'" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": null, 79 | "id": "bc8d78a2-1f89-49fc-9aa5-577cae99fcd8", 80 | "metadata": { 81 | "tags": [] 82 | }, 83 | "outputs": [], 84 | "source": [ 85 | "x = np.linspace(-2, 2, 500)" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": null, 91 | "id": "5d9546a3-7e4f-496e-97d5-1c6400f74053", 92 | "metadata": { 93 | "tags": [] 94 | }, 95 | "outputs": [], 96 | "source": [ 97 | "def f(x):\n", 98 | " return x ** 3" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": null, 104 | "id": "772a3d17-2fab-43b8-a8df-2bb95b086830", 105 | "metadata": { 106 | "tags": [] 107 | }, 108 | "outputs": [], 109 | "source": [ 110 | "y = f(x)" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": null, 116 | "id": "0e7d1721-5321-492b-9f99-64a70339a209", 117 | "metadata": {}, 118 | "outputs": [], 119 | "source": [ 120 | "scaler = StandardScaler()" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": null, 126 | "id": "076b3900-5023-4e7d-88c5-363a627ea1da", 127 | "metadata": { 128 | "tags": [] 129 | }, 130 | "outputs": [], 131 | "source": [ 132 | "y_ = scaler.fit_transform(y.reshape(-1, 1))" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": null, 138 | "id": "71591bdb-b77e-403a-b7de-b3c5812f8aa1", 139 | "metadata": {}, 140 | "outputs": [], 141 | "source": [ 142 | "plt.plot(x, y, 'r', lw=1.0,\n", 143 | " label='real data')\n", 144 | "plt.plot(x, y_, 'b--', lw=1.0,\n", 145 | " label='normalized data')\n", 146 | "plt.legend();" 147 | ] 148 | }, 149 | { 150 | "cell_type": "markdown", 151 | "id": "3738dddc-f0bf-4e05-8333-22fabf3ad9cf", 152 | "metadata": {}, 153 | "source": [ 154 | "### Model Training" 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": null, 160 | "id": "871963c5-c57f-4f1c-849d-ddfbb8024110", 161 | "metadata": { 162 | "tags": [] 163 | }, 164 | "outputs": [], 165 | "source": [ 166 | "class Generator(nn.Module):\n", 167 | " def __init__(self, hu=32):\n", 168 | " super(Generator, self).__init__()\n", 169 | " self.net = nn.Sequential(\n", 170 | " nn.Linear(1, hu),\n", 171 | " nn.ReLU(),\n", 172 | " nn.Linear(hu, hu),\n", 173 | " nn.ReLU(),\n", 174 | " nn.Linear(hu, 1)\n", 175 | " )\n", 176 | "\n", 177 | " def forward(self, x):\n", 178 | " return self.net(x)" 179 | ] 180 | }, 181 | { 182 | "cell_type": "code", 183 | "execution_count": null, 184 | "id": "be8b39f7-6428-466e-93ca-5b22beaa2231", 185 | "metadata": { 186 | "tags": [] 187 | }, 188 | "outputs": [], 189 | "source": [ 190 | "class Discriminator(nn.Module):\n", 191 | " def __init__(self, hu=32):\n", 192 | " super(Discriminator, self).__init__()\n", 193 | " self.net = nn.Sequential(\n", 194 | " nn.Linear(1, hu),\n", 195 | " nn.ReLU(),\n", 196 | " nn.Linear(hu, hu),\n", 197 | " nn.ReLU(),\n", 198 | " nn.Linear(hu, 1),\n", 199 | " nn.Sigmoid()\n", 200 | " )\n", 201 | "\n", 202 | " def forward(self, x):\n", 203 | " return self.net(x)" 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": null, 209 | "id": "1d09e356-01ff-4b37-afcf-ffc16e22a844", 210 | "metadata": { 211 | "tags": [] 212 | }, 213 | "outputs": [], 214 | "source": [ 215 | "def create_gan(generator, discriminator, lr=0.001):\n", 216 | " gen_optimizer = optim.Adam(generator.parameters(), lr=lr)\n", 217 | " disc_optimizer = optim.Adam(discriminator.parameters(), lr=lr)\n", 218 | " criterion = nn.BCELoss()\n", 219 | " return gen_optimizer, disc_optimizer, criterion" 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": null, 225 | "id": "4829e1ec-e8a4-466d-997a-a9d0d9592b09", 226 | "metadata": { 227 | "tags": [] 228 | }, 229 | "outputs": [], 230 | "source": [ 231 | "generator = Generator()\n", 232 | "discriminator = Discriminator()\n", 233 | "gen_optimizer, disc_optimizer, criterion = create_gan(generator, discriminator, lr=0.0001)" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": null, 239 | "id": "76ce1bec-fc28-4b1c-b501-2b9d9a90db72", 240 | "metadata": {}, 241 | "outputs": [], 242 | "source": [ 243 | "from numpy.random import default_rng" 244 | ] 245 | }, 246 | { 247 | "cell_type": "code", 248 | "execution_count": null, 249 | "id": "3863917e-de5c-4732-b928-46fce75b536f", 250 | "metadata": {}, 251 | "outputs": [], 252 | "source": [ 253 | "rng = default_rng(seed=100)" 254 | ] 255 | }, 256 | { 257 | "cell_type": "code", 258 | "execution_count": null, 259 | "id": "aa6f15af-4ed8-46da-a44a-af84f344f5f6", 260 | "metadata": { 261 | "tags": [] 262 | }, 263 | "outputs": [], 264 | "source": [ 265 | "def train_models(y_, epochs, batch_size):\n", 266 | " for epoch in range(epochs):\n", 267 | " # sample real data\n", 268 | " idx = rng.integers(0, len(y_), batch_size)\n", 269 | " real_batch = torch.from_numpy(y_[idx].reshape(-1, 1)).float()\n", 270 | " real_labels = torch.ones(batch_size, 1)\n", 271 | " fake_labels = torch.zeros(batch_size, 1)\n", 272 | "\n", 273 | " # generate fake data\n", 274 | " noise = torch.randn(batch_size, 1)\n", 275 | " fake_batch = generator(noise)\n", 276 | "\n", 277 | " # train discriminator\n", 278 | " disc_optimizer.zero_grad()\n", 279 | " real_preds = discriminator(real_batch)\n", 280 | " real_loss = criterion(real_preds, real_labels)\n", 281 | " fake_preds = discriminator(fake_batch.detach())\n", 282 | " fake_loss = criterion(fake_preds, fake_labels)\n", 283 | " d_loss = real_loss + fake_loss\n", 284 | " d_loss.backward()\n", 285 | " disc_optimizer.step()\n", 286 | "\n", 287 | " # train generator\n", 288 | " gen_optimizer.zero_grad()\n", 289 | " noise = torch.randn(batch_size, 1)\n", 290 | " fake_batch = generator(noise)\n", 291 | " gen_preds = discriminator(fake_batch)\n", 292 | " g_loss = criterion(gen_preds, real_labels)\n", 293 | " g_loss.backward()\n", 294 | " gen_optimizer.step()\n", 295 | "\n", 296 | " if epoch % 1000 == 0:\n", 297 | " print(f'Epoch: {epoch}')\n", 298 | " # after training, generate final data\n", 299 | " with torch.no_grad():\n", 300 | " idx = rng.integers(0, len(y_), batch_size)\n", 301 | " real_data = y_[idx]\n", 302 | " synthetic_data = generator(torch.randn(batch_size, 1)).cpu().numpy()\n", 303 | " return real_data, synthetic_data" 304 | ] 305 | }, 306 | { 307 | "cell_type": "code", 308 | "execution_count": null, 309 | "id": "8133e8a2-43ba-45f6-afeb-b68c92d78a9b", 310 | "metadata": {}, 311 | "outputs": [], 312 | "source": [ 313 | "%%time\n", 314 | "real_data, synthetic_data = train_models(y_, epochs=15001, batch_size=32)" 315 | ] 316 | }, 317 | { 318 | "cell_type": "code", 319 | "execution_count": null, 320 | "id": "33b6c00d-4bb1-4127-87be-62a06341f5fd", 321 | "metadata": { 322 | "tags": [] 323 | }, 324 | "outputs": [], 325 | "source": [ 326 | "plt.plot(real_data, 'r', lw=1.0,\n", 327 | " label='real data (last batch)')\n", 328 | "plt.plot(synthetic_data, 'b:', lw=1.0,\n", 329 | " label='synthetic data (last batch)')\n", 330 | "plt.legend();" 331 | ] 332 | }, 333 | { 334 | "cell_type": "code", 335 | "execution_count": null, 336 | "id": "f89c7125-f20b-4738-bdc8-dd92da53144a", 337 | "metadata": { 338 | "tags": [] 339 | }, 340 | "outputs": [], 341 | "source": [ 342 | "data = pd.DataFrame({'real': y}, index=x)" 343 | ] 344 | }, 345 | { 346 | "cell_type": "code", 347 | "execution_count": null, 348 | "id": "af51e963-e8fb-41bf-ade5-40084677c275", 349 | "metadata": { 350 | "tags": [] 351 | }, 352 | "outputs": [], 353 | "source": [ 354 | "N = 5\n", 355 | "for i in range(N):\n", 356 | " noise = np.random.normal(0, 1, (len(y), 1))\n", 357 | " noise_t = torch.from_numpy(noise).float()\n", 358 | " synthetic_data = generator(noise_t).detach().numpy()\n", 359 | " data[f'synth_{i:02d}'] = scaler.inverse_transform(synthetic_data)" 360 | ] 361 | }, 362 | { 363 | "cell_type": "code", 364 | "execution_count": null, 365 | "id": "82e3c9ce-0536-42b2-aef1-cc2e5d2eb6b9", 366 | "metadata": { 367 | "tags": [] 368 | }, 369 | "outputs": [], 370 | "source": [ 371 | "data.describe().round(3)" 372 | ] 373 | }, 374 | { 375 | "cell_type": "code", 376 | "execution_count": null, 377 | "id": "4fffc46b-b152-4f0c-9a9d-60addaf2d6d3", 378 | "metadata": { 379 | "tags": [] 380 | }, 381 | "outputs": [], 382 | "source": [ 383 | "((data.apply(np.sort)['real'] -\n", 384 | " data.apply(np.sort)['synth_00']) ** 2).mean()" 385 | ] 386 | }, 387 | { 388 | "cell_type": "code", 389 | "execution_count": null, 390 | "id": "43cfeef2-07d0-4506-bc39-9ea6c3db9342", 391 | "metadata": { 392 | "tags": [] 393 | }, 394 | "outputs": [], 395 | "source": [ 396 | "data.apply(np.sort).plot(style=['r'] + N * ['b--'], lw=1, legend=False);" 397 | ] 398 | }, 399 | { 400 | "cell_type": "markdown", 401 | "id": "e6d25459-74ce-4747-bb66-b8c70a96b96e", 402 | "metadata": { 403 | "tags": [] 404 | }, 405 | "source": [ 406 | "## Financial Example" 407 | ] 408 | }, 409 | { 410 | "cell_type": "code", 411 | "execution_count": null, 412 | "id": "cfe2760c-8590-422f-9325-8e729cf3026e", 413 | "metadata": { 414 | "tags": [] 415 | }, 416 | "outputs": [], 417 | "source": [ 418 | "raw = pd.read_csv('https://certificate.tpq.io/rl4finance.csv',\n", 419 | " index_col=0, parse_dates=True).dropna()" 420 | ] 421 | }, 422 | { 423 | "cell_type": "code", 424 | "execution_count": null, 425 | "id": "8441cfd0-035a-4384-8d98-a8586e4894a9", 426 | "metadata": { 427 | "tags": [] 428 | }, 429 | "outputs": [], 430 | "source": [ 431 | "rets = raw['GLD'].iloc[-2 * 252:]\n", 432 | "rets = np.log((rets / rets.shift(1)).dropna())\n", 433 | "rets = rets.values" 434 | ] 435 | }, 436 | { 437 | "cell_type": "code", 438 | "execution_count": null, 439 | "id": "21667720-db20-4ec1-aaea-e48f1cbdf703", 440 | "metadata": { 441 | "tags": [] 442 | }, 443 | "outputs": [], 444 | "source": [ 445 | "scaler = StandardScaler()" 446 | ] 447 | }, 448 | { 449 | "cell_type": "code", 450 | "execution_count": null, 451 | "id": "fde48cba-8acf-4a04-9eb5-6c28a97976dc", 452 | "metadata": { 453 | "tags": [] 454 | }, 455 | "outputs": [], 456 | "source": [ 457 | "rets_ = scaler.fit_transform(rets.reshape(-1, 1))" 458 | ] 459 | }, 460 | { 461 | "cell_type": "code", 462 | "execution_count": null, 463 | "id": "1cc4f7ec-c728-4e29-b356-d1999ebc0944", 464 | "metadata": { 465 | "tags": [] 466 | }, 467 | "outputs": [], 468 | "source": [ 469 | "rng = default_rng(seed=100)\n", 470 | "torch.manual_seed(100)" 471 | ] 472 | }, 473 | { 474 | "cell_type": "code", 475 | "execution_count": null, 476 | "id": "b4884fa3-006b-423a-9b27-531fbd9aa14c", 477 | "metadata": { 478 | "tags": [] 479 | }, 480 | "outputs": [], 481 | "source": [ 482 | "generator = Generator(hu=24)\n", 483 | "discriminator = Discriminator(hu=24)\n", 484 | "gen_optimizer, disc_optimizer, criterion = create_gan(generator, discriminator, lr=0.0001)" 485 | ] 486 | }, 487 | { 488 | "cell_type": "code", 489 | "execution_count": null, 490 | "id": "fd265a28-33f3-4e40-a978-f51956ce4464", 491 | "metadata": { 492 | "tags": [] 493 | }, 494 | "outputs": [], 495 | "source": [ 496 | "%time rd, sd = train_models(y_=rets_, epochs=15001, batch_size=32)" 497 | ] 498 | }, 499 | { 500 | "cell_type": "code", 501 | "execution_count": null, 502 | "id": "09ad0c15-7cec-4766-adeb-9631c6e5e4aa", 503 | "metadata": { 504 | "tags": [] 505 | }, 506 | "outputs": [], 507 | "source": [ 508 | "data = pd.DataFrame({'real': rets})" 509 | ] 510 | }, 511 | { 512 | "cell_type": "code", 513 | "execution_count": null, 514 | "id": "32e09910-0d6f-43e7-82cf-f2832c285b85", 515 | "metadata": { 516 | "tags": [] 517 | }, 518 | "outputs": [], 519 | "source": [ 520 | "N = 25" 521 | ] 522 | }, 523 | { 524 | "cell_type": "code", 525 | "execution_count": null, 526 | "id": "10933efb-dd34-46f8-9e20-b3bca9f84332", 527 | "metadata": { 528 | "tags": [] 529 | }, 530 | "outputs": [], 531 | "source": [ 532 | "for i in range(N):\n", 533 | " noise = np.random.normal(0, 1, (len(rets_), 1))\n", 534 | " noise_t = torch.from_numpy(noise).float()\n", 535 | " synthetic_data = generator(noise_t).detach().numpy()\n", 536 | " data[f'synth_{i:02d}'] = scaler.inverse_transform(synthetic_data)" 537 | ] 538 | }, 539 | { 540 | "cell_type": "code", 541 | "execution_count": null, 542 | "id": "8a2bef01-4374-42c1-b246-1ec2b84d7b54", 543 | "metadata": { 544 | "tags": [] 545 | }, 546 | "outputs": [], 547 | "source": [ 548 | "res = data.describe().round(4)\n", 549 | "res.iloc[:, :5]" 550 | ] 551 | }, 552 | { 553 | "cell_type": "code", 554 | "execution_count": null, 555 | "id": "26a1a332-5985-4bb7-a23d-1cd3afa4f5df", 556 | "metadata": { 557 | "tags": [] 558 | }, 559 | "outputs": [], 560 | "source": [ 561 | "data.iloc[:, :2].plot(style=['r', 'b--', 'b--'], lw=1, alpha=0.7);" 562 | ] 563 | }, 564 | { 565 | "cell_type": "code", 566 | "execution_count": null, 567 | "id": "7bcc7cb4-c88e-4014-9eee-5742c34a9c4e", 568 | "metadata": { 569 | "tags": [] 570 | }, 571 | "outputs": [], 572 | "source": [ 573 | "data['real'].plot(kind='hist', bins=50, label='real',\n", 574 | " color='r', alpha=0.7)\n", 575 | "data['synth_00'].plot(kind='hist', bins=50, alpha=0.7,\n", 576 | " label='synthetic', color='b', sharex=True)\n", 577 | "plt.legend();" 578 | ] 579 | }, 580 | { 581 | "cell_type": "code", 582 | "execution_count": null, 583 | "id": "2c18e2cb-3fd7-4189-af3b-607a11410812", 584 | "metadata": { 585 | "tags": [] 586 | }, 587 | "outputs": [], 588 | "source": [ 589 | "plt.plot(np.sort(data['real']), 'r', lw=1.0, label='real')\n", 590 | "plt.plot(np.sort(data['synth_00']), 'b--', lw=1.0, label='synthetic')\n", 591 | "plt.legend();" 592 | ] 593 | }, 594 | { 595 | "cell_type": "code", 596 | "execution_count": null, 597 | "id": "102ba276-1c1e-42a2-9fe0-b3d63deaa9bf", 598 | "metadata": { 599 | "tags": [] 600 | }, 601 | "outputs": [], 602 | "source": [ 603 | "sn = N\n", 604 | "data.iloc[:, 1:sn + 1].cumsum().apply(np.exp).plot(\n", 605 | " style='b--', lw=0.7, legend=False)\n", 606 | "data.iloc[:, 1:sn + 1].mean(axis=1).cumsum().apply(\n", 607 | " np.exp).plot(style='g', lw=2)\n", 608 | "data['real'].cumsum().apply(np.exp).plot(style='r', lw=2);" 609 | ] 610 | }, 611 | { 612 | "cell_type": "markdown", 613 | "id": "c896c323-bcd7-4c6f-b057-1fa874ca88d6", 614 | "metadata": {}, 615 | "source": [ 616 | "### Kolmogorow-Smirnow (KS) Test" 617 | ] 618 | }, 619 | { 620 | "cell_type": "code", 621 | "execution_count": null, 622 | "id": "601641c4-1ff5-4a17-a6de-5a9a0ecd984f", 623 | "metadata": { 624 | "tags": [] 625 | }, 626 | "outputs": [], 627 | "source": [ 628 | "from scipy import stats" 629 | ] 630 | }, 631 | { 632 | "cell_type": "code", 633 | "execution_count": null, 634 | "id": "99adf8c7-2618-4753-81f0-ad3c72e070bf", 635 | "metadata": { 636 | "tags": [] 637 | }, 638 | "outputs": [], 639 | "source": [ 640 | "pvs = list()\n", 641 | "for i in range(N):\n", 642 | " pvs.append(stats.kstest(data[f'synth_{i:02d}'],\n", 643 | " data['real']).pvalue)\n", 644 | "pvs = np.array(pvs)" 645 | ] 646 | }, 647 | { 648 | "cell_type": "code", 649 | "execution_count": null, 650 | "id": "ed10d183-e744-4373-98e9-30a10ea43f16", 651 | "metadata": { 652 | "tags": [] 653 | }, 654 | "outputs": [], 655 | "source": [ 656 | "np.sort((pvs > 0.05).astype(int))" 657 | ] 658 | }, 659 | { 660 | "cell_type": "code", 661 | "execution_count": null, 662 | "id": "9ed68882-429d-48f7-abe7-139b0d57eed9", 663 | "metadata": { 664 | "tags": [] 665 | }, 666 | "outputs": [], 667 | "source": [ 668 | "sum(np.sort(pvs > 0.05)) / N" 669 | ] 670 | }, 671 | { 672 | "cell_type": "code", 673 | "execution_count": null, 674 | "id": "514e33ea-ec94-4824-9098-0c3093256ba5", 675 | "metadata": { 676 | "tags": [] 677 | }, 678 | "outputs": [], 679 | "source": [ 680 | "plt.hist(pvs, bins=100)\n", 681 | "plt.axvline(0.05, color='r');" 682 | ] 683 | }, 684 | { 685 | "cell_type": "markdown", 686 | "id": "20e3eaa7-ac35-44e5-bffc-93662c2d2c55", 687 | "metadata": {}, 688 | "source": [ 689 | "\"The
\n", 690 | "\n", 691 | "https://tpq.io | @dyjh | team@tpq.io" 692 | ] 693 | } 694 | ], 695 | "metadata": { 696 | "kernelspec": { 697 | "display_name": "Python 3 (ipykernel)", 698 | "language": "python", 699 | "name": "python3" 700 | }, 701 | "language_info": { 702 | "codemirror_mode": { 703 | "name": "ipython", 704 | "version": 3 705 | }, 706 | "file_extension": ".py", 707 | "mimetype": "text/x-python", 708 | "name": "python", 709 | "nbconvert_exporter": "python", 710 | "pygments_lexer": "ipython3", 711 | "version": "3.11.12" 712 | } 713 | }, 714 | "nbformat": 4, 715 | "nbformat_minor": 5 716 | } 717 | -------------------------------------------------------------------------------- /pytorch/06_rl4f_pytorch.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "475819a4-e148-4616-b1cb-44b659aeb08a", 6 | "metadata": {}, 7 | "source": [ 8 | "\"The
" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "280cc0c6-2c18-46cd-8af7-3f19b64a6d7e", 14 | "metadata": {}, 15 | "source": [ 16 | "# Reinforcement Learning for Finance\n", 17 | "\n", 18 | "**Chapter 06 — Algorithmic Trading**\n", 19 | "\n", 20 | "© Dr. Yves J. Hilpisch\n", 21 | "\n", 22 | "https://tpq.io | @dyjh | team@tpq.io" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "id": "d6be6f8b-e00e-402c-9df1-1d3f16e76c7e", 28 | "metadata": {}, 29 | "source": [ 30 | "## Prediction Game Revisited" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "id": "be1564c8-e582-444c-a42b-c46119e612df", 37 | "metadata": { 38 | "tags": [] 39 | }, 40 | "outputs": [], 41 | "source": [ 42 | "import math\n", 43 | "import random\n", 44 | "import numpy as np\n", 45 | "import pandas as pd\n", 46 | "from pylab import plt, mpl\n", 47 | "import torch" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": null, 53 | "id": "a94ba4a3-69dd-4342-a682-e96eea349a84", 54 | "metadata": { 55 | "tags": [] 56 | }, 57 | "outputs": [], 58 | "source": [ 59 | "plt.style.use('seaborn-v0_8')\n", 60 | "mpl.rcParams['figure.dpi'] = 300\n", 61 | "mpl.rcParams['savefig.dpi'] = 300\n", 62 | "mpl.rcParams['font.family'] = 'serif'\n", 63 | "np.set_printoptions(suppress=True)" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": null, 69 | "id": "ec2f0944-16d0-498d-94b4-5356e1223a07", 70 | "metadata": { 71 | "tags": [] 72 | }, 73 | "outputs": [], 74 | "source": [ 75 | "from finance import *" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": null, 81 | "id": "5fd5a39b-3d6a-45f3-acdf-4fc0c5829e60", 82 | "metadata": { 83 | "tags": [] 84 | }, 85 | "outputs": [], 86 | "source": [ 87 | "finance = Finance('GLD', 'r', min_accuracy=47.5,\n", 88 | " n_features=8)" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": null, 94 | "id": "6ac14464-cf63-485b-b3b0-813e6fb2be86", 95 | "metadata": { 96 | "tags": [] 97 | }, 98 | "outputs": [], 99 | "source": [ 100 | "finance.data[finance.symbol].plot(title=finance.symbol,\n", 101 | " lw=1.0, c='b');" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": null, 107 | "id": "d7454752-abbb-4c3e-8a5e-84307f02f5b9", 108 | "metadata": { 109 | "tags": [] 110 | }, 111 | "outputs": [], 112 | "source": [ 113 | "from dqlagent_pytorch import *" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": null, 119 | "id": "2c9b7437-a258-4a10-b491-da45927c62fd", 120 | "metadata": { 121 | "tags": [] 122 | }, 123 | "outputs": [], 124 | "source": [ 125 | "random.seed(100)\n", 126 | "np.random.seed(100)\n", 127 | "torch.manual_seed(100)" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": null, 133 | "id": "f25c7ff0-10fd-4fed-b91f-5c9e6a159afe", 134 | "metadata": { 135 | "tags": [] 136 | }, 137 | "outputs": [], 138 | "source": [ 139 | "dqlagent = DQLAgent(finance.symbol, finance.feature,\n", 140 | " finance.n_features, finance, lr=0.0001)" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": null, 146 | "id": "07aea7cd-6c46-4108-a4a5-a5bbcecc99e4", 147 | "metadata": { 148 | "tags": [] 149 | }, 150 | "outputs": [], 151 | "source": [ 152 | "%time dqlagent.learn(500)" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": null, 158 | "id": "7e4c1dcd-fd70-4888-a23d-dee3f9584a83", 159 | "metadata": { 160 | "tags": [] 161 | }, 162 | "outputs": [], 163 | "source": [ 164 | "dqlagent.test(3)" 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": null, 170 | "id": "10873024-0abc-4205-969d-37560a00cf4d", 171 | "metadata": { 172 | "tags": [] 173 | }, 174 | "outputs": [], 175 | "source": [ 176 | "from simulation import Simulation" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": null, 182 | "id": "7d421e30-ca8e-41ee-b894-405099ef5c63", 183 | "metadata": { 184 | "tags": [] 185 | }, 186 | "outputs": [], 187 | "source": [ 188 | "random.seed(500)" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": null, 194 | "id": "686fdb1d-8719-4a2f-9ffe-2a6170de7e08", 195 | "metadata": { 196 | "tags": [] 197 | }, 198 | "outputs": [], 199 | "source": [ 200 | "simulation = Simulation('SYMBOL', 'r', 4, '2025-1-1', '2027-1-1',\n", 201 | " 2 * 252, min_accuracy=0.5, x0=1, kappa=1,\n", 202 | " theta=0.75, sigma=0.1, new=True, normalize=True)" 203 | ] 204 | }, 205 | { 206 | "cell_type": "code", 207 | "execution_count": null, 208 | "id": "5f4aa03b-a660-4b37-8028-9c2ede4c289a", 209 | "metadata": { 210 | "tags": [] 211 | }, 212 | "outputs": [], 213 | "source": [ 214 | "for _ in range(5):\n", 215 | " simulation.reset()\n", 216 | " simulation.data[simulation.symbol].plot(title=simulation.symbol,\n", 217 | " lw=1.0, c='b');" 218 | ] 219 | }, 220 | { 221 | "cell_type": "code", 222 | "execution_count": null, 223 | "id": "31a9978b-0745-4ea4-a7da-b7b59466133a", 224 | "metadata": { 225 | "tags": [] 226 | }, 227 | "outputs": [], 228 | "source": [ 229 | "random.seed(100)\n", 230 | "np.random.seed(100)\n", 231 | "torch.manual_seed(100)\n" 232 | ] 233 | }, 234 | { 235 | "cell_type": "code", 236 | "execution_count": null, 237 | "id": "e43037b2-4b3d-4eb9-8846-623cfb75ce4d", 238 | "metadata": { 239 | "tags": [] 240 | }, 241 | "outputs": [], 242 | "source": [ 243 | "agent = DQLAgent(simulation.symbol, simulation.feature,\n", 244 | " simulation.n_features, simulation)" 245 | ] 246 | }, 247 | { 248 | "cell_type": "code", 249 | "execution_count": null, 250 | "id": "755a2c76-484d-47fb-9fb4-65c3f1559dd5", 251 | "metadata": { 252 | "scrolled": true, 253 | "tags": [] 254 | }, 255 | "outputs": [], 256 | "source": [ 257 | "%time agent.learn(250)" 258 | ] 259 | }, 260 | { 261 | "cell_type": "code", 262 | "execution_count": null, 263 | "id": "90a0a7f3-e0e5-4877-89f1-5af861dd22d5", 264 | "metadata": { 265 | "tags": [] 266 | }, 267 | "outputs": [], 268 | "source": [ 269 | "agent.test(5)" 270 | ] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": null, 275 | "id": "a65841de-5300-402d-b15f-c61c5b4d97af", 276 | "metadata": { 277 | "tags": [] 278 | }, 279 | "outputs": [], 280 | "source": [ 281 | "class ActionSpace:\n", 282 | " n = 2\n", 283 | " def sample(self):\n", 284 | " return random.randint(0, 1)" 285 | ] 286 | }, 287 | { 288 | "cell_type": "code", 289 | "execution_count": null, 290 | "id": "80ff9999-3980-4dfb-96dd-4b549e85502f", 291 | "metadata": { 292 | "tags": [] 293 | }, 294 | "outputs": [], 295 | "source": [ 296 | "class Trading:\n", 297 | " def __init__(self, symbol, features, window, lags,\n", 298 | " start, end, periods,\n", 299 | " x0=100, kappa=1, theta=100, sigma=0.2,\n", 300 | " leverage=1, min_accuracy=0.5, min_performance=0.85,\n", 301 | " mu=None, std=None,\n", 302 | " new=True, normalize=True):\n", 303 | " self.symbol = symbol\n", 304 | " self.features = features\n", 305 | " self.n_features = len(features)\n", 306 | " self.window = window\n", 307 | " self.lags = lags\n", 308 | " self.start = start\n", 309 | " self.end = end\n", 310 | " self.periods = periods\n", 311 | " self.x0 = x0\n", 312 | " self.kappa = kappa\n", 313 | " self.theta = theta\n", 314 | " self.sigma = sigma\n", 315 | " self.leverage = leverage\n", 316 | " self.min_accuracy = min_accuracy\n", 317 | " self.min_performance = min_performance\n", 318 | " self.start = start\n", 319 | " self.end = end\n", 320 | " self.mu = mu\n", 321 | " self.std = std\n", 322 | " self.new = new\n", 323 | " self.normalize = normalize\n", 324 | " self.action_space = ActionSpace()\n", 325 | " self._simulate_data()\n", 326 | " self._prepare_data()" 327 | ] 328 | }, 329 | { 330 | "cell_type": "code", 331 | "execution_count": null, 332 | "id": "83196771-01cd-482e-bb21-7f635594d4f9", 333 | "metadata": { 334 | "tags": [] 335 | }, 336 | "outputs": [], 337 | "source": [ 338 | "class Trading(Trading):\n", 339 | " def _simulate_data(self):\n", 340 | " index = pd.date_range(start=self.start,\n", 341 | " end=self.end, periods=self.periods)\n", 342 | " s = [self.x0]\n", 343 | " dt = (index[-1] - index[0]).days / 365 / self.periods\n", 344 | " for t in range(1, len(index)):\n", 345 | " s_ = (s[t - 1] + self.kappa * (self.theta - s[t - 1]) * dt +\n", 346 | " s[t - 1] * self.sigma * math.sqrt(dt) *\n", 347 | " random.gauss(0, 1))\n", 348 | " s.append(s_)\n", 349 | " self.data = pd.DataFrame(s, columns=[self.symbol], index=index)" 350 | ] 351 | }, 352 | { 353 | "cell_type": "code", 354 | "execution_count": null, 355 | "id": "3ee3dfca-e197-497c-8546-8cd8d8497e42", 356 | "metadata": { 357 | "tags": [] 358 | }, 359 | "outputs": [], 360 | "source": [ 361 | "class Trading(Trading):\n", 362 | " def _prepare_data(self):\n", 363 | " self.data['r'] = np.log(self.data / self.data.shift(1))\n", 364 | " self.data.dropna(inplace=True)\n", 365 | " # additional features\n", 366 | " if self.window > 0:\n", 367 | " self.data['SMA'] = self.data[\n", 368 | " self.symbol].rolling(self.window).mean()\n", 369 | " self.data['DEL'] = self.data[\n", 370 | " self.symbol] - self.data['SMA']\n", 371 | " self.data['MIN'] = self.data[\n", 372 | " self.symbol].rolling(self.window).min()\n", 373 | " self.data['MAX'] = self.data[\n", 374 | " self.symbol].rolling(self.window).max()\n", 375 | " self.data['MOM'] = self.data['r'].rolling(\n", 376 | " self.window).mean()\n", 377 | " # add more features here\n", 378 | " self.data.dropna(inplace=True)\n", 379 | " if self.normalize:\n", 380 | " if self.mu is None or self.std is None:\n", 381 | " self.mu = self.data.mean()\n", 382 | " self.std = self.data.std()\n", 383 | " self.data_ = (self.data - self.mu) / self.std\n", 384 | " else:\n", 385 | " self.data_ = self.data.copy()\n", 386 | " self.data['d'] = np.where(self.data['r'] > 0, 1, 0)\n", 387 | " self.data['d'] = self.data['d'].astype(int)" 388 | ] 389 | }, 390 | { 391 | "cell_type": "code", 392 | "execution_count": null, 393 | "id": "94c749e3-3cdf-48b7-8507-ea4f2e1b543d", 394 | "metadata": { 395 | "tags": [] 396 | }, 397 | "outputs": [], 398 | "source": [ 399 | "class Trading(Trading):\n", 400 | " def _get_state(self):\n", 401 | " return self.data_[self.features].iloc[self.bar -\n", 402 | " self.lags:self.bar]\n", 403 | " def seed(self, seed):\n", 404 | " random.seed(seed)\n", 405 | " np.random.seed(seed)\n", 406 | " torch.manual_seed(seed)\n", 407 | " def reset(self):\n", 408 | " if self.new:\n", 409 | " self._simulate_data()\n", 410 | " self._prepare_data()\n", 411 | " self.treward = 0\n", 412 | " self.accuracy = 0\n", 413 | " self.actions = list()\n", 414 | " self.returns = list()\n", 415 | " self.performance = 1\n", 416 | " self.bar = self.lags\n", 417 | " state = self._get_state()\n", 418 | " return state.values, {}" 419 | ] 420 | }, 421 | { 422 | "cell_type": "code", 423 | "execution_count": null, 424 | "id": "29ee2c13-e43f-41eb-9711-c3b7a74d9e1e", 425 | "metadata": { 426 | "tags": [] 427 | }, 428 | "outputs": [], 429 | "source": [ 430 | "class Trading(Trading):\n", 431 | " def step(self, action):\n", 432 | " correct = action == self.data['d'].iloc[self.bar]\n", 433 | " ret = self.data['r'].iloc[self.bar] * self.leverage\n", 434 | " reward_ = 1 if correct else 0\n", 435 | " pl = abs(ret) if correct else -abs(ret)\n", 436 | " reward = reward_\n", 437 | " # alternative options:\n", 438 | " # reward = pl # only the P&L in log returns\n", 439 | " # reward = reward_ + 10 * pl # the reward + the scaled P&L\n", 440 | " self.treward += reward\n", 441 | " self.bar += 1\n", 442 | " self.accuracy = self.treward / (self.bar - self.lags) \n", 443 | " self.performance *= math.exp(pl)\n", 444 | " if self.bar >= len(self.data):\n", 445 | " done = True\n", 446 | " elif reward_ == 1:\n", 447 | " done = False\n", 448 | " elif (self.accuracy < self.min_accuracy and\n", 449 | " self.bar > self.lags + 15):\n", 450 | " done = True\n", 451 | " elif (self.performance < self.min_performance and\n", 452 | " self.bar > self.lags + 15):\n", 453 | " done = True\n", 454 | " else:\n", 455 | " done = False\n", 456 | " state = self._get_state()\n", 457 | " return state.values, reward, done, False, {}" 458 | ] 459 | }, 460 | { 461 | "cell_type": "code", 462 | "execution_count": null, 463 | "id": "17cf270f-3740-45e6-8665-70268e1a70cc", 464 | "metadata": { 465 | "tags": [] 466 | }, 467 | "outputs": [], 468 | "source": [ 469 | "symbol = 'SYMBOL'" 470 | ] 471 | }, 472 | { 473 | "cell_type": "code", 474 | "execution_count": null, 475 | "id": "18455398-c390-43ec-b210-2329194e1fb5", 476 | "metadata": { 477 | "tags": [] 478 | }, 479 | "outputs": [], 480 | "source": [ 481 | "trading = Trading(symbol, [symbol, 'r', 'DEL'], window=10, lags=5,\n", 482 | " start='2024-1-1', end='2026-1-1', periods=504,\n", 483 | " x0=100, kappa=2, theta=300, sigma=0.1, normalize=False)" 484 | ] 485 | }, 486 | { 487 | "cell_type": "code", 488 | "execution_count": null, 489 | "id": "d027e602-ef64-4c00-9c64-908a6b53b674", 490 | "metadata": { 491 | "tags": [] 492 | }, 493 | "outputs": [], 494 | "source": [ 495 | "random.seed(750)" 496 | ] 497 | }, 498 | { 499 | "cell_type": "code", 500 | "execution_count": null, 501 | "id": "5e012f99-e964-4135-95d7-37b050be42f2", 502 | "metadata": { 503 | "tags": [] 504 | }, 505 | "outputs": [], 506 | "source": [ 507 | "trading.reset()" 508 | ] 509 | }, 510 | { 511 | "cell_type": "code", 512 | "execution_count": null, 513 | "id": "bde7344f-2a9b-4727-a94a-d3fc1bce1653", 514 | "metadata": { 515 | "tags": [] 516 | }, 517 | "outputs": [], 518 | "source": [ 519 | "trading.data.info()" 520 | ] 521 | }, 522 | { 523 | "cell_type": "code", 524 | "execution_count": null, 525 | "id": "110f8cd3-72b1-4e80-8f63-802bb915492d", 526 | "metadata": { 527 | "tags": [] 528 | }, 529 | "outputs": [], 530 | "source": [ 531 | "trading.data.iloc[-200:][\n", 532 | " [trading.symbol, 'SMA', 'MIN', 'MAX']].plot(\n", 533 | " style=['b-', 'r--', 'g:', 'g:'], lw=1.0);" 534 | ] 535 | }, 536 | { 537 | "cell_type": "code", 538 | "execution_count": null, 539 | "id": "7aea1c26-48a3-47f1-969c-6c249c8241f1", 540 | "metadata": { 541 | "tags": [] 542 | }, 543 | "outputs": [], 544 | "source": [ 545 | "random.seed(100)\n", 546 | "np.random.seed(100)\n", 547 | "torch.manual_seed(100)" 548 | ] 549 | }, 550 | { 551 | "cell_type": "code", 552 | "execution_count": null, 553 | "id": "272afe67-2ff5-4b1c-998e-ec46e6457c88", 554 | "metadata": { 555 | "tags": [] 556 | }, 557 | "outputs": [], 558 | "source": [ 559 | "trading = Trading(symbol, ['r', 'DEL', 'MOM'], window=10, lags=8,\n", 560 | " start='2024-1-1', end='2026-1-1', periods=2 * 252,\n", 561 | " x0=100, kappa=2, theta=50, sigma=0.1,\n", 562 | " leverage=1, min_accuracy=0.5, min_performance=0.85,\n", 563 | " new=True, normalize=True)" 564 | ] 565 | }, 566 | { 567 | "cell_type": "code", 568 | "execution_count": null, 569 | "id": "b7e7600d-63f6-4aff-b7ef-044761a1ad07", 570 | "metadata": { 571 | "tags": [] 572 | }, 573 | "outputs": [], 574 | "source": [ 575 | "tradingagent = TradingAgent(trading.symbol, trading.features,\n", 576 | " trading.lags * trading.n_features, trading, hu=24, lr=0.0001)" 577 | ] 578 | }, 579 | { 580 | "cell_type": "code", 581 | "execution_count": null, 582 | "id": "f86bdfb8-21b8-4806-89e4-d80bdd976978", 583 | "metadata": { 584 | "tags": [] 585 | }, 586 | "outputs": [], 587 | "source": [ 588 | "%%time\n", 589 | "tradingagent.test(100, min_accuracy=0.0,\n", 590 | " min_performance=0.0,\n", 591 | " verbose=True, full=False)" 592 | ] 593 | }, 594 | { 595 | "cell_type": "code", 596 | "execution_count": null, 597 | "id": "7052466e-5b5b-429f-8720-ab7fb3cba220", 598 | "metadata": { 599 | "tags": [] 600 | }, 601 | "outputs": [], 602 | "source": [ 603 | "random_performances = tradingagent.performances" 604 | ] 605 | }, 606 | { 607 | "cell_type": "code", 608 | "execution_count": null, 609 | "id": "b56f0904-2ac7-432a-9730-1f426716a9e4", 610 | "metadata": { 611 | "tags": [] 612 | }, 613 | "outputs": [], 614 | "source": [ 615 | "sum(random_performances) / len(random_performances)" 616 | ] 617 | }, 618 | { 619 | "cell_type": "code", 620 | "execution_count": null, 621 | "id": "037ebe99-31a2-40d1-89ff-69c8e0c475ca", 622 | "metadata": { 623 | "tags": [] 624 | }, 625 | "outputs": [], 626 | "source": [ 627 | "plt.hist(random_performances, bins=50, color='b')\n", 628 | "plt.xlabel('gross performance')\n", 629 | "plt.ylabel('frequency');" 630 | ] 631 | }, 632 | { 633 | "cell_type": "code", 634 | "execution_count": null, 635 | "id": "d5f9cd36-532e-417f-a80b-c019b8098d96", 636 | "metadata": { 637 | "scrolled": true, 638 | "tags": [] 639 | }, 640 | "outputs": [], 641 | "source": [ 642 | "%time tradingagent.learn(500)" 643 | ] 644 | }, 645 | { 646 | "cell_type": "code", 647 | "execution_count": null, 648 | "id": "f9f71cf4-7c81-40e0-a847-a3f57fff69be", 649 | "metadata": { 650 | "tags": [] 651 | }, 652 | "outputs": [], 653 | "source": [ 654 | "%%time\n", 655 | "tradingagent.test(50, min_accuracy=0.0,\n", 656 | " min_performance=0.0,\n", 657 | " verbose=True, full=False)" 658 | ] 659 | }, 660 | { 661 | "cell_type": "code", 662 | "execution_count": null, 663 | "id": "6b12eb14-9702-4b9a-a572-c57db5de3ecd", 664 | "metadata": { 665 | "tags": [] 666 | }, 667 | "outputs": [], 668 | "source": [ 669 | "sum(tradingagent.performances) / len(tradingagent.performances)" 670 | ] 671 | }, 672 | { 673 | "cell_type": "code", 674 | "execution_count": null, 675 | "id": "830a46d0-3bdf-4e29-9ea4-74b3bea98647", 676 | "metadata": { 677 | "tags": [] 678 | }, 679 | "outputs": [], 680 | "source": [ 681 | "plt.hist(random_performances, bins=30,\n", 682 | " color='b', label='random (left)')\n", 683 | "plt.hist(tradingagent.performances, bins=30,\n", 684 | " color='r', label='trained (right)')\n", 685 | "plt.xlabel('gross performance')\n", 686 | "plt.ylabel('frequency')\n", 687 | "plt.legend();" 688 | ] 689 | }, 690 | { 691 | "cell_type": "markdown", 692 | "id": "20e3eaa7-ac35-44e5-bffc-93662c2d2c55", 693 | "metadata": {}, 694 | "source": [ 695 | "\"The
\n", 696 | "\n", 697 | "https://tpq.io | @dyjh | team@tpq.io" 698 | ] 699 | } 700 | ], 701 | "metadata": { 702 | "kernelspec": { 703 | "display_name": "Python 3 (ipykernel)", 704 | "language": "python", 705 | "name": "python3" 706 | }, 707 | "language_info": { 708 | "codemirror_mode": { 709 | "name": "ipython", 710 | "version": 3 711 | }, 712 | "file_extension": ".py", 713 | "mimetype": "text/x-python", 714 | "name": "python", 715 | "nbconvert_exporter": "python", 716 | "pygments_lexer": "ipython3", 717 | "version": "3.11.12" 718 | } 719 | }, 720 | "nbformat": 4, 721 | "nbformat_minor": 5 722 | } 723 | -------------------------------------------------------------------------------- /pytorch/assetallocation_pytorch.py: -------------------------------------------------------------------------------- 1 | # 2 | # Investing Environment and Agent 3 | # Three Asset Case 4 | # 5 | # (c) Dr. Yves J. Hilpisch 6 | # Reinforcement Learning for Finance 7 | # 8 | 9 | import os 10 | import math 11 | import random 12 | import numpy as np 13 | import pandas as pd 14 | from scipy import stats 15 | from pylab import plt, mpl 16 | from scipy.optimize import minimize 17 | 18 | import torch 19 | from dqlagent_pytorch import * 20 | 21 | plt.style.use('seaborn-v0_8') 22 | mpl.rcParams['figure.dpi'] = 300 23 | mpl.rcParams['savefig.dpi'] = 300 24 | mpl.rcParams['font.family'] = 'serif' 25 | np.set_printoptions(suppress=True) 26 | 27 | 28 | 29 | class observation_space: 30 | def __init__(self, n): 31 | self.shape = (n,) 32 | 33 | 34 | class action_space: 35 | def __init__(self, n): 36 | self.n = n 37 | def seed(self, seed): 38 | random.seed(seed) 39 | def sample(self): 40 | rn = np.random.random(3) 41 | return rn / rn.sum() 42 | 43 | 44 | class Investing: 45 | def __init__(self, asset_one, asset_two, asset_three, 46 | steps=252, amount=1): 47 | self.asset_one = asset_one 48 | self.asset_two = asset_two 49 | self.asset_three = asset_three 50 | self.steps = steps 51 | self.initial_balance = amount 52 | self.portfolio_value = amount 53 | self.portfolio_value_new = amount 54 | self.observation_space = observation_space(4) 55 | self.osn = self.observation_space.shape[0] 56 | self.action_space = action_space(3) 57 | self.retrieved = 0 58 | self._generate_data() 59 | self.portfolios = pd.DataFrame() 60 | self.episode = 0 61 | 62 | def _generate_data(self): 63 | if self.retrieved: 64 | pass 65 | else: 66 | url = 'https://certificate.tpq.io/rl4finance.csv' 67 | self.raw = pd.read_csv(url, index_col=0, parse_dates=True).dropna() 68 | self.retrieved 69 | self.data = pd.DataFrame() 70 | self.data['X'] = self.raw[self.asset_one] 71 | self.data['Y'] = self.raw[self.asset_two] 72 | self.data['Z'] = self.raw[self.asset_three] 73 | s = random.randint(self.steps, len(self.data)) 74 | self.data = self.data.iloc[s-self.steps:s] 75 | self.data = self.data / self.data.iloc[0] 76 | 77 | def _get_state(self): 78 | Xt = self.data['X'].iloc[self.bar] 79 | Yt = self.data['Y'].iloc[self.bar] 80 | Zt = self.data['Z'].iloc[self.bar] 81 | date = self.data.index[self.bar] 82 | return np.array( 83 | [Xt, Yt, Zt, self.xt, self.yt, self.zt] 84 | ), {'date': date} 85 | 86 | def seed(self, seed=None): 87 | if seed is not None: 88 | random.seed(seed) 89 | 90 | def reset(self): 91 | self.xt = 0 92 | self.yt = 0 93 | self.zt = 0 94 | self.bar = 0 95 | self.treward = 0 96 | self.portfolio_value = self.initial_balance 97 | self.portfolio_value_new = self.initial_balance 98 | self.episode += 1 99 | self._generate_data() 100 | self.state, info = self._get_state() 101 | return self.state, info 102 | 103 | def add_results(self, pl): 104 | df = pd.DataFrame({ 105 | 'e': self.episode, 'date': self.date, 106 | 'xt': self.xt, 'yt': self.yt, 'zt': self.zt, 107 | 'pv': self.portfolio_value, 108 | 'pv_new': self.portfolio_value_new, 'p&l[$]': pl, 109 | 'p&l[%]': pl / self.portfolio_value_new * 100, 110 | 'Xt': self.state[0], 'Yt': self.state[1], 111 | 'Zt': self.state[2], 'Xt_new': self.new_state[0], 112 | 'Yt_new': self.new_state[1], 113 | 'Zt_new': self.new_state[2], 114 | }, index=[0]) 115 | self.portfolios = pd.concat((self.portfolios, df), ignore_index=True) 116 | 117 | def step(self, action): 118 | self.bar += 1 119 | self.new_state, info = self._get_state() 120 | self.date = info['date'] 121 | if self.bar == 1: 122 | self.xt = action[0] 123 | self.yt = action[1] 124 | self.zt = action[2] 125 | pl = 0. 126 | reward = 0. 127 | self.add_results(pl) 128 | else: 129 | self.portfolio_value_new = ( 130 | self.xt * self.portfolio_value * 131 | self.new_state[0] / self.state[0] + 132 | self.yt * self.portfolio_value * 133 | self.new_state[1] / self.state[1] + 134 | self.zt * self.portfolio_value * 135 | self.new_state[2] / self.state[2] 136 | ) 137 | pl = self.portfolio_value_new - self.portfolio_value 138 | self.xt = action[0] 139 | self.yt = action[1] 140 | self.zt = action[2] 141 | self.add_results(pl) 142 | ret = self.portfolios['p&l[%]'].iloc[-1] / 100 * 252 143 | vol = self.portfolios['p&l[%]'].rolling( 144 | 20, min_periods=1).std().iloc[-1] * math.sqrt(252) 145 | sharpe = ret / vol 146 | reward = sharpe 147 | self.portfolio_value = self.portfolio_value_new 148 | if self.bar == len(self.data) - 1: 149 | done = True 150 | else: 151 | done = False 152 | self.state = self.new_state 153 | return self.state, reward, done, False, {} 154 | 155 | 156 | class InvestingAgent(DQLAgent): 157 | def __init__(self, symbol, feature, n_features, env, hu=24, lr=0.001): 158 | super().__init__(symbol, feature, n_features, env, hu, lr) 159 | # Continuous action: override model to output scalar Q-value 160 | self.model = QNetwork(self.n_features, 1, hu).to(device) 161 | self.optimizer = optim.Adam(self.model.parameters(), lr=lr) 162 | self.criterion = nn.MSELoss() 163 | def opt_action(self, state): 164 | bnds = 3 * [(0, 1)] # three weights 165 | cons = [{'type': 'eq', 'fun': lambda x: x.sum() - 1}] 166 | def f_obj(x): 167 | s = state.copy() 168 | s[0, 3] = x[0] 169 | s[0, 4] = x[1] 170 | s[0, 5] = x[2] 171 | pen = np.mean((state[0, 3:] - x) ** 2) 172 | s_tensor = torch.FloatTensor(s).to(device) 173 | with torch.no_grad(): 174 | q_val = self.model(s_tensor) 175 | return q_val.cpu().numpy()[0, 0] - pen 176 | try: 177 | state = self._reshape(state) 178 | res = minimize(lambda x: -f_obj(x), 3 * [1 / 3], 179 | bounds=bnds, constraints=cons, 180 | options={'eps': 1e-4}, method='SLSQP') 181 | action = res['x'] 182 | except Exception: 183 | action = self.env.action_space.sample() 184 | return action 185 | 186 | def act(self, state): 187 | if random.random() <= self.epsilon: 188 | return self.env.action_space.sample() 189 | return self.opt_action(state) 190 | 191 | def replay(self): 192 | if len(self.memory) < self.batch_size: 193 | return 194 | batch = random.sample(self.memory, self.batch_size) 195 | for state, action, next_state, reward, done in batch: 196 | target = torch.tensor([reward], dtype=torch.float32).to(device) 197 | if not done: 198 | ns = next_state.copy() 199 | action_cont = self.opt_action(ns) 200 | ns[0, 3:] = action_cont 201 | ns_tensor = torch.FloatTensor(ns).to(device) 202 | with torch.no_grad(): 203 | future_q = self.model(ns_tensor)[0, 0] 204 | target = target + self.gamma * future_q 205 | state_tensor = torch.FloatTensor(state).to(device) 206 | self.optimizer.zero_grad() 207 | current_q = self.model(state_tensor)[0, 0] 208 | loss = self.criterion(current_q, target) 209 | loss.backward() 210 | self.optimizer.step() 211 | if self.epsilon > self.epsilon_min: 212 | self.epsilon *= self.epsilon_decay 213 | 214 | def test(self, episodes, verbose=True): 215 | for e in range(1, episodes + 1): 216 | state, _ = self.env.reset() 217 | state = self._reshape(state) 218 | treward = 0 219 | for _ in range(1, len(self.env.data) + 1): 220 | action = self.opt_action(state) 221 | state, reward, done, trunc, _ = self.env.step(action) 222 | state = self._reshape(state) 223 | treward += reward 224 | if done: 225 | templ = f'episode={e} | total reward={treward:4.2f}' 226 | if verbose: 227 | print(templ, end='\r') 228 | break 229 | print() 230 | 231 | -------------------------------------------------------------------------------- /pytorch/bsm73.py: -------------------------------------------------------------------------------- 1 | # 2 | # Valuation of European call options 3 | # in Black-Scholes-Merton (1973) model 4 | # 5 | # (c) Dr. Yves J. Hilpisch 6 | # Reinforcement Learning for Finance 7 | # 8 | 9 | from math import log, sqrt, exp 10 | from scipy import stats 11 | 12 | 13 | def bsm_call_value(St, K, T, t, r, sigma): 14 | ''' Valuation of European call option in BSM model. 15 | Analytical formula. 16 | 17 | Parameters 18 | ========== 19 | St: float 20 | stock/index level at date/time t 21 | K: float 22 | fixed strike price 23 | T: float 24 | maturity date/time (in year fractions) 25 | t: float 26 | current data/time 27 | r: float 28 | constant risk-free short rate 29 | sigma: float 30 | volatility factor in diffusion term 31 | 32 | Returns 33 | ======= 34 | value: float 35 | present value of the European call option 36 | ''' 37 | St = float(St) 38 | d1 = (log(St / K) + (r + 0.5 * sigma ** 2) * (T - t)) / (sigma * sqrt(T - t)) 39 | d2 = (log(St / K) + (r - 0.5 * sigma ** 2) * (T - t)) / (sigma * sqrt(T - t)) 40 | # stats.norm.cdf --> cumulative distribution function 41 | # for normal distribution 42 | value = (St * stats.norm.cdf(d1, 0, 1) - 43 | K * exp(-r * (T - t)) * stats.norm.cdf(d2, 0, 1)) 44 | return value 45 | 46 | -------------------------------------------------------------------------------- /pytorch/dqlagent_pytorch.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import warnings 4 | import numpy as np 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | import torch.optim as optim 9 | from collections import deque 10 | 11 | warnings.simplefilter('ignore') 12 | os.environ['PYTHONHASHSEED'] = '0' 13 | 14 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 15 | 16 | class QNetwork(nn.Module): 17 | def __init__(self, state_dim, action_dim, hu=24): 18 | super(QNetwork, self).__init__() 19 | self.fc1 = nn.Linear(state_dim, hu) 20 | self.fc2 = nn.Linear(hu, hu) 21 | self.fc3 = nn.Linear(hu, action_dim) 22 | def forward(self, x): 23 | x = F.relu(self.fc1(x)) 24 | x = F.relu(self.fc2(x)) 25 | return self.fc3(x) 26 | 27 | class DQLAgent: 28 | def __init__(self, symbol, feature, n_features, env, hu=24, lr=0.001): 29 | self.epsilon = 1.0 30 | self.epsilon_decay = 0.9975 31 | self.epsilon_min = 0.1 32 | self.memory = deque(maxlen=2000) 33 | self.batch_size = 32 34 | self.gamma = 0.5 35 | self.trewards = [] 36 | self.max_treward = -np.inf 37 | self.n_features = n_features 38 | self.env = env 39 | self.episodes = 0 40 | # Q-Network and optimizer 41 | self.model = QNetwork(self.n_features, self.env.action_space.n, hu).to(device) 42 | self.optimizer = optim.Adam(self.model.parameters(), lr=lr) 43 | self.criterion = nn.MSELoss() 44 | 45 | def _reshape(self, state): 46 | state = state.flatten() 47 | return np.reshape(state, [1, len(state)]) 48 | 49 | def act(self, state): 50 | if random.random() < self.epsilon: 51 | return self.env.action_space.sample() 52 | state_tensor = torch.FloatTensor(state).to(device) 53 | if state_tensor.dim() == 1: 54 | state_tensor = state_tensor.unsqueeze(0) 55 | with torch.no_grad(): 56 | q_values = self.model(state_tensor) 57 | return int(torch.argmax(q_values[0]).item()) 58 | 59 | def replay(self): 60 | if len(self.memory) < self.batch_size: 61 | return 62 | batch = random.sample(self.memory, self.batch_size) 63 | states = np.vstack([e[0] for e in batch]) 64 | actions = np.array([e[1] for e in batch]) 65 | next_states = np.vstack([e[2] for e in batch]) 66 | rewards = np.array([e[3] for e in batch], dtype=np.float32) 67 | dones = np.array([e[4] for e in batch], dtype=bool) 68 | 69 | states_tensor = torch.FloatTensor(states).to(device) 70 | next_states_tensor = torch.FloatTensor(next_states).to(device) 71 | actions_tensor = torch.LongTensor(actions).unsqueeze(1).to(device) 72 | rewards_tensor = torch.FloatTensor(rewards).to(device) 73 | dones_tensor = torch.BoolTensor(dones).to(device) 74 | 75 | current_q = self.model(states_tensor).gather(1, actions_tensor).squeeze(1) 76 | next_q = self.model(next_states_tensor).max(1)[0] 77 | target_q = rewards_tensor + self.gamma * next_q * (~dones_tensor).float() 78 | 79 | loss = self.criterion(current_q, target_q.detach()) 80 | self.optimizer.zero_grad() 81 | loss.backward() 82 | self.optimizer.step() 83 | 84 | if self.epsilon > self.epsilon_min: 85 | self.epsilon *= self.epsilon_decay 86 | 87 | def learn(self, episodes): 88 | for e in range(1, episodes + 1): 89 | self.episodes += 1 90 | state, _ = self.env.reset() 91 | state = self._reshape(state) 92 | treward = 0 93 | for f in range(1, 5000): 94 | self.f = f 95 | action = self.act(state) 96 | next_state, reward, done, trunc, _ = self.env.step(action) 97 | treward += reward 98 | next_state = self._reshape(next_state) 99 | self.memory.append((state, action, next_state, reward, done)) 100 | state = next_state 101 | if done: 102 | self.trewards.append(treward) 103 | self.max_treward = max(self.max_treward, treward) 104 | templ = f'episode={self.episodes:4d} | ' 105 | templ += f'treward={treward:7.3f} | max={self.max_treward:7.3f}' 106 | print(templ, end='\r') 107 | break 108 | if len(self.memory) > self.batch_size: 109 | self.replay() 110 | print() 111 | 112 | def test(self, episodes, min_accuracy=0.0, min_performance=0.0, verbose=True, full=True): 113 | # Backup and set environment thresholds 114 | ma = getattr(self.env, 'min_accuracy', None) 115 | if hasattr(self.env, 'min_accuracy'): 116 | self.env.min_accuracy = min_accuracy 117 | mp = None 118 | if hasattr(self.env, 'min_performance'): 119 | mp = self.env.min_performance 120 | self.env.min_performance = min_performance 121 | self.performances = [] 122 | for e in range(1, episodes + 1): 123 | state, _ = self.env.reset() 124 | state = self._reshape(state) 125 | for f in range(1, 5001): 126 | action = self.act(state) 127 | state, reward, done, trunc, _ = self.env.step(action) 128 | state = self._reshape(state) 129 | if done: 130 | templ = f'total reward={f:4d} | accuracy={self.env.accuracy:.3f}' 131 | if hasattr(self.env, 'min_performance'): 132 | self.performances.append(self.env.performance) 133 | templ += f' | performance={self.env.performance:.3f}' 134 | if verbose: 135 | if full: 136 | print(templ) 137 | else: 138 | print(templ, end='\r') 139 | break 140 | # Restore environment thresholds 141 | if hasattr(self.env, 'min_accuracy') and ma is not None: 142 | self.env.min_accuracy = ma 143 | if mp is not None: 144 | self.env.min_performance = mp 145 | print() -------------------------------------------------------------------------------- /pytorch/finance.py: -------------------------------------------------------------------------------- 1 | # 2 | # Finance Environment with Historical Data 3 | # 4 | # (c) Dr. Yves J. Hilpisch 5 | # Reinforcement Learning for Finance 6 | # 7 | 8 | import random 9 | import numpy as np 10 | import pandas as pd 11 | 12 | 13 | class ActionSpace: 14 | n = 2 15 | def sample(self): 16 | return random.randint(0, 1) 17 | 18 | 19 | class Finance: 20 | url = 'https://certificate.tpq.io/rl4finance.csv' 21 | def __init__(self, symbol, feature, min_accuracy=0.485, n_features=4): 22 | self.symbol = symbol 23 | self.feature = feature 24 | self.n_features = n_features 25 | self.action_space = ActionSpace() 26 | self.min_accuracy = min_accuracy 27 | self._get_data() 28 | self._prepare_data() 29 | 30 | def _get_data(self): 31 | self.raw = pd.read_csv(self.url, 32 | index_col=0, parse_dates=True) 33 | 34 | def _prepare_data(self): 35 | self.data = pd.DataFrame(self.raw[self.symbol]).dropna() 36 | self.data['r'] = np.log(self.data / self.data.shift(1)) 37 | self.data['d'] = np.where(self.data['r'] > 0, 1, 0) 38 | self.data.dropna(inplace=True) 39 | self.data_ = (self.data - self.data.mean()) / self.data.std() 40 | 41 | def reset(self): 42 | self.bar = self.n_features 43 | self.treward = 0 44 | state = self.data_[self.feature].iloc[ 45 | self.bar - self.n_features:self.bar].values 46 | return state, {} 47 | 48 | def step(self, action): 49 | if action == self.data['d'].iloc[self.bar]: 50 | correct = True 51 | else: 52 | correct = False 53 | reward = 1 if correct else 0 54 | self.treward += reward 55 | self.bar += 1 56 | self.accuracy = self.treward / (self.bar - self.n_features) 57 | if self.bar >= len(self.data): 58 | done = True 59 | elif reward == 1: 60 | done = False 61 | elif (self.accuracy < self.min_accuracy) and (self.bar > 15): 62 | done = True 63 | else: 64 | done = False 65 | next_state = self.data_[self.feature].iloc[ 66 | self.bar - self.n_features:self.bar].values 67 | return next_state, reward, done, False, {} 68 | 69 | -------------------------------------------------------------------------------- /pytorch/simulation.py: -------------------------------------------------------------------------------- 1 | # 2 | # Monte Carlo Simulation Environment 3 | # 4 | # (c) Dr. Yves J. Hilpisch 5 | # Reinforcement Learning for Finance 6 | # 7 | 8 | import math 9 | import random 10 | import numpy as np 11 | import pandas as pd 12 | from numpy.random import default_rng 13 | 14 | rng = default_rng() 15 | 16 | 17 | class ActionSpace: 18 | n = 2 19 | def sample(self): 20 | return random.randint(0, 1) 21 | 22 | 23 | class Simulation: 24 | def __init__(self, symbol, feature, n_features, 25 | start, end, periods, 26 | min_accuracy=0.525, x0=100, 27 | kappa=1, theta=100, sigma=0.2, 28 | normalize=True, new=False): 29 | self.symbol = symbol 30 | self.feature = feature 31 | self.n_features = n_features 32 | self.start = start 33 | self.end = end 34 | self.periods = periods 35 | self.x0 = x0 36 | self.kappa = kappa 37 | self.theta = theta 38 | self.sigma = sigma 39 | self.min_accuracy = min_accuracy 40 | self.normalize = normalize 41 | self.new = new 42 | self.action_space = ActionSpace() 43 | self._simulate_data() 44 | self._prepare_data() 45 | 46 | def _simulate_data(self): 47 | index = pd.date_range(start=self.start, 48 | end=self.end, periods=self.periods) 49 | s = [self.x0] 50 | dt = (index[-1] - index[0]).days / 365 / self.periods 51 | for t in range(1, len(index)): 52 | s_ = (s[t - 1] + self.kappa * (self.theta - s[t - 1]) * dt + 53 | s[t - 1] * self.sigma * math.sqrt(dt) * random.gauss(0, 1)) 54 | s.append(s_) 55 | 56 | self.data = pd.DataFrame(s, columns=[self.symbol], index=index) 57 | 58 | def _prepare_data(self): 59 | self.data['r'] = np.log(self.data / self.data.shift(1)) 60 | self.data.dropna(inplace=True) 61 | if self.normalize: 62 | self.mu = self.data.mean() 63 | self.std = self.data.std() 64 | self.data_ = (self.data - self.mu) / self.std 65 | else: 66 | self.data_ = self.data.copy() 67 | self.data['d'] = np.where(self.data['r'] > 0, 1, 0) 68 | self.data['d'] = self.data['d'].astype(int) 69 | 70 | def _get_state(self): 71 | return self.data_[self.feature].iloc[self.bar - 72 | self.n_features:self.bar] 73 | 74 | def seed(self, seed): 75 | random.seed(seed) 76 | np.random.seed(seed) 77 | tf.random.set_random_seed(seed) 78 | 79 | def reset(self): 80 | if self.new: 81 | self._simulate_data() 82 | self._prepare_data() 83 | self.treward = 0 84 | self.accuracy = 0 85 | self.bar = self.n_features 86 | state = self._get_state() 87 | return state.values, {} 88 | 89 | def step(self, action): 90 | if action == self.data['d'].iloc[self.bar]: 91 | correct = True 92 | else: 93 | correct = False 94 | reward = 1 if correct else 0 95 | self.treward += reward 96 | self.bar += 1 97 | self.accuracy = self.treward / (self.bar - self.n_features) 98 | if self.bar >= len(self.data): 99 | done = True 100 | elif reward == 1: 101 | done = False 102 | elif (self.accuracy < self.min_accuracy and 103 | self.bar > self.n_features + 15): 104 | done = True 105 | else: 106 | done = False 107 | next_state = self.data_[self.feature].iloc[ 108 | self.bar - self.n_features:self.bar].values 109 | return next_state, reward, done, False, {} 110 | 111 | -------------------------------------------------------------------------------- /rl4f_tf210.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yhilpisch/rl4f/389650dd7127cb28cd12e65ef3d8af54cb3d400c/rl4f_tf210.yaml --------------------------------------------------------------------------------