├── Model_training.ipynb ├── README.md ├── Results ├── 1e-4_1e-9_trajectory.npy ├── 1e-4_optimal.npy ├── 1e-4_shortfall_list.npy ├── 1e-4_trajectory.npy ├── 1e-6_1e-6_competition_shortfall_list.npy ├── 1e-6_1e-6_competition_trajectory.npy ├── 1e-6_1e-6_competition_trajectory_1500.npy ├── 1e-6_1e-6_cooporation_shortfall_list.npy ├── 1e-6_1e-6_cooporation_trajectory.npy ├── 1e-6_optimal.npy ├── 1e-6_shortfall_list.npy ├── 1e-6_shortfall_list_0.3M.npy ├── 1e-6_shortfall_list_0.7M.npy ├── 1e-6_shortfall_list_fixed_competitor.npy ├── 1e-6_shortfall_list_new_fixed_competitor.npy ├── 1e-6_trajectory.npy ├── 1e-6_trajectory_fixed_competitor.npy ├── 1e-6_trajectory_new_fixed_competitor.npy ├── 1e-7_1e-7_shortfall_list.npy ├── 1e-7_1e-7_trajectory.npy ├── 1e-7_shortfall_list.npy ├── 1e-7_trajectory.npy ├── 1e-9_optimal.npy ├── 1e-9_shortfall_list.npy ├── 1e-9_trajectory.npy ├── README.md └── price_list.npy ├── Visualization.ipynb ├── ddpg_agent.py ├── img ├── Competition_and_Cooporation.png ├── Have_Competitor.png ├── Multi-agent.png ├── README.md ├── Theorem1.png └── theorem2.png ├── model.py ├── syntheticChrissAlmgren.py └── utils.py /Model_training.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Deep Reinforcement Learning for Optimal Execution of Portfolio Transactions " 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import utils\n", 17 | "\n", 18 | "# Get the default financial and AC Model parameters\n", 19 | "financial_params, ac_params = utils.get_env_param()" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 2, 25 | "metadata": {}, 26 | "outputs": [ 27 | { 28 | "data": { 29 | "text/html": [ 30 | "\n", 31 | "\n", 32 | "\n", 33 | " \n", 34 | "\n", 35 | "\n", 36 | " \n", 37 | "\n", 38 | "

Financial Parameters
Annual Volatility:	12%	Bid-Ask Spread:	0.125
Daily Volatility:	0.8%	Daily Trading Volume:	5,000,000

" 39 | ], 40 | "text/plain": [ 41 | "" 42 | ] 43 | }, 44 | "execution_count": 2, 45 | "metadata": {}, 46 | "output_type": "execute_result" 47 | } 48 | ], 49 | "source": [ 50 | "financial_params" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": 3, 56 | "metadata": {}, 57 | "outputs": [ 58 | { 59 | "data": { 60 | "text/html": [ 61 | "\n", 62 | "\n", 63 | "\n", 64 | " \n", 65 | "\n", 66 | "\n", 67 | " \n", 68 | "\n", 69 | "\n", 70 | " \n", 71 | "\n", 72 | "\n", 73 | " \n", 74 | "\n", 75 | "\n", 76 | " \n", 77 | "\n", 78 | "\n", 79 | " \n", 80 | "\n", 81 | "

Almgren and Chriss Model Parameters
Total Number of Shares for Agent1 to Sell:	500,000	Fixed Cost of Selling per Share:	$0.062
Total Number of Shares for Agent2 to Sell:	500,000	Trader's Risk Aversion for Agent 1:	1e-06
Starting Price per Share:	$50.00	Trader's Risk Aversion for Agent 2:	0.0001
Price Impact for Each 1% of Daily Volume Traded:	$2.5e-06	Permanent Impact Constant:	2.5e-07
Number of Days to Sell All the Shares:	60	Single Step Variance:	0.144
Number of Trades:	60	Time Interval between trades:	1.0

" 82 | ], 83 | "text/plain": [ 84 | "" 85 | ] 86 | }, 87 | "execution_count": 3, 88 | "metadata": {}, 89 | "output_type": "execute_result" 90 | } 91 | ], 92 | "source": [ 93 | "ac_params" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": 2, 99 | "metadata": { 100 | "scrolled": false 101 | }, 102 | "outputs": [ 103 | { 104 | "name": "stdout", 105 | "output_type": "stream", 106 | "text": [ 107 | "Episode [100/1300]\tAverage Shortfall for Agent1: $1,172,575.47\n", 108 | "Episode [100/1300]\tAverage Shortfall for Agent2: $1,183,832.73\n", 109 | "Episode [200/1300]\tAverage Shortfall for Agent1: $1,281,148.59\n", 110 | "Episode [200/1300]\tAverage Shortfall for Agent2: $1,281,025.43\n", 111 | "Episode [300/1300]\tAverage Shortfall for Agent1: $1,281,250.00\n", 112 | "Episode [300/1300]\tAverage Shortfall for Agent2: $1,281,250.00\n", 113 | "Episode [400/1300]\tAverage Shortfall for Agent1: $1,281,250.00\n", 114 | "Episode [400/1300]\tAverage Shortfall for Agent2: $1,281,250.00\n", 115 | "Episode [500/1300]\tAverage Shortfall for Agent1: $1,281,250.00\n", 116 | "Episode [500/1300]\tAverage Shortfall for Agent2: $1,281,250.00\n", 117 | "Episode [600/1300]\tAverage Shortfall for Agent1: $1,281,250.00\n", 118 | "Episode [600/1300]\tAverage Shortfall for Agent2: $1,281,250.00\n", 119 | "Episode [700/1300]\tAverage Shortfall for Agent1: $1,227,339.91\n", 120 | "Episode [700/1300]\tAverage Shortfall for Agent2: $1,253,734.24\n", 121 | "Episode [800/1300]\tAverage Shortfall for Agent1: $415,623.02\n", 122 | "Episode [800/1300]\tAverage Shortfall for Agent2: $433,944.85\n", 123 | "Episode [900/1300]\tAverage Shortfall for Agent1: $314,968.49\n", 124 | "Episode [900/1300]\tAverage Shortfall for Agent2: $317,854.76\n", 125 | "Episode [1000/1300]\tAverage Shortfall for Agent1: $318,731.56\n", 126 | "Episode [1000/1300]\tAverage Shortfall for Agent2: $317,495.71\n", 127 | "Episode [1100/1300]\tAverage Shortfall for Agent1: $329,135.85\n", 128 | "Episode [1100/1300]\tAverage Shortfall for Agent2: $333,255.71\n", 129 | "Episode [1200/1300]\tAverage Shortfall for Agent1: $300,993.44\n", 130 | "Episode [1200/1300]\tAverage Shortfall for Agent2: $301,320.57\n", 131 | "Episode [1300/1300]\tAverage Shortfall for Agent1: $294,413.69\n", 132 | "Episode [1300/1300]\tAverage Shortfall for Agent2: $292,937.04\n", 133 | "\n", 134 | "Average Implementation Shortfall for Agent1: $829,225.39 \n", 135 | "\n", 136 | "\n", 137 | "Average Implementation Shortfall for Agent2: $833,877.00 \n", 138 | "\n" 139 | ] 140 | } 141 | ], 142 | "source": [ 143 | "import numpy as np\n", 144 | "\n", 145 | "import syntheticChrissAlmgren as sca\n", 146 | "from ddpg_agent import Agent\n", 147 | "\n", 148 | "from collections import deque\n", 149 | "\n", 150 | "# Create simulation environment\n", 151 | "env = sca.MarketEnvironment()\n", 152 | "\n", 153 | "# Initialize Feed-forward DNNs for Actor and Critic models. \n", 154 | "agent1 = Agent(state_size=env.observation_space_dimension(), action_size=env.action_space_dimension(),random_seed = 1225)\n", 155 | "agent2 = Agent(state_size=env.observation_space_dimension(), action_size=env.action_space_dimension(),random_seed = 108)\n", 156 | "# Set the liquidation time\n", 157 | "lqt = 60\n", 158 | "\n", 159 | "# Set the number of trades\n", 160 | "n_trades = 60\n", 161 | "\n", 162 | "# Set trader's risk aversion\n", 163 | "tr1 = 1e-6\n", 164 | "tr2 = 1e-6\n", 165 | "\n", 166 | "# Set the number of episodes to run the simulation\n", 167 | "episodes = 1300\n", 168 | "shortfall_list = []\n", 169 | "shortfall_hist1 = np.array([])\n", 170 | "shortfall_hist2 = np.array([])\n", 171 | "shortfall_deque1 = deque(maxlen=100)\n", 172 | "shortfall_deque2 = deque(maxlen=100)\n", 173 | "for episode in range(episodes): \n", 174 | " # Reset the enviroment\n", 175 | " cur_state = env.reset(seed = episode, liquid_time = lqt, num_trades = n_trades, lamb1 = tr1,lamb2 = tr2)\n", 176 | "\n", 177 | " # set the environment to make transactions\n", 178 | " env.start_transactions()\n", 179 | "\n", 180 | " for i in range(n_trades + 1):\n", 181 | " \n", 182 | " # Predict the best action for the current state. \n", 183 | " cur_state1 = np.delete(cur_state,8)\n", 184 | " cur_state2 = np.delete(cur_state,7)\n", 185 | " #print(cur_state[5:])\n", 186 | " action1 = agent1.act(cur_state1, add_noise = True)\n", 187 | " action2 = agent2.act(cur_state2, add_noise = True)\n", 188 | " #print(action1,action2)\n", 189 | " # Action is performed and new state, reward, info are received. \n", 190 | " new_state, reward1, reward2, done1, done2, info = env.step(action1,action2)\n", 191 | " \n", 192 | " # current state, action, reward, new state are stored in the experience replay\n", 193 | " new_state1 = np.delete(new_state,8)\n", 194 | " new_state2 = np.delete(new_state,7)\n", 195 | " agent1.step(cur_state1, action1, reward1, new_state1, done1)\n", 196 | " agent2.step(cur_state2, action2, reward2, new_state2, done2)\n", 197 | " # roll over new state\n", 198 | " cur_state = new_state\n", 199 | "\n", 200 | " if info.done1 and info.done2:\n", 201 | " shortfall_hist1 = np.append(shortfall_hist1, info.implementation_shortfall1)\n", 202 | " shortfall_deque1.append(info.implementation_shortfall1)\n", 203 | " \n", 204 | " shortfall_hist2 = np.append(shortfall_hist2, info.implementation_shortfall2)\n", 205 | " shortfall_deque2.append(info.implementation_shortfall2)\n", 206 | " break\n", 207 | " \n", 208 | " if (episode + 1) % 100 == 0: # print average shortfall over last 100 episodes\n", 209 | " print('\\rEpisode [{}/{}]\\tAverage Shortfall for Agent1: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque1))) \n", 210 | " print('\\rEpisode [{}/{}]\\tAverage Shortfall for Agent2: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque2)))\n", 211 | " shortfall_list.append([np.mean(shortfall_deque1),np.mean(shortfall_deque2)])\n", 212 | "print('\\nAverage Implementation Shortfall for Agent1: ${:,.2f} \\n'.format(np.mean(shortfall_hist1)))\n", 213 | "print('\\nAverage Implementation Shortfall for Agent2: ${:,.2f} \\n'.format(np.mean(shortfall_hist2)))" 214 | ] 215 | }, 216 | { 217 | "cell_type": "code", 218 | "execution_count": 5, 219 | "metadata": {}, 220 | "outputs": [], 221 | "source": [ 222 | "shortfall = np.array(shortfall_list)" 223 | ] 224 | }, 225 | { 226 | "cell_type": "code", 227 | "execution_count": null, 228 | "metadata": {}, 229 | "outputs": [], 230 | "source": [] 231 | }, 232 | { 233 | "cell_type": "code", 234 | "execution_count": 6, 235 | "metadata": {}, 236 | "outputs": [], 237 | "source": [ 238 | "np.save('1e-6_1e-6_cooporation_shorfall_list.npy',shortfall)" 239 | ] 240 | }, 241 | { 242 | "cell_type": "code", 243 | "execution_count": 3, 244 | "metadata": {}, 245 | "outputs": [ 246 | { 247 | "name": "stdout", 248 | "output_type": "stream", 249 | "text": [ 250 | "1e-06 1e-06\n", 251 | "[1. 1.]\n", 252 | "[0.761694 0.656324]\n", 253 | "[0.603648 0.454928]\n", 254 | "[0.44365 0.334226]\n", 255 | "[0.305346 0.25539 ]\n", 256 | "[0.20247 0.202642]\n", 257 | "[0.13316 0.148788]\n", 258 | "[0.09197 0.10399]\n", 259 | "[0.064072 0.074902]\n", 260 | "[0.044238 0.052522]\n", 261 | "[0.03257 0.036602]\n", 262 | "[0.02397 0.024466]\n", 263 | "[0.018556 0.01732 ]\n", 264 | "[0.013314 0.011942]\n", 265 | "[0.009696 0.008204]\n", 266 | "[0.006774 0.005622]\n", 267 | "[0.004728 0.003898]\n", 268 | "[0.003236 0.002704]\n", 269 | "[0.00228 0.001762]\n", 270 | "[0.00165 0.00114]\n", 271 | "[0.001234 0.000778]\n", 272 | "[0.000932 0.000566]\n", 273 | "[0.000674 0.000392]\n", 274 | "[0.000506 0.00029 ]\n", 275 | "[0.000376 0.000212]\n", 276 | "[0.000294 0.00015 ]\n", 277 | "[0.000224 0.000108]\n", 278 | "[1.66e-04 7.40e-05]\n", 279 | "[1.14e-04 5.40e-05]\n", 280 | "[8.2e-05 4.2e-05]\n", 281 | "[5.8e-05 3.4e-05]\n", 282 | "[4.0e-05 2.8e-05]\n", 283 | "[2.6e-05 2.2e-05]\n", 284 | "[1.8e-05 1.8e-05]\n", 285 | "[1.2e-05 1.4e-05]\n", 286 | "[8.e-06 1.e-05]\n", 287 | "[6.e-06 8.e-06]\n", 288 | "[4.e-06 6.e-06]\n", 289 | "[2.e-06 4.e-06]\n", 290 | "[2.e-06 2.e-06]\n", 291 | "[2.e-06 2.e-06]\n", 292 | "[2.e-06 2.e-06]\n", 293 | "[2.e-06 2.e-06]\n", 294 | "[2.e-06 2.e-06]\n", 295 | "[2.e-06 2.e-06]\n", 296 | "[2.e-06 2.e-06]\n", 297 | "[2.e-06 2.e-06]\n", 298 | "[2.e-06 2.e-06]\n", 299 | "[2.e-06 2.e-06]\n", 300 | "[2.e-06 2.e-06]\n", 301 | "[2.e-06 2.e-06]\n", 302 | "[2.e-06 2.e-06]\n", 303 | "[2.e-06 2.e-06]\n", 304 | "[2.e-06 2.e-06]\n", 305 | "[2.e-06 2.e-06]\n", 306 | "[2.e-06 2.e-06]\n", 307 | "[2.e-06 2.e-06]\n", 308 | "[2.e-06 2.e-06]\n", 309 | "[2.e-06 2.e-06]\n", 310 | "[2.e-06 2.e-06]\n", 311 | "Episode [1300/1300]\tAverage Shortfall for Agent1: $298,868.72\n", 312 | "Episode [1300/1300]\tAverage Shortfall for Agent2: $296,739.36\n" 313 | ] 314 | } 315 | ], 316 | "source": [ 317 | "print(tr1,tr2)\n", 318 | "cur_state = env.reset(seed = episode, liquid_time = lqt, num_trades = n_trades, lamb1 = tr1,lamb2 = tr2)\n", 319 | "\n", 320 | " # set the environment to make transactions\n", 321 | "env.start_transactions()\n", 322 | "\n", 323 | "trajectory = np.zeros([n_trades+1,2])\n", 324 | "for i in range(n_trades + 1):\n", 325 | " trajectory[i] = cur_state[7:]\n", 326 | " \n", 327 | " print(cur_state[7:])\n", 328 | " # Predict the best action for the current state. \n", 329 | " cur_state1 = np.delete(cur_state,8)\n", 330 | " cur_state2 = np.delete(cur_state,7)\n", 331 | " #print(cur_state[5:])\n", 332 | " action1 = agent1.act(cur_state1, add_noise = True)\n", 333 | " action2 = agent2.act(cur_state2, add_noise = True)\n", 334 | " #print(action1,action2)\n", 335 | " # Action is performed and new state, reward, info are received. \n", 336 | " new_state, reward1, reward2, done1, done2, info = env.step(action1,action2)\n", 337 | " \n", 338 | " # current state, action, reward, new state are stored in the experience replay\n", 339 | " new_state1 = np.delete(new_state,8)\n", 340 | " new_state2 = np.delete(new_state,7)\n", 341 | " agent1.step(cur_state1, action1, reward1, new_state1, done1)\n", 342 | " agent2.step(cur_state2, action2, reward2, new_state2, done2)\n", 343 | " # roll over new state\n", 344 | " cur_state = new_state\n", 345 | "\n", 346 | " if info.done1 and info.done2:\n", 347 | " shortfall_hist1 = np.append(shortfall_hist1, info.implementation_shortfall1)\n", 348 | " shortfall_deque1.append(info.implementation_shortfall1)\n", 349 | " \n", 350 | " shortfall_hist2 = np.append(shortfall_hist2, info.implementation_shortfall2)\n", 351 | " shortfall_deque2.append(info.implementation_shortfall2)\n", 352 | " break\n", 353 | " \n", 354 | "if (episode + 1) % 100 == 0: # print average shortfall over last 100 episodes\n", 355 | " print('\\rEpisode [{}/{}]\\tAverage Shortfall for Agent1: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque1))) \n", 356 | " print('\\rEpisode [{}/{}]\\tAverage Shortfall for Agent2: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque2)))\n" 357 | ] 358 | }, 359 | { 360 | "cell_type": "code", 361 | "execution_count": 5, 362 | "metadata": {}, 363 | "outputs": [], 364 | "source": [ 365 | "np.save('1e-6_1e-6_competition_trajectory_1500.npy',trajectory)" 366 | ] 367 | }, 368 | { 369 | "cell_type": "code", 370 | "execution_count": 5, 371 | "metadata": {}, 372 | "outputs": [ 373 | { 374 | "ename": "TypeError", 375 | "evalue": "reset() got an unexpected keyword argument 'lamb'", 376 | "output_type": "error", 377 | "traceback": [ 378 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 379 | "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", 380 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 20\u001b[0m \u001b[0;31m# Plot the trading list and trading trajectory. If show_trl = True, the data frame containing the values of the\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 21\u001b[0m \u001b[0;31m# trading list and trading trajectory is printed\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 22\u001b[0;31m \u001b[0mutils\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mplot_trade_list\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlq_time\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0ml_time\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnm_trades\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mn_trades\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtr_risk\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mt_risk\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mshow_trl\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 381 | "\u001b[0;32m~/Kaggle/finance/utils.py\u001b[0m in \u001b[0;36mplot_trade_list\u001b[0;34m(lq_time, nm_trades, tr_risk, show_trl)\u001b[0m\n\u001b[1;32m 313\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 314\u001b[0m \u001b[0;31m# Reset the environment with the given parameters\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 315\u001b[0;31m \u001b[0menv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mliquid_time\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlq_time\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnum_trades\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnm_trades\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlamb\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtr_risk\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 316\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 317\u001b[0m \u001b[0;31m# Get the trading list from the environment\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 382 | "\u001b[0;31mTypeError\u001b[0m: reset() got an unexpected keyword argument 'lamb'" 383 | ] 384 | } 385 | ], 386 | "source": [ 387 | "%matplotlib inline\n", 388 | "\n", 389 | "import matplotlib.pyplot as plt\n", 390 | "\n", 391 | "import utils\n", 392 | "\n", 393 | "# We set the default figure size\n", 394 | "plt.rcParams['figure.figsize'] = [17.0, 7.0]\n", 395 | "\n", 396 | "\n", 397 | "# Set the number of days to sell all shares (i.e. the liquidation time)\n", 398 | "l_time = 60\n", 399 | "\n", 400 | "# Set the number of trades\n", 401 | "n_trades = 60\n", 402 | "\n", 403 | "# Set the trader's risk aversion\n", 404 | "t_risk = 1e-6\n", 405 | "\n", 406 | "# Plot the trading list and trading trajectory. If show_trl = True, the data frame containing the values of the\n", 407 | "# trading list and trading trajectory is printed\n", 408 | "utils.plot_trade_list(lq_time = l_time, nm_trades = n_trades, tr_risk = t_risk, show_trl = True)" 409 | ] 410 | }, 411 | { 412 | "cell_type": "code", 413 | "execution_count": null, 414 | "metadata": {}, 415 | "outputs": [], 416 | "source": [] 417 | } 418 | ], 419 | "metadata": { 420 | "kernelspec": { 421 | "display_name": "Python 3", 422 | "language": "python", 423 | "name": "python3" 424 | }, 425 | "language_info": { 426 | "codemirror_mode": { 427 | "name": "ipython", 428 | "version": 3 429 | }, 430 | "file_extension": ".py", 431 | "mimetype": "text/x-python", 432 | "name": "python", 433 | "nbconvert_exporter": "python", 434 | "pygments_lexer": "ipython3", 435 | "version": "3.6.8" 436 | } 437 | }, 438 | "nbformat": 4, 439 | "nbformat_minor": 2 440 | } 441 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Multi-Agent Reinforcement Learning for Liquidation Strategy Analysis 2 | Source code for paper: Multi-agent reinforcement learning for liquidation strategy analysis accepted by ICML 2019 AI in Finance: Applications and Infrastructure for Multi-Agent Learning. (https://arxiv.org/abs/1906.11046) 3 | 4 | ## Abstract 5 | 6 | Liquidation is the process of selling a large number of shares of one stock sequentially within a given time frame, taking into consideration the costs arising from market impact and a trader's risk aversion. The main challenge in optimizing liquidation is to find an appropriate modeling system that can incorporate the complexities of the stock market and generate practical trading strategies. In this paper, we propose to use multi-agent deep reinforcement learning model, which better captures high-level complexities comparing to various machine learning methods, such that agents can learn how to make best selling decisions. 7 | 8 | ## Proposed Methods 9 | 10 | * We theoretically analyze the Almgren and Chriss model and extend its fundamental mechanism so it can be used as the multi-agent trading environment. Our work builds the foundation for future multi-agent environment trading analysis. 11 | * We analyze the cooperative and competitive behaviors between agents by adjusting the reward functions for each agent, which overcomes the limitation of single-agent reinforcement learning algorithms. 12 | * We simulate trading and develop optimal trading strategy with practical constraints by using reinforcement learning method, which shows the capabilities of reinforcement learning methods in solving realistic liquidation problems. 13 | 14 | ## Dependencies 15 | 16 | The scripte has been tested running under Python 3.7.0, with the folowing packages installed: 17 | 18 | * `numpy==1.14.5` 19 | * `tensorflow==1.8.0` 20 | 21 | ## Experiments 22 | 23 | ### 1.Environment 24 | 25 | The problem of an optimal liquidation strategy is investigated by using the Almgren-Chriss market impact model on the background that the agents liquidate assets completely in a given time frame. The impact of the stock market is divided into three components: unaffected price process, permanent impact, and temporary impact. The stochastic component of the price process exists, but is eliminated from the mean-variance. The price process permits linear functions of permanent and temporary price. Therefore, the model serves as the trading environment such that when agents make selling decisions, the environment would return price information. 26 | 27 | ### 2.Results 28 | 29 | ![theorem1](img/Theorem1.png) 30 | 31 | Comparison of expected implementation shortfalls: there are three agents $A, B1$ and $B2$. The expected shortfall of agent A is higher than the sum of two expected shortfalls $B_1$ and $B_2$ 32 | 33 | ![theorem2](img/theorem2.png) 34 | 35 | Trading trajectory: comparing to their original trading trajectories, their current trading trajectories are closer to each other when they are trained in a multi-agent environment. 36 | 37 | 38 | ![graph3](img/Competition_and_Cooporation.png) 39 | 40 | Cooperative and competitive relationships: if two agents are in cooperative relationship, the total expected shortfall is not better than training with independent reward functions. If two agents are in a competitive relationship, they would first learn to minimize expected shortfall, and then malignant competition leads to significant implementation shortfall increment. 41 | 42 | 43 | ![graph4](img/Have_Competitor.png) 44 | 45 | Trading trajectory: comparing to independent training, introducing a competitor makes the host agent learn to adapt to new environment and sell all shares of stock in the first two days 46 | 47 | 48 | ## Authors 49 | 50 | Wenhang Bao & Xiao-Yang Liu 51 | 52 | -------------------------------------------------------------------------------- /Results/1e-4_1e-9_trajectory.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI4Finance-Foundation/Liquidation-Analysis-using-Multi-Agent-Reinforcement-Learning-ICML-2019/cf21bbc3a57bc644b31c2b36eec4132713fb12d5/Results/1e-4_1e-9_trajectory.npy -------------------------------------------------------------------------------- /Results/1e-4_optimal.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI4Finance-Foundation/Liquidation-Analysis-using-Multi-Agent-Reinforcement-Learning-ICML-2019/cf21bbc3a57bc644b31c2b36eec4132713fb12d5/Results/1e-4_optimal.npy -------------------------------------------------------------------------------- /Results/1e-4_shortfall_list.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI4Finance-Foundation/Liquidation-Analysis-using-Multi-Agent-Reinforcement-Learning-ICML-2019/cf21bbc3a57bc644b31c2b36eec4132713fb12d5/Results/1e-4_shortfall_list.npy -------------------------------------------------------------------------------- /Results/1e-4_trajectory.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI4Finance-Foundation/Liquidation-Analysis-using-Multi-Agent-Reinforcement-Learning-ICML-2019/cf21bbc3a57bc644b31c2b36eec4132713fb12d5/Results/1e-4_trajectory.npy -------------------------------------------------------------------------------- /Results/1e-6_1e-6_competition_shortfall_list.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI4Finance-Foundation/Liquidation-Analysis-using-Multi-Agent-Reinforcement-Learning-ICML-2019/cf21bbc3a57bc644b31c2b36eec4132713fb12d5/Results/1e-6_1e-6_competition_shortfall_list.npy -------------------------------------------------------------------------------- /Results/1e-6_1e-6_competition_trajectory.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI4Finance-Foundation/Liquidation-Analysis-using-Multi-Agent-Reinforcement-Learning-ICML-2019/cf21bbc3a57bc644b31c2b36eec4132713fb12d5/Results/1e-6_1e-6_competition_trajectory.npy -------------------------------------------------------------------------------- /Results/1e-6_1e-6_competition_trajectory_1500.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI4Finance-Foundation/Liquidation-Analysis-using-Multi-Agent-Reinforcement-Learning-ICML-2019/cf21bbc3a57bc644b31c2b36eec4132713fb12d5/Results/1e-6_1e-6_competition_trajectory_1500.npy -------------------------------------------------------------------------------- /Results/1e-6_1e-6_cooporation_shortfall_list.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI4Finance-Foundation/Liquidation-Analysis-using-Multi-Agent-Reinforcement-Learning-ICML-2019/cf21bbc3a57bc644b31c2b36eec4132713fb12d5/Results/1e-6_1e-6_cooporation_shortfall_list.npy -------------------------------------------------------------------------------- /Results/1e-6_1e-6_cooporation_trajectory.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI4Finance-Foundation/Liquidation-Analysis-using-Multi-Agent-Reinforcement-Learning-ICML-2019/cf21bbc3a57bc644b31c2b36eec4132713fb12d5/Results/1e-6_1e-6_cooporation_trajectory.npy -------------------------------------------------------------------------------- /Results/1e-6_optimal.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI4Finance-Foundation/Liquidation-Analysis-using-Multi-Agent-Reinforcement-Learning-ICML-2019/cf21bbc3a57bc644b31c2b36eec4132713fb12d5/Results/1e-6_optimal.npy -------------------------------------------------------------------------------- /Results/1e-6_shortfall_list.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI4Finance-Foundation/Liquidation-Analysis-using-Multi-Agent-Reinforcement-Learning-ICML-2019/cf21bbc3a57bc644b31c2b36eec4132713fb12d5/Results/1e-6_shortfall_list.npy -------------------------------------------------------------------------------- /Results/1e-6_shortfall_list_0.3M.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI4Finance-Foundation/Liquidation-Analysis-using-Multi-Agent-Reinforcement-Learning-ICML-2019/cf21bbc3a57bc644b31c2b36eec4132713fb12d5/Results/1e-6_shortfall_list_0.3M.npy -------------------------------------------------------------------------------- /Results/1e-6_shortfall_list_0.7M.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI4Finance-Foundation/Liquidation-Analysis-using-Multi-Agent-Reinforcement-Learning-ICML-2019/cf21bbc3a57bc644b31c2b36eec4132713fb12d5/Results/1e-6_shortfall_list_0.7M.npy -------------------------------------------------------------------------------- /Results/1e-6_shortfall_list_fixed_competitor.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI4Finance-Foundation/Liquidation-Analysis-using-Multi-Agent-Reinforcement-Learning-ICML-2019/cf21bbc3a57bc644b31c2b36eec4132713fb12d5/Results/1e-6_shortfall_list_fixed_competitor.npy -------------------------------------------------------------------------------- /Results/1e-6_shortfall_list_new_fixed_competitor.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI4Finance-Foundation/Liquidation-Analysis-using-Multi-Agent-Reinforcement-Learning-ICML-2019/cf21bbc3a57bc644b31c2b36eec4132713fb12d5/Results/1e-6_shortfall_list_new_fixed_competitor.npy -------------------------------------------------------------------------------- /Results/1e-6_trajectory.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI4Finance-Foundation/Liquidation-Analysis-using-Multi-Agent-Reinforcement-Learning-ICML-2019/cf21bbc3a57bc644b31c2b36eec4132713fb12d5/Results/1e-6_trajectory.npy -------------------------------------------------------------------------------- /Results/1e-6_trajectory_fixed_competitor.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI4Finance-Foundation/Liquidation-Analysis-using-Multi-Agent-Reinforcement-Learning-ICML-2019/cf21bbc3a57bc644b31c2b36eec4132713fb12d5/Results/1e-6_trajectory_fixed_competitor.npy -------------------------------------------------------------------------------- /Results/1e-6_trajectory_new_fixed_competitor.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI4Finance-Foundation/Liquidation-Analysis-using-Multi-Agent-Reinforcement-Learning-ICML-2019/cf21bbc3a57bc644b31c2b36eec4132713fb12d5/Results/1e-6_trajectory_new_fixed_competitor.npy -------------------------------------------------------------------------------- /Results/1e-7_1e-7_shortfall_list.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI4Finance-Foundation/Liquidation-Analysis-using-Multi-Agent-Reinforcement-Learning-ICML-2019/cf21bbc3a57bc644b31c2b36eec4132713fb12d5/Results/1e-7_1e-7_shortfall_list.npy -------------------------------------------------------------------------------- /Results/1e-7_1e-7_trajectory.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI4Finance-Foundation/Liquidation-Analysis-using-Multi-Agent-Reinforcement-Learning-ICML-2019/cf21bbc3a57bc644b31c2b36eec4132713fb12d5/Results/1e-7_1e-7_trajectory.npy -------------------------------------------------------------------------------- /Results/1e-7_shortfall_list.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI4Finance-Foundation/Liquidation-Analysis-using-Multi-Agent-Reinforcement-Learning-ICML-2019/cf21bbc3a57bc644b31c2b36eec4132713fb12d5/Results/1e-7_shortfall_list.npy -------------------------------------------------------------------------------- /Results/1e-7_trajectory.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI4Finance-Foundation/Liquidation-Analysis-using-Multi-Agent-Reinforcement-Learning-ICML-2019/cf21bbc3a57bc644b31c2b36eec4132713fb12d5/Results/1e-7_trajectory.npy -------------------------------------------------------------------------------- /Results/1e-9_optimal.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI4Finance-Foundation/Liquidation-Analysis-using-Multi-Agent-Reinforcement-Learning-ICML-2019/cf21bbc3a57bc644b31c2b36eec4132713fb12d5/Results/1e-9_optimal.npy -------------------------------------------------------------------------------- /Results/1e-9_shortfall_list.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI4Finance-Foundation/Liquidation-Analysis-using-Multi-Agent-Reinforcement-Learning-ICML-2019/cf21bbc3a57bc644b31c2b36eec4132713fb12d5/Results/1e-9_shortfall_list.npy -------------------------------------------------------------------------------- /Results/1e-9_trajectory.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI4Finance-Foundation/Liquidation-Analysis-using-Multi-Agent-Reinforcement-Learning-ICML-2019/cf21bbc3a57bc644b31c2b36eec4132713fb12d5/Results/1e-9_trajectory.npy -------------------------------------------------------------------------------- /Results/README.md: -------------------------------------------------------------------------------- 1 | README 2 | -------------------------------------------------------------------------------- /Results/price_list.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI4Finance-Foundation/Liquidation-Analysis-using-Multi-Agent-Reinforcement-Learning-ICML-2019/cf21bbc3a57bc644b31c2b36eec4132713fb12d5/Results/price_list.npy -------------------------------------------------------------------------------- /ddpg_agent.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import random 3 | import copy 4 | from collections import namedtuple, deque 5 | 6 | from model import Actor, Critic 7 | 8 | import torch 9 | import torch.nn.functional as F 10 | import torch.optim as optim 11 | 12 | BUFFER_SIZE = int(1e4) # replay buffer size 13 | BATCH_SIZE = 128 # minibatch size 14 | GAMMA = 0.99 # discount factor 15 | TAU = 1e-3 # for soft update of target parameters 16 | LR_ACTOR = 1e-4 # learning rate of the actor 17 | LR_CRITIC = 1e-3 # learning rate of the critic 18 | WEIGHT_DECAY = 0 # L2 weight decay 19 | 20 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 21 | 22 | class Agent(): 23 | """Interacts with and learns from the environment.""" 24 | 25 | def __init__(self, state_size, action_size, random_seed): 26 | """Initialize an Agent object. 27 | 28 | Params 29 | ====== 30 | state_size (int): dimension of each state 31 | action_size (int): dimension of each action 32 | random_seed (int): random seed 33 | """ 34 | self.state_size = state_size 35 | self.action_size = action_size 36 | self.seed = random.seed(random_seed) 37 | 38 | # Actor Network (w/ Target Network) 39 | self.actor_local = Actor(state_size, action_size, random_seed).to(device) 40 | self.actor_target = Actor(state_size, action_size, random_seed).to(device) 41 | self.actor_optimizer = optim.Adam(self.actor_local.parameters(), lr=LR_ACTOR) 42 | 43 | # Critic Network (w/ Target Network) 44 | self.critic_local = Critic(state_size, action_size, random_seed).to(device) 45 | self.critic_target = Critic(state_size, action_size, random_seed).to(device) 46 | self.critic_optimizer = optim.Adam(self.critic_local.parameters(), lr=LR_CRITIC, weight_decay=WEIGHT_DECAY) 47 | 48 | # Noise process 49 | self.noise = OUNoise(action_size, random_seed) 50 | 51 | # Replay memory 52 | self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, random_seed) 53 | 54 | def step(self, state, action, reward, next_state, done): 55 | """Save experience in replay memory, and use random sample from buffer to learn.""" 56 | # Save experience / reward 57 | self.memory.add(state, action, reward, next_state, done) 58 | 59 | # Learn, if enough samples are available in memory 60 | if len(self.memory) > BATCH_SIZE: 61 | experiences = self.memory.sample() 62 | self.learn(experiences, GAMMA) 63 | 64 | def act(self, state, add_noise=True): 65 | """Returns actions for given state as per current policy.""" 66 | state = torch.from_numpy(state).float().to(device) 67 | self.actor_local.eval() 68 | with torch.no_grad(): 69 | action = self.actor_local(state).cpu().data.numpy() 70 | self.actor_local.train() 71 | if add_noise: 72 | action += self.noise.sample() 73 | action = (action + 1.0) / 2.0 74 | return np.clip(action, 0, 1) 75 | 76 | 77 | def reset(self): 78 | self.noise.reset() 79 | 80 | def learn(self, experiences, gamma): 81 | """Update policy and value parameters using given batch of experience tuples. 82 | Q_targets = r + γ * critic_target(next_state, actor_target(next_state)) 83 | where: 84 | actor_target(state) -> action 85 | critic_target(state, action) -> Q-value 86 | 87 | Params 88 | ====== 89 | experiences (Tuple[torch.Tensor]): tuple of (s, a, r, s', done) tuples 90 | gamma (float): discount factor 91 | """ 92 | states, actions, rewards, next_states, dones = experiences 93 | 94 | # ---------------------------- update critic ---------------------------- # 95 | # Get predicted next-state actions and Q values from target models 96 | actions_next = self.actor_target(next_states) 97 | Q_targets_next = self.critic_target(next_states, actions_next) 98 | # Compute Q targets for current states (y_i) 99 | Q_targets = rewards + (gamma * Q_targets_next * (1 - dones)) 100 | # Compute critic loss 101 | Q_expected = self.critic_local(states, actions) 102 | critic_loss = F.mse_loss(Q_expected, Q_targets) 103 | # Minimize the loss 104 | self.critic_optimizer.zero_grad() 105 | critic_loss.backward() 106 | self.critic_optimizer.step() 107 | 108 | # ---------------------------- update actor ---------------------------- # 109 | # Compute actor loss 110 | actions_pred = self.actor_local(states) 111 | actor_loss = -self.critic_local(states, actions_pred).mean() 112 | # Minimize the loss 113 | self.actor_optimizer.zero_grad() 114 | actor_loss.backward() 115 | self.actor_optimizer.step() 116 | 117 | # ----------------------- update target networks ----------------------- # 118 | self.soft_update(self.critic_local, self.critic_target, TAU) 119 | self.soft_update(self.actor_local, self.actor_target, TAU) 120 | 121 | def soft_update(self, local_model, target_model, tau): 122 | """Soft update model parameters. 123 | θ_target = τ*θ_local + (1 - τ)*θ_target 124 | 125 | Params 126 | ====== 127 | local_model: PyTorch model (weights will be copied from) 128 | target_model: PyTorch model (weights will be copied to) 129 | tau (float): interpolation parameter 130 | """ 131 | for target_param, local_param in zip(target_model.parameters(), local_model.parameters()): 132 | target_param.data.copy_(tau*local_param.data + (1.0-tau)*target_param.data) 133 | 134 | class OUNoise: 135 | """Ornstein-Uhlenbeck process.""" 136 | 137 | def __init__(self, size, seed, mu=0., theta=0.15, sigma=0.2): 138 | """Initialize parameters and noise process.""" 139 | self.mu = mu * np.ones(size) 140 | self.theta = theta 141 | self.sigma = sigma 142 | self.seed = random.seed(seed) 143 | self.reset() 144 | 145 | def reset(self): 146 | """Reset the internal state (= noise) to mean (mu).""" 147 | self.state = copy.copy(self.mu) 148 | 149 | def sample(self): 150 | """Update internal state and return it as a noise sample.""" 151 | x = self.state 152 | dx = self.theta * (self.mu - x) + self.sigma * np.array([random.random() for i in range(len(x))]) 153 | self.state = x + dx 154 | return self.state 155 | 156 | class ReplayBuffer: 157 | """Fixed-size buffer to store experience tuples.""" 158 | 159 | def __init__(self, action_size, buffer_size, batch_size, seed): 160 | """Initialize a ReplayBuffer object. 161 | Params 162 | ====== 163 | buffer_size (int): maximum size of buffer 164 | batch_size (int): size of each training batch 165 | """ 166 | self.action_size = action_size 167 | self.memory = deque(maxlen=buffer_size) # internal memory (deque) 168 | self.batch_size = batch_size 169 | self.experience = namedtuple("Experience", field_names=["state", "action", "reward", "next_state", "done"]) 170 | self.seed = random.seed(seed) 171 | 172 | def add(self, state, action, reward, next_state, done): 173 | """Add a new experience to memory.""" 174 | e = self.experience(state, action, reward, next_state, done) 175 | self.memory.append(e) 176 | 177 | def sample(self): 178 | """Randomly sample a batch of experiences from memory.""" 179 | experiences = random.sample(self.memory, k=self.batch_size) 180 | 181 | states = torch.from_numpy(np.vstack([e.state for e in experiences if e is not None])).float().to(device) 182 | actions = torch.from_numpy(np.vstack([e.action for e in experiences if e is not None])).float().to(device) 183 | rewards = torch.from_numpy(np.vstack([e.reward for e in experiences if e is not None])).float().to(device) 184 | next_states = torch.from_numpy(np.vstack([e.next_state for e in experiences if e is not None])).float().to(device) 185 | dones = torch.from_numpy(np.vstack([e.done for e in experiences if e is not None]).astype(np.uint8)).float().to(device) 186 | 187 | return (states, actions, rewards, next_states, dones) 188 | 189 | def __len__(self): 190 | """Return the current size of internal memory.""" 191 | return len(self.memory) -------------------------------------------------------------------------------- /img/Competition_and_Cooporation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI4Finance-Foundation/Liquidation-Analysis-using-Multi-Agent-Reinforcement-Learning-ICML-2019/cf21bbc3a57bc644b31c2b36eec4132713fb12d5/img/Competition_and_Cooporation.png -------------------------------------------------------------------------------- /img/Have_Competitor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI4Finance-Foundation/Liquidation-Analysis-using-Multi-Agent-Reinforcement-Learning-ICML-2019/cf21bbc3a57bc644b31c2b36eec4132713fb12d5/img/Have_Competitor.png -------------------------------------------------------------------------------- /img/Multi-agent.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI4Finance-Foundation/Liquidation-Analysis-using-Multi-Agent-Reinforcement-Learning-ICML-2019/cf21bbc3a57bc644b31c2b36eec4132713fb12d5/img/Multi-agent.png -------------------------------------------------------------------------------- /img/README.md: -------------------------------------------------------------------------------- 1 | README 2 | -------------------------------------------------------------------------------- /img/Theorem1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI4Finance-Foundation/Liquidation-Analysis-using-Multi-Agent-Reinforcement-Learning-ICML-2019/cf21bbc3a57bc644b31c2b36eec4132713fb12d5/img/Theorem1.png -------------------------------------------------------------------------------- /img/theorem2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI4Finance-Foundation/Liquidation-Analysis-using-Multi-Agent-Reinforcement-Learning-ICML-2019/cf21bbc3a57bc644b31c2b36eec4132713fb12d5/img/theorem2.png -------------------------------------------------------------------------------- /model.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | def hidden_init(layer): 9 | fan_in = layer.weight.data.size()[0] 10 | lim = 1. / np.sqrt(fan_in) 11 | return (-lim, lim) 12 | 13 | class Actor(nn.Module): 14 | """Actor (Policy) Model.""" 15 | 16 | def __init__(self, state_size, action_size, seed, fc1_units=24, fc2_units=48): 17 | """Initialize parameters and build model. 18 | Params 19 | ====== 20 | state_size (int): Dimension of each state 21 | action_size (int): Dimension of each action 22 | seed (int): Random seed 23 | fc1_units (int): Number of nodes in first hidden layer 24 | fc2_units (int): Number of nodes in second hidden layer 25 | """ 26 | super(Actor, self).__init__() 27 | self.seed = torch.manual_seed(seed) 28 | self.fc1 = nn.Linear(state_size, fc1_units) 29 | self.fc2 = nn.Linear(fc1_units, fc2_units) 30 | self.fc3 = nn.Linear(fc2_units, action_size) 31 | self.reset_parameters() 32 | 33 | def reset_parameters(self): 34 | self.fc1.weight.data.uniform_(*hidden_init(self.fc1)) 35 | self.fc2.weight.data.uniform_(*hidden_init(self.fc2)) 36 | self.fc3.weight.data.uniform_(-3e-3, 3e-3) 37 | 38 | def forward(self, state): 39 | """Build an actor (policy) network that maps states -> actions.""" 40 | x = F.relu(self.fc1(state)) 41 | x = F.relu(self.fc2(x)) 42 | return torch.tanh(self.fc3(x)) 43 | 44 | 45 | class Critic(nn.Module): 46 | """Critic (Value) Model.""" 47 | 48 | def __init__(self, state_size, action_size, seed, fcs1_units=24, fc2_units=48): 49 | """Initialize parameters and build model. 50 | Params 51 | ====== 52 | state_size (int): Dimension of each state 53 | action_size (int): Dimension of each action 54 | seed (int): Random seed 55 | fcs1_units (int): Number of nodes in the first hidden layer 56 | fc2_units (int): Number of nodes in the second hidden layer 57 | """ 58 | super(Critic, self).__init__() 59 | self.seed = torch.manual_seed(seed) 60 | self.fcs1 = nn.Linear(state_size, fcs1_units) 61 | self.fc2 = nn.Linear(fcs1_units+action_size, fc2_units) 62 | self.fc3 = nn.Linear(fc2_units, 1) 63 | self.reset_parameters() 64 | 65 | def reset_parameters(self): 66 | self.fcs1.weight.data.uniform_(*hidden_init(self.fcs1)) 67 | self.fc2.weight.data.uniform_(*hidden_init(self.fc2)) 68 | self.fc3.weight.data.uniform_(-3e-3, 3e-3) 69 | 70 | def forward(self, state, action): 71 | """Build a critic (value) network that maps (state, action) pairs -> Q-values.""" 72 | xs = F.relu(self.fcs1(state)) 73 | x = torch.cat((xs, action), dim=1) 74 | x = F.relu(self.fc2(x)) 75 | return self.fc3(x) 76 | -------------------------------------------------------------------------------- /syntheticChrissAlmgren.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy as np 3 | import collections 4 | 5 | 6 | # ------------------------------------------------ Financial Parameters --------------------------------------------------- # 7 | 8 | ANNUAL_VOLAT = 0.12 # Annual volatility in stock price 9 | BID_ASK_SP = 1 / 8 # Bid-ask spread 10 | DAILY_TRADE_VOL = 5e6 # Average Daily trading volume 11 | TRAD_DAYS = 250 # Number of trading days in a year 12 | DAILY_VOLAT = ANNUAL_VOLAT / np.sqrt(TRAD_DAYS) # Daily volatility in stock price 13 | 14 | 15 | # ----------------------------- Parameters for the Almgren and Chriss Optimal Execution Model ----------------------------- # 16 | 17 | TOTAL_SHARES1 = 500000 # Total number of shares to sell 18 | TOTAL_SHARES2 = 500000 # Total number of shares to sell 19 | STARTING_PRICE = 50 # Starting price per share 20 | LLAMBDA1 = 1e-6 # Trader's risk aversion 21 | LLAMBDA2 = 1e-4 22 | LIQUIDATION_TIME = 60 # How many days to sell all the shares. 23 | NUM_N = 60 # Number of trades 24 | EPSILON = BID_ASK_SP / 2 # Fixed Cost of Selling. 25 | SINGLE_STEP_VARIANCE = (DAILY_VOLAT * STARTING_PRICE) ** 2 # Calculate single step variance 26 | ETA = BID_ASK_SP / (0.01 * DAILY_TRADE_VOL) # Price Impact for Each 1% of Daily Volume Traded 27 | GAMMA = BID_ASK_SP / (0.1 * DAILY_TRADE_VOL) # Permanent Impact Constant 28 | 29 | # ----------------------------------------------------------------------------------------------------------------------- # 30 | 31 | 32 | # Simulation Environment 33 | 34 | class MarketEnvironment(): 35 | 36 | def __init__(self, randomSeed = 0, 37 | lqd_time = LIQUIDATION_TIME, 38 | num_tr = NUM_N, 39 | lambd1 = LLAMBDA1, 40 | lambd2 = LLAMBDA2): 41 | 42 | # Set the random seed 43 | random.seed(randomSeed) 44 | 45 | # Initialize the financial parameters so we can access them later 46 | self.anv = ANNUAL_VOLAT 47 | self.basp = BID_ASK_SP 48 | self.dtv = DAILY_TRADE_VOL 49 | self.dpv = DAILY_VOLAT 50 | 51 | # Initialize the Almgren-Chriss parameters so we can access them later 52 | self.total_shares1 = TOTAL_SHARES1 53 | self.total_shares2 = TOTAL_SHARES2 54 | self.startingPrice = STARTING_PRICE 55 | self.llambda1 = lambd1 56 | self.llambda2 = lambd2 57 | self.liquidation_time = lqd_time 58 | self.num_n = num_tr 59 | self.epsilon = EPSILON 60 | self.singleStepVariance = SINGLE_STEP_VARIANCE 61 | self.eta = ETA 62 | self.gamma = GAMMA 63 | 64 | # Calculate some Almgren-Chriss parameters 65 | self.tau = self.liquidation_time / self.num_n 66 | self.eta_hat = self.eta - (0.5 * self.gamma * self.tau) 67 | self.kappa_hat1 = np.sqrt((self.llambda1 * self.singleStepVariance) / self.eta_hat) 68 | self.kappa_hat2 = np.sqrt((self.llambda2 * self.singleStepVariance) / self.eta_hat) 69 | self.kappa1 = np.arccosh((((self.kappa_hat1 ** 2) * (self.tau ** 2)) / 2) + 1) / self.tau 70 | self.kappa2 = np.arccosh((((self.kappa_hat2 ** 2) * (self.tau ** 2)) / 2) + 1) / self.tau 71 | 72 | # Set the variables for the initial state 73 | self.shares_remaining1 = self.total_shares1 74 | self.shares_remaining2 = self.total_shares2 75 | self.timeHorizon = self.num_n 76 | self.logReturns = collections.deque(np.zeros(6)) 77 | 78 | # Set the initial impacted price to the starting price 79 | self.prevImpactedPrice = self.startingPrice 80 | 81 | # Set the initial transaction state to False 82 | self.transacting1 = False 83 | self.transacting2 = False 84 | 85 | # Set a variable to keep trak of the trade number 86 | self.k = 0 87 | 88 | 89 | def reset(self, seed = 0, liquid_time = LIQUIDATION_TIME, num_trades = NUM_N, lamb1 = LLAMBDA1,lamb2 = LLAMBDA2): 90 | 91 | # Initialize the environment with the given parameters 92 | self.__init__(randomSeed = seed, lqd_time = liquid_time, num_tr = num_trades, lambd1 = lamb1,lambd2 = lamb2) 93 | 94 | # Set the initial state to [0,0,0,0,0,0,1,1] 95 | self.initial_state = np.array(list(self.logReturns) + [self.timeHorizon / self.num_n, \ 96 | self.shares_remaining1 / self.total_shares1, \ 97 | self.shares_remaining2 / self.total_shares2]) 98 | return self.initial_state 99 | 100 | 101 | def start_transactions(self): 102 | 103 | # Set transactions on 104 | self.transacting1 = True 105 | self.transacting2 = True 106 | 107 | # Set the minimum number of stocks one can sell 108 | self.tolerance = 1 109 | 110 | # Set the initial capture to zero 111 | self.totalCapture1 = 0 112 | self.totalCapture2 = 0 113 | 114 | # Set the initial previous price to the starting price 115 | self.prevPrice = self.startingPrice 116 | 117 | # Set the initial square of the shares to sell to zero 118 | self.totalSSSQ1 = 0 119 | self.totalSSSQ2 = 0 120 | # Set the initial square of the remaing shares to sell to zero 121 | self.totalSRSQ1 = 0 122 | self.totalSRSQ2 = 0 123 | # Set the initial AC utility 124 | self.prevUtility1 = self.compute_AC_utility(self.total_shares1,self.kappa1,self.llambda1) 125 | self.prevUtility2 = self.compute_AC_utility(self.total_shares2,self.kappa2,self.llambda2) 126 | 127 | def step(self, action1,action2): 128 | 129 | # Create a class that will be used to keep track of information about the transaction 130 | class Info(object): 131 | pass 132 | info = Info() 133 | 134 | # Set the done flag to False. This indicates that we haven't sold all the shares yet. 135 | info.done1 = False 136 | info.done2 = False 137 | 138 | # During training, if the DDPG fails to sell all the stocks before the given 139 | # number of trades or if the total number shares remaining is less than 1, then stop transacting, 140 | # set the done Flag to True, return the current implementation shortfall, and give a negative reward. 141 | # The negative reward is given in the else statement below. 142 | if self.transacting1 and (self.timeHorizon == 0 or (abs(self.shares_remaining1) < self.tolerance)): 143 | self.transacting1 = False 144 | info.done1 = True 145 | info.implementation_shortfall1 = self.total_shares1 * self.startingPrice - self.totalCapture1 146 | info.expected_shortfall1 = self.get_expected_shortfall(self.total_shares1,self.totalSSSQ1) 147 | info.expected_variance1 = self.singleStepVariance * self.tau * self.totalSRSQ1 148 | info.utility1 = info.expected_shortfall1 + self.llambda1 * info.expected_variance1 149 | 150 | if self.transacting2 and (self.timeHorizon == 0 or (abs(self.shares_remaining2) < self.tolerance)): 151 | self.transacting2 = False 152 | info.done2 = True 153 | info.implementation_shortfall2 = self.total_shares2 * self.startingPrice - self.totalCapture2 154 | info.expected_shortfall2 = self.get_expected_shortfall(self.total_shares2,self.totalSSSQ2) 155 | info.expected_variance2 = self.singleStepVariance * self.tau * self.totalSRSQ2 156 | info.utility2 = info.expected_shortfall2 + self.llambda2 * info.expected_variance2 157 | 158 | # We don't add noise before the first trade 159 | if self.k == 0: 160 | info.price = self.prevImpactedPrice 161 | else: 162 | # Calculate the current stock price using arithmetic brownian motion 163 | info.price = self.prevImpactedPrice + np.sqrt(self.singleStepVariance * self.tau) * random.normalvariate(0, 1) 164 | 165 | # If we are transacting, the stock price is affected by the number of shares we sell. The price evolves 166 | # according to the Almgren and Chriss price dynamics model. 167 | if self.transacting1: 168 | 169 | # If action is an ndarray then extract the number from the array 170 | if isinstance(action1, np.ndarray): 171 | action1 = action1.item() 172 | 173 | # Convert the action to the number of shares to sell in the current step 174 | sharesToSellNow1 = self.shares_remaining1 * action1 175 | 176 | if self.timeHorizon < 2: 177 | sharesToSellNow1 = self.shares_remaining1 178 | else: 179 | sharesToSellNow1 = 0 180 | # sharesToSellNow = min(self.shares_remaining * action, self.shares_remaining) 181 | if self.transacting2: 182 | 183 | # If action is an ndarray then extract the number from the array 184 | if isinstance(action2, np.ndarray): 185 | action2 = action2.item() 186 | 187 | # Convert the action to the number of shares to sell in the current step 188 | sharesToSellNow2 = self.shares_remaining2 * action2 189 | 190 | if self.timeHorizon < 2: 191 | sharesToSellNow2 = self.shares_remaining2 192 | else: 193 | sharesToSellNow2 = 0 194 | 195 | if self.transacting1 or self.transacting2: 196 | 197 | # Since we are not selling fractions of shares, round up the total number of shares to sell to the nearest integer. 198 | info.share_to_sell_now1 = np.around(sharesToSellNow1) 199 | info.share_to_sell_now2 = np.around(sharesToSellNow2) 200 | # Calculate the permanent and temporary impact on the stock price according the AC price dynamics model 201 | info.currentPermanentImpact = self.permanentImpact(info.share_to_sell_now1+info.share_to_sell_now2) 202 | info.currentTemporaryImpact = self.temporaryImpact(info.share_to_sell_now1+info.share_to_sell_now2) 203 | 204 | # Apply the temporary impact on the current stock price 205 | info.exec_price = info.price - info.currentTemporaryImpact 206 | 207 | # Calculate the current total capture 208 | self.totalCapture1 += info.share_to_sell_now1 * info.exec_price 209 | self.totalCapture2 += info.share_to_sell_now2 * info.exec_price 210 | 211 | # Calculate the log return for the current step and save it in the logReturn deque 212 | self.logReturns.append(np.log(info.price/self.prevPrice)) 213 | self.logReturns.popleft() 214 | 215 | # Update the number of shares remaining 216 | self.shares_remaining1 -= info.share_to_sell_now1 217 | self.shares_remaining2 -= info.share_to_sell_now2 218 | 219 | # Calculate the runnig total of the squares of shares sold and shares remaining 220 | self.totalSSSQ1 += info.share_to_sell_now1 ** 2 221 | self.totalSRSQ1 += self.shares_remaining1 ** 2 222 | 223 | self.totalSSSQ2 += info.share_to_sell_now2 ** 2 224 | self.totalSRSQ2 += self.shares_remaining2 ** 2 225 | 226 | # Update the variables required for the next step 227 | self.timeHorizon -= 1 228 | self.prevPrice = info.price 229 | self.prevImpactedPrice = info.price - info.currentPermanentImpact 230 | 231 | # Calculate the reward 232 | currentUtility1 = self.compute_AC_utility(self.shares_remaining1,self.kappa1,self.llambda1) 233 | currentUtility2 = self.compute_AC_utility(self.shares_remaining2,self.kappa2,self.llambda2) 234 | if self.prevUtility1 == 0: 235 | reward1 = 0 236 | else: 237 | reward1 = (abs(self.prevUtility1) - abs(currentUtility1)) / abs(self.prevUtility1) 238 | if self.prevUtility2 == 0: 239 | reward2 =0 240 | else: 241 | reward2 = (abs(self.prevUtility2) - abs(currentUtility2)) / abs(self.prevUtility2) 242 | 243 | if reward1 > reward2: 244 | reward2 -= reward1 245 | #reward2 += reward1 246 | #reward2 *= 0.5 247 | reward2 *= 0.5 248 | else: 249 | #reward1 += reward2 250 | #reward1 *= 0.5 251 | reward1 -= reward2 252 | reward1 *= 0.5 253 | #reward1 = max(reward1 - reward2, 0) 254 | #reward2 = max(reward2 - reward1, 0) 255 | 256 | self.prevUtility1 = currentUtility1 257 | self.prevUtility2 = currentUtility2 258 | 259 | # If all the shares have been sold calculate E, V, and U, and give a positive reward. 260 | if self.shares_remaining1 <= 0: 261 | 262 | # Calculate the implementation shortfall 263 | info.implementation_shortfall1 = self.total_shares1 * self.startingPrice - self.totalCapture1 264 | info.done1 = True 265 | 266 | if self.shares_remaining2 <= 0: 267 | 268 | # Calculate the implementation shortfall 269 | info.implementation_shortfall2 = self.total_shares2 * self.startingPrice - self.totalCapture2 270 | info.done2 = True 271 | 272 | # Set the done flag to True. This indicates that we have sold all the shares 273 | else: 274 | reward1 = 0.0 275 | reward2 = 0.0 276 | 277 | self.k += 1 278 | 279 | # Set the new state 280 | state = np.array(list(self.logReturns) + [self.timeHorizon / self.num_n, self.shares_remaining1 / self.total_shares1, self.shares_remaining2 / self.total_shares2]) 281 | 282 | return (state, np.array([reward1]),np.array([reward2]), info.done1,info.done2, info) 283 | 284 | 285 | def permanentImpact(self, sharesToSell): 286 | # Calculate the permanent impact according to equations (6) and (1) of the AC paper 287 | pi = self.gamma * sharesToSell 288 | return pi 289 | 290 | 291 | def temporaryImpact(self, sharesToSell): 292 | # Calculate the temporary impact according to equation (7) of the AC paper 293 | ti = (self.epsilon * np.sign(sharesToSell)) + ((self.eta / self.tau) * sharesToSell) 294 | return ti 295 | 296 | def get_expected_shortfall(self, sharesToSell,totalSSSQ): 297 | # Calculate the expected shortfall according to equation (8) of the AC paper 298 | ft = 0.5 * self.gamma * (sharesToSell ** 2) 299 | st = self.epsilon * sharesToSell 300 | tt = (self.eta_hat / self.tau) * totalSSSQ 301 | return ft + st + tt 302 | 303 | 304 | def get_AC_expected_shortfall(self, sharesToSell,kappa): 305 | # Calculate the expected shortfall for the optimal strategy according to equation (20) of the AC paper 306 | ft = 0.5 * self.gamma * (sharesToSell ** 2) 307 | st = self.epsilon * sharesToSell 308 | tt = self.eta_hat * (sharesToSell ** 2) 309 | nft = np.tanh(0.5 * kappa * self.tau) * (self.tau * np.sinh(2 * kappa * self.liquidation_time) \ 310 | + 2 * self.liquidation_time * np.sinh(kappa * self.tau)) 311 | dft = 2 * (self.tau ** 2) * (np.sinh(kappa * self.liquidation_time) ** 2) 312 | fot = nft / dft 313 | return ft + st + (tt * fot) 314 | 315 | 316 | def get_AC_variance(self, sharesToSell,kappa): 317 | # Calculate the variance for the optimal strategy according to equation (20) of the AC paper 318 | ft = 0.5 * (self.singleStepVariance) * (sharesToSell ** 2) 319 | nst = self.tau * np.sinh(kappa * self.liquidation_time) * np.cosh(kappa * (self.liquidation_time - self.tau)) \ 320 | - self.liquidation_time * np.sinh(kappa * self.tau) 321 | dst = (np.sinh(kappa * self.liquidation_time) ** 2) * np.sinh(kappa * self.tau) 322 | st = nst / dst 323 | return ft * st 324 | 325 | 326 | def compute_AC_utility(self, sharesToSell,kappa,llambda): 327 | # Calculate the AC Utility according to pg. 13 of the AC paper 328 | if self.liquidation_time == 0: 329 | return 0 330 | E = self.get_AC_expected_shortfall(sharesToSell,kappa) 331 | V = self.get_AC_variance(sharesToSell,kappa) 332 | return E + llambda * V 333 | 334 | 335 | def get_trade_list(self,kappa): 336 | # Calculate the trade list for the optimal strategy according to equation (18) of the AC paper 337 | trade_list = np.zeros(self.num_n) 338 | ftn = 2 * np.sinh(0.5 * kappa * self.tau) 339 | ftd = np.sinh(kappa * self.liquidation_time) 340 | ft = (ftn / ftd) * self.total_shares1 341 | for i in range(1, self.num_n + 1): 342 | st = np.cosh(kappa * (self.liquidation_time - (i - 0.5) * self.tau)) 343 | trade_list[i - 1] = st 344 | trade_list *= ft 345 | return trade_list 346 | 347 | 348 | def observation_space_dimension(self): 349 | # Return the dimension of the state 350 | return 8 351 | 352 | 353 | def action_space_dimension(self): 354 | # Return the dimension of the action 355 | return 1 356 | 357 | 358 | def stop_transactions(self): 359 | # Stop transacting 360 | self.transacting = False 361 | 362 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import matplotlib.pyplot as plt 4 | import matplotlib.ticker as mticker 5 | 6 | import syntheticChrissAlmgren as sca 7 | 8 | from statsmodels.iolib.table import SimpleTable 9 | from statsmodels.compat.python import zip_longest 10 | from statsmodels.iolib.tableformatting import fmt_2cols 11 | 12 | 13 | def generate_table(left_col, right_col, table_title): 14 | 15 | # Do not use column headers 16 | col_headers = None 17 | 18 | # Generate the right table 19 | if right_col: 20 | # Add padding 21 | if len(right_col) < len(left_col): 22 | right_col += [(' ', ' ')] * (len(left_col) - len(right_col)) 23 | elif len(right_col) > len(left_col): 24 | left_col += [(' ', ' ')] * (len(right_col) - len(left_col)) 25 | right_col = [('%-21s' % (' '+k), v) for k,v in right_col] 26 | 27 | # Generate the right table 28 | gen_stubs_right, gen_data_right = zip_longest(*right_col) 29 | gen_table_right = SimpleTable(gen_data_right, 30 | col_headers, 31 | gen_stubs_right, 32 | title = table_title, 33 | txt_fmt = fmt_2cols) 34 | else: 35 | # If there is no right table set the right table to empty 36 | gen_table_right = [] 37 | 38 | # Generate the left table 39 | gen_stubs_left, gen_data_left = zip_longest(*left_col) 40 | gen_table_left = SimpleTable(gen_data_left, 41 | col_headers, 42 | gen_stubs_left, 43 | title = table_title, 44 | txt_fmt = fmt_2cols) 45 | 46 | 47 | # Merge the left and right tables to make a single table 48 | gen_table_left.extend_right(gen_table_right) 49 | general_table = gen_table_left 50 | 51 | return general_table 52 | 53 | 54 | def get_env_param(): 55 | 56 | # Create a simulation environment 57 | env = sca.MarketEnvironment() 58 | 59 | # Set the title for the financial parameters table 60 | fp_title = 'Financial Parameters' 61 | 62 | # Get the default financial parameters from the simulation environment 63 | fp_left_col = [('Annual Volatility:', ['{:.0f}%'.format(env.anv * 100)]), 64 | ('Daily Volatility:', ['{:.1f}%'.format(env.dpv * 100)])] 65 | 66 | fp_right_col = [('Bid-Ask Spread:', ['{:.3f}'.format(env.basp)]), 67 | ('Daily Trading Volume:', ['{:,.0f}'.format(env.dtv)])] 68 | 69 | # Set the title for the Almgren and Chriss Model parameters table 70 | acp_title = 'Almgren and Chriss Model Parameters' 71 | 72 | # Get the default Almgren and Chriss Model Parameters from the simulation environment 73 | acp_left_col = [('Total Number of Shares for Agent1 to Sell:', ['{:,}'.format(env.total_shares1)]), 74 | ('Total Number of Shares for Agent2 to Sell:', ['{:,}'.format(env.total_shares2)]), 75 | ('Starting Price per Share:', ['${:.2f}'.format(env.startingPrice)]), 76 | ('Price Impact for Each 1% of Daily Volume Traded:', ['${}'.format(env.eta)]), 77 | ('Number of Days to Sell All the Shares:', ['{}'.format(env.liquidation_time)]), 78 | ('Number of Trades:', ['{}'.format(env.num_n)])] 79 | 80 | acp_right_col = [('Fixed Cost of Selling per Share:', ['${:.3f}'.format(env.epsilon)]), 81 | ('Trader\'s Risk Aversion for Agent 1:', ['{}'.format(env.llambda1)]), 82 | ('Trader\'s Risk Aversion for Agent 2:', ['{}'.format(env.llambda2)]), 83 | ('Permanent Impact Constant:', ['{}'.format(env.gamma)]), 84 | ('Single Step Variance:', ['{:.3f}'.format(env.singleStepVariance)]), 85 | ('Time Interval between trades:', ['{}'.format(env.tau)])] 86 | 87 | # Generate tables with the default financial and AC Model parameters 88 | fp_table = generate_table(fp_left_col, fp_right_col, fp_title) 89 | acp_table = generate_table(acp_left_col, acp_right_col, acp_title) 90 | 91 | return fp_table, acp_table 92 | 93 | 94 | def plot_price_model(seed = 0, num_days = 1000): 95 | 96 | # Create a simulation environment 97 | env = sca.MarketEnvironment() 98 | 99 | # Reset the enviroment with the given seed 100 | env.reset(seed) 101 | 102 | # Create an array to hold the daily stock price for the given number of days 103 | price_hist = np.zeros(num_days) 104 | 105 | # Get the simulated stock price movement from the environment 106 | for i in range(num_days): 107 | _, _, _, info = env.step(i) 108 | price_hist[i] = info.price 109 | 110 | # Print Average and Standard Deviation in Stock Price 111 | print('Average Stock Price: ${:,.2f}'.format(price_hist.mean())) 112 | print('Standard Deviation in Stock Price: ${:,.2f}'.format(price_hist.std())) 113 | # print('Standard Deviation of Random Noise: {:,.5f}'.format(np.sqrt(env.singleStepVariance * env.tau))) 114 | 115 | # Plot the price history for the given number of days 116 | price_df = pd.DataFrame(data = price_hist, columns = ['Stock'], dtype = 'float64') 117 | ax = price_df.plot(colormap = 'cool', grid = False) 118 | ax.set_facecolor(color = 'k') 119 | ax = plt.gca() 120 | yNumFmt = mticker.StrMethodFormatter('${x:,.2f}') 121 | ax.yaxis.set_major_formatter(yNumFmt) 122 | plt.ylabel('Stock Price') 123 | plt.xlabel('days') 124 | plt.show() 125 | 126 | 127 | 128 | def get_optimal_vals(lq_time = 60, nm_trades = 60, tr_risk = 1e-6, title = ''): 129 | 130 | # Create a simulation environment 131 | env = sca.MarketEnvironment() 132 | 133 | # Reset the enviroment with the given parameters 134 | env.reset(liquid_time = lq_time, num_trades = nm_trades, lamb = tr_risk) 135 | 136 | # Set the title for the AC Optimal Strategy table 137 | if title == '': 138 | title = 'AC Optimal Strategy' 139 | else: 140 | title = 'AC Optimal Strategy for ' + title 141 | 142 | # Get the AC optimal values from the environment 143 | E = env.get_AC_expected_shortfall(env.total_shares) 144 | V = env.get_AC_variance(env.total_shares) 145 | U = env.compute_AC_utility(env.total_shares) 146 | 147 | left_col = [('Number of Days to Sell All the Shares:', ['{}'.format(env.liquidation_time)]), 148 | ('Half-Life of The Trade:', ['{:,.1f}'.format(1 / env.kappa)]), 149 | ('Utility:', ['${:,.2f}'.format(U)])] 150 | 151 | right_col = [('Initial Portfolio Value:', ['${:,.2f}'.format(env.total_shares * env.startingPrice)]), 152 | ('Expected Shortfall:', ['${:,.2f}'.format(E)]), 153 | ('Standard Deviation of Shortfall:', ['${:,.2f}'.format(np.sqrt(V))])] 154 | 155 | # Generate the table with the AC optimal values 156 | val_table = generate_table(left_col, right_col, title) 157 | 158 | return val_table 159 | 160 | 161 | def get_min_param(): 162 | 163 | # Get the minimum impact AC parameters 164 | min_impact = get_optimal_vals(lq_time = 250, nm_trades = 250, tr_risk = 1e-17, title = 'Minimum Impact') 165 | 166 | # Get the minimum variance AC parameters 167 | min_var = get_optimal_vals(lq_time = 1, nm_trades = 1, tr_risk = 0.0058, title = 'Minimum Variance') 168 | 169 | return min_impact, min_var 170 | 171 | 172 | def get_crfs(trisk): 173 | 174 | # Create the annotation label 175 | tr_st = '{:.0e}'.format(trisk) 176 | lnum = tr_st.split('e')[0] 177 | lexp = tr_st.split('e')[1] 178 | if np.abs(np.int(lexp)) < 10: 179 | lexp = lexp.replace('0', '', 1) 180 | an_st = '$\lambda = ' + lnum + ' \\times 10^{' + lexp + '}$' 181 | 182 | # Set the correction factors for the annotation label 183 | if trisk >= 1e-7 and trisk <= 4e-7: 184 | xcrf = 0.94 185 | ycrf = 2.5 186 | scrf = 0.1 187 | elif trisk > 4e-7 and trisk <= 9e-7: 188 | xcrf = 0.9 189 | ycrf = 2.5 190 | scrf = 0.06 191 | elif trisk > 9e-7 and trisk <= 1e-6: 192 | xcrf = 0.85 193 | ycrf = 2.5 194 | scrf = 0.06 195 | elif trisk > 1e-6 and trisk < 2e-6: 196 | xcrf = 1.2 197 | ycrf = 2.5 198 | scrf = 0.06 199 | elif trisk >= 2e-6 and trisk < 3e-6: 200 | xcrf = 0.8 201 | ycrf = 2.5 202 | scrf = 0.06 203 | elif trisk >= 3e-6 and trisk < 4e-6: 204 | xcrf = 0.7 205 | ycrf = 2.5 206 | scrf = 0.08 207 | elif trisk >= 4e-6 and trisk < 7e-6: 208 | xcrf = 1.4 209 | ycrf = 2.0 210 | scrf = 0.08 211 | elif trisk >= 7e-6 and trisk <= 1e-5: 212 | xcrf = 4.5 213 | ycrf = 1.5 214 | scrf = 0.08 215 | elif trisk > 1e-5 and trisk <= 2e-5: 216 | xcrf = 7.0 217 | ycrf = 1.1 218 | scrf = 0.08 219 | elif trisk > 2e-5 and trisk <= 5e-5: 220 | xcrf = 12. 221 | ycrf = 1.1 222 | scrf = 0.08 223 | elif trisk > 5e-5 and trisk <= 1e-4: 224 | xcrf = 30 225 | ycrf = 0.99 226 | scrf = 0.08 227 | else: 228 | xcrf = 1 229 | ycrf = 1 230 | scrf = 0.08 231 | 232 | return an_st, xcrf, ycrf, scrf 233 | 234 | 235 | def plot_efficient_frontier(tr_risk = 1e-6): 236 | 237 | # Create a simulation environment 238 | env = sca.MarketEnvironment() 239 | 240 | # Reset the enviroment with the given trader's risk aversion 241 | env.reset(lamb = tr_risk) 242 | 243 | # Get the expected shortfall and corresponding variance for the given trader's risk aversion 244 | tr_E = env.get_AC_expected_shortfall(env.total_shares) 245 | tr_V = env.get_AC_variance(env.total_shares) 246 | 247 | # Create empty arrays to hold our values of E, V, and U 248 | E = np.array([]) 249 | V = np.array([]) 250 | U = np.array([]) 251 | 252 | # Set the number of plot points for our frontier 253 | num_points = 7000 254 | 255 | # Set the values of the trader's risk aversion to plot 256 | lambdas = np.linspace(1e-7, 1e-4, num_points) 257 | 258 | # Calclate E, V, U for each value of llambda 259 | for llambda in lambdas: 260 | env.reset(lamb = llambda) 261 | E = np.append(E, env.get_AC_expected_shortfall(env.total_shares)) 262 | V = np.append(V, env.get_AC_variance(env.total_shares)) 263 | U = np.append(U, env.compute_AC_utility(env.total_shares)) 264 | 265 | # Plot E vs V and use U for the colorbar 266 | cm = plt.cm.get_cmap('gist_rainbow') 267 | sc = plt.scatter(V, E, s = 20, c = U, cmap = cm) 268 | plt.colorbar(sc, label = 'AC Utility', format = mticker.StrMethodFormatter('${x:,.0f}')) 269 | ax = plt.gca() 270 | ax.set_facecolor('k') 271 | ymin = E.min() * 0.7 272 | ymax = E.max() * 1.1 273 | plt.ylim(ymin, ymax) 274 | yNumFmt = mticker.StrMethodFormatter('${x:,.0f}') 275 | xNumFmt = mticker.StrMethodFormatter('{x:,.0f}') 276 | ax.yaxis.set_major_formatter(yNumFmt) 277 | ax.xaxis.set_major_formatter(xNumFmt) 278 | plt.xlabel('Variance of Shortfall') 279 | plt.ylabel('Expected Shortfall') 280 | 281 | # Get the annotation label and the correction factors 282 | an_st, xcrf, ycrf, scrf = get_crfs(tr_risk) 283 | 284 | # Plot the annotation in the above plot 285 | plt.annotate(an_st, xy = (tr_V, tr_E), xytext = (tr_V * xcrf, tr_E * ycrf), color = 'w', size = 'large', 286 | arrowprops = dict(facecolor = 'cyan', shrink = scrf, width = 3, headwidth = 10)) 287 | plt.show() 288 | 289 | 290 | def round_trade_list(trl): 291 | 292 | # Round the shares in the trading list 293 | trl_rd = np.around(trl) 294 | 295 | # Rounding the number of shares in the trading list sometimes results in selling more or less 296 | # shares than we have available. We calculate the difference between to total number of shares 297 | # sold in the original trading list and the number of shares sold in the rounded list. 298 | # This difference will be used to correct for rounding errors. 299 | res = np.around(trl.sum() - trl_rd.sum()) 300 | 301 | # Correct the number of shares sold due to rounding errors if necessary 302 | if res != 0: 303 | idx = trl_rd.nonzero()[0][-1] 304 | trl_rd[idx] += res 305 | 306 | return trl_rd 307 | 308 | 309 | def plot_trade_list(lq_time = 60, nm_trades = 60, tr_risk = 1e-6, show_trl = False): 310 | 311 | # Create simulation environment 312 | env = sca.MarketEnvironment() 313 | 314 | # Reset the environment with the given parameters 315 | env.reset(liquid_time = lq_time, num_trades = nm_trades, lamb = tr_risk) 316 | 317 | # Get the trading list from the environment 318 | trade_list = env.get_trade_list() 319 | 320 | # Add a zero at the beginning of the trade list to indicate that at time 0 we don't sell any stocks 321 | new_trl = np.insert(trade_list, 0, 0) 322 | 323 | # We create a dataframe with the trading list and trading trajectory 324 | df = pd.DataFrame(data = list(range(nm_trades + 1)), columns = ['Trade Number'], dtype = 'float64') 325 | df['Stocks Sold'] = new_trl 326 | df['Stocks Remaining'] = (np.ones(nm_trades + 1) * env.total_shares) - np.cumsum(new_trl) 327 | 328 | # Create a figure with 2 plots in 1 row 329 | fig, axes = plt.subplots(nrows = 1, ncols = 2) 330 | 331 | # Make a scatter plot of the trade list 332 | df.iloc[1:].plot.scatter(x = 'Trade Number', y = 'Stocks Sold', c = 'Stocks Sold', colormap = 'gist_rainbow', 333 | alpha = 1, sharex = False, s = 50, colorbar = False, ax = axes[0]) 334 | 335 | # Plot a line through the points of the scatter plot of the trade list 336 | axes[0].plot(df['Trade Number'].iloc[1:], df['Stocks Sold'].iloc[1:], linewidth = 2.0, alpha = 0.5) 337 | axes[0].set_facecolor(color = 'k') 338 | yNumFmt = mticker.StrMethodFormatter('{x:,.0f}') 339 | axes[0].yaxis.set_major_formatter(yNumFmt) 340 | axes[0].set_title('Trading List') 341 | 342 | # Make a scatter plot of the number of stocks remaining after each trade 343 | df.plot.scatter(x = 'Trade Number', y = 'Stocks Remaining', c = 'Stocks Remaining', colormap = 'gist_rainbow', 344 | alpha = 1, sharex = False, s = 50, colorbar = False, ax = axes[1]) 345 | 346 | # Plot a line through the points of the scatter plot of the number of stocks remaining after each trade 347 | axes[1].plot(df['Trade Number'], df['Stocks Remaining'], linewidth = 2.0, alpha = 0.5) 348 | axes[1].set_facecolor(color = 'k') 349 | yNumFmt = mticker.StrMethodFormatter('{x:,.0f}') 350 | axes[1].yaxis.set_major_formatter(yNumFmt) 351 | axes[1].set_title('Trading Trajectory') 352 | 353 | # Set the spacing between plots 354 | plt.subplots_adjust(wspace = 0.4) 355 | plt.show() 356 | 357 | print('\nNumber of Shares Sold: {:,.0f}\n'.format(new_trl.sum())) 358 | 359 | if show_trl: 360 | 361 | # Since we are not selling fractional shares we round up the shares in the trading list 362 | rd_trl = round_trade_list(new_trl) 363 | # rd_trl = new_trl 364 | 365 | # We create a dataframe with the modified trading list and trading trajectory 366 | df2 = pd.DataFrame(data = list(range(nm_trades + 1)), columns = ['Trade Number'], dtype = 'float64') 367 | df2['Stocks Sold'] = rd_trl 368 | df2['Stocks Remaining'] = (np.ones(nm_trades + 1) * env.total_shares) - np.cumsum(rd_trl) 369 | 370 | return df2.style.hide_index().format({'Trade Number': '{:.0f}', 'Stocks Sold': '{:,.0f}', 'Stocks Remaining': '{:,.0f}'}) 371 | # return df2.style.hide_index().format({'Trade Number': '{:.0f}', 'Stocks Sold': '{:e}', 'Stocks Remaining': '{:e}'}) 372 | 373 | 374 | def implement_trade_list(seed = 0, lq_time = 60, nm_trades = 60, tr_risk = 1e-6): 375 | 376 | # Create simulation environment 377 | env = sca.MarketEnvironment() 378 | 379 | # Reset the environment with the given parameters 380 | env.reset(seed = seed, liquid_time = lq_time, num_trades = nm_trades, lamb = tr_risk) 381 | 382 | # Get the trading list from the environment 383 | trl = env.get_trade_list() 384 | 385 | # Since we are not selling fractional shares we round up the shares in the trading list 386 | trade_list = round_trade_list(trl) 387 | 388 | # set the environment to make transactions 389 | env.start_transactions() 390 | 391 | # Create an array to hold the impacted stock price 392 | price_hist = np.array([]) 393 | 394 | # Implement the trading list in our similation environment 395 | for trade in trade_list: 396 | 397 | # Convert the number of shares to sell in each trade into an action 398 | action = trade / env.shares_remaining 399 | 400 | # Take a step in the environment my selling the number of shares in the current trade 401 | _, _, _, info = env.step(action) 402 | 403 | # Get the impacted price from the environment 404 | price_hist = np.append(price_hist, info.exec_price) 405 | 406 | # If all shares have been sold, stop making transactions and get the implementation sortfall 407 | if info.done: 408 | print('Implementation Shortfall: ${:,.2f} \n'.format(info.implementation_shortfall)) 409 | break 410 | 411 | # Plot the impacted price 412 | price_df = pd.DataFrame(data = price_hist, columns = ['Stock'], dtype = 'float64') 413 | ax = price_df.plot(colormap = 'cool', grid = False) 414 | ax.set_facecolor(color = 'k') 415 | ax.set_title('Impacted Stock Price') 416 | ax = plt.gca() 417 | yNumFmt = mticker.StrMethodFormatter('${x:,.2f}') 418 | ax.yaxis.set_major_formatter(yNumFmt) 419 | plt.plot(price_hist, 'o') 420 | plt.ylabel('Stock Price') 421 | plt.xlabel('Trade Number') 422 | plt.show() 423 | 424 | 425 | def get_av_std(lq_time = 60, nm_trades = 60, tr_risk = 1e-6, trs = 100): 426 | 427 | # Create simulation environment 428 | env = sca.MarketEnvironment() 429 | 430 | # Reset the enviroment 431 | env.reset(liquid_time = lq_time, num_trades = nm_trades, lamb = tr_risk) 432 | 433 | # Get the trading list 434 | trl = env.get_trade_list() 435 | 436 | # Since we are not selling fractional shares we round up the shares in the trading list 437 | trade_list = round_trade_list(trl) 438 | 439 | # Set the initial shortfall to zero 440 | shortfall_hist = np.array([]) 441 | 442 | for episode in range(trs): 443 | 444 | # Print current episode every 100 episodes 445 | if (episode + 1) % 100 == 0: 446 | print('Episode [{}/{}]'.format(episode + 1, trs), end = '\r', flush = True) 447 | 448 | # Reset the enviroment 449 | env.reset(seed = episode, liquid_time = lq_time, num_trades = nm_trades, lamb = tr_risk) 450 | 451 | # set the environment to make transactions 452 | env.start_transactions() 453 | 454 | for trade in trade_list: 455 | action = trade / env.shares_remaining 456 | _, _, _, info = env.step(action) 457 | 458 | if info.done: 459 | shortfall_hist = np.append(shortfall_hist, info.implementation_shortfall) 460 | break 461 | 462 | print('Average Implementation Shortfall: ${:,.2f}'.format(shortfall_hist.mean())) 463 | print('Standard Deviation of the Implementation Shortfall: ${:,.2f}'.format(shortfall_hist.std())) 464 | 465 | plt.plot(shortfall_hist, 'cyan', label='') 466 | plt.xlim(0, trs) 467 | ax = plt.gca() 468 | ax.set_facecolor('k') 469 | ax.set_xlabel('Episode', fontsize = 15) 470 | ax.set_ylabel('Implementation Shortfall (US $)', fontsize = 15) 471 | ax.axhline(shortfall_hist.mean(),0, 1, color = 'm', label='Average') 472 | yNumFmt = mticker.StrMethodFormatter('${x:,.0f}') 473 | ax.yaxis.set_major_formatter(yNumFmt) 474 | plt.legend() 475 | plt.show --------------------------------------------------------------------------------