├── Model_training.ipynb
├── README.md
├── Results
├── 1e-4_1e-9_trajectory.npy
├── 1e-4_optimal.npy
├── 1e-4_shortfall_list.npy
├── 1e-4_trajectory.npy
├── 1e-6_1e-6_competition_shortfall_list.npy
├── 1e-6_1e-6_competition_trajectory.npy
├── 1e-6_1e-6_competition_trajectory_1500.npy
├── 1e-6_1e-6_cooporation_shortfall_list.npy
├── 1e-6_1e-6_cooporation_trajectory.npy
├── 1e-6_optimal.npy
├── 1e-6_shortfall_list.npy
├── 1e-6_shortfall_list_0.3M.npy
├── 1e-6_shortfall_list_0.7M.npy
├── 1e-6_shortfall_list_fixed_competitor.npy
├── 1e-6_shortfall_list_new_fixed_competitor.npy
├── 1e-6_trajectory.npy
├── 1e-6_trajectory_fixed_competitor.npy
├── 1e-6_trajectory_new_fixed_competitor.npy
├── 1e-7_1e-7_shortfall_list.npy
├── 1e-7_1e-7_trajectory.npy
├── 1e-7_shortfall_list.npy
├── 1e-7_trajectory.npy
├── 1e-9_optimal.npy
├── 1e-9_shortfall_list.npy
├── 1e-9_trajectory.npy
├── README.md
└── price_list.npy
├── Visualization.ipynb
├── ddpg_agent.py
├── img
├── Competition_and_Cooporation.png
├── Have_Competitor.png
├── Multi-agent.png
├── README.md
├── Theorem1.png
└── theorem2.png
├── model.py
├── syntheticChrissAlmgren.py
└── utils.py
/Model_training.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Deep Reinforcement Learning for Optimal Execution of Portfolio Transactions "
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 1,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "import utils\n",
17 | "\n",
18 | "# Get the default financial and AC Model parameters\n",
19 | "financial_params, ac_params = utils.get_env_param()"
20 | ]
21 | },
22 | {
23 | "cell_type": "code",
24 | "execution_count": 2,
25 | "metadata": {},
26 | "outputs": [
27 | {
28 | "data": {
29 | "text/html": [
30 | "
\n",
31 | "Financial Parameters\n",
32 | "\n",
33 | " Annual Volatility: | 12% | Bid-Ask Spread: | 0.125 | \n",
34 | "
\n",
35 | "\n",
36 | " Daily Volatility: | 0.8% | Daily Trading Volume: | 5,000,000 | \n",
37 | "
\n",
38 | "
"
39 | ],
40 | "text/plain": [
41 | ""
42 | ]
43 | },
44 | "execution_count": 2,
45 | "metadata": {},
46 | "output_type": "execute_result"
47 | }
48 | ],
49 | "source": [
50 | "financial_params"
51 | ]
52 | },
53 | {
54 | "cell_type": "code",
55 | "execution_count": 3,
56 | "metadata": {},
57 | "outputs": [
58 | {
59 | "data": {
60 | "text/html": [
61 | "\n",
62 | "Almgren and Chriss Model Parameters\n",
63 | "\n",
64 | " Total Number of Shares for Agent1 to Sell: | 500,000 | Fixed Cost of Selling per Share: | $0.062 | \n",
65 | "
\n",
66 | "\n",
67 | " Total Number of Shares for Agent2 to Sell: | 500,000 | Trader's Risk Aversion for Agent 1: | 1e-06 | \n",
68 | "
\n",
69 | "\n",
70 | " Starting Price per Share: | $50.00 | Trader's Risk Aversion for Agent 2: | 0.0001 | \n",
71 | "
\n",
72 | "\n",
73 | " Price Impact for Each 1% of Daily Volume Traded: | $2.5e-06 | Permanent Impact Constant: | 2.5e-07 | \n",
74 | "
\n",
75 | "\n",
76 | " Number of Days to Sell All the Shares: | 60 | Single Step Variance: | 0.144 | \n",
77 | "
\n",
78 | "\n",
79 | " Number of Trades: | 60 | Time Interval between trades: | 1.0 | \n",
80 | "
\n",
81 | "
"
82 | ],
83 | "text/plain": [
84 | ""
85 | ]
86 | },
87 | "execution_count": 3,
88 | "metadata": {},
89 | "output_type": "execute_result"
90 | }
91 | ],
92 | "source": [
93 | "ac_params"
94 | ]
95 | },
96 | {
97 | "cell_type": "code",
98 | "execution_count": 2,
99 | "metadata": {
100 | "scrolled": false
101 | },
102 | "outputs": [
103 | {
104 | "name": "stdout",
105 | "output_type": "stream",
106 | "text": [
107 | "Episode [100/1300]\tAverage Shortfall for Agent1: $1,172,575.47\n",
108 | "Episode [100/1300]\tAverage Shortfall for Agent2: $1,183,832.73\n",
109 | "Episode [200/1300]\tAverage Shortfall for Agent1: $1,281,148.59\n",
110 | "Episode [200/1300]\tAverage Shortfall for Agent2: $1,281,025.43\n",
111 | "Episode [300/1300]\tAverage Shortfall for Agent1: $1,281,250.00\n",
112 | "Episode [300/1300]\tAverage Shortfall for Agent2: $1,281,250.00\n",
113 | "Episode [400/1300]\tAverage Shortfall for Agent1: $1,281,250.00\n",
114 | "Episode [400/1300]\tAverage Shortfall for Agent2: $1,281,250.00\n",
115 | "Episode [500/1300]\tAverage Shortfall for Agent1: $1,281,250.00\n",
116 | "Episode [500/1300]\tAverage Shortfall for Agent2: $1,281,250.00\n",
117 | "Episode [600/1300]\tAverage Shortfall for Agent1: $1,281,250.00\n",
118 | "Episode [600/1300]\tAverage Shortfall for Agent2: $1,281,250.00\n",
119 | "Episode [700/1300]\tAverage Shortfall for Agent1: $1,227,339.91\n",
120 | "Episode [700/1300]\tAverage Shortfall for Agent2: $1,253,734.24\n",
121 | "Episode [800/1300]\tAverage Shortfall for Agent1: $415,623.02\n",
122 | "Episode [800/1300]\tAverage Shortfall for Agent2: $433,944.85\n",
123 | "Episode [900/1300]\tAverage Shortfall for Agent1: $314,968.49\n",
124 | "Episode [900/1300]\tAverage Shortfall for Agent2: $317,854.76\n",
125 | "Episode [1000/1300]\tAverage Shortfall for Agent1: $318,731.56\n",
126 | "Episode [1000/1300]\tAverage Shortfall for Agent2: $317,495.71\n",
127 | "Episode [1100/1300]\tAverage Shortfall for Agent1: $329,135.85\n",
128 | "Episode [1100/1300]\tAverage Shortfall for Agent2: $333,255.71\n",
129 | "Episode [1200/1300]\tAverage Shortfall for Agent1: $300,993.44\n",
130 | "Episode [1200/1300]\tAverage Shortfall for Agent2: $301,320.57\n",
131 | "Episode [1300/1300]\tAverage Shortfall for Agent1: $294,413.69\n",
132 | "Episode [1300/1300]\tAverage Shortfall for Agent2: $292,937.04\n",
133 | "\n",
134 | "Average Implementation Shortfall for Agent1: $829,225.39 \n",
135 | "\n",
136 | "\n",
137 | "Average Implementation Shortfall for Agent2: $833,877.00 \n",
138 | "\n"
139 | ]
140 | }
141 | ],
142 | "source": [
143 | "import numpy as np\n",
144 | "\n",
145 | "import syntheticChrissAlmgren as sca\n",
146 | "from ddpg_agent import Agent\n",
147 | "\n",
148 | "from collections import deque\n",
149 | "\n",
150 | "# Create simulation environment\n",
151 | "env = sca.MarketEnvironment()\n",
152 | "\n",
153 | "# Initialize Feed-forward DNNs for Actor and Critic models. \n",
154 | "agent1 = Agent(state_size=env.observation_space_dimension(), action_size=env.action_space_dimension(),random_seed = 1225)\n",
155 | "agent2 = Agent(state_size=env.observation_space_dimension(), action_size=env.action_space_dimension(),random_seed = 108)\n",
156 | "# Set the liquidation time\n",
157 | "lqt = 60\n",
158 | "\n",
159 | "# Set the number of trades\n",
160 | "n_trades = 60\n",
161 | "\n",
162 | "# Set trader's risk aversion\n",
163 | "tr1 = 1e-6\n",
164 | "tr2 = 1e-6\n",
165 | "\n",
166 | "# Set the number of episodes to run the simulation\n",
167 | "episodes = 1300\n",
168 | "shortfall_list = []\n",
169 | "shortfall_hist1 = np.array([])\n",
170 | "shortfall_hist2 = np.array([])\n",
171 | "shortfall_deque1 = deque(maxlen=100)\n",
172 | "shortfall_deque2 = deque(maxlen=100)\n",
173 | "for episode in range(episodes): \n",
174 | " # Reset the enviroment\n",
175 | " cur_state = env.reset(seed = episode, liquid_time = lqt, num_trades = n_trades, lamb1 = tr1,lamb2 = tr2)\n",
176 | "\n",
177 | " # set the environment to make transactions\n",
178 | " env.start_transactions()\n",
179 | "\n",
180 | " for i in range(n_trades + 1):\n",
181 | " \n",
182 | " # Predict the best action for the current state. \n",
183 | " cur_state1 = np.delete(cur_state,8)\n",
184 | " cur_state2 = np.delete(cur_state,7)\n",
185 | " #print(cur_state[5:])\n",
186 | " action1 = agent1.act(cur_state1, add_noise = True)\n",
187 | " action2 = agent2.act(cur_state2, add_noise = True)\n",
188 | " #print(action1,action2)\n",
189 | " # Action is performed and new state, reward, info are received. \n",
190 | " new_state, reward1, reward2, done1, done2, info = env.step(action1,action2)\n",
191 | " \n",
192 | " # current state, action, reward, new state are stored in the experience replay\n",
193 | " new_state1 = np.delete(new_state,8)\n",
194 | " new_state2 = np.delete(new_state,7)\n",
195 | " agent1.step(cur_state1, action1, reward1, new_state1, done1)\n",
196 | " agent2.step(cur_state2, action2, reward2, new_state2, done2)\n",
197 | " # roll over new state\n",
198 | " cur_state = new_state\n",
199 | "\n",
200 | " if info.done1 and info.done2:\n",
201 | " shortfall_hist1 = np.append(shortfall_hist1, info.implementation_shortfall1)\n",
202 | " shortfall_deque1.append(info.implementation_shortfall1)\n",
203 | " \n",
204 | " shortfall_hist2 = np.append(shortfall_hist2, info.implementation_shortfall2)\n",
205 | " shortfall_deque2.append(info.implementation_shortfall2)\n",
206 | " break\n",
207 | " \n",
208 | " if (episode + 1) % 100 == 0: # print average shortfall over last 100 episodes\n",
209 | " print('\\rEpisode [{}/{}]\\tAverage Shortfall for Agent1: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque1))) \n",
210 | " print('\\rEpisode [{}/{}]\\tAverage Shortfall for Agent2: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque2)))\n",
211 | " shortfall_list.append([np.mean(shortfall_deque1),np.mean(shortfall_deque2)])\n",
212 | "print('\\nAverage Implementation Shortfall for Agent1: ${:,.2f} \\n'.format(np.mean(shortfall_hist1)))\n",
213 | "print('\\nAverage Implementation Shortfall for Agent2: ${:,.2f} \\n'.format(np.mean(shortfall_hist2)))"
214 | ]
215 | },
216 | {
217 | "cell_type": "code",
218 | "execution_count": 5,
219 | "metadata": {},
220 | "outputs": [],
221 | "source": [
222 | "shortfall = np.array(shortfall_list)"
223 | ]
224 | },
225 | {
226 | "cell_type": "code",
227 | "execution_count": null,
228 | "metadata": {},
229 | "outputs": [],
230 | "source": []
231 | },
232 | {
233 | "cell_type": "code",
234 | "execution_count": 6,
235 | "metadata": {},
236 | "outputs": [],
237 | "source": [
238 | "np.save('1e-6_1e-6_cooporation_shorfall_list.npy',shortfall)"
239 | ]
240 | },
241 | {
242 | "cell_type": "code",
243 | "execution_count": 3,
244 | "metadata": {},
245 | "outputs": [
246 | {
247 | "name": "stdout",
248 | "output_type": "stream",
249 | "text": [
250 | "1e-06 1e-06\n",
251 | "[1. 1.]\n",
252 | "[0.761694 0.656324]\n",
253 | "[0.603648 0.454928]\n",
254 | "[0.44365 0.334226]\n",
255 | "[0.305346 0.25539 ]\n",
256 | "[0.20247 0.202642]\n",
257 | "[0.13316 0.148788]\n",
258 | "[0.09197 0.10399]\n",
259 | "[0.064072 0.074902]\n",
260 | "[0.044238 0.052522]\n",
261 | "[0.03257 0.036602]\n",
262 | "[0.02397 0.024466]\n",
263 | "[0.018556 0.01732 ]\n",
264 | "[0.013314 0.011942]\n",
265 | "[0.009696 0.008204]\n",
266 | "[0.006774 0.005622]\n",
267 | "[0.004728 0.003898]\n",
268 | "[0.003236 0.002704]\n",
269 | "[0.00228 0.001762]\n",
270 | "[0.00165 0.00114]\n",
271 | "[0.001234 0.000778]\n",
272 | "[0.000932 0.000566]\n",
273 | "[0.000674 0.000392]\n",
274 | "[0.000506 0.00029 ]\n",
275 | "[0.000376 0.000212]\n",
276 | "[0.000294 0.00015 ]\n",
277 | "[0.000224 0.000108]\n",
278 | "[1.66e-04 7.40e-05]\n",
279 | "[1.14e-04 5.40e-05]\n",
280 | "[8.2e-05 4.2e-05]\n",
281 | "[5.8e-05 3.4e-05]\n",
282 | "[4.0e-05 2.8e-05]\n",
283 | "[2.6e-05 2.2e-05]\n",
284 | "[1.8e-05 1.8e-05]\n",
285 | "[1.2e-05 1.4e-05]\n",
286 | "[8.e-06 1.e-05]\n",
287 | "[6.e-06 8.e-06]\n",
288 | "[4.e-06 6.e-06]\n",
289 | "[2.e-06 4.e-06]\n",
290 | "[2.e-06 2.e-06]\n",
291 | "[2.e-06 2.e-06]\n",
292 | "[2.e-06 2.e-06]\n",
293 | "[2.e-06 2.e-06]\n",
294 | "[2.e-06 2.e-06]\n",
295 | "[2.e-06 2.e-06]\n",
296 | "[2.e-06 2.e-06]\n",
297 | "[2.e-06 2.e-06]\n",
298 | "[2.e-06 2.e-06]\n",
299 | "[2.e-06 2.e-06]\n",
300 | "[2.e-06 2.e-06]\n",
301 | "[2.e-06 2.e-06]\n",
302 | "[2.e-06 2.e-06]\n",
303 | "[2.e-06 2.e-06]\n",
304 | "[2.e-06 2.e-06]\n",
305 | "[2.e-06 2.e-06]\n",
306 | "[2.e-06 2.e-06]\n",
307 | "[2.e-06 2.e-06]\n",
308 | "[2.e-06 2.e-06]\n",
309 | "[2.e-06 2.e-06]\n",
310 | "[2.e-06 2.e-06]\n",
311 | "Episode [1300/1300]\tAverage Shortfall for Agent1: $298,868.72\n",
312 | "Episode [1300/1300]\tAverage Shortfall for Agent2: $296,739.36\n"
313 | ]
314 | }
315 | ],
316 | "source": [
317 | "print(tr1,tr2)\n",
318 | "cur_state = env.reset(seed = episode, liquid_time = lqt, num_trades = n_trades, lamb1 = tr1,lamb2 = tr2)\n",
319 | "\n",
320 | " # set the environment to make transactions\n",
321 | "env.start_transactions()\n",
322 | "\n",
323 | "trajectory = np.zeros([n_trades+1,2])\n",
324 | "for i in range(n_trades + 1):\n",
325 | " trajectory[i] = cur_state[7:]\n",
326 | " \n",
327 | " print(cur_state[7:])\n",
328 | " # Predict the best action for the current state. \n",
329 | " cur_state1 = np.delete(cur_state,8)\n",
330 | " cur_state2 = np.delete(cur_state,7)\n",
331 | " #print(cur_state[5:])\n",
332 | " action1 = agent1.act(cur_state1, add_noise = True)\n",
333 | " action2 = agent2.act(cur_state2, add_noise = True)\n",
334 | " #print(action1,action2)\n",
335 | " # Action is performed and new state, reward, info are received. \n",
336 | " new_state, reward1, reward2, done1, done2, info = env.step(action1,action2)\n",
337 | " \n",
338 | " # current state, action, reward, new state are stored in the experience replay\n",
339 | " new_state1 = np.delete(new_state,8)\n",
340 | " new_state2 = np.delete(new_state,7)\n",
341 | " agent1.step(cur_state1, action1, reward1, new_state1, done1)\n",
342 | " agent2.step(cur_state2, action2, reward2, new_state2, done2)\n",
343 | " # roll over new state\n",
344 | " cur_state = new_state\n",
345 | "\n",
346 | " if info.done1 and info.done2:\n",
347 | " shortfall_hist1 = np.append(shortfall_hist1, info.implementation_shortfall1)\n",
348 | " shortfall_deque1.append(info.implementation_shortfall1)\n",
349 | " \n",
350 | " shortfall_hist2 = np.append(shortfall_hist2, info.implementation_shortfall2)\n",
351 | " shortfall_deque2.append(info.implementation_shortfall2)\n",
352 | " break\n",
353 | " \n",
354 | "if (episode + 1) % 100 == 0: # print average shortfall over last 100 episodes\n",
355 | " print('\\rEpisode [{}/{}]\\tAverage Shortfall for Agent1: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque1))) \n",
356 | " print('\\rEpisode [{}/{}]\\tAverage Shortfall for Agent2: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque2)))\n"
357 | ]
358 | },
359 | {
360 | "cell_type": "code",
361 | "execution_count": 5,
362 | "metadata": {},
363 | "outputs": [],
364 | "source": [
365 | "np.save('1e-6_1e-6_competition_trajectory_1500.npy',trajectory)"
366 | ]
367 | },
368 | {
369 | "cell_type": "code",
370 | "execution_count": 5,
371 | "metadata": {},
372 | "outputs": [
373 | {
374 | "ename": "TypeError",
375 | "evalue": "reset() got an unexpected keyword argument 'lamb'",
376 | "output_type": "error",
377 | "traceback": [
378 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
379 | "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
380 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 20\u001b[0m \u001b[0;31m# Plot the trading list and trading trajectory. If show_trl = True, the data frame containing the values of the\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 21\u001b[0m \u001b[0;31m# trading list and trading trajectory is printed\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 22\u001b[0;31m \u001b[0mutils\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mplot_trade_list\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlq_time\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0ml_time\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnm_trades\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mn_trades\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtr_risk\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mt_risk\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mshow_trl\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
381 | "\u001b[0;32m~/Kaggle/finance/utils.py\u001b[0m in \u001b[0;36mplot_trade_list\u001b[0;34m(lq_time, nm_trades, tr_risk, show_trl)\u001b[0m\n\u001b[1;32m 313\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 314\u001b[0m \u001b[0;31m# Reset the environment with the given parameters\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 315\u001b[0;31m \u001b[0menv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mliquid_time\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlq_time\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnum_trades\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnm_trades\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlamb\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtr_risk\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 316\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 317\u001b[0m \u001b[0;31m# Get the trading list from the environment\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
382 | "\u001b[0;31mTypeError\u001b[0m: reset() got an unexpected keyword argument 'lamb'"
383 | ]
384 | }
385 | ],
386 | "source": [
387 | "%matplotlib inline\n",
388 | "\n",
389 | "import matplotlib.pyplot as plt\n",
390 | "\n",
391 | "import utils\n",
392 | "\n",
393 | "# We set the default figure size\n",
394 | "plt.rcParams['figure.figsize'] = [17.0, 7.0]\n",
395 | "\n",
396 | "\n",
397 | "# Set the number of days to sell all shares (i.e. the liquidation time)\n",
398 | "l_time = 60\n",
399 | "\n",
400 | "# Set the number of trades\n",
401 | "n_trades = 60\n",
402 | "\n",
403 | "# Set the trader's risk aversion\n",
404 | "t_risk = 1e-6\n",
405 | "\n",
406 | "# Plot the trading list and trading trajectory. If show_trl = True, the data frame containing the values of the\n",
407 | "# trading list and trading trajectory is printed\n",
408 | "utils.plot_trade_list(lq_time = l_time, nm_trades = n_trades, tr_risk = t_risk, show_trl = True)"
409 | ]
410 | },
411 | {
412 | "cell_type": "code",
413 | "execution_count": null,
414 | "metadata": {},
415 | "outputs": [],
416 | "source": []
417 | }
418 | ],
419 | "metadata": {
420 | "kernelspec": {
421 | "display_name": "Python 3",
422 | "language": "python",
423 | "name": "python3"
424 | },
425 | "language_info": {
426 | "codemirror_mode": {
427 | "name": "ipython",
428 | "version": 3
429 | },
430 | "file_extension": ".py",
431 | "mimetype": "text/x-python",
432 | "name": "python",
433 | "nbconvert_exporter": "python",
434 | "pygments_lexer": "ipython3",
435 | "version": "3.6.8"
436 | }
437 | },
438 | "nbformat": 4,
439 | "nbformat_minor": 2
440 | }
441 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Multi-Agent Reinforcement Learning for Liquidation Strategy Analysis
2 | Source code for paper: Multi-agent reinforcement learning for liquidation strategy analysis accepted by ICML 2019 AI in Finance: Applications and Infrastructure for Multi-Agent Learning. (https://arxiv.org/abs/1906.11046)
3 |
4 | ## Abstract
5 |
6 | Liquidation is the process of selling a large number of shares of one stock sequentially within a given time frame, taking into consideration the costs arising from market impact and a trader's risk aversion. The main challenge in optimizing liquidation is to find an appropriate modeling system that can incorporate the complexities of the stock market and generate practical trading strategies. In this paper, we propose to use multi-agent deep reinforcement learning model, which better captures high-level complexities comparing to various machine learning methods, such that agents can learn how to make best selling decisions.
7 |
8 | ## Proposed Methods
9 |
10 | * We theoretically analyze the Almgren and Chriss model and extend its fundamental mechanism so it can be used as the multi-agent trading environment. Our work builds the foundation for future multi-agent environment trading analysis.
11 | * We analyze the cooperative and competitive behaviors between agents by adjusting the reward functions for each agent, which overcomes the limitation of single-agent reinforcement learning algorithms.
12 | * We simulate trading and develop optimal trading strategy with practical constraints by using reinforcement learning method, which shows the capabilities of reinforcement learning methods in solving realistic liquidation problems.
13 |
14 | ## Dependencies
15 |
16 | The scripte has been tested running under Python 3.7.0, with the folowing packages installed:
17 |
18 | * `numpy==1.14.5`
19 | * `tensorflow==1.8.0`
20 |
21 | ## Experiments
22 |
23 | ### 1.Environment
24 |
25 | The problem of an optimal liquidation strategy is investigated by using the Almgren-Chriss market impact model on the background that the agents liquidate assets completely in a given time frame. The impact of the stock market is divided into three components: unaffected price process, permanent impact, and temporary impact. The stochastic component of the price process exists, but is eliminated from the mean-variance. The price process permits linear functions of permanent and temporary price. Therefore, the model serves as the trading environment such that when agents make selling decisions, the environment would return price information.
26 |
27 | ### 2.Results
28 |
29 | 
30 |
31 | Comparison of expected implementation shortfalls: there are three agents $A, B1$ and $B2$. The expected shortfall of agent A is higher than the sum of two expected shortfalls $B_1$ and $B_2$
32 |
33 | 
34 |
35 | Trading trajectory: comparing to their original trading trajectories, their current trading trajectories are closer to each other when they are trained in a multi-agent environment.
36 |
37 |
38 | 
39 |
40 | Cooperative and competitive relationships: if two agents are in cooperative relationship, the total expected shortfall is not better than training with independent reward functions. If two agents are in a competitive relationship, they would first learn to minimize expected shortfall, and then malignant competition leads to significant implementation shortfall increment.
41 |
42 |
43 | 
44 |
45 | Trading trajectory: comparing to independent training, introducing a competitor makes the host agent learn to adapt to new environment and sell all shares of stock in the first two days
46 |
47 |
48 | ## Authors
49 |
50 | Wenhang Bao & Xiao-Yang Liu
51 |
52 |
--------------------------------------------------------------------------------
/Results/1e-4_1e-9_trajectory.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AI4Finance-Foundation/Liquidation-Analysis-using-Multi-Agent-Reinforcement-Learning-ICML-2019/cf21bbc3a57bc644b31c2b36eec4132713fb12d5/Results/1e-4_1e-9_trajectory.npy
--------------------------------------------------------------------------------
/Results/1e-4_optimal.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AI4Finance-Foundation/Liquidation-Analysis-using-Multi-Agent-Reinforcement-Learning-ICML-2019/cf21bbc3a57bc644b31c2b36eec4132713fb12d5/Results/1e-4_optimal.npy
--------------------------------------------------------------------------------
/Results/1e-4_shortfall_list.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AI4Finance-Foundation/Liquidation-Analysis-using-Multi-Agent-Reinforcement-Learning-ICML-2019/cf21bbc3a57bc644b31c2b36eec4132713fb12d5/Results/1e-4_shortfall_list.npy
--------------------------------------------------------------------------------
/Results/1e-4_trajectory.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AI4Finance-Foundation/Liquidation-Analysis-using-Multi-Agent-Reinforcement-Learning-ICML-2019/cf21bbc3a57bc644b31c2b36eec4132713fb12d5/Results/1e-4_trajectory.npy
--------------------------------------------------------------------------------
/Results/1e-6_1e-6_competition_shortfall_list.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AI4Finance-Foundation/Liquidation-Analysis-using-Multi-Agent-Reinforcement-Learning-ICML-2019/cf21bbc3a57bc644b31c2b36eec4132713fb12d5/Results/1e-6_1e-6_competition_shortfall_list.npy
--------------------------------------------------------------------------------
/Results/1e-6_1e-6_competition_trajectory.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AI4Finance-Foundation/Liquidation-Analysis-using-Multi-Agent-Reinforcement-Learning-ICML-2019/cf21bbc3a57bc644b31c2b36eec4132713fb12d5/Results/1e-6_1e-6_competition_trajectory.npy
--------------------------------------------------------------------------------
/Results/1e-6_1e-6_competition_trajectory_1500.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AI4Finance-Foundation/Liquidation-Analysis-using-Multi-Agent-Reinforcement-Learning-ICML-2019/cf21bbc3a57bc644b31c2b36eec4132713fb12d5/Results/1e-6_1e-6_competition_trajectory_1500.npy
--------------------------------------------------------------------------------
/Results/1e-6_1e-6_cooporation_shortfall_list.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AI4Finance-Foundation/Liquidation-Analysis-using-Multi-Agent-Reinforcement-Learning-ICML-2019/cf21bbc3a57bc644b31c2b36eec4132713fb12d5/Results/1e-6_1e-6_cooporation_shortfall_list.npy
--------------------------------------------------------------------------------
/Results/1e-6_1e-6_cooporation_trajectory.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AI4Finance-Foundation/Liquidation-Analysis-using-Multi-Agent-Reinforcement-Learning-ICML-2019/cf21bbc3a57bc644b31c2b36eec4132713fb12d5/Results/1e-6_1e-6_cooporation_trajectory.npy
--------------------------------------------------------------------------------
/Results/1e-6_optimal.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AI4Finance-Foundation/Liquidation-Analysis-using-Multi-Agent-Reinforcement-Learning-ICML-2019/cf21bbc3a57bc644b31c2b36eec4132713fb12d5/Results/1e-6_optimal.npy
--------------------------------------------------------------------------------
/Results/1e-6_shortfall_list.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AI4Finance-Foundation/Liquidation-Analysis-using-Multi-Agent-Reinforcement-Learning-ICML-2019/cf21bbc3a57bc644b31c2b36eec4132713fb12d5/Results/1e-6_shortfall_list.npy
--------------------------------------------------------------------------------
/Results/1e-6_shortfall_list_0.3M.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AI4Finance-Foundation/Liquidation-Analysis-using-Multi-Agent-Reinforcement-Learning-ICML-2019/cf21bbc3a57bc644b31c2b36eec4132713fb12d5/Results/1e-6_shortfall_list_0.3M.npy
--------------------------------------------------------------------------------
/Results/1e-6_shortfall_list_0.7M.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AI4Finance-Foundation/Liquidation-Analysis-using-Multi-Agent-Reinforcement-Learning-ICML-2019/cf21bbc3a57bc644b31c2b36eec4132713fb12d5/Results/1e-6_shortfall_list_0.7M.npy
--------------------------------------------------------------------------------
/Results/1e-6_shortfall_list_fixed_competitor.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AI4Finance-Foundation/Liquidation-Analysis-using-Multi-Agent-Reinforcement-Learning-ICML-2019/cf21bbc3a57bc644b31c2b36eec4132713fb12d5/Results/1e-6_shortfall_list_fixed_competitor.npy
--------------------------------------------------------------------------------
/Results/1e-6_shortfall_list_new_fixed_competitor.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AI4Finance-Foundation/Liquidation-Analysis-using-Multi-Agent-Reinforcement-Learning-ICML-2019/cf21bbc3a57bc644b31c2b36eec4132713fb12d5/Results/1e-6_shortfall_list_new_fixed_competitor.npy
--------------------------------------------------------------------------------
/Results/1e-6_trajectory.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AI4Finance-Foundation/Liquidation-Analysis-using-Multi-Agent-Reinforcement-Learning-ICML-2019/cf21bbc3a57bc644b31c2b36eec4132713fb12d5/Results/1e-6_trajectory.npy
--------------------------------------------------------------------------------
/Results/1e-6_trajectory_fixed_competitor.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AI4Finance-Foundation/Liquidation-Analysis-using-Multi-Agent-Reinforcement-Learning-ICML-2019/cf21bbc3a57bc644b31c2b36eec4132713fb12d5/Results/1e-6_trajectory_fixed_competitor.npy
--------------------------------------------------------------------------------
/Results/1e-6_trajectory_new_fixed_competitor.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AI4Finance-Foundation/Liquidation-Analysis-using-Multi-Agent-Reinforcement-Learning-ICML-2019/cf21bbc3a57bc644b31c2b36eec4132713fb12d5/Results/1e-6_trajectory_new_fixed_competitor.npy
--------------------------------------------------------------------------------
/Results/1e-7_1e-7_shortfall_list.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AI4Finance-Foundation/Liquidation-Analysis-using-Multi-Agent-Reinforcement-Learning-ICML-2019/cf21bbc3a57bc644b31c2b36eec4132713fb12d5/Results/1e-7_1e-7_shortfall_list.npy
--------------------------------------------------------------------------------
/Results/1e-7_1e-7_trajectory.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AI4Finance-Foundation/Liquidation-Analysis-using-Multi-Agent-Reinforcement-Learning-ICML-2019/cf21bbc3a57bc644b31c2b36eec4132713fb12d5/Results/1e-7_1e-7_trajectory.npy
--------------------------------------------------------------------------------
/Results/1e-7_shortfall_list.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AI4Finance-Foundation/Liquidation-Analysis-using-Multi-Agent-Reinforcement-Learning-ICML-2019/cf21bbc3a57bc644b31c2b36eec4132713fb12d5/Results/1e-7_shortfall_list.npy
--------------------------------------------------------------------------------
/Results/1e-7_trajectory.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AI4Finance-Foundation/Liquidation-Analysis-using-Multi-Agent-Reinforcement-Learning-ICML-2019/cf21bbc3a57bc644b31c2b36eec4132713fb12d5/Results/1e-7_trajectory.npy
--------------------------------------------------------------------------------
/Results/1e-9_optimal.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AI4Finance-Foundation/Liquidation-Analysis-using-Multi-Agent-Reinforcement-Learning-ICML-2019/cf21bbc3a57bc644b31c2b36eec4132713fb12d5/Results/1e-9_optimal.npy
--------------------------------------------------------------------------------
/Results/1e-9_shortfall_list.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AI4Finance-Foundation/Liquidation-Analysis-using-Multi-Agent-Reinforcement-Learning-ICML-2019/cf21bbc3a57bc644b31c2b36eec4132713fb12d5/Results/1e-9_shortfall_list.npy
--------------------------------------------------------------------------------
/Results/1e-9_trajectory.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AI4Finance-Foundation/Liquidation-Analysis-using-Multi-Agent-Reinforcement-Learning-ICML-2019/cf21bbc3a57bc644b31c2b36eec4132713fb12d5/Results/1e-9_trajectory.npy
--------------------------------------------------------------------------------
/Results/README.md:
--------------------------------------------------------------------------------
1 | README
2 |
--------------------------------------------------------------------------------
/Results/price_list.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AI4Finance-Foundation/Liquidation-Analysis-using-Multi-Agent-Reinforcement-Learning-ICML-2019/cf21bbc3a57bc644b31c2b36eec4132713fb12d5/Results/price_list.npy
--------------------------------------------------------------------------------
/ddpg_agent.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import random
3 | import copy
4 | from collections import namedtuple, deque
5 |
6 | from model import Actor, Critic
7 |
8 | import torch
9 | import torch.nn.functional as F
10 | import torch.optim as optim
11 |
12 | BUFFER_SIZE = int(1e4) # replay buffer size
13 | BATCH_SIZE = 128 # minibatch size
14 | GAMMA = 0.99 # discount factor
15 | TAU = 1e-3 # for soft update of target parameters
16 | LR_ACTOR = 1e-4 # learning rate of the actor
17 | LR_CRITIC = 1e-3 # learning rate of the critic
18 | WEIGHT_DECAY = 0 # L2 weight decay
19 |
20 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
21 |
22 | class Agent():
23 | """Interacts with and learns from the environment."""
24 |
25 | def __init__(self, state_size, action_size, random_seed):
26 | """Initialize an Agent object.
27 |
28 | Params
29 | ======
30 | state_size (int): dimension of each state
31 | action_size (int): dimension of each action
32 | random_seed (int): random seed
33 | """
34 | self.state_size = state_size
35 | self.action_size = action_size
36 | self.seed = random.seed(random_seed)
37 |
38 | # Actor Network (w/ Target Network)
39 | self.actor_local = Actor(state_size, action_size, random_seed).to(device)
40 | self.actor_target = Actor(state_size, action_size, random_seed).to(device)
41 | self.actor_optimizer = optim.Adam(self.actor_local.parameters(), lr=LR_ACTOR)
42 |
43 | # Critic Network (w/ Target Network)
44 | self.critic_local = Critic(state_size, action_size, random_seed).to(device)
45 | self.critic_target = Critic(state_size, action_size, random_seed).to(device)
46 | self.critic_optimizer = optim.Adam(self.critic_local.parameters(), lr=LR_CRITIC, weight_decay=WEIGHT_DECAY)
47 |
48 | # Noise process
49 | self.noise = OUNoise(action_size, random_seed)
50 |
51 | # Replay memory
52 | self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, random_seed)
53 |
54 | def step(self, state, action, reward, next_state, done):
55 | """Save experience in replay memory, and use random sample from buffer to learn."""
56 | # Save experience / reward
57 | self.memory.add(state, action, reward, next_state, done)
58 |
59 | # Learn, if enough samples are available in memory
60 | if len(self.memory) > BATCH_SIZE:
61 | experiences = self.memory.sample()
62 | self.learn(experiences, GAMMA)
63 |
64 | def act(self, state, add_noise=True):
65 | """Returns actions for given state as per current policy."""
66 | state = torch.from_numpy(state).float().to(device)
67 | self.actor_local.eval()
68 | with torch.no_grad():
69 | action = self.actor_local(state).cpu().data.numpy()
70 | self.actor_local.train()
71 | if add_noise:
72 | action += self.noise.sample()
73 | action = (action + 1.0) / 2.0
74 | return np.clip(action, 0, 1)
75 |
76 |
77 | def reset(self):
78 | self.noise.reset()
79 |
80 | def learn(self, experiences, gamma):
81 | """Update policy and value parameters using given batch of experience tuples.
82 | Q_targets = r + γ * critic_target(next_state, actor_target(next_state))
83 | where:
84 | actor_target(state) -> action
85 | critic_target(state, action) -> Q-value
86 |
87 | Params
88 | ======
89 | experiences (Tuple[torch.Tensor]): tuple of (s, a, r, s', done) tuples
90 | gamma (float): discount factor
91 | """
92 | states, actions, rewards, next_states, dones = experiences
93 |
94 | # ---------------------------- update critic ---------------------------- #
95 | # Get predicted next-state actions and Q values from target models
96 | actions_next = self.actor_target(next_states)
97 | Q_targets_next = self.critic_target(next_states, actions_next)
98 | # Compute Q targets for current states (y_i)
99 | Q_targets = rewards + (gamma * Q_targets_next * (1 - dones))
100 | # Compute critic loss
101 | Q_expected = self.critic_local(states, actions)
102 | critic_loss = F.mse_loss(Q_expected, Q_targets)
103 | # Minimize the loss
104 | self.critic_optimizer.zero_grad()
105 | critic_loss.backward()
106 | self.critic_optimizer.step()
107 |
108 | # ---------------------------- update actor ---------------------------- #
109 | # Compute actor loss
110 | actions_pred = self.actor_local(states)
111 | actor_loss = -self.critic_local(states, actions_pred).mean()
112 | # Minimize the loss
113 | self.actor_optimizer.zero_grad()
114 | actor_loss.backward()
115 | self.actor_optimizer.step()
116 |
117 | # ----------------------- update target networks ----------------------- #
118 | self.soft_update(self.critic_local, self.critic_target, TAU)
119 | self.soft_update(self.actor_local, self.actor_target, TAU)
120 |
121 | def soft_update(self, local_model, target_model, tau):
122 | """Soft update model parameters.
123 | θ_target = τ*θ_local + (1 - τ)*θ_target
124 |
125 | Params
126 | ======
127 | local_model: PyTorch model (weights will be copied from)
128 | target_model: PyTorch model (weights will be copied to)
129 | tau (float): interpolation parameter
130 | """
131 | for target_param, local_param in zip(target_model.parameters(), local_model.parameters()):
132 | target_param.data.copy_(tau*local_param.data + (1.0-tau)*target_param.data)
133 |
134 | class OUNoise:
135 | """Ornstein-Uhlenbeck process."""
136 |
137 | def __init__(self, size, seed, mu=0., theta=0.15, sigma=0.2):
138 | """Initialize parameters and noise process."""
139 | self.mu = mu * np.ones(size)
140 | self.theta = theta
141 | self.sigma = sigma
142 | self.seed = random.seed(seed)
143 | self.reset()
144 |
145 | def reset(self):
146 | """Reset the internal state (= noise) to mean (mu)."""
147 | self.state = copy.copy(self.mu)
148 |
149 | def sample(self):
150 | """Update internal state and return it as a noise sample."""
151 | x = self.state
152 | dx = self.theta * (self.mu - x) + self.sigma * np.array([random.random() for i in range(len(x))])
153 | self.state = x + dx
154 | return self.state
155 |
156 | class ReplayBuffer:
157 | """Fixed-size buffer to store experience tuples."""
158 |
159 | def __init__(self, action_size, buffer_size, batch_size, seed):
160 | """Initialize a ReplayBuffer object.
161 | Params
162 | ======
163 | buffer_size (int): maximum size of buffer
164 | batch_size (int): size of each training batch
165 | """
166 | self.action_size = action_size
167 | self.memory = deque(maxlen=buffer_size) # internal memory (deque)
168 | self.batch_size = batch_size
169 | self.experience = namedtuple("Experience", field_names=["state", "action", "reward", "next_state", "done"])
170 | self.seed = random.seed(seed)
171 |
172 | def add(self, state, action, reward, next_state, done):
173 | """Add a new experience to memory."""
174 | e = self.experience(state, action, reward, next_state, done)
175 | self.memory.append(e)
176 |
177 | def sample(self):
178 | """Randomly sample a batch of experiences from memory."""
179 | experiences = random.sample(self.memory, k=self.batch_size)
180 |
181 | states = torch.from_numpy(np.vstack([e.state for e in experiences if e is not None])).float().to(device)
182 | actions = torch.from_numpy(np.vstack([e.action for e in experiences if e is not None])).float().to(device)
183 | rewards = torch.from_numpy(np.vstack([e.reward for e in experiences if e is not None])).float().to(device)
184 | next_states = torch.from_numpy(np.vstack([e.next_state for e in experiences if e is not None])).float().to(device)
185 | dones = torch.from_numpy(np.vstack([e.done for e in experiences if e is not None]).astype(np.uint8)).float().to(device)
186 |
187 | return (states, actions, rewards, next_states, dones)
188 |
189 | def __len__(self):
190 | """Return the current size of internal memory."""
191 | return len(self.memory)
--------------------------------------------------------------------------------
/img/Competition_and_Cooporation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AI4Finance-Foundation/Liquidation-Analysis-using-Multi-Agent-Reinforcement-Learning-ICML-2019/cf21bbc3a57bc644b31c2b36eec4132713fb12d5/img/Competition_and_Cooporation.png
--------------------------------------------------------------------------------
/img/Have_Competitor.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AI4Finance-Foundation/Liquidation-Analysis-using-Multi-Agent-Reinforcement-Learning-ICML-2019/cf21bbc3a57bc644b31c2b36eec4132713fb12d5/img/Have_Competitor.png
--------------------------------------------------------------------------------
/img/Multi-agent.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AI4Finance-Foundation/Liquidation-Analysis-using-Multi-Agent-Reinforcement-Learning-ICML-2019/cf21bbc3a57bc644b31c2b36eec4132713fb12d5/img/Multi-agent.png
--------------------------------------------------------------------------------
/img/README.md:
--------------------------------------------------------------------------------
1 | README
2 |
--------------------------------------------------------------------------------
/img/Theorem1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AI4Finance-Foundation/Liquidation-Analysis-using-Multi-Agent-Reinforcement-Learning-ICML-2019/cf21bbc3a57bc644b31c2b36eec4132713fb12d5/img/Theorem1.png
--------------------------------------------------------------------------------
/img/theorem2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AI4Finance-Foundation/Liquidation-Analysis-using-Multi-Agent-Reinforcement-Learning-ICML-2019/cf21bbc3a57bc644b31c2b36eec4132713fb12d5/img/theorem2.png
--------------------------------------------------------------------------------
/model.py:
--------------------------------------------------------------------------------
1 |
2 | import numpy as np
3 |
4 | import torch
5 | import torch.nn as nn
6 | import torch.nn.functional as F
7 |
8 | def hidden_init(layer):
9 | fan_in = layer.weight.data.size()[0]
10 | lim = 1. / np.sqrt(fan_in)
11 | return (-lim, lim)
12 |
13 | class Actor(nn.Module):
14 | """Actor (Policy) Model."""
15 |
16 | def __init__(self, state_size, action_size, seed, fc1_units=24, fc2_units=48):
17 | """Initialize parameters and build model.
18 | Params
19 | ======
20 | state_size (int): Dimension of each state
21 | action_size (int): Dimension of each action
22 | seed (int): Random seed
23 | fc1_units (int): Number of nodes in first hidden layer
24 | fc2_units (int): Number of nodes in second hidden layer
25 | """
26 | super(Actor, self).__init__()
27 | self.seed = torch.manual_seed(seed)
28 | self.fc1 = nn.Linear(state_size, fc1_units)
29 | self.fc2 = nn.Linear(fc1_units, fc2_units)
30 | self.fc3 = nn.Linear(fc2_units, action_size)
31 | self.reset_parameters()
32 |
33 | def reset_parameters(self):
34 | self.fc1.weight.data.uniform_(*hidden_init(self.fc1))
35 | self.fc2.weight.data.uniform_(*hidden_init(self.fc2))
36 | self.fc3.weight.data.uniform_(-3e-3, 3e-3)
37 |
38 | def forward(self, state):
39 | """Build an actor (policy) network that maps states -> actions."""
40 | x = F.relu(self.fc1(state))
41 | x = F.relu(self.fc2(x))
42 | return torch.tanh(self.fc3(x))
43 |
44 |
45 | class Critic(nn.Module):
46 | """Critic (Value) Model."""
47 |
48 | def __init__(self, state_size, action_size, seed, fcs1_units=24, fc2_units=48):
49 | """Initialize parameters and build model.
50 | Params
51 | ======
52 | state_size (int): Dimension of each state
53 | action_size (int): Dimension of each action
54 | seed (int): Random seed
55 | fcs1_units (int): Number of nodes in the first hidden layer
56 | fc2_units (int): Number of nodes in the second hidden layer
57 | """
58 | super(Critic, self).__init__()
59 | self.seed = torch.manual_seed(seed)
60 | self.fcs1 = nn.Linear(state_size, fcs1_units)
61 | self.fc2 = nn.Linear(fcs1_units+action_size, fc2_units)
62 | self.fc3 = nn.Linear(fc2_units, 1)
63 | self.reset_parameters()
64 |
65 | def reset_parameters(self):
66 | self.fcs1.weight.data.uniform_(*hidden_init(self.fcs1))
67 | self.fc2.weight.data.uniform_(*hidden_init(self.fc2))
68 | self.fc3.weight.data.uniform_(-3e-3, 3e-3)
69 |
70 | def forward(self, state, action):
71 | """Build a critic (value) network that maps (state, action) pairs -> Q-values."""
72 | xs = F.relu(self.fcs1(state))
73 | x = torch.cat((xs, action), dim=1)
74 | x = F.relu(self.fc2(x))
75 | return self.fc3(x)
76 |
--------------------------------------------------------------------------------
/syntheticChrissAlmgren.py:
--------------------------------------------------------------------------------
1 | import random
2 | import numpy as np
3 | import collections
4 |
5 |
6 | # ------------------------------------------------ Financial Parameters --------------------------------------------------- #
7 |
8 | ANNUAL_VOLAT = 0.12 # Annual volatility in stock price
9 | BID_ASK_SP = 1 / 8 # Bid-ask spread
10 | DAILY_TRADE_VOL = 5e6 # Average Daily trading volume
11 | TRAD_DAYS = 250 # Number of trading days in a year
12 | DAILY_VOLAT = ANNUAL_VOLAT / np.sqrt(TRAD_DAYS) # Daily volatility in stock price
13 |
14 |
15 | # ----------------------------- Parameters for the Almgren and Chriss Optimal Execution Model ----------------------------- #
16 |
17 | TOTAL_SHARES1 = 500000 # Total number of shares to sell
18 | TOTAL_SHARES2 = 500000 # Total number of shares to sell
19 | STARTING_PRICE = 50 # Starting price per share
20 | LLAMBDA1 = 1e-6 # Trader's risk aversion
21 | LLAMBDA2 = 1e-4
22 | LIQUIDATION_TIME = 60 # How many days to sell all the shares.
23 | NUM_N = 60 # Number of trades
24 | EPSILON = BID_ASK_SP / 2 # Fixed Cost of Selling.
25 | SINGLE_STEP_VARIANCE = (DAILY_VOLAT * STARTING_PRICE) ** 2 # Calculate single step variance
26 | ETA = BID_ASK_SP / (0.01 * DAILY_TRADE_VOL) # Price Impact for Each 1% of Daily Volume Traded
27 | GAMMA = BID_ASK_SP / (0.1 * DAILY_TRADE_VOL) # Permanent Impact Constant
28 |
29 | # ----------------------------------------------------------------------------------------------------------------------- #
30 |
31 |
32 | # Simulation Environment
33 |
34 | class MarketEnvironment():
35 |
36 | def __init__(self, randomSeed = 0,
37 | lqd_time = LIQUIDATION_TIME,
38 | num_tr = NUM_N,
39 | lambd1 = LLAMBDA1,
40 | lambd2 = LLAMBDA2):
41 |
42 | # Set the random seed
43 | random.seed(randomSeed)
44 |
45 | # Initialize the financial parameters so we can access them later
46 | self.anv = ANNUAL_VOLAT
47 | self.basp = BID_ASK_SP
48 | self.dtv = DAILY_TRADE_VOL
49 | self.dpv = DAILY_VOLAT
50 |
51 | # Initialize the Almgren-Chriss parameters so we can access them later
52 | self.total_shares1 = TOTAL_SHARES1
53 | self.total_shares2 = TOTAL_SHARES2
54 | self.startingPrice = STARTING_PRICE
55 | self.llambda1 = lambd1
56 | self.llambda2 = lambd2
57 | self.liquidation_time = lqd_time
58 | self.num_n = num_tr
59 | self.epsilon = EPSILON
60 | self.singleStepVariance = SINGLE_STEP_VARIANCE
61 | self.eta = ETA
62 | self.gamma = GAMMA
63 |
64 | # Calculate some Almgren-Chriss parameters
65 | self.tau = self.liquidation_time / self.num_n
66 | self.eta_hat = self.eta - (0.5 * self.gamma * self.tau)
67 | self.kappa_hat1 = np.sqrt((self.llambda1 * self.singleStepVariance) / self.eta_hat)
68 | self.kappa_hat2 = np.sqrt((self.llambda2 * self.singleStepVariance) / self.eta_hat)
69 | self.kappa1 = np.arccosh((((self.kappa_hat1 ** 2) * (self.tau ** 2)) / 2) + 1) / self.tau
70 | self.kappa2 = np.arccosh((((self.kappa_hat2 ** 2) * (self.tau ** 2)) / 2) + 1) / self.tau
71 |
72 | # Set the variables for the initial state
73 | self.shares_remaining1 = self.total_shares1
74 | self.shares_remaining2 = self.total_shares2
75 | self.timeHorizon = self.num_n
76 | self.logReturns = collections.deque(np.zeros(6))
77 |
78 | # Set the initial impacted price to the starting price
79 | self.prevImpactedPrice = self.startingPrice
80 |
81 | # Set the initial transaction state to False
82 | self.transacting1 = False
83 | self.transacting2 = False
84 |
85 | # Set a variable to keep trak of the trade number
86 | self.k = 0
87 |
88 |
89 | def reset(self, seed = 0, liquid_time = LIQUIDATION_TIME, num_trades = NUM_N, lamb1 = LLAMBDA1,lamb2 = LLAMBDA2):
90 |
91 | # Initialize the environment with the given parameters
92 | self.__init__(randomSeed = seed, lqd_time = liquid_time, num_tr = num_trades, lambd1 = lamb1,lambd2 = lamb2)
93 |
94 | # Set the initial state to [0,0,0,0,0,0,1,1]
95 | self.initial_state = np.array(list(self.logReturns) + [self.timeHorizon / self.num_n, \
96 | self.shares_remaining1 / self.total_shares1, \
97 | self.shares_remaining2 / self.total_shares2])
98 | return self.initial_state
99 |
100 |
101 | def start_transactions(self):
102 |
103 | # Set transactions on
104 | self.transacting1 = True
105 | self.transacting2 = True
106 |
107 | # Set the minimum number of stocks one can sell
108 | self.tolerance = 1
109 |
110 | # Set the initial capture to zero
111 | self.totalCapture1 = 0
112 | self.totalCapture2 = 0
113 |
114 | # Set the initial previous price to the starting price
115 | self.prevPrice = self.startingPrice
116 |
117 | # Set the initial square of the shares to sell to zero
118 | self.totalSSSQ1 = 0
119 | self.totalSSSQ2 = 0
120 | # Set the initial square of the remaing shares to sell to zero
121 | self.totalSRSQ1 = 0
122 | self.totalSRSQ2 = 0
123 | # Set the initial AC utility
124 | self.prevUtility1 = self.compute_AC_utility(self.total_shares1,self.kappa1,self.llambda1)
125 | self.prevUtility2 = self.compute_AC_utility(self.total_shares2,self.kappa2,self.llambda2)
126 |
127 | def step(self, action1,action2):
128 |
129 | # Create a class that will be used to keep track of information about the transaction
130 | class Info(object):
131 | pass
132 | info = Info()
133 |
134 | # Set the done flag to False. This indicates that we haven't sold all the shares yet.
135 | info.done1 = False
136 | info.done2 = False
137 |
138 | # During training, if the DDPG fails to sell all the stocks before the given
139 | # number of trades or if the total number shares remaining is less than 1, then stop transacting,
140 | # set the done Flag to True, return the current implementation shortfall, and give a negative reward.
141 | # The negative reward is given in the else statement below.
142 | if self.transacting1 and (self.timeHorizon == 0 or (abs(self.shares_remaining1) < self.tolerance)):
143 | self.transacting1 = False
144 | info.done1 = True
145 | info.implementation_shortfall1 = self.total_shares1 * self.startingPrice - self.totalCapture1
146 | info.expected_shortfall1 = self.get_expected_shortfall(self.total_shares1,self.totalSSSQ1)
147 | info.expected_variance1 = self.singleStepVariance * self.tau * self.totalSRSQ1
148 | info.utility1 = info.expected_shortfall1 + self.llambda1 * info.expected_variance1
149 |
150 | if self.transacting2 and (self.timeHorizon == 0 or (abs(self.shares_remaining2) < self.tolerance)):
151 | self.transacting2 = False
152 | info.done2 = True
153 | info.implementation_shortfall2 = self.total_shares2 * self.startingPrice - self.totalCapture2
154 | info.expected_shortfall2 = self.get_expected_shortfall(self.total_shares2,self.totalSSSQ2)
155 | info.expected_variance2 = self.singleStepVariance * self.tau * self.totalSRSQ2
156 | info.utility2 = info.expected_shortfall2 + self.llambda2 * info.expected_variance2
157 |
158 | # We don't add noise before the first trade
159 | if self.k == 0:
160 | info.price = self.prevImpactedPrice
161 | else:
162 | # Calculate the current stock price using arithmetic brownian motion
163 | info.price = self.prevImpactedPrice + np.sqrt(self.singleStepVariance * self.tau) * random.normalvariate(0, 1)
164 |
165 | # If we are transacting, the stock price is affected by the number of shares we sell. The price evolves
166 | # according to the Almgren and Chriss price dynamics model.
167 | if self.transacting1:
168 |
169 | # If action is an ndarray then extract the number from the array
170 | if isinstance(action1, np.ndarray):
171 | action1 = action1.item()
172 |
173 | # Convert the action to the number of shares to sell in the current step
174 | sharesToSellNow1 = self.shares_remaining1 * action1
175 |
176 | if self.timeHorizon < 2:
177 | sharesToSellNow1 = self.shares_remaining1
178 | else:
179 | sharesToSellNow1 = 0
180 | # sharesToSellNow = min(self.shares_remaining * action, self.shares_remaining)
181 | if self.transacting2:
182 |
183 | # If action is an ndarray then extract the number from the array
184 | if isinstance(action2, np.ndarray):
185 | action2 = action2.item()
186 |
187 | # Convert the action to the number of shares to sell in the current step
188 | sharesToSellNow2 = self.shares_remaining2 * action2
189 |
190 | if self.timeHorizon < 2:
191 | sharesToSellNow2 = self.shares_remaining2
192 | else:
193 | sharesToSellNow2 = 0
194 |
195 | if self.transacting1 or self.transacting2:
196 |
197 | # Since we are not selling fractions of shares, round up the total number of shares to sell to the nearest integer.
198 | info.share_to_sell_now1 = np.around(sharesToSellNow1)
199 | info.share_to_sell_now2 = np.around(sharesToSellNow2)
200 | # Calculate the permanent and temporary impact on the stock price according the AC price dynamics model
201 | info.currentPermanentImpact = self.permanentImpact(info.share_to_sell_now1+info.share_to_sell_now2)
202 | info.currentTemporaryImpact = self.temporaryImpact(info.share_to_sell_now1+info.share_to_sell_now2)
203 |
204 | # Apply the temporary impact on the current stock price
205 | info.exec_price = info.price - info.currentTemporaryImpact
206 |
207 | # Calculate the current total capture
208 | self.totalCapture1 += info.share_to_sell_now1 * info.exec_price
209 | self.totalCapture2 += info.share_to_sell_now2 * info.exec_price
210 |
211 | # Calculate the log return for the current step and save it in the logReturn deque
212 | self.logReturns.append(np.log(info.price/self.prevPrice))
213 | self.logReturns.popleft()
214 |
215 | # Update the number of shares remaining
216 | self.shares_remaining1 -= info.share_to_sell_now1
217 | self.shares_remaining2 -= info.share_to_sell_now2
218 |
219 | # Calculate the runnig total of the squares of shares sold and shares remaining
220 | self.totalSSSQ1 += info.share_to_sell_now1 ** 2
221 | self.totalSRSQ1 += self.shares_remaining1 ** 2
222 |
223 | self.totalSSSQ2 += info.share_to_sell_now2 ** 2
224 | self.totalSRSQ2 += self.shares_remaining2 ** 2
225 |
226 | # Update the variables required for the next step
227 | self.timeHorizon -= 1
228 | self.prevPrice = info.price
229 | self.prevImpactedPrice = info.price - info.currentPermanentImpact
230 |
231 | # Calculate the reward
232 | currentUtility1 = self.compute_AC_utility(self.shares_remaining1,self.kappa1,self.llambda1)
233 | currentUtility2 = self.compute_AC_utility(self.shares_remaining2,self.kappa2,self.llambda2)
234 | if self.prevUtility1 == 0:
235 | reward1 = 0
236 | else:
237 | reward1 = (abs(self.prevUtility1) - abs(currentUtility1)) / abs(self.prevUtility1)
238 | if self.prevUtility2 == 0:
239 | reward2 =0
240 | else:
241 | reward2 = (abs(self.prevUtility2) - abs(currentUtility2)) / abs(self.prevUtility2)
242 |
243 | if reward1 > reward2:
244 | reward2 -= reward1
245 | #reward2 += reward1
246 | #reward2 *= 0.5
247 | reward2 *= 0.5
248 | else:
249 | #reward1 += reward2
250 | #reward1 *= 0.5
251 | reward1 -= reward2
252 | reward1 *= 0.5
253 | #reward1 = max(reward1 - reward2, 0)
254 | #reward2 = max(reward2 - reward1, 0)
255 |
256 | self.prevUtility1 = currentUtility1
257 | self.prevUtility2 = currentUtility2
258 |
259 | # If all the shares have been sold calculate E, V, and U, and give a positive reward.
260 | if self.shares_remaining1 <= 0:
261 |
262 | # Calculate the implementation shortfall
263 | info.implementation_shortfall1 = self.total_shares1 * self.startingPrice - self.totalCapture1
264 | info.done1 = True
265 |
266 | if self.shares_remaining2 <= 0:
267 |
268 | # Calculate the implementation shortfall
269 | info.implementation_shortfall2 = self.total_shares2 * self.startingPrice - self.totalCapture2
270 | info.done2 = True
271 |
272 | # Set the done flag to True. This indicates that we have sold all the shares
273 | else:
274 | reward1 = 0.0
275 | reward2 = 0.0
276 |
277 | self.k += 1
278 |
279 | # Set the new state
280 | state = np.array(list(self.logReturns) + [self.timeHorizon / self.num_n, self.shares_remaining1 / self.total_shares1, self.shares_remaining2 / self.total_shares2])
281 |
282 | return (state, np.array([reward1]),np.array([reward2]), info.done1,info.done2, info)
283 |
284 |
285 | def permanentImpact(self, sharesToSell):
286 | # Calculate the permanent impact according to equations (6) and (1) of the AC paper
287 | pi = self.gamma * sharesToSell
288 | return pi
289 |
290 |
291 | def temporaryImpact(self, sharesToSell):
292 | # Calculate the temporary impact according to equation (7) of the AC paper
293 | ti = (self.epsilon * np.sign(sharesToSell)) + ((self.eta / self.tau) * sharesToSell)
294 | return ti
295 |
296 | def get_expected_shortfall(self, sharesToSell,totalSSSQ):
297 | # Calculate the expected shortfall according to equation (8) of the AC paper
298 | ft = 0.5 * self.gamma * (sharesToSell ** 2)
299 | st = self.epsilon * sharesToSell
300 | tt = (self.eta_hat / self.tau) * totalSSSQ
301 | return ft + st + tt
302 |
303 |
304 | def get_AC_expected_shortfall(self, sharesToSell,kappa):
305 | # Calculate the expected shortfall for the optimal strategy according to equation (20) of the AC paper
306 | ft = 0.5 * self.gamma * (sharesToSell ** 2)
307 | st = self.epsilon * sharesToSell
308 | tt = self.eta_hat * (sharesToSell ** 2)
309 | nft = np.tanh(0.5 * kappa * self.tau) * (self.tau * np.sinh(2 * kappa * self.liquidation_time) \
310 | + 2 * self.liquidation_time * np.sinh(kappa * self.tau))
311 | dft = 2 * (self.tau ** 2) * (np.sinh(kappa * self.liquidation_time) ** 2)
312 | fot = nft / dft
313 | return ft + st + (tt * fot)
314 |
315 |
316 | def get_AC_variance(self, sharesToSell,kappa):
317 | # Calculate the variance for the optimal strategy according to equation (20) of the AC paper
318 | ft = 0.5 * (self.singleStepVariance) * (sharesToSell ** 2)
319 | nst = self.tau * np.sinh(kappa * self.liquidation_time) * np.cosh(kappa * (self.liquidation_time - self.tau)) \
320 | - self.liquidation_time * np.sinh(kappa * self.tau)
321 | dst = (np.sinh(kappa * self.liquidation_time) ** 2) * np.sinh(kappa * self.tau)
322 | st = nst / dst
323 | return ft * st
324 |
325 |
326 | def compute_AC_utility(self, sharesToSell,kappa,llambda):
327 | # Calculate the AC Utility according to pg. 13 of the AC paper
328 | if self.liquidation_time == 0:
329 | return 0
330 | E = self.get_AC_expected_shortfall(sharesToSell,kappa)
331 | V = self.get_AC_variance(sharesToSell,kappa)
332 | return E + llambda * V
333 |
334 |
335 | def get_trade_list(self,kappa):
336 | # Calculate the trade list for the optimal strategy according to equation (18) of the AC paper
337 | trade_list = np.zeros(self.num_n)
338 | ftn = 2 * np.sinh(0.5 * kappa * self.tau)
339 | ftd = np.sinh(kappa * self.liquidation_time)
340 | ft = (ftn / ftd) * self.total_shares1
341 | for i in range(1, self.num_n + 1):
342 | st = np.cosh(kappa * (self.liquidation_time - (i - 0.5) * self.tau))
343 | trade_list[i - 1] = st
344 | trade_list *= ft
345 | return trade_list
346 |
347 |
348 | def observation_space_dimension(self):
349 | # Return the dimension of the state
350 | return 8
351 |
352 |
353 | def action_space_dimension(self):
354 | # Return the dimension of the action
355 | return 1
356 |
357 |
358 | def stop_transactions(self):
359 | # Stop transacting
360 | self.transacting = False
361 |
362 |
--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pandas as pd
3 | import matplotlib.pyplot as plt
4 | import matplotlib.ticker as mticker
5 |
6 | import syntheticChrissAlmgren as sca
7 |
8 | from statsmodels.iolib.table import SimpleTable
9 | from statsmodels.compat.python import zip_longest
10 | from statsmodels.iolib.tableformatting import fmt_2cols
11 |
12 |
13 | def generate_table(left_col, right_col, table_title):
14 |
15 | # Do not use column headers
16 | col_headers = None
17 |
18 | # Generate the right table
19 | if right_col:
20 | # Add padding
21 | if len(right_col) < len(left_col):
22 | right_col += [(' ', ' ')] * (len(left_col) - len(right_col))
23 | elif len(right_col) > len(left_col):
24 | left_col += [(' ', ' ')] * (len(right_col) - len(left_col))
25 | right_col = [('%-21s' % (' '+k), v) for k,v in right_col]
26 |
27 | # Generate the right table
28 | gen_stubs_right, gen_data_right = zip_longest(*right_col)
29 | gen_table_right = SimpleTable(gen_data_right,
30 | col_headers,
31 | gen_stubs_right,
32 | title = table_title,
33 | txt_fmt = fmt_2cols)
34 | else:
35 | # If there is no right table set the right table to empty
36 | gen_table_right = []
37 |
38 | # Generate the left table
39 | gen_stubs_left, gen_data_left = zip_longest(*left_col)
40 | gen_table_left = SimpleTable(gen_data_left,
41 | col_headers,
42 | gen_stubs_left,
43 | title = table_title,
44 | txt_fmt = fmt_2cols)
45 |
46 |
47 | # Merge the left and right tables to make a single table
48 | gen_table_left.extend_right(gen_table_right)
49 | general_table = gen_table_left
50 |
51 | return general_table
52 |
53 |
54 | def get_env_param():
55 |
56 | # Create a simulation environment
57 | env = sca.MarketEnvironment()
58 |
59 | # Set the title for the financial parameters table
60 | fp_title = 'Financial Parameters'
61 |
62 | # Get the default financial parameters from the simulation environment
63 | fp_left_col = [('Annual Volatility:', ['{:.0f}%'.format(env.anv * 100)]),
64 | ('Daily Volatility:', ['{:.1f}%'.format(env.dpv * 100)])]
65 |
66 | fp_right_col = [('Bid-Ask Spread:', ['{:.3f}'.format(env.basp)]),
67 | ('Daily Trading Volume:', ['{:,.0f}'.format(env.dtv)])]
68 |
69 | # Set the title for the Almgren and Chriss Model parameters table
70 | acp_title = 'Almgren and Chriss Model Parameters'
71 |
72 | # Get the default Almgren and Chriss Model Parameters from the simulation environment
73 | acp_left_col = [('Total Number of Shares for Agent1 to Sell:', ['{:,}'.format(env.total_shares1)]),
74 | ('Total Number of Shares for Agent2 to Sell:', ['{:,}'.format(env.total_shares2)]),
75 | ('Starting Price per Share:', ['${:.2f}'.format(env.startingPrice)]),
76 | ('Price Impact for Each 1% of Daily Volume Traded:', ['${}'.format(env.eta)]),
77 | ('Number of Days to Sell All the Shares:', ['{}'.format(env.liquidation_time)]),
78 | ('Number of Trades:', ['{}'.format(env.num_n)])]
79 |
80 | acp_right_col = [('Fixed Cost of Selling per Share:', ['${:.3f}'.format(env.epsilon)]),
81 | ('Trader\'s Risk Aversion for Agent 1:', ['{}'.format(env.llambda1)]),
82 | ('Trader\'s Risk Aversion for Agent 2:', ['{}'.format(env.llambda2)]),
83 | ('Permanent Impact Constant:', ['{}'.format(env.gamma)]),
84 | ('Single Step Variance:', ['{:.3f}'.format(env.singleStepVariance)]),
85 | ('Time Interval between trades:', ['{}'.format(env.tau)])]
86 |
87 | # Generate tables with the default financial and AC Model parameters
88 | fp_table = generate_table(fp_left_col, fp_right_col, fp_title)
89 | acp_table = generate_table(acp_left_col, acp_right_col, acp_title)
90 |
91 | return fp_table, acp_table
92 |
93 |
94 | def plot_price_model(seed = 0, num_days = 1000):
95 |
96 | # Create a simulation environment
97 | env = sca.MarketEnvironment()
98 |
99 | # Reset the enviroment with the given seed
100 | env.reset(seed)
101 |
102 | # Create an array to hold the daily stock price for the given number of days
103 | price_hist = np.zeros(num_days)
104 |
105 | # Get the simulated stock price movement from the environment
106 | for i in range(num_days):
107 | _, _, _, info = env.step(i)
108 | price_hist[i] = info.price
109 |
110 | # Print Average and Standard Deviation in Stock Price
111 | print('Average Stock Price: ${:,.2f}'.format(price_hist.mean()))
112 | print('Standard Deviation in Stock Price: ${:,.2f}'.format(price_hist.std()))
113 | # print('Standard Deviation of Random Noise: {:,.5f}'.format(np.sqrt(env.singleStepVariance * env.tau)))
114 |
115 | # Plot the price history for the given number of days
116 | price_df = pd.DataFrame(data = price_hist, columns = ['Stock'], dtype = 'float64')
117 | ax = price_df.plot(colormap = 'cool', grid = False)
118 | ax.set_facecolor(color = 'k')
119 | ax = plt.gca()
120 | yNumFmt = mticker.StrMethodFormatter('${x:,.2f}')
121 | ax.yaxis.set_major_formatter(yNumFmt)
122 | plt.ylabel('Stock Price')
123 | plt.xlabel('days')
124 | plt.show()
125 |
126 |
127 |
128 | def get_optimal_vals(lq_time = 60, nm_trades = 60, tr_risk = 1e-6, title = ''):
129 |
130 | # Create a simulation environment
131 | env = sca.MarketEnvironment()
132 |
133 | # Reset the enviroment with the given parameters
134 | env.reset(liquid_time = lq_time, num_trades = nm_trades, lamb = tr_risk)
135 |
136 | # Set the title for the AC Optimal Strategy table
137 | if title == '':
138 | title = 'AC Optimal Strategy'
139 | else:
140 | title = 'AC Optimal Strategy for ' + title
141 |
142 | # Get the AC optimal values from the environment
143 | E = env.get_AC_expected_shortfall(env.total_shares)
144 | V = env.get_AC_variance(env.total_shares)
145 | U = env.compute_AC_utility(env.total_shares)
146 |
147 | left_col = [('Number of Days to Sell All the Shares:', ['{}'.format(env.liquidation_time)]),
148 | ('Half-Life of The Trade:', ['{:,.1f}'.format(1 / env.kappa)]),
149 | ('Utility:', ['${:,.2f}'.format(U)])]
150 |
151 | right_col = [('Initial Portfolio Value:', ['${:,.2f}'.format(env.total_shares * env.startingPrice)]),
152 | ('Expected Shortfall:', ['${:,.2f}'.format(E)]),
153 | ('Standard Deviation of Shortfall:', ['${:,.2f}'.format(np.sqrt(V))])]
154 |
155 | # Generate the table with the AC optimal values
156 | val_table = generate_table(left_col, right_col, title)
157 |
158 | return val_table
159 |
160 |
161 | def get_min_param():
162 |
163 | # Get the minimum impact AC parameters
164 | min_impact = get_optimal_vals(lq_time = 250, nm_trades = 250, tr_risk = 1e-17, title = 'Minimum Impact')
165 |
166 | # Get the minimum variance AC parameters
167 | min_var = get_optimal_vals(lq_time = 1, nm_trades = 1, tr_risk = 0.0058, title = 'Minimum Variance')
168 |
169 | return min_impact, min_var
170 |
171 |
172 | def get_crfs(trisk):
173 |
174 | # Create the annotation label
175 | tr_st = '{:.0e}'.format(trisk)
176 | lnum = tr_st.split('e')[0]
177 | lexp = tr_st.split('e')[1]
178 | if np.abs(np.int(lexp)) < 10:
179 | lexp = lexp.replace('0', '', 1)
180 | an_st = '$\lambda = ' + lnum + ' \\times 10^{' + lexp + '}$'
181 |
182 | # Set the correction factors for the annotation label
183 | if trisk >= 1e-7 and trisk <= 4e-7:
184 | xcrf = 0.94
185 | ycrf = 2.5
186 | scrf = 0.1
187 | elif trisk > 4e-7 and trisk <= 9e-7:
188 | xcrf = 0.9
189 | ycrf = 2.5
190 | scrf = 0.06
191 | elif trisk > 9e-7 and trisk <= 1e-6:
192 | xcrf = 0.85
193 | ycrf = 2.5
194 | scrf = 0.06
195 | elif trisk > 1e-6 and trisk < 2e-6:
196 | xcrf = 1.2
197 | ycrf = 2.5
198 | scrf = 0.06
199 | elif trisk >= 2e-6 and trisk < 3e-6:
200 | xcrf = 0.8
201 | ycrf = 2.5
202 | scrf = 0.06
203 | elif trisk >= 3e-6 and trisk < 4e-6:
204 | xcrf = 0.7
205 | ycrf = 2.5
206 | scrf = 0.08
207 | elif trisk >= 4e-6 and trisk < 7e-6:
208 | xcrf = 1.4
209 | ycrf = 2.0
210 | scrf = 0.08
211 | elif trisk >= 7e-6 and trisk <= 1e-5:
212 | xcrf = 4.5
213 | ycrf = 1.5
214 | scrf = 0.08
215 | elif trisk > 1e-5 and trisk <= 2e-5:
216 | xcrf = 7.0
217 | ycrf = 1.1
218 | scrf = 0.08
219 | elif trisk > 2e-5 and trisk <= 5e-5:
220 | xcrf = 12.
221 | ycrf = 1.1
222 | scrf = 0.08
223 | elif trisk > 5e-5 and trisk <= 1e-4:
224 | xcrf = 30
225 | ycrf = 0.99
226 | scrf = 0.08
227 | else:
228 | xcrf = 1
229 | ycrf = 1
230 | scrf = 0.08
231 |
232 | return an_st, xcrf, ycrf, scrf
233 |
234 |
235 | def plot_efficient_frontier(tr_risk = 1e-6):
236 |
237 | # Create a simulation environment
238 | env = sca.MarketEnvironment()
239 |
240 | # Reset the enviroment with the given trader's risk aversion
241 | env.reset(lamb = tr_risk)
242 |
243 | # Get the expected shortfall and corresponding variance for the given trader's risk aversion
244 | tr_E = env.get_AC_expected_shortfall(env.total_shares)
245 | tr_V = env.get_AC_variance(env.total_shares)
246 |
247 | # Create empty arrays to hold our values of E, V, and U
248 | E = np.array([])
249 | V = np.array([])
250 | U = np.array([])
251 |
252 | # Set the number of plot points for our frontier
253 | num_points = 7000
254 |
255 | # Set the values of the trader's risk aversion to plot
256 | lambdas = np.linspace(1e-7, 1e-4, num_points)
257 |
258 | # Calclate E, V, U for each value of llambda
259 | for llambda in lambdas:
260 | env.reset(lamb = llambda)
261 | E = np.append(E, env.get_AC_expected_shortfall(env.total_shares))
262 | V = np.append(V, env.get_AC_variance(env.total_shares))
263 | U = np.append(U, env.compute_AC_utility(env.total_shares))
264 |
265 | # Plot E vs V and use U for the colorbar
266 | cm = plt.cm.get_cmap('gist_rainbow')
267 | sc = plt.scatter(V, E, s = 20, c = U, cmap = cm)
268 | plt.colorbar(sc, label = 'AC Utility', format = mticker.StrMethodFormatter('${x:,.0f}'))
269 | ax = plt.gca()
270 | ax.set_facecolor('k')
271 | ymin = E.min() * 0.7
272 | ymax = E.max() * 1.1
273 | plt.ylim(ymin, ymax)
274 | yNumFmt = mticker.StrMethodFormatter('${x:,.0f}')
275 | xNumFmt = mticker.StrMethodFormatter('{x:,.0f}')
276 | ax.yaxis.set_major_formatter(yNumFmt)
277 | ax.xaxis.set_major_formatter(xNumFmt)
278 | plt.xlabel('Variance of Shortfall')
279 | plt.ylabel('Expected Shortfall')
280 |
281 | # Get the annotation label and the correction factors
282 | an_st, xcrf, ycrf, scrf = get_crfs(tr_risk)
283 |
284 | # Plot the annotation in the above plot
285 | plt.annotate(an_st, xy = (tr_V, tr_E), xytext = (tr_V * xcrf, tr_E * ycrf), color = 'w', size = 'large',
286 | arrowprops = dict(facecolor = 'cyan', shrink = scrf, width = 3, headwidth = 10))
287 | plt.show()
288 |
289 |
290 | def round_trade_list(trl):
291 |
292 | # Round the shares in the trading list
293 | trl_rd = np.around(trl)
294 |
295 | # Rounding the number of shares in the trading list sometimes results in selling more or less
296 | # shares than we have available. We calculate the difference between to total number of shares
297 | # sold in the original trading list and the number of shares sold in the rounded list.
298 | # This difference will be used to correct for rounding errors.
299 | res = np.around(trl.sum() - trl_rd.sum())
300 |
301 | # Correct the number of shares sold due to rounding errors if necessary
302 | if res != 0:
303 | idx = trl_rd.nonzero()[0][-1]
304 | trl_rd[idx] += res
305 |
306 | return trl_rd
307 |
308 |
309 | def plot_trade_list(lq_time = 60, nm_trades = 60, tr_risk = 1e-6, show_trl = False):
310 |
311 | # Create simulation environment
312 | env = sca.MarketEnvironment()
313 |
314 | # Reset the environment with the given parameters
315 | env.reset(liquid_time = lq_time, num_trades = nm_trades, lamb = tr_risk)
316 |
317 | # Get the trading list from the environment
318 | trade_list = env.get_trade_list()
319 |
320 | # Add a zero at the beginning of the trade list to indicate that at time 0 we don't sell any stocks
321 | new_trl = np.insert(trade_list, 0, 0)
322 |
323 | # We create a dataframe with the trading list and trading trajectory
324 | df = pd.DataFrame(data = list(range(nm_trades + 1)), columns = ['Trade Number'], dtype = 'float64')
325 | df['Stocks Sold'] = new_trl
326 | df['Stocks Remaining'] = (np.ones(nm_trades + 1) * env.total_shares) - np.cumsum(new_trl)
327 |
328 | # Create a figure with 2 plots in 1 row
329 | fig, axes = plt.subplots(nrows = 1, ncols = 2)
330 |
331 | # Make a scatter plot of the trade list
332 | df.iloc[1:].plot.scatter(x = 'Trade Number', y = 'Stocks Sold', c = 'Stocks Sold', colormap = 'gist_rainbow',
333 | alpha = 1, sharex = False, s = 50, colorbar = False, ax = axes[0])
334 |
335 | # Plot a line through the points of the scatter plot of the trade list
336 | axes[0].plot(df['Trade Number'].iloc[1:], df['Stocks Sold'].iloc[1:], linewidth = 2.0, alpha = 0.5)
337 | axes[0].set_facecolor(color = 'k')
338 | yNumFmt = mticker.StrMethodFormatter('{x:,.0f}')
339 | axes[0].yaxis.set_major_formatter(yNumFmt)
340 | axes[0].set_title('Trading List')
341 |
342 | # Make a scatter plot of the number of stocks remaining after each trade
343 | df.plot.scatter(x = 'Trade Number', y = 'Stocks Remaining', c = 'Stocks Remaining', colormap = 'gist_rainbow',
344 | alpha = 1, sharex = False, s = 50, colorbar = False, ax = axes[1])
345 |
346 | # Plot a line through the points of the scatter plot of the number of stocks remaining after each trade
347 | axes[1].plot(df['Trade Number'], df['Stocks Remaining'], linewidth = 2.0, alpha = 0.5)
348 | axes[1].set_facecolor(color = 'k')
349 | yNumFmt = mticker.StrMethodFormatter('{x:,.0f}')
350 | axes[1].yaxis.set_major_formatter(yNumFmt)
351 | axes[1].set_title('Trading Trajectory')
352 |
353 | # Set the spacing between plots
354 | plt.subplots_adjust(wspace = 0.4)
355 | plt.show()
356 |
357 | print('\nNumber of Shares Sold: {:,.0f}\n'.format(new_trl.sum()))
358 |
359 | if show_trl:
360 |
361 | # Since we are not selling fractional shares we round up the shares in the trading list
362 | rd_trl = round_trade_list(new_trl)
363 | # rd_trl = new_trl
364 |
365 | # We create a dataframe with the modified trading list and trading trajectory
366 | df2 = pd.DataFrame(data = list(range(nm_trades + 1)), columns = ['Trade Number'], dtype = 'float64')
367 | df2['Stocks Sold'] = rd_trl
368 | df2['Stocks Remaining'] = (np.ones(nm_trades + 1) * env.total_shares) - np.cumsum(rd_trl)
369 |
370 | return df2.style.hide_index().format({'Trade Number': '{:.0f}', 'Stocks Sold': '{:,.0f}', 'Stocks Remaining': '{:,.0f}'})
371 | # return df2.style.hide_index().format({'Trade Number': '{:.0f}', 'Stocks Sold': '{:e}', 'Stocks Remaining': '{:e}'})
372 |
373 |
374 | def implement_trade_list(seed = 0, lq_time = 60, nm_trades = 60, tr_risk = 1e-6):
375 |
376 | # Create simulation environment
377 | env = sca.MarketEnvironment()
378 |
379 | # Reset the environment with the given parameters
380 | env.reset(seed = seed, liquid_time = lq_time, num_trades = nm_trades, lamb = tr_risk)
381 |
382 | # Get the trading list from the environment
383 | trl = env.get_trade_list()
384 |
385 | # Since we are not selling fractional shares we round up the shares in the trading list
386 | trade_list = round_trade_list(trl)
387 |
388 | # set the environment to make transactions
389 | env.start_transactions()
390 |
391 | # Create an array to hold the impacted stock price
392 | price_hist = np.array([])
393 |
394 | # Implement the trading list in our similation environment
395 | for trade in trade_list:
396 |
397 | # Convert the number of shares to sell in each trade into an action
398 | action = trade / env.shares_remaining
399 |
400 | # Take a step in the environment my selling the number of shares in the current trade
401 | _, _, _, info = env.step(action)
402 |
403 | # Get the impacted price from the environment
404 | price_hist = np.append(price_hist, info.exec_price)
405 |
406 | # If all shares have been sold, stop making transactions and get the implementation sortfall
407 | if info.done:
408 | print('Implementation Shortfall: ${:,.2f} \n'.format(info.implementation_shortfall))
409 | break
410 |
411 | # Plot the impacted price
412 | price_df = pd.DataFrame(data = price_hist, columns = ['Stock'], dtype = 'float64')
413 | ax = price_df.plot(colormap = 'cool', grid = False)
414 | ax.set_facecolor(color = 'k')
415 | ax.set_title('Impacted Stock Price')
416 | ax = plt.gca()
417 | yNumFmt = mticker.StrMethodFormatter('${x:,.2f}')
418 | ax.yaxis.set_major_formatter(yNumFmt)
419 | plt.plot(price_hist, 'o')
420 | plt.ylabel('Stock Price')
421 | plt.xlabel('Trade Number')
422 | plt.show()
423 |
424 |
425 | def get_av_std(lq_time = 60, nm_trades = 60, tr_risk = 1e-6, trs = 100):
426 |
427 | # Create simulation environment
428 | env = sca.MarketEnvironment()
429 |
430 | # Reset the enviroment
431 | env.reset(liquid_time = lq_time, num_trades = nm_trades, lamb = tr_risk)
432 |
433 | # Get the trading list
434 | trl = env.get_trade_list()
435 |
436 | # Since we are not selling fractional shares we round up the shares in the trading list
437 | trade_list = round_trade_list(trl)
438 |
439 | # Set the initial shortfall to zero
440 | shortfall_hist = np.array([])
441 |
442 | for episode in range(trs):
443 |
444 | # Print current episode every 100 episodes
445 | if (episode + 1) % 100 == 0:
446 | print('Episode [{}/{}]'.format(episode + 1, trs), end = '\r', flush = True)
447 |
448 | # Reset the enviroment
449 | env.reset(seed = episode, liquid_time = lq_time, num_trades = nm_trades, lamb = tr_risk)
450 |
451 | # set the environment to make transactions
452 | env.start_transactions()
453 |
454 | for trade in trade_list:
455 | action = trade / env.shares_remaining
456 | _, _, _, info = env.step(action)
457 |
458 | if info.done:
459 | shortfall_hist = np.append(shortfall_hist, info.implementation_shortfall)
460 | break
461 |
462 | print('Average Implementation Shortfall: ${:,.2f}'.format(shortfall_hist.mean()))
463 | print('Standard Deviation of the Implementation Shortfall: ${:,.2f}'.format(shortfall_hist.std()))
464 |
465 | plt.plot(shortfall_hist, 'cyan', label='')
466 | plt.xlim(0, trs)
467 | ax = plt.gca()
468 | ax.set_facecolor('k')
469 | ax.set_xlabel('Episode', fontsize = 15)
470 | ax.set_ylabel('Implementation Shortfall (US $)', fontsize = 15)
471 | ax.axhline(shortfall_hist.mean(),0, 1, color = 'm', label='Average')
472 | yNumFmt = mticker.StrMethodFormatter('${x:,.0f}')
473 | ax.yaxis.set_major_formatter(yNumFmt)
474 | plt.legend()
475 | plt.show
--------------------------------------------------------------------------------