├── figure
    ├── README.md
    ├── 1.png
    ├── 2.png
    ├── 3.png
    ├── 4.png
    ├── 5.png
    ├── 6.png
    ├── 7.png
    ├── 8.png
    ├── 9.png
    ├── 10.png
    ├── 11.png
    └── 12.png
├── reference
    ├── README.md
    ├── The SABR Model.pdf
    ├── Managing_Smile_Risk.pdf
    ├── HedgingUnderSABRModel.pdf
    └── sabrbeta-JoDerivatives-Revisionv1.pdf
├── README.md
├── calculator.py
├── strategy.py
└── research.py


/figure/README.md:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/reference/README.md:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/figure/1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuba316/SABR_Volatility_Arbitrage/HEAD/figure/1.png


--------------------------------------------------------------------------------
/figure/2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuba316/SABR_Volatility_Arbitrage/HEAD/figure/2.png


--------------------------------------------------------------------------------
/figure/3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuba316/SABR_Volatility_Arbitrage/HEAD/figure/3.png


--------------------------------------------------------------------------------
/figure/4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuba316/SABR_Volatility_Arbitrage/HEAD/figure/4.png


--------------------------------------------------------------------------------
/figure/5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuba316/SABR_Volatility_Arbitrage/HEAD/figure/5.png


--------------------------------------------------------------------------------
/figure/6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuba316/SABR_Volatility_Arbitrage/HEAD/figure/6.png


--------------------------------------------------------------------------------
/figure/7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuba316/SABR_Volatility_Arbitrage/HEAD/figure/7.png


--------------------------------------------------------------------------------
/figure/8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuba316/SABR_Volatility_Arbitrage/HEAD/figure/8.png


--------------------------------------------------------------------------------
/figure/9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuba316/SABR_Volatility_Arbitrage/HEAD/figure/9.png


--------------------------------------------------------------------------------
/figure/10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuba316/SABR_Volatility_Arbitrage/HEAD/figure/10.png


--------------------------------------------------------------------------------
/figure/11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuba316/SABR_Volatility_Arbitrage/HEAD/figure/11.png


--------------------------------------------------------------------------------
/figure/12.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuba316/SABR_Volatility_Arbitrage/HEAD/figure/12.png


--------------------------------------------------------------------------------
/reference/The SABR Model.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuba316/SABR_Volatility_Arbitrage/HEAD/reference/The SABR Model.pdf


--------------------------------------------------------------------------------
/reference/Managing_Smile_Risk.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuba316/SABR_Volatility_Arbitrage/HEAD/reference/Managing_Smile_Risk.pdf


--------------------------------------------------------------------------------
/reference/HedgingUnderSABRModel.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuba316/SABR_Volatility_Arbitrage/HEAD/reference/HedgingUnderSABRModel.pdf


--------------------------------------------------------------------------------
/reference/sabrbeta-JoDerivatives-Revisionv1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuba316/SABR_Volatility_Arbitrage/HEAD/reference/sabrbeta-JoDerivatives-Revisionv1.pdf


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # SABR_Volatility_Arbitrage
 2 | A 50ETF Option Volatility Arbitrage Strategy Based on SABR Model  
 3 | 
 4 | ![](https://github.com/yuba316/SABR_Volatility_Arbitrage/blob/main/figure/1.png)
 5 | ![](https://github.com/yuba316/SABR_Volatility_Arbitrage/blob/main/figure/2.png)
 6 | ![](https://github.com/yuba316/SABR_Volatility_Arbitrage/blob/main/figure/3.png)
 7 | ![](https://github.com/yuba316/SABR_Volatility_Arbitrage/blob/main/figure/4.png)
 8 | ![](https://github.com/yuba316/SABR_Volatility_Arbitrage/blob/main/figure/5.png)
 9 | ![](https://github.com/yuba316/SABR_Volatility_Arbitrage/blob/main/figure/6.png)
10 | ![](https://github.com/yuba316/SABR_Volatility_Arbitrage/blob/main/figure/7.png)
11 | ![](https://github.com/yuba316/SABR_Volatility_Arbitrage/blob/main/figure/8.png)
12 | ![](https://github.com/yuba316/SABR_Volatility_Arbitrage/blob/main/figure/9.png)
13 | ![](https://github.com/yuba316/SABR_Volatility_Arbitrage/blob/main/figure/10.png)
14 | ![](https://github.com/yuba316/SABR_Volatility_Arbitrage/blob/main/figure/11.png)
15 | ![](https://github.com/yuba316/SABR_Volatility_Arbitrage/blob/main/figure/12.png)
16 | 


--------------------------------------------------------------------------------
/calculator.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from scipy.stats import norm
  3 | from calcbsimpvol import calcbsimpvol as cal_vix
  4 | 
  5 | 
  6 | class Greek:  # option greeks calculator
  7 |     def __init__(self):
  8 |         self.author = "Joey Zheng"
  9 | 
 10 |     @staticmethod
 11 |     def bsm(S, K, T, rf, sigma, CorP=1):
 12 |         sign = (-1) ** (not CorP)
 13 |         temp = sigma * np.sqrt(T)
 14 |         d1 = sign * ((np.log(S / K) + (rf + 0.5 * sigma * sigma) * T) / temp)
 15 |         d2 = sign * (d1 - temp)
 16 |         return sign * (S * norm.cdf(d1) - K * np.exp(-rf * T) * norm.cdf(d2))
 17 | 
 18 |     @staticmethod
 19 |     def delta(S, K, T, rf, sigma, CorP=1):
 20 |         d1 = (np.log(S / K) + (rf + 0.5 * sigma * sigma) * T) / (sigma * np.sqrt(T))
 21 |         return norm.cdf(d1) - (1 - CorP)
 22 | 
 23 |     @staticmethod
 24 |     def vega(S, K, T, rf, sigma):
 25 |         temp = np.sqrt(T)
 26 |         d1 = (np.log(S / K) + (rf + 0.5 * sigma * sigma) * T) / (sigma * temp)
 27 |         return S * norm.pdf(d1) * temp
 28 | 
 29 |     @staticmethod
 30 |     def vix_n(S, K, T, rf, mkt, CorP=1, tol=1e-8, max_itr=100, sigma=0.2, dw=1e-4, up=10):
 31 |         price = Greek.bsm(S, K, T, rf, sigma, CorP)
 32 |         pre, count = sigma, 0
 33 |         while not (abs(mkt - price) < tol or count >= max_itr):
 34 |             sigma += (mkt - price) / Greek.vega(S, K, T, rf, sigma)
 35 |             sigma = max(min(sigma, up), dw)
 36 |             if abs(sigma - pre) < tol:
 37 |                 break
 38 |             price = Greek.bsm(S, K, T, rf, sigma, CorP)
 39 |             pre = sigma
 40 |             count += 1
 41 |         return sigma
 42 | 
 43 |     @staticmethod
 44 |     def vix_b(S, K, T, rf, mkt, CorP=1, tol=1e-8, max_itr=100, sigma=0.2, dw=1e-4, up=10):
 45 |         price = Greek.bsm(S, K, T, rf, sigma, CorP)
 46 |         pre, count = sigma, 0
 47 |         while not (abs(mkt - price) < tol or count >= max_itr):
 48 |             if mkt > price:
 49 |                 dw = sigma
 50 |                 sigma = (sigma + up) / 2
 51 |             else:
 52 |                 up = sigma
 53 |                 sigma = (sigma + dw) / 2
 54 |             if abs(sigma - pre) < tol:
 55 |                 break
 56 |             price = Greek.bsm(S, K, T, rf, sigma, CorP)
 57 |             pre = sigma
 58 |             count += 1
 59 |         return sigma
 60 | 
 61 |     @staticmethod
 62 |     def vix(S, K, T, rf, mkt, CorP=1):
 63 |         return cal_vix(dict(cp=np.array(CorP), P=np.array([mkt]), S=np.array([S]), K=np.array([K]),
 64 |                             tau=np.array([T]), r=np.array(rf), q=np.array([0])))[0][0]
 65 | 
 66 |     @staticmethod
 67 |     def sabr(S, K, T, rf, alpha, beta, args):
 68 |         vega, rho = args
 69 |         F = S * np.exp(rf * T)
 70 |         z = (vega / alpha) * (F * K) ** ((1 - beta) / 2) * np.log(F / K)
 71 |         X = np.log((np.sqrt(1 - 2 * rho * z + z ** 2) + z - rho) / (1 - rho))
 72 |         a = ((((1 - beta) * alpha) ** 2 / (24 * (F * K) ** (1 - beta)) +
 73 |               rho * beta * vega * alpha / (4 * (F * K) ** ((1 - beta) / 2)) +
 74 |               (2 - 3 * rho ** 2) * vega ** 2 / 24) * T + 1) * alpha
 75 |         b = ((F * K) ** ((1 - beta) / 2)) * (
 76 |                     1 + ((1 - beta) * np.log(F / K)) ** 2 / 24 + ((1 - beta) * np.log(F / K)) ** 4 / 1920)
 77 |         return a / b * z / X
 78 | 
 79 |     @staticmethod
 80 |     def sabr_atm(S, T, rf, alpha, beta, args):
 81 |         vega, rho = args
 82 |         F = S * np.exp(rf * T)
 83 |         a = ((((1 - beta) * alpha) ** 2 / (24 * F ** (2 - 2 * beta)) +
 84 |               rho * beta * vega * alpha / (4 * F ** (1 - beta)) +
 85 |               (2 - 3 * rho ** 2) * vega ** 2 / 24) * T + 1) * alpha
 86 |         b = F ** (1 - beta)
 87 |         return a / b
 88 | 
 89 |     @staticmethod
 90 |     def dsabr_v(S, K, T, rf, alpha, beta, args):
 91 |         vega, rho = args
 92 |         F = S * np.exp(rf * T)
 93 |         z = (vega / alpha) * (F * K) ** ((1 - beta) / 2) * np.log(F / K)
 94 |         X = np.log((np.sqrt(1 - 2 * rho * z + z ** 2) + z - rho) / (1 - rho))
 95 |         a = ((((1 - beta) * alpha) ** 2 / (24 * (F * K) ** (1 - beta)) +
 96 |               rho * beta * vega * alpha / (4 * (F * K) ** ((1 - beta) / 2)) +
 97 |               (2 - 3 * rho ** 2) * vega ** 2 / 24) * T + 1) * alpha
 98 |         b = ((F * K) ** ((1 - beta) / 2)) * (
 99 |                     1 + ((1 - beta) * np.log(F / K)) ** 2 / 24 + ((1 - beta) * np.log(F / K)) ** 4 / 1920)
100 |         da_v = alpha * T * (rho * beta * alpha / (4 * (F * K) ** ((1 - beta) / 2)) +
101 |                             (2 - 3 * rho ** 2) * vega / 12)
102 |         dz_v = (F * K) ** ((1 - beta) / 2) * np.log(F / K) / alpha
103 |         return da_v / b * z / X + a / b * (dz_v / X - z * dz_v / X**2)
104 | 
105 |     @staticmethod
106 |     def dsabr_r(S, K, T, rf, alpha, beta, args):
107 |         vega, rho = args
108 |         F = S * np.exp(rf * T)
109 |         z = (vega / alpha) * (F * K) ** ((1 - beta) / 2) * np.log(F / K)
110 |         X = np.log((np.sqrt(1 - 2 * rho * z + z ** 2) + z - rho) / (1 - rho))
111 |         a = ((((1 - beta) * alpha) ** 2 / (24 * (F * K) ** (1 - beta)) +
112 |               rho * beta * vega * alpha / (4 * (F * K) ** ((1 - beta) / 2)) +
113 |               (2 - 3 * rho ** 2) * vega ** 2 / 24) * T + 1) * alpha
114 |         b = ((F * K) ** ((1 - beta) / 2)) * (
115 |                     1 + ((1 - beta) * np.log(F / K)) ** 2 / 24 + ((1 - beta) * np.log(F / K)) ** 4 / 1920)
116 |         da_r = alpha * T * (vega * beta * alpha / (4 * (F * K) ** ((1 - beta) / 2)) - rho / 4 * vega ** 2)
117 |         dX_r = 1 / (1 - rho) -\
118 |                (z / np.sqrt(1 - 2 * rho * z + z ** 2) - 1) / (np.sqrt(1 - 2 * rho * z + z ** 2) + z - rho)
119 |         return da_r / b * z / X - a / b * z * dX_r / X ** 2
120 | 
121 |     @staticmethod
122 |     def sabr_delta(S, K, T, rf, alpha, beta, args, CorP=1):
123 |         vega, rho = args
124 |         F = S * np.exp(rf * T)
125 |         z = (vega / alpha) * (F * K) ** ((1 - beta) / 2) * np.log(F / K)
126 |         X = np.log((np.sqrt(1 - 2 * rho * z + z ** 2) + z - rho) / (1 - rho))
127 |         a = ((((1 - beta) * alpha) ** 2 / (24 * (F * K) ** (1 - beta)) +
128 |               rho * beta * vega * alpha / (4 * (F * K) ** ((1 - beta) / 2)) +
129 |               (2 - 3 * rho ** 2) * vega ** 2 / 24) * T + 1) * alpha
130 |         b = ((F * K) ** ((1 - beta) / 2)) * (
131 |                     1 + ((1 - beta) * np.log(F / K)) ** 2 / 24 + ((1 - beta) * np.log(F / K)) ** 4 / 1920)
132 |         sigma = a / b * z / X
133 |         da_f = alpha * T * (((1 - beta) * alpha) ** 2 / 24 +
134 |                             rho * beta * vega * alpha / 8 * (F * K) ** ((1 - beta) / 2)) *\
135 |                (beta - 1) * (F * K) ** (beta - 2)
136 |         db_f = (1 - beta) / 2 * ((F * K) ** (-(1 + beta) / 2)) * (1 + ((1 - beta) * np.log(F / K)) ** 2 / 24 +
137 |                                                                   ((1 - beta) * np.log(F / K)) ** 4 / 1920) +\
138 |                ((F * K) ** ((1 - beta) / 2)) * ((1 - beta) ** 2 / 24 + (1 - beta) ** 4 / 960 * (np.log(F / K)) ** 2) *\
139 |                2 * np.log(F / K) / F
140 |         dz_f = vega / alpha * (((F * K) ** ((1 - beta) / 2)) / F + (1 - beta) / 2 * ((F * K) ** (-(1 + beta) / 2)) *
141 |                                K * np.log(F / K))
142 |         dsigma_f = (da_f / b - a * db_f / b ** 2) * z / X + (dz_f / X - z * dz_f / X ** 2) * a / b
143 |         return Greek.delta(S, K, T, rf, sigma, CorP) + Greek.vega(S, K, T, rf, sigma) * dsigma_f
144 | 
145 |     @staticmethod
146 |     def sabr_vega(S, K, T, rf, alpha, beta, args):
147 |         sigma = Greek.sabr(S, K, T, rf, alpha, beta, args)
148 |         return Greek.vega(S, K, T, rf, sigma) * sigma / Greek.sabr_atm(S, T, rf, alpha, beta, args)
149 | 


--------------------------------------------------------------------------------
/strategy.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import pandas as pd
  4 | import datetime
  5 | import matplotlib.pyplot as plt
  6 | from calculator import Greek
  7 | 
  8 | path = r"D:\work\CTA\strategy\VolArb_O_SABR_0702\data"
  9 | 
 10 | opt = pd.read_csv(os.path.join(path, "opt.csv"))
 11 | df_1 = pd.read_csv(os.path.join(path, "beta_1.csv"))
 12 | df_2 = pd.read_csv(os.path.join(path, "beta_ols.csv"))
 13 | df_3 = pd.read_csv(os.path.join(path, "beta_hedge.csv"))
 14 | 
 15 | 
 16 | def get_signal_1(temp, td_quote, position=1, threshold=0, curr=[], curr_pos={}, days=30, delta_range=(0.1, 0.9), dis=1):
 17 |     res = []
 18 |     df = temp.copy()
 19 |     index = df["index"].iloc[0]
 20 |     # Filter 1. maturity & delta
 21 |     df["delta"] = df.apply(lambda x: Greek.delta(x["S"], x["K"], x["T"], x["rf"], x["sabr"], x["CorP"]), axis=1)
 22 |     df["T"] = (df["T"] * 365).apply(round)
 23 |     df = df[(df["T"] >= days) & (abs(df["delta"]) > delta_range[0]) & (abs(df["delta"]) < delta_range[1])]
 24 |     if len(df) == 0:  # current position meet the maturity limitation, cover
 25 |         for i in curr:
 26 |             res.append({"index": index, "Symbol": i, "Position": -1 * curr_pos[i], "Signal": -1,
 27 |                         "ExePrice": td_quote[td_quote["Symbol"] == i]["ExePrice"].iloc[0]})
 28 |         curr, curr_pos = [], {}
 29 |         return res, curr, curr_pos
 30 |     date = df["T"].min()
 31 |     df = df[df["T"] == date]
 32 |     df["T"] = df["T"] / 365
 33 |     # Filter 2. rest arbitrage space from current position
 34 |     if len(curr) > 0:
 35 |         curr_df = df[df["Symbol"].apply(lambda x: x in curr)].copy()
 36 |         curr_df["dis"] = curr_df["vix"] - curr_df["sabr"]
 37 |         threshold = curr_df["dis"].sum()
 38 |         if threshold <= 0 or len(curr_df[curr_df["CorP"] == 1]) == 0 or len(curr_df[curr_df["CorP"] == 0]) == 0:
 39 |             for i in curr:  # no more arbitrage space, cover current position
 40 |                 res.append({"index": index, "Symbol": i, "Position": -1 * curr_pos[i], "Signal": -1,
 41 |                             "ExePrice": td_quote[td_quote["Symbol"] == i]["ExePrice"].iloc[0]})
 42 |             threshold, curr, curr_pos = 0, [], {}
 43 |     # Filter 3. arbitrage space
 44 |     df["dis"] = df["vix"] - df["sabr"]
 45 |     if len(df[df["CorP"] == 1]) > 0 and len(df[df["CorP"] == 0]) > 0:# and df["dis"].max() < dis:
 46 |         call = df[df["CorP"] == 1]["Symbol"].iloc[np.argmax(df[df["CorP"] == 1]["dis"])]
 47 |         put = df[df["CorP"] == 0]["Symbol"].iloc[np.argmax(df[df["CorP"] == 0]["dis"])]
 48 |         new = [call, put]
 49 |         new_df = df[df["Symbol"].apply(lambda x: x in new)].copy()
 50 |         new_df.sort_values(by="dis", inplace=True)  # trade the larger arbitrage one and hedge the other one
 51 |         new = new_df["Symbol"].to_list()
 52 |         arb_space = new_df["dis"].sum()
 53 |         if arb_space > threshold:  # new combo has a larger arbitrage space, switch the position
 54 |             for i in curr:  # cover current position
 55 |                 res.append({"index": index, "Symbol": i, "Position": -1 * curr_pos[i], "Signal": -1,
 56 |                             "ExePrice": td_quote[td_quote["Symbol"] == i]["ExePrice"].iloc[0]})
 57 |             threshold, curr, curr_pos = 0, [], {}
 58 |             delta_hedge = abs(new_df["delta"].iloc[0] / new_df["delta"].iloc[1])
 59 |             new_pos = {new[0]: -position, new[1]: -position * delta_hedge}
 60 |             for i in new:  # open new position
 61 |                 res.append({"index": index, "Symbol": i, "Position": new_pos[i], "Signal": 1,
 62 |                             "ExePrice": td_quote[td_quote["Symbol"] == i]["ExePrice"].iloc[0]})
 63 |         else:
 64 |             new, new_pos = curr, curr_pos
 65 |     else:
 66 |         new, new_pos = curr, curr_pos
 67 |     return res, new, new_pos
 68 | 
 69 | 
 70 | def get_position(temp, opt, position=1, threshold=0, days=30, delta_range=(0.1, 10)):
 71 |     df = temp.sort_values(by="index").copy()
 72 |     quote = opt[["index", "Symbol", "ExePrice"]].sort_values(by="index").copy()
 73 |     quote["ExePrice"] = quote.groupby("Symbol")["ExePrice"].shift(-1)  # execute order tomorrow
 74 |     date = df["index"].unique()
 75 |     n = len(date)
 76 |     res, curr, curr_pos = [], [], {}
 77 |     for i in range(n):
 78 |         td = date[i]
 79 |         td_opt = df[df["index"] == td].copy()
 80 |         td_quote = quote[quote["index"] == td].copy()
 81 |         td_dict, curr, curr_pos = get_signal_1(td_opt, td_quote, position, threshold, curr, curr_pos, days, delta_range)
 82 |         res += td_dict
 83 |     close_pos = []
 84 |     m = len(res)
 85 |     for i in range(m-1, -1, -1):
 86 |         if res[i]["Signal"] == 1:
 87 |             close_pos.append({"index": date[-1], "Symbol": res[i]["Symbol"], "Position": -1 * res[i]["Position"],
 88 |                               "Signal": -1, "ExePrice": quote[(quote["Symbol"] == res[i]["Symbol"]) &
 89 |                                                               (quote["index"] == date[-1])]["ExePrice"].iloc[0]})
 90 |         else:  # cover all the open positions at the end of the period
 91 |             break
 92 |     return pd.DataFrame(res+close_pos)
 93 | 
 94 | 
 95 | def backtest(pos, opt):
 96 |     quote = opt[["index", "Symbol", "ExePrice"]].sort_values(by="index").copy()
 97 |     quote["ExePrice"] = quote.groupby("Symbol")["ExePrice"].shift(-1)
 98 |     quote = quote[(quote["index"] >= pos["index"].iloc[0]) & (quote["index"] <= pos["index"].iloc[-1])]
 99 |     date = quote["index"].unique()
100 |     n = len(date)
101 |     curr, curr_pos, last_price = [], {}, 0
102 |     profit, position, price = [], [], []
103 |     for i in range(n):
104 |         td_quote = quote[quote["index"] == date[i]]
105 |         td_profit = 0
106 |         for j in curr:
107 |             td_price = td_quote[quote["Symbol"] == j]["ExePrice"].iloc[0]
108 |             td_profit += curr_pos[j] * td_price
109 |         td_profit = 0 if last_price == 0 else (td_profit - last_price) / abs(last_price)
110 |         profit.append(td_profit)
111 | 
112 |         td_pos = pos[pos["index"] == date[i]]
113 |         m = len(td_pos)
114 |         for j in range(m):
115 |             if td_pos["Signal"].iloc[j] == -1:
116 |                 curr.remove(td_pos["Symbol"].iloc[j])
117 |                 curr_pos.pop(td_pos["Symbol"].iloc[j])
118 |             else:
119 |                 curr.append(td_pos["Symbol"].iloc[j])
120 |                 curr_pos[td_pos["Symbol"].iloc[j]] = td_pos["Position"].iloc[j]
121 | 
122 |         last_price = 0
123 |         for j in curr:
124 |             td_price = td_quote[quote["Symbol"] == j]["ExePrice"].iloc[0]
125 |             last_price += curr_pos[j] * td_price
126 |         position.append([i for i in curr])
127 |         price.append(last_price)
128 |     profit = pd.DataFrame({"index": date, "profit": profit, "position": position, "price": price})
129 |     profit["index"] = pd.to_datetime(profit["index"], format="%Y-%m-%d")
130 |     profit["cum_profit"] = (profit["profit"] + 1).cumprod()
131 |     return profit
132 | 
133 | 
134 | def statistics(profit, figure=False, title="Back Test"):
135 |     stat = {}
136 |     stat["tot_ret"] = (profit["cum_profit"].iloc[-1] / profit["cum_profit"].iloc[0] - 1)
137 |     stat["ann_ret"] = (stat["tot_ret"] + 1) ** (365 / (profit["index"].iloc[-1] - profit["index"].iloc[0]).days) - 1
138 |     stat["ann_std"] = profit["profit"].std() * np.sqrt(252)
139 |     stat["Sharpe"] = stat["ann_ret"] / stat["ann_std"]
140 |     stat["MDD"] = np.max(np.maximum.accumulate(profit["cum_profit"].values) - profit["cum_profit"].values)
141 |     if figure:
142 |         plt.figure(figsize=(12, 6))
143 |         plt.plot(profit["index"], profit["cum_profit"])
144 |         plt.title(title)
145 |         plt.show()
146 |     return stat
147 | 
148 | 
149 | def grid_search(res, temp, para, para_name, idx, n):
150 |     if idx == n:
151 |         res.append(temp.copy())
152 |         return
153 |     for i in para[para_name[idx]]:
154 |         temp[para_name[idx]] = i
155 |         grid_search(res, temp, para, para_name, idx+1, n)
156 |         temp.pop(para_name[idx])
157 |     return
158 | 
159 | 
160 | def optimize(df, opt, para, target="Sharpe", max=True):
161 |     pair = []
162 |     grid_search(pair, {}, para, list(para.keys()), 0, len(para))
163 |     res, best_para, best_res = [], {}, -np.inf if max else np.inf
164 |     for p in pair:
165 |         pos = get_position(df, opt, **p)
166 |         profit = backtest(pos, opt)
167 |         stat = statistics(profit)
168 |         res.append((p, stat))
169 |         if (max and stat[target] > best_res) or (not max and stat[target] < best_res):
170 |             best_res = stat[target]
171 |             best_para = p
172 |     return best_para, best_res, res
173 | 
174 | 
175 | start, end = "2017-03-01", "2021-03-01"
176 | # best_para, best_res, res = optimize(df_1[(df_1["index"] >= start) & (df_1["index"] <= end)], opt,
177 | #                                     {"days": [5, 10, 21, 42, 63],
178 | #                                      "delta_range": [(0.1, 0.9), (0.2, 0.8), (0.3, 0.7)]})
179 | # best_para, best_res, res = optimize(df_1[(df_1["index"] >= start) & (df_1["index"] <= end)], opt,
180 | #                                     {"days": [10], "delta_range": [(0.1, 0.9)], "threshold": np.linspace(0, 0.8, 9)})
181 | pos = get_position(df_1[(df_1["index"] >= start) & (df_1["index"] <= end)], opt,
182 |                    **{"position": 1, "threshold": 0, "days": 10, "delta_range": (0.1, 0.9)})
183 | profit = backtest(pos, opt)
184 | stat = statistics(profit, True, "Beta = 1")
185 | 
186 | # best_para, best_res, res = optimize(df_2[(df_2["index"] >= start) & (df_2["index"] <= end)], opt,
187 | #                                     {"days": [5, 10, 21, 42, 63],
188 | #                                      "delta_range": [(0.1, 0.9), (0.2, 0.8), (0.3, 0.7)]})
189 | # best_para, best_res, res = optimize(df_2[(df_2["index"] >= start) & (df_2["index"] <= end)], opt,
190 | #                                     {"days": [10], "delta_range": [(0.1, 0.9)], "threshold": np.linspace(0, 0.8, 9)})
191 | pos = get_position(df_2[(df_2["index"] >= start) & (df_2["index"] <= end)], opt,
192 |                    **{"position": 1, "threshold": 0.4, "days": 10, "delta_range": (0.1, 0.9)})
193 | profit = backtest(pos, opt)
194 | stat = statistics(profit, True, "Beta = OLS")
195 | 
196 | # best_para, best_res, res = optimize(df_3[(df_3["index"] >= start) & (df_3["index"] <= end)], opt,
197 | #                                     {"days": [5, 10, 21, 42, 63],
198 | #                                      "delta_range": [(0.1, 0.9), (0.2, 0.8), (0.3, 0.7)]})
199 | # best_para, best_res, res = optimize(df_3[(df_3["index"] >= start) & (df_3["index"] <= end)], opt,
200 | #                                     {"days": [63], "delta_range": [(0.1, 0.9)], "threshold": np.linspace(0, 0.8, 9)})
201 | pos = get_position(df_3[(df_3["index"] >= start) & (df_3["index"] <= end)], opt,
202 |                    **{"position": 1, "threshold": 0.3, "days": 63, "delta_range": (0.1, 0.9)})
203 | profit = backtest(pos, opt)
204 | stat = statistics(profit, True, "Beta = Hedging")
205 | 


--------------------------------------------------------------------------------
/research.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import pickle5
  3 | import numpy as np
  4 | import pandas as pd
  5 | from scipy import optimize
  6 | from calculator import Greek
  7 | import datetime
  8 | 
  9 | path = r"D:\work\CTA\data\data"
 10 | 
 11 | 
 12 | # 0. read in data
 13 | def read_pkl(path, file):
 14 |     pkl = open(os.path.join(path, file), "rb")
 15 |     df = pickle5.load(pkl)
 16 |     df.reset_index(inplace=True, drop=df.index.name in df.columns)
 17 |     return df
 18 | 
 19 | 
 20 | info = read_pkl(path, "etf_option_info_.pkl")
 21 | etf = read_pkl(path, "etf_price_standard_.pkl")
 22 | opt = read_pkl(path, "etf_option_price_standard_.pkl")
 23 | rf = pd.read_csv(os.path.join(path, "cn10ybond_yield_.csv"))
 24 | 
 25 | asset, start_date, end_date = "CN.SSE.510050", "2016-01-01", "2022-03-04"
 26 | 
 27 | 
 28 | # 1. data processing
 29 | def get_quote(df, col, asset, start, end):
 30 |     start = datetime.datetime.strptime(start, "%Y-%m-%d")
 31 |     end = datetime.datetime.strptime(end, "%Y-%m-%d")
 32 |     date = [i for i in list(df.columns)[2:] if start <= i <= end]
 33 |     df = df[(df["level_0"].apply(lambda x: asset in x)) &
 34 |             (df["level_1"].apply(lambda x: x in col))][["level_0", "level_1"] + date].copy()
 35 |     return df
 36 | 
 37 | 
 38 | def get_opt(info, etf, opt, rf, asset="CN.SSE.510050", start="2017-03-19", end="2021-03-19"):
 39 |     col = ["Symbol", "OptType", "StrikePrice", "BeginDate", "EndDate", "IsAdj"]
 40 |     info = info[(info["Symbol"].apply(lambda x: asset in x)) &
 41 |                 (info["BeginDate"] <= end) & (info["EndDate"] >= start)][col].reset_index(drop=True).copy()
 42 |     info["BeginDate"] = pd.to_datetime(info["BeginDate"], format="%Y-%m-%d")
 43 |     info["EndDate"] = pd.to_datetime(info["EndDate"], format="%Y-%m-%d")
 44 |     col = ["Open", "Close", "High", "Low", "Volume"]
 45 |     etf = get_quote(etf, col, asset, start, end)
 46 |     etf = pd.DataFrame(etf[list(etf.columns)[2:]].values.T, columns=col, index=list(etf.columns)[2:]).reset_index()
 47 |     col = ["OPEN", "CLOSE", "HIGH", "LOW", "SETTLE", "VOLUME"]
 48 |     opt = get_quote(opt, col, asset, start, end)
 49 |     opt.set_index(["level_0", "level_1"], inplace=True)
 50 |     opt = opt.unstack()
 51 |     opt.columns = opt.columns.swaplevel()
 52 |     opt = opt.stack().reset_index()
 53 |     rf["index"] = pd.to_datetime(rf["index"], format="%Y年%m月%d日")
 54 |     rf = rf[(rf["index"] >= start) & (rf["index"] <= end)]
 55 |     opt.rename(columns={"level_0": "Symbol", "level_1": "index"}, inplace=True)
 56 |     opt = pd.merge(opt, info, how="left", on="Symbol")
 57 |     opt = opt[~opt["IsAdj"]]
 58 |     opt = pd.merge(opt, etf[["index", "Close"]].rename(columns={"Close": "UnlyPrice"}), how="left", on="index")
 59 |     opt = pd.merge(opt, rf[["index", "Close"]].rename(columns={"Close": "rf"}), how="left", on="index")
 60 |     opt["rf"] = 0.01 * opt["rf"]
 61 |     return info, etf, opt, rf
 62 | 
 63 | 
 64 | info, etf, opt, rf = get_opt(info, etf, opt, rf, asset, start_date, end_date)
 65 | 
 66 | 
 67 | # 2. generate beta with rolling regression
 68 | def get_atm(temp):
 69 |     df = temp.copy()
 70 |     idx = np.argmin(abs(df["UnlyPrice"] - df["StrikePrice"]))
 71 |     df = df.iloc[idx, :]
 72 |     T = (df["EndDate"] - df["index"]).days / 365
 73 |     F = df["UnlyPrice"] * np.exp(df["rf"] * T)
 74 |     vix = Greek.vix(df["UnlyPrice"], df["StrikePrice"], T, df["rf"], df["SETTLE"], 1 * (df["OptType"] == "Call"))
 75 |     df["ln_f"] = np.log(F)
 76 |     df["ln_sigma"] = np.nan if vix < 0.01 or vix >= 1 else np.log(vix)
 77 |     return df
 78 | 
 79 | 
 80 | atm_opt = opt.groupby("index").apply(lambda x: get_atm(x)).reset_index(drop=True)
 81 | 
 82 | 
 83 | def ols(Y, X):  # perform OLS, already check with statsmodels.api.OLS
 84 |     temp = np.c_[Y, X]
 85 |     temp = temp[~np.array(np.isnan(temp).any(axis=1)).reshape(len(temp))]
 86 |     Y, X = temp[:, 0], temp[:, 1:]
 87 |     W = np.linalg.inv((X.T).dot(X)).dot(X.T).dot(Y)  # W = ([X^TX]^-1)X^TY
 88 |     Y_ = np.dot(X, W)  # Y_hat = XW
 89 |     E = Y-Y_  # residual = Y - Y_hat
 90 |     return W, Y_, E
 91 | 
 92 | 
 93 | def rolling_ols(Y, X, window=252):
 94 |     W = np.repeat(np.nan, (window-1)*X.shape[1]).reshape((window-1), X.shape[1])
 95 |     n = len(Y)
 96 |     for i in range(window, n+1, 1):
 97 |         temp_W, temp_Y, temp_E = ols(Y[i-window:i, :], X[i-window:i, :])
 98 |         W = np.concatenate((W, temp_W.T), axis=0)
 99 |     return W
100 | 
101 | 
102 | atm_opt["c"] = 1
103 | atm_opt[["alpha", "beta"]] = np.array(rolling_ols(np.mat(atm_opt[["ln_sigma"]]), np.mat(atm_opt[["c", "ln_f"]])))
104 | atm_opt[["alpha", "beta"]] = atm_opt[["alpha", "beta"]].fillna(method="ffill")
105 | atm_opt["beta"] = atm_opt["beta"] + 1
106 | atm_opt["ln_sigma"] = atm_opt["ln_sigma"].fillna(method="ffill")
107 | opt = pd.merge(opt, atm_opt[["index", "ln_f", "ln_sigma", "beta"]], how="left", on="index")
108 | opt.rename(columns={"beta": "beta_ols"}, inplace=True)
109 | opt["beta_1"] = 1  # Geometric Brownian Motion
110 | 
111 | 
112 | # 3. estimate 1: calibration error
113 | def mse(args, VIX, S, K, T, rf, alpha, beta):  # optimize function: minimize MSE(vix, sabr)
114 |     res, n = 0, len(S)
115 |     for i in range(n):
116 |         res += (VIX[i] - Greek.sabr(S[i], K[i], T[i], rf[i], alpha, beta, args)) ** 2
117 |     return res / n
118 | 
119 | 
120 | def dmse(args, VIX, S, K, T, rf, alpha, beta):  # derivative of optimize function
121 |     dmse_v, dmse_r, n = 0, 0, len(S)
122 |     for i in range(n):
123 |         temp = VIX[i] - Greek.sabr(S[i], K[i], T[i], rf[i], alpha, beta, args)
124 |         dmse_v -= temp * Greek.dsabr_v(S[i], K[i], T[i], rf[i], alpha, beta, args)
125 |         dmse_r -= temp * Greek.dsabr_r(S[i], K[i], T[i], rf[i], alpha, beta, args)
126 |     res = np.array([dmse_v, dmse_r])
127 |     return 2 * res / n
128 | 
129 | 
130 | def opt_calib(VIX, S, K, T, rf, alpha, beta):
131 |     args_ = (VIX, S, K, T, rf, alpha, beta, )
132 |     x0_, bound_ = np.array([0.5, -0.5]), ((0, None), (-1, 1))
133 |     res = optimize.minimize(mse, x0_, args=args_, method="Powell", bounds=bound_, jac=dmse,
134 |                             tol=1e-16, options={"maxiter": 100})
135 |     return res
136 | 
137 | 
138 | def sim_calib(temp, beta):
139 |     df = temp[["Symbol", "index", "SETTLE", "StrikePrice", "UnlyPrice", "rf", "ln_f", "ln_sigma", beta]].copy()
140 |     df.columns = ["Symbol", "index", "mkt", "K", "S", "rf", "ln_f", "ln_sigma", "beta"]
141 |     df["ExePrice"] = (temp["HIGH"] + temp["LOW"] + temp["CLOSE"] + temp["OPEN"]) / 4
142 |     df["T"] = (temp["EndDate"] - temp["index"]).apply(lambda x: x.days / 365)
143 |     df["CorP"] = 1 * (temp["OptType"] == "Call")
144 |     df["vix"] = df.apply(lambda x: Greek.vix(x["S"], x["K"], x["T"], x["rf"], x["mkt"], x["CorP"]), axis=1)
145 |     df = df[(df["vix"] >= 0.01) & (df["vix"] < 1)]  # 0. df 为空
146 |     if len(df) == 0:
147 |         return pd.DataFrame()
148 |     VIX, S, K, T, rf = df["vix"].values, df["S"].values, df["K"].values, df["T"].values, df["rf"].values
149 |     beta = df["beta"].iloc[0]
150 |     alpha = np.exp(df["ln_sigma"].iloc[0] + (1 - beta) * df["ln_f"].iloc[0])
151 |     df["alpha"] = alpha
152 |     sim = opt_calib(VIX, S, K, T, rf, alpha, beta)  # 1. sabr返回nan值; 2. sim.x返回离谱值
153 |     df["sabr"] = df.apply(lambda x: Greek.sabr(x["S"], x["K"], x["T"], x["rf"], alpha, beta, sim.x), axis=1)
154 |     return df
155 | 
156 | 
157 | # 4. estimate 2: hedging error
158 | def hedge_error(args, MKT, MKT_1, CorP, weight, S_1, K, T_1, rf_1, ln_f_1, ln_sigma_1, dF, dVIX, idx=None):
159 |     beta, args = args[0], args[1:]
160 |     alpha = np.exp(ln_sigma_1[0] + (1 - beta) * ln_f_1[0])
161 |     w_sum = weight.sum()
162 |     res, n = 0, len(MKT)
163 |     idx = list(range(n)) if idx is None else idx
164 |     for i in idx:
165 |         res += ((MKT_1[i] + Greek.sabr_delta(S_1[i], K[i], T_1[i], rf_1[i], alpha, beta, args, CorP[i]) * dF[i] +
166 |                  Greek.sabr_vega(S_1[i], K[i], T_1[i], rf_1[i], alpha, beta, args) * dVIX[i]) / MKT[i] - 1) ** 2\
167 |                * weight[i]
168 |     return res
169 | 
170 | 
171 | def opt_hedge(MKT, MKT_1, CorP, weight, S_1, K, T_1, rf_1, ln_f_1, ln_sigma_1, dF, dVIX, idx=None):
172 |     args_ = (MKT, MKT_1, CorP, weight, S_1, K, T_1, rf_1, ln_f_1, ln_sigma_1, dF, dVIX, idx, )
173 |     x0_, bound_ = np.array([0.5, 0.5, -0.5]), ((0, 1), (0, None), (-1, 1))
174 |     res = optimize.minimize(hedge_error, x0_, args=args_, method="Powell", bounds=bound_,
175 |                             tol=1e-16, options={"maxiter": 100})
176 |     return res
177 | 
178 | 
179 | def sim_hedge(temp):
180 |     df = temp[["Symbol", "index", "SETTLE", "StrikePrice", "UnlyPrice", "rf",
181 |                "SETTLE_1", "UnlyPrice_1", "rf_1", "ln_f_1", "ln_sigma_1"]].copy()
182 |     df.columns = ["Symbol", "index", "mkt", "K", "S", "rf", "mkt_1", "S_1", "rf_1", "ln_f_1", "ln_sigma_1"]
183 |     df["ExePrice"] = (temp["HIGH"] + temp["LOW"] + temp["CLOSE"] + temp["OPEN"]) / 4
184 |     df["T"] = (temp["EndDate"] - temp["index"]).apply(lambda x: x.days / 365)
185 |     df["T_1"] = df["T"] + 1 / 365
186 |     df["CorP"] = 1 * (temp["OptType"] == "Call")
187 |     df["weight"] = 1 / (1 + (df["S"] * np.exp(df["rf"] * df["T"]) - df["K"]) ** 2)
188 |     df["vix"] = df.apply(lambda x: Greek.vix(x["S"], x["K"], x["T"], x["rf"], x["mkt"], x["CorP"]), axis=1)
189 |     df["vix_1"] = df.apply(lambda x: Greek.vix(x["S_1"], x["K"], x["T_1"], x["rf_1"], x["mkt_1"], x["CorP"]), axis=1)
190 |     df = df[(df["vix"] >= 0.01) & (df["vix"] < 1) & (df["vix_1"] >= 0.01) & (df["vix_1"] < 1)]
191 |     df["dis"] = abs(df["S"] - df["K"])
192 |     df.sort_values(by="dis", inplace=True)
193 |     n = len(df)
194 |     if n == 0:
195 |         return pd.DataFrame()
196 |     idx = list(range(min(2, n)))
197 |     MKT, MKT_1, CorP, weight = df["mkt"].values, df["mkt_1"].values, df["CorP"].values, df["weight"].values
198 |     S_1, K, T_1, rf_1 = df["S_1"].values, df["K"].values, df["T_1"].values, df["rf_1"].values
199 |     ln_f_1, ln_sigma_1 = df["ln_f_1"].values, df["ln_sigma_1"].values
200 |     dF = (df["S"] * np.exp(df["rf"] * df["T"]) - df["S_1"] * np.exp(df["rf_1"] * df["T_1"])).values
201 |     dVIX = (df["vix"] - df["vix_1"]).values
202 |     sim = opt_hedge(MKT, MKT_1, CorP, weight, S_1, K, T_1, rf_1, ln_f_1, ln_sigma_1, dF, dVIX, idx)
203 |     beta = sim.x[0]
204 |     alpha = np.exp(ln_sigma_1[0] + (1 - beta) * ln_f_1[0])
205 |     df["sabr"] = df.apply(lambda x: Greek.sabr(x["S"], x["K"], x["T"], x["rf"], alpha, beta, sim.x[1:]), axis=1)
206 |     return df
207 | 
208 | 
209 | opt[["SETTLE_1", "UnlyPrice_1", "rf_1", "ln_f_1", "ln_sigma_1"]] =\
210 |     opt.groupby("Symbol")[["SETTLE", "UnlyPrice", "rf", "ln_f", "ln_sigma"]].shift(1)
211 | opt["ExePrice"] = opt[["HIGH", "LOW", "CLOSE", "OPEN"]].mean(axis=1)
212 | # path = r"D:\work\CTA\strategy\VolArb_O_SABR_0702\data"
213 | # info.to_csv(os.path.join(path, "info.csv"), index=False)
214 | # etf.to_csv(os.path.join(path, "etf.csv"), index=False)
215 | # opt.to_csv(os.path.join(path, "opt.csv"), index=False)
216 | # rf.to_csv(os.path.join(path, "rf.csv"), index=False)
217 | # df_1 = opt.groupby("index").apply(lambda x: sim_calib(x, "beta_1"))
218 | # df_1.to_csv(os.path.join(path, "beta_1.csv"), index=False)
219 | # df_2 = opt.groupby("index").apply(lambda x: sim_calib(x, "beta_ols"))
220 | # df_2.to_csv(os.path.join(path, "beta_ols.csv"), index=False)
221 | # df_3 = opt.groupby("index").apply(lambda x: sim_hedge(x))
222 | # df_3.to_csv(os.path.join(path, "beta_hedge.csv"), index=False)
223 | # temp = opt[(opt["index"] == "2019-02-11") & (opt["EndDate"] == "2019-03-27")].sort_values(by="StrikePrice").copy()
224 | 


--------------------------------------------------------------------------------