├── .idea ├── .gitignore ├── encodings.xml ├── vcs.xml ├── dictionaries │ └── ebellord.xml ├── modules.xml ├── misc.xml └── opt_trade.iml ├── LICENSE ├── README.md ├── XmlConverter.py ├── implied_to_realized.py ├── option_stream ├── stream_utilities.py └── main.py ├── account_management.py ├── overlay_presentation.py ├── option_data_new.py ├── vix_utilities.py ├── option_utilities.py ├── option_daily_prod.py ├── option_simulation.py └── spx_data_update.py /.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /workspace.xml -------------------------------------------------------------------------------- /.idea/encodings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /.idea/dictionaries/ebellord.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | cnopts 5 | datashop 6 | livevol 7 | 8 | 9 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 7 | 8 | 10 | -------------------------------------------------------------------------------- /.idea/opt_trade.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 11 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Edmund Bellord 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # Historical simulation engin is deprecated as I rebuild the daily production application 3 | 4 | The stream directory contains all the code needed to display a bokeh application displaying a live list of SPXW options to trade according algorithm described below 5 | 6 | 7 | # Volatility Trading and Historical Simulation Engine 8 | 9 | Project contains classes and functions used to build historical simulations and daily trading estimates for two equity index volatility based investment strategies: 10 | 11 | - Equity index options: Algorithm is drawn from JUREK, J. W. and STAFFORD, E. (2015), [The Cost of Capital for Alternative Investments](https://www.hbs.edu/faculty/Publication%20Files/Cost%20of%20Capital%20for%20Alternative%20Investments_57a4f444-65fa-4f0c-b51a-116408f1dab9.pdf) The Journal of Finance 12 | 13 | - VIX futures: Algorithm is drawn from Cheng, I-H. (2018), [The VIX Premium](https://ssrn.com/abstract=2495414). Review of Financial Studies, Forthcoming. 14 | 15 | 16 | ### Prerequisites 17 | 18 | Requires active session of Interactive Brokers TWS or IB Gateway 19 | 20 | Requires directory structure defined in UpdateSP500Data class 21 | 22 | ### Example 23 | 24 | Equity index option simulation example 25 | ```python 26 | z_score_strike = -1 27 | option_life_in_months = 2 28 | from option_simulation import OptionSimulation 29 | opt_sim = OptionSimulation(update_simulation_data=False) 30 | sim_output = [opt_sim.trade_sim(z_score_strike, option_life_in_months, trade_day_type=day_type) \ 31 | for day_type in ['EOM', (0,22)] 32 | ``` 33 | Equity index option [notebooks](https://github.com/ejpjapan/jupyter_nb/tree/master/spx/) 34 | 35 | VIX futures [notebooks](https://github.com/ejpjapan/jupyter_nb/tree/master/vixp/) 36 | 37 | ## Authors 38 | 39 | * **Edmund Bellord** - [ejpjapan](https://github.com/ejpjapan/) 40 | 41 | ## License 42 | 43 | This project is licensed under the MIT License - see the [LICENSE.md](https://github.com/ejpjapan/opt_trade/blob/master/LICENSE) file for details 44 | -------------------------------------------------------------------------------- /XmlConverter.py: -------------------------------------------------------------------------------- 1 | from lxml import etree 2 | from datetime import datetime 3 | import pandas as pd 4 | 5 | 6 | class XmlConverter(object): 7 | """ 8 | Converts XML to pandas dataframe. 9 | 10 | Example Usage: 11 | 12 | converter = XmlConverter() 13 | converter.parse() 14 | df = converter.build_dataframe() 15 | """ 16 | 17 | def __init__(self, input_path='feds200628.xls', first_header='SVENY01', last_header='TAU2'): 18 | """ 19 | Constructs a new XMLConverter instance. 20 | 21 | Inputs: 22 | - input_path: Path to input xml file. 23 | - first_header: String containing first header. 24 | - last_header: String containing last header. 25 | """ 26 | 27 | self._first_header = first_header 28 | self._last_header = last_header 29 | self._header_list = [] 30 | self._table_reached = False 31 | self._input_path = input_path 32 | self._parser = etree.XMLParser(target=self) 33 | self._in_headers = False 34 | self._in_data_tag = False 35 | self._in_actual_data = False 36 | self._data = {} 37 | self._is_possibly_empty = False 38 | 39 | def parse(self): 40 | """ 41 | Parses xml file to generate dictionary containing data. 42 | """ 43 | etree.parse(self._input_path, self._parser) 44 | 45 | def start(self, tag, attrib): 46 | """ 47 | Callback function for XMLParser event of start of a new tag. 48 | 49 | Inputs: 50 | - tag: String containing the tag name. 51 | - attrib: String containing the attributes for this tag. 52 | """ 53 | if tag.split('}', 1)[1] == 'Data': 54 | self._in_data_tag = True 55 | 56 | if self._in_actual_data: 57 | self._is_possibly_empty = True 58 | 59 | def end(self, tag): 60 | """ 61 | Callback function for XMLParser event of end of a tag 62 | 63 | Inputs: 64 | - tag: String containing the tag name. 65 | """ 66 | if self._is_possibly_empty: 67 | self._is_possibly_empty = False 68 | self._data[self._curr_date_idx].append('0') 69 | 70 | self._in_data_tag = False 71 | 72 | def data(self, data): 73 | """ 74 | Callback function for XMLParser event of data of a tag 75 | 76 | Inputs: 77 | - data: String containing the text data for this tag. 78 | """ 79 | if self._in_data_tag == True: 80 | if self._in_headers: 81 | if data == self._last_header: 82 | self._in_headers = False 83 | self._header_list.append(data) 84 | self._in_actual_data = True 85 | 86 | return 87 | 88 | self._header_list.append(data) 89 | 90 | return 91 | 92 | if data == self._first_header: 93 | self._in_headers = True 94 | self._header_list.append(data) 95 | 96 | return 97 | 98 | if self._in_actual_data: 99 | self._is_possibly_empty = False 100 | 101 | try: 102 | datetime.strptime(data, '%Y-%m-%d') 103 | self._curr_date_idx = data 104 | self._data[self._curr_date_idx] = [] 105 | 106 | except ValueError: 107 | self._data[self._curr_date_idx].append(data) 108 | 109 | def close(self): 110 | """ 111 | Callback function for XMLParser event of close. 112 | """ 113 | pass 114 | 115 | def build_dataframe(self): 116 | """ 117 | Builds a pandas dataframe. 118 | """ 119 | df = pd.DataFrame.from_dict(self._data, orient='index') 120 | df.columns = self._header_list 121 | df = df.set_index(pd.to_datetime(df.index)) 122 | df = df.astype(float) 123 | 124 | return df -------------------------------------------------------------------------------- /implied_to_realized.py: -------------------------------------------------------------------------------- 1 | from option_utilities import read_feather, write_feather 2 | from spx_data_update import UpdateSP500Data, IbWrapper 3 | from ib_insync import IB, Index, util 4 | import numpy as np 5 | import pandas as pd 6 | from arch import arch_model 7 | import matplotlib.pyplot as plt 8 | import matplotlib.cm as cm 9 | 10 | 11 | class SPX5MinuteBars: 12 | 13 | def __init__(self, update_bars=True, window=500, horizon=50, realized_window=22): 14 | self.bars = self.spx_bar_history(update_bars) 15 | self.vol_risk_premium = self.vrp() 16 | self.har_vol = pd.DataFrame() 17 | self.window = window 18 | self.horizon = horizon 19 | self.realized_window = realized_window 20 | 21 | @staticmethod 22 | def spx_bar_history(update_bars=True): 23 | file_name = 'sp500_5min_bars' 24 | df_hist = read_feather(UpdateSP500Data.DATA_BASE_PATH / file_name) 25 | # Download latest 26 | if update_bars: 27 | ibw = IbWrapper() 28 | ib = ibw.ib 29 | contract = Index('SPX', 'CBOE', 'USD') 30 | bars = ib.reqHistoricalData( 31 | contract, 32 | endDateTime='', 33 | durationStr='1 M', 34 | barSizeSetting='5 mins', 35 | whatToShow='TRADES', 36 | useRTH=True, 37 | formatDate=1) 38 | 39 | ib.disconnect() 40 | df = util.df(bars) 41 | df = df.set_index('date') 42 | full_hist = df.combine_first(df_hist) 43 | write_feather(full_hist, UpdateSP500Data.DATA_BASE_PATH / file_name) 44 | else: 45 | full_hist = df_hist.copy() 46 | return full_hist 47 | 48 | @staticmethod 49 | def vrp(): 50 | vrp = pd.read_csv(UpdateSP500Data.DATA_BASE_PATH / 'xl' / 'vol_risk_premium.csv', 51 | usecols=['VRP', 'EVRP', 'IV', 'RV', 'ERV']) 52 | vrp = vrp.set_index(pd.date_range('31-jan-1990', '31-dec-2017', freq='BM')) 53 | return vrp 54 | 55 | def plot_vol_forecast(self, num_days=10): 56 | expected_volatility = self.expected_vol 57 | fig, ax = plt.subplots(figsize=(12, 5), dpi=80, facecolor='w', edgecolor='k') 58 | 59 | for i in range(-1, -(num_days + 1), -1): 60 | if i == -1: 61 | expected_volatility.iloc[:, -1].plot(color='r') 62 | else: 63 | c = cm.viridis(-i / num_days, 1) 64 | expected_volatility.iloc[:, i].plot(color=c) 65 | 66 | plt.autoscale(enable=True, axis='x', tight=True) 67 | legend_labels = expected_volatility.iloc[:, -num_days:].columns.strftime('%d-%b') 68 | _ = plt.legend(legend_labels[::-1]) 69 | _ = plt.title('HAR Volatity Forecast') 70 | _ = ax.set_ylabel('Annualized Vol %') 71 | return ax 72 | 73 | @property 74 | def realized_vol(self): 75 | """Annualized daily volatility calculated as sum of squared 5 minute returns""" 76 | squared_diff = (np.log(self.bars['close'] / self.bars['close'].shift(1))) ** 2 77 | realized_quadratic_variation = squared_diff.groupby(squared_diff.index.date).sum() 78 | realized_quadratic_variation = realized_quadratic_variation.reindex( 79 | pd.to_datetime(realized_quadratic_variation.index)) 80 | daily_vol = np.sqrt(realized_quadratic_variation * 252) 81 | daily_vol = daily_vol.rename('rv_daily') 82 | return daily_vol 83 | 84 | @property 85 | def expected_vol(self): 86 | """Expected volatility out to 50 days using HAR model""" 87 | if self.har_vol.empty: 88 | daily_vol = self.realized_vol 89 | series_list = [] 90 | for i in range(self.window, len(daily_vol) + 1): 91 | am = arch_model(daily_vol[i - self.window:i], mean='HAR', lags=[1, 5, 22], vol='Constant') 92 | res = am.fit() 93 | forecasts = res.forecast(horizon=self.horizon) 94 | np_vol = forecasts.mean.iloc[-1] 95 | series_list.append(np_vol) 96 | e_vol = pd.concat(series_list, axis=1) 97 | self.har_vol = e_vol 98 | else: 99 | e_vol = self.har_vol 100 | return e_vol 101 | 102 | @property 103 | def realized_variance(self): 104 | """Realized variance see VRP literature""" 105 | realized_quadratic_variation = (self.realized_vol**2) / 252 106 | rv = realized_quadratic_variation.rolling(self.realized_window).sum() 107 | rv = rv.rename('RV_CALC') 108 | return rv 109 | 110 | @property 111 | def daily_return(self): 112 | daily_ret = self.bars['close'].groupby(self.bars.index.date).last().pct_change() 113 | return daily_ret 114 | 115 | -------------------------------------------------------------------------------- /option_stream/stream_utilities.py: -------------------------------------------------------------------------------- 1 | # from future.backports.datetime import datetime 2 | from ib_insync import IB 3 | import nest_asyncio 4 | from dateutil.relativedelta import relativedelta 5 | import pandas as pd 6 | import pandas_datareader.data as web 7 | from datetime import datetime, timedelta 8 | import plistlib 9 | from pathlib import Path 10 | 11 | class IbWrapper: 12 | def __init__(self, client_id=30): 13 | """Wrapper function for Interactive Broker API connection.""" 14 | self.ib = IB() 15 | self.ib.errorEvent += self.on_error # Attach the error handler 16 | nest_asyncio.apply() 17 | self.client_id = client_id # Store client_id for reuse 18 | 19 | def connect_to_ib(self): 20 | """Attempt to connect to IB Gateway or TWS, suppressing connection errors.""" 21 | if self.ib.isConnected(): 22 | print("An existing IB connection is found. Disconnecting before reconnecting.") 23 | self.ib.disconnect() 24 | 25 | try: 26 | # Attempt to connect to IB Gateway 27 | self.ib.connect('127.0.0.1', port=4001, clientId=self.client_id, timeout=10) 28 | print("Connected to IB Gateway on port 4001") 29 | except ConnectionRefusedError: 30 | print("IB Gateway connection failed. Attempting to connect to TWS...") 31 | try: 32 | # Attempt to connect to TWS as a fallback 33 | self.ib.connect('127.0.0.1', port=7496, clientId=self.client_id, timeout=10) 34 | print("Connected to TWS on port 7496") 35 | except ConnectionRefusedError: 36 | raise ConnectionError("TWS connection also failed. Please ensure the API port is open and try again.") 37 | 38 | def disconnect_from_ib(self): 39 | """Disconnect from IB Gateway or TWS.""" 40 | if self.ib.isConnected(): 41 | self.ib.disconnect() 42 | print("Disconnected from IB.") 43 | 44 | @staticmethod 45 | def on_error(req_id, error_code, error_string, contract): 46 | """Custom error handling method for the IB API.""" 47 | if error_code == 200: 48 | # Suppress or log the specific Error 200 - No security definition found 49 | pass # Suppressing the output completely 50 | elif error_code in [2104, 2106, 2158]: 51 | # These are not errors, just information about data farm connections 52 | pass # Suppressing the output completely 53 | else: 54 | print(f"Error {error_code}, reqId {req_id}: {error_string}, contract: {contract}") 55 | 56 | def __enter__(self): 57 | """Enter the runtime context related to this object and connect to IB.""" 58 | self.connect_to_ib() # Connect to IB 59 | return self # Return the instance so that `self.ib` can be accessed 60 | 61 | def __exit__(self, exc_type, exc_val, exc_tb): 62 | """Exit the runtime context and disconnect from IB.""" 63 | self.disconnect_from_ib() # Disconnect from IB 64 | 65 | 66 | class USSimpleYieldCurve: 67 | """Simple US Zero coupon yield curve for today up to one year""" 68 | # Simple Zero yield curve built from TBill discount yields and effective fund rate 69 | # This is a simplified approximation for a full term structure model 70 | # Consider improving by building fully specified yield curve model using 71 | # Quantlib 72 | def __init__(self): 73 | end = datetime.now() 74 | start = end - timedelta(days=10) 75 | zero_rates = web.DataReader(['DFF', 'DTB4WK', 'DTB3', 'DTB6', 'DTB1YR', 'DGS2'], 'fred', start, end) 76 | zero_rates = zero_rates.dropna(axis=0) 77 | zero_yld_date = zero_rates.index[-1] 78 | new_index = [zero_yld_date + relativedelta(days=1), 79 | zero_yld_date + relativedelta(weeks=4), 80 | zero_yld_date + relativedelta(months=3), 81 | zero_yld_date + relativedelta(months=6), 82 | zero_yld_date + relativedelta(years=1), 83 | zero_yld_date + relativedelta(years=2)] 84 | dt_time_index = pd.DatetimeIndex(new_index, tz='America/New_York') 85 | zero_curve = pd.DataFrame(data=zero_rates.iloc[-1].values, index=pd.DatetimeIndex(dt_time_index.date), 86 | columns=[end]) 87 | self.zero_curve = zero_curve.resample('D').interpolate(method='polynomial', order=2) 88 | 89 | def get_zero4_date(self, input_date): 90 | """Retrieve zero yield maturity for input_date""" 91 | return self.zero_curve.loc[input_date] 92 | 93 | 94 | def illiquid_equity(discount=0.5): 95 | return sum(config_key('illiquid_equity').values()) * discount 96 | 97 | 98 | def config_key(dict_key: str): 99 | file_name = Path.home() / 'Library' / 'Mobile Documents' / 'com~apple~CloudDocs' / 'localDB' / 'config.plist' 100 | assert (file_name.is_file()) 101 | f = open(str(file_name), 'rb') 102 | pl = plistlib.load(f) 103 | return pl[dict_key] -------------------------------------------------------------------------------- /account_management.py: -------------------------------------------------------------------------------- 1 | import lxml.objectify as lxml_objectify 2 | import numpy as np 3 | import pandas as pd 4 | 5 | 6 | class FlexStatement(object): 7 | 8 | def __init__(self, path): 9 | self.path = path 10 | 11 | # This is a large object, so I let it be garbage-collected 12 | stmt = self.get_lxml_root() 13 | 14 | self.perf = clean_perf(stmt) 15 | self.option_perf = clean_option_perf(self.perf) 16 | 17 | self.option_perf_underlying = rollup_option_underlying( 18 | self.option_perf) 19 | 20 | self.stock_perf = clean_stock_perf(self.perf) 21 | 22 | self.cash_transactions = clean_cash(stmt) 23 | self.dividends = clean_dividends(self.cash_transactions) 24 | 25 | dividends_by_symbol = (self.dividends.groupby('symbol') 26 | .amount.sum()) 27 | 28 | self.mtm_ytd = pd.DataFrame({ 29 | 'Stocks': self.stock_perf.mtmYTD, 30 | 'Options': self.option_perf_underlying.mtmYTD, 31 | 'Dividends': dividends_by_symbol, 32 | }).fillna(0) 33 | 34 | self.realized = pd.DataFrame({ 35 | 'Stocks': self.stock_perf.realSTYTD, 36 | 'Options': self.option_perf_underlying.realSTYTD, 37 | 'Dividends': dividends_by_symbol, 38 | }).fillna(0) 39 | 40 | self.mtm_ytd['Total'] = self.mtm_ytd.sum(1) 41 | self.realized['Total'] = self.realized.sum(1) 42 | 43 | self.cash_by_type = self.cash_transactions.groupby('type').amount.sum() 44 | self.fees = self.cash_by_type[['Broker Interest Paid', 45 | 'Broker Interest Received', 46 | 'Other Fees']] 47 | 48 | self.in_out = clean_in_out(self.cash_transactions) 49 | 50 | def get_lxml_root(self): 51 | tree = lxml_objectify.parse(open(self.path, 'rb')) 52 | root = tree.getroot() 53 | return root.FlexStatements.FlexStatement 54 | 55 | 56 | def clean_perf(statement): 57 | summary = statement.MTDYTDPerformanceSummary 58 | perf = get_table(summary) 59 | 60 | numeric_cols = ['mtmYTD', 'mtmMTD', 'realSTMTD', 'realSTYTD', 61 | 'realLTMTD', 'realLTYTD'] 62 | 63 | for c in numeric_cols: 64 | perf[c] = perf[c].astype(np.float64) 65 | return perf 66 | 67 | 68 | def get_table(node): 69 | return pd.DataFrame([dict(zip(c.keys(), c.values())) for 70 | c in node.getchildren()]) 71 | 72 | 73 | def clean_option_perf(perf): 74 | perf = perf[perf.assetCategory == 'OPT'].copy() 75 | perf['expiry'] = pd.to_datetime(perf['expiry']) 76 | return perf 77 | 78 | 79 | def clean_stock_perf(perf): 80 | perf = perf[perf.assetCategory == 'STK'] 81 | 82 | perf = perf.drop(['acctAlias', 83 | 'assetCategory', 84 | 'expiry', 85 | 'multiplier', 86 | 'putCall', 87 | 'strike', 88 | 'securityID', 89 | 'securityIDType', 90 | 'underlyingSymbol', 91 | 'underlyingConid'], axis='columns') 92 | return perf.set_index('symbol') 93 | 94 | 95 | def rollup_option_underlying(options): 96 | grouped = options.groupby('underlyingSymbol') 97 | 98 | return pd.DataFrame({ 99 | 'mtmYTD': grouped.mtmYTD.sum(), 100 | 'realSTYTD': grouped.realSTYTD.sum(), 101 | 'realLTYTD': grouped.realLTYTD.sum(), 102 | }) 103 | 104 | 105 | def rollup_statements(statements, key='mtm_ytd'): 106 | def clean(x): 107 | return x.drop('Total', axis=1) 108 | 109 | result = clean(getattr(statements[0], key)) 110 | for stmt in statements[1:]: 111 | result = result.add(clean(getattr(stmt, key)), 112 | fill_value=0) 113 | 114 | result = result.fillna(0) 115 | result['Total'] = result.sum(1) 116 | 117 | return result 118 | 119 | 120 | def clean_cash(statement): 121 | cash_trans = get_table(statement.CashTransactions) 122 | cash_trans.amount = cash_trans.amount.astype(np.float64) 123 | cash_trans.dateTime = pd.to_datetime(cash_trans.dateTime) 124 | return cash_trans 125 | 126 | 127 | def clean_dividends(cash_trans): 128 | dividends = cash_trans[cash_trans.type.str.contains('Dividends')] 129 | 130 | return dividends[['accountId', 'assetCategory', 'underlyingSymbol', 131 | 'symbol', 'dateTime', 'description', 132 | 'amount']] 133 | 134 | 135 | def total_fees(cash_trans): 136 | total_by_type = cash_trans.groupby('type').amount.sum() 137 | 138 | return total_by_type[['Broker Interest Paid', 139 | 'Broker Interest Received', 140 | 'Other Fees']] 141 | 142 | 143 | def clean_in_out(cash_trans): 144 | inout = cash_trans[cash_trans.type == 'Deposits & Withdrawals'] 145 | 146 | return pd.Series([inout.amount[inout.amount > 0].sum(), 147 | inout.amount[inout.amount < 0].sum()], 148 | index=['Deposit', 'Withdrawal']) 149 | -------------------------------------------------------------------------------- /overlay_presentation.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | from pathlib import Path 4 | import pandas as pd 5 | from pptx import Presentation 6 | from spx_data_update import UpdateSP500Data 7 | from option_utilities import read_feather, write_feather 8 | from urllib.request import urlretrieve 9 | from pptx.util import Inches 10 | from pptx.enum.text import PP_PARAGRAPH_ALIGNMENT as PP_ALIGN 11 | import pandas_datareader.data as web 12 | import os 13 | 14 | 15 | def title_only_slide(asset_path, slide_dict, presentation, text_msg=None): 16 | slide = presentation.slides.add_slide(presentation.slide_layouts[slide_dict['TITLE_ONLY']]) 17 | for shape in slide.placeholders: 18 | print('%d %s' % (shape.placeholder_format.idx, shape.name)) 19 | placeholder = slide.placeholders[13] # idx key, not position 20 | placeholder_picture = placeholder.insert_picture(str(asset_path)) 21 | slide.shapes.title.text = text_msg 22 | return slide 23 | 24 | 25 | def main(): 26 | ppt_path = Path.home() / 'Dropbox' / 'option_overlay' 27 | fig_path = Path.home() / 'Dropbox' / 'outputDev' / 'fig' 28 | template_name = 'option_overlay.pptx' 29 | output_name = 'test.pptx' 30 | 31 | # Assets 32 | heat_map_path = fig_path / 'heat_map.png' 33 | cum_perf_path = fig_path / 'cum_perf.png' 34 | cum_total_perf_path = fig_path / 'cum_total_perf.png' 35 | cum_total_perf2_path = fig_path / 'cum_total_perf2.png' 36 | hfri_overlay_path = fig_path / 'hfri_overlay.png' 37 | delta_path = fig_path / 'Delta.png' 38 | gamma_path = fig_path / 'Gamma.png' 39 | vega_path = fig_path / 'Vega.png' 40 | rho_path = fig_path / 'Rho.png' 41 | theta_path = fig_path / 'Theta.png' 42 | 43 | # Layout index 44 | layout_dict = {'TITLE': 0, 'SUB_TITLE': 1, 'QUOTE': 2, 'TITLE_COLUMN1': 3, 'TITLE_COLUMN2': 4, 'TITLE_COLUMN3': 5, 45 | 'TITLE_ONLY': 6, 'CAPTION': 7, 'BLANK': 8} 46 | 47 | prs = Presentation(ppt_path / template_name) 48 | 49 | # Title slide 50 | for shape in prs.slides[0].placeholders: 51 | print('%d %s' % (shape.placeholder_format.idx, shape.name)) 52 | prs.slides[0].shapes[0].text = 'Income Alternatives' 53 | 54 | # 1 55 | slide = prs.slides.add_slide(prs.slide_layouts[layout_dict['TITLE_COLUMN1']]) 56 | for shape in slide.placeholders: 57 | print('%d %s' % (shape.placeholder_format.idx, shape.name)) 58 | # placeholder = slide.placeholders[1] # idx key, not position 59 | slide.shapes.title.text = 'Background' 60 | 61 | paragraph_strs = [ 62 | 'Egg, bacon, sausage and spam.', 63 | 'Spam, bacon, sausage and spam.', 64 | 'Spam, egg, spam, spam, bacon and spam.' 65 | ] 66 | text_frame = slide.placeholders[1].text_frame 67 | text_frame.clear() # remove any existing paragraphs, leaving one empty one 68 | 69 | p = text_frame.paragraphs[0] 70 | p.text = paragraph_strs[0] 71 | p.alignment = PP_ALIGN.LEFT 72 | 73 | for para_str in paragraph_strs[1:]: 74 | p = text_frame.add_paragraph() 75 | p.text = para_str 76 | p.alignment = PP_ALIGN.LEFT 77 | p.level = 1 78 | 79 | # 2 80 | title_only_slide(heat_map_path, layout_dict, prs, text_msg='Monthly Excess Returns (%)') 81 | # 3 82 | title_only_slide(cum_perf_path, layout_dict, prs, text_msg='Cumulative Excess Return') 83 | # 4 84 | title_only_slide(cum_total_perf_path, layout_dict, prs, text_msg='Cumulative Total Return') 85 | # 5 86 | title_only_slide(cum_total_perf2_path, layout_dict, prs, text_msg='Cumulative Total Return') 87 | # 6 88 | title_only_slide(hfri_overlay_path, layout_dict, prs, text_msg='Overlay vs. HFRI') 89 | 90 | # 6~10 91 | greek_dict = {delta_path: 'Delta', 92 | gamma_path: 'Gamma', 93 | vega_path: 'Vega', 94 | rho_path: 'Rho', 95 | theta_path: 'Theta'} 96 | for key, value in greek_dict.items(): 97 | title_only_slide(key, layout_dict, prs, text_msg='Portfolio ' + value) 98 | 99 | # Save and open presentation 100 | prs.save(ppt_path / output_name) 101 | os.system("open " + str(ppt_path / output_name)) 102 | 103 | # 104 | # greek_dict = {delta_path: 'Delta', 105 | # gamma_path: 'Gamma', 106 | # vega_path: 'Vega', 107 | # rho_path: 'Rho', 108 | # theta_path: 'Theta.png'} 109 | # for key, value in greek_dict.items(): 110 | # slide = prs.slides.add_slide(prs.slide_layouts[layout_dict['TITLE_ONLY']]) 111 | 112 | 113 | if __name__ == '__main__': 114 | main() 115 | 116 | 117 | 118 | # for i in range(0, 8, 1): 119 | # blank_slide_layout = prs.slide_layouts[i] 120 | # slide = prs.slides.add_slide(blank_slide_layout) 121 | # 122 | # top = Inches(1.54) 123 | # left = Inches(0.28) 124 | # height = Inches(3.82) 125 | # pic = slide.shapes.add_picture(str(heat_map_path), left, top, height=height) 126 | 127 | 128 | # for shape in slide.placeholders: 129 | # print('%d %s' % (shape.placeholder_format.idx, shape.name)) 130 | 131 | def aqr_alt_funds(update_funds=True): 132 | 133 | db_directory = UpdateSP500Data.DATA_BASE_PATH / 'xl' 134 | url_string = 'https://funds.aqr.com/-/media/files/fund-documents/pricefiles/' 135 | 136 | fund_dict = {'alternative_risk_premia': 'leapmf.xls', 137 | 'diversified_arbitrage': 'daf.xls', 138 | 'equity_market_neutral': 'emnmf.xls', 139 | 'equity_long_short': 'elsmf.xls', 140 | 'global_macro': 'gmmf.xls', 141 | 'managed_futures': 'mfmf.xls', 142 | 'multi_alternative': 'msaf.xls', 143 | 'style_premia_alternative': 'spaf.xls'} 144 | 145 | url_dict = {value: url_string + value for (key, value) in fund_dict.items()} 146 | 147 | if update_funds: 148 | _ = [urlretrieve(value, db_directory / key) for (key, value) in url_dict.items()] 149 | 150 | rows_to_skip = list(range(0, 15)) 151 | rows_to_skip.append(16) 152 | 153 | aqr_funds_index = [] 154 | for key, value in fund_dict.items(): 155 | df = pd.read_excel(db_directory / value, usecols=[1, 4], 156 | skiprows=rows_to_skip, index_col=0, squeeze=True, 157 | keep_default_na=False) 158 | df = df.rename(key) 159 | aqr_funds_index.append(df) 160 | return pd.concat(aqr_funds_index, axis=1) 161 | 162 | 163 | def get_fund_assets(update_funds=True): 164 | 165 | db_directory = UpdateSP500Data.DATA_BASE_PATH / 'feather' 166 | feather_name = 'all_funds.feather' 167 | if update_funds: 168 | fund_dict = {'^SP500TR': 'S&P 500', 169 | 'VDIGX': 'VG Dividend Growth', 170 | 'VEIRX': 'VG Equity-Income', 171 | 'VWEAX': 'VG High-Yield Corporate', 172 | 'VWALX': 'VG High-Yield Muni', 173 | 'VBTLX': 'VG Total Bond Market', 174 | 'BXMIX': 'Blackstone Alternatives', 175 | 'QLEIX': 'AQR Equity Long/Short', 176 | 'QGMIX': 'AQR Global Macro', 177 | 'QMHIX': 'AQR Managed Futures', 178 | 'ADAIX': 'AQR Diversified Arbitrage', 179 | 'QSPIX': 'AQR Style Premia', 180 | 'AVGRX': 'Dreyfus Dynamic Total Return', #$1.141bn 181 | 'FAAAX': 'K2 Franklin Alternative',# fund $1.17bn 182 | 'GJRTX': 'GSAM Absolute return', # tracker $2.36bn 183 | 'MASNX': 'Litman Gregory Masters Alt',# Strats Fund $2.05bn 184 | 'PSMIX': 'Principal Global Multi-Strategy',# Fund $2.76bn 185 | 'QOPIX': 'Oppenheimer Fundamental Alternatives',# Fd $1.20 186 | 'GAFYX': 'Natixis ASG Global Alternatives'} # Fd $1.39bn 187 | 188 | all_funds = [web.get_data_yahoo(key, 'JAN-16-80') for key, _ in fund_dict.items()] 189 | all_funds = [fund['Adj Close'] for fund in all_funds] 190 | all_funds = [fund.rename(fund_name) for fund, fund_name in zip(all_funds, fund_dict.values())] 191 | all_funds = pd.concat(all_funds, axis=1) 192 | # Replace dodgy observation 193 | all_funds['Principal Global Multi-Strategy']['2017-08-24'] = all_funds['Principal Global Multi-Strategy'][ 194 | '2017-08-23'] 195 | write_feather(all_funds, db_directory / feather_name) 196 | 197 | all_funds = read_feather(db_directory / feather_name) 198 | return all_funds 199 | 200 | 201 | def get_hfr(feather_name, csv_file_path, update_funds=True): 202 | db_directory = UpdateSP500Data.DATA_BASE_PATH / 'xl' 203 | # feather_name = 'hfrx.feather' 204 | if update_funds: 205 | rows_to_skip = list(range(0, 2)) 206 | headers = ['Date', 'Index Name', 'Index Code', 'Return', 'Index Value'] 207 | 208 | df = pd.read_csv(db_directory / csv_file_path, skiprows=rows_to_skip, 209 | squeeze=True, names=headers, engine='python') 210 | index_codes = df['Index Code'].unique() 211 | all_hfr_list = [] 212 | for index_code in index_codes[:-1]: # remove HFR company info 213 | idx = df['Index Code'] == index_code 214 | hfr = df[idx].copy() 215 | hfr['Date'] = hfr['Date'].apply(pd.to_datetime) 216 | hfr = hfr.set_index(['Date']) 217 | hfr = hfr.reindex(hfr.index.sort_values()) 218 | hfr_index = hfr['Index Value'].rename(hfr['Index Name'].unique()[0]) 219 | all_hfr_list.append(hfr_index) 220 | hfr_df = pd.concat(all_hfr_list, axis=1) 221 | write_feather(hfr_df, UpdateSP500Data.DATA_BASE_PATH / 'feather' / feather_name) 222 | 223 | hfr_df = read_feather(UpdateSP500Data.DATA_BASE_PATH / 'feather' / feather_name) 224 | return hfr_df 225 | 226 | 227 | def daily_hfrx(**kwargs): 228 | feather_name = 'hfrx.feather' 229 | csv_file_path = 'hfrx_daily_index_data.csv' 230 | return get_hfr(feather_name, csv_file_path, **kwargs) 231 | 232 | 233 | def monthly_hfri(**kwargs): 234 | feather_name = 'hfri.feather' 235 | csv_file_path = 'hfri_index_data.csv' 236 | return get_hfr(feather_name, csv_file_path, **kwargs) 237 | 238 | 239 | def monthly_hfrr(**kwargs): 240 | feather_name = 'hfrr.feather' 241 | csv_file_path = 'hfrr_index_data.csv' 242 | return get_hfr(feather_name, csv_file_path, **kwargs) 243 | 244 | # def daily_hfrx(update_funds=True): 245 | # db_directory = UpdateSP500Data.DATA_BASE_PATH / 'xl' 246 | # feather_name = 'hfrx.feather' 247 | # if update_funds: 248 | # rows_to_skip = list(range(0, 2)) 249 | # headers = ['Date', 'Index Name', 'Index Code', 'Return', 'Index Value'] 250 | # 251 | # df = pd.read_csv(db_directory / 'hfrx_daily_index_data.csv', skiprows=rows_to_skip, 252 | # squeeze=True, names=headers, engine='python') 253 | # index_codes = df['Index Code'].unique() 254 | # all_hfrx_list = [] 255 | # for index_code in index_codes[:-1]: # remove HFR company info 256 | # idx = df['Index Code'] == index_code 257 | # hfr = df[idx] 258 | # hfr.loc[:, 'Date'] = pd.to_datetime(hfr.loc[:, 'Date']) 259 | # hfr = hfr.set_index(['Date']) 260 | # hfr = hfr.reindex(hfr.index.sort_values()) 261 | # hfr_index = hfr['Index Value'].rename(hfr['Index Name'].unique()[0]) 262 | # all_hfrx_list.append(hfr_index) 263 | # hfrx = pd.concat(all_hfrx_list, axis=1) 264 | # write_feather(hfrx, UpdateSP500Data.DATA_BASE_PATH / 'feather' / feather_name) 265 | # 266 | # hfrx = read_feather(UpdateSP500Data.DATA_BASE_PATH / 'feather' / feather_name) 267 | # return hfrx 268 | 269 | 270 | -------------------------------------------------------------------------------- /option_data_new.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from pathlib import Path 3 | import plistlib 4 | import pysftp 5 | import zipfile 6 | import os 7 | import warnings 8 | from time import time 9 | import re 10 | import requests 11 | from io import StringIO 12 | from datetime import datetime 13 | 14 | 15 | 16 | 17 | def get_dates(feather_directory, file_type='.feather'): 18 | """ Fetch dates from feather file names 19 | :rtype: pd.DatetimeIndex 20 | """ 21 | regex_pattern = r'\d{4}-\d{2}-\d{2}' # this will fail if month>12 or days>31 22 | opt_dates_list = [] 23 | for item in os.listdir(feather_directory): # loop through items in dir 24 | if item.endswith(file_type): 25 | date_string = re.search(regex_pattern, item) 26 | if date_string: 27 | opt_dates_list.append(date_string.group()) 28 | opt_dates_list = list(set(opt_dates_list)) 29 | opt_dates_all = pd.DatetimeIndex([pd.to_datetime(date_item, yearfirst=True, 30 | format='%Y-%m-%d') 31 | for date_item in opt_dates_list]) 32 | opt_dates_all = opt_dates_all.sort_values() 33 | return opt_dates_all 34 | 35 | 36 | def data_shop_login(): 37 | return config_key('cbeoDataShop_dict') 38 | 39 | 40 | def illiquid_equity(discount=0.5): 41 | return sum(config_key('illiquid_equity').values()) * discount 42 | 43 | 44 | def config_key(dict_key: str): 45 | file_name = GetRawCBOEOptionData.DATA_BASE_PATH / 'config.plist' 46 | assert (file_name.is_file()) 47 | f = open(str(file_name), 'rb') 48 | pl = plistlib.load(f) 49 | return pl[dict_key] 50 | 51 | 52 | 53 | class GetRawCBOEOptionData: 54 | """Class for handling raw option data downloads and processing from the CBOE DataShop.""" 55 | DATA_BASE_PATH = Path.home() / 'Library' / 'Mobile Documents' / 'com~apple~CloudDocs' / 'localDB' 56 | TOP_LEVEL_PATH = DATA_BASE_PATH / 'cboeRawVolData' 57 | OPTION_TYPES = ['P', 'C'] # Option types: P for Put, C for Call 58 | SUBSCRIPTION_STR = 'subscriptions/order_000012838/item_000016265/' # Subscription string for data access 59 | SYMBOL_DEFINITION_FILE = 'OptionSymbolConversionHistory.xlsx' # Excel file containing symbol conversion history 60 | 61 | def __init__(self, top_level_directory: Path): 62 | """ 63 | Initialize with the directory path for storing the data. 64 | 65 | :param top_level_directory: Path object pointing to the top-level directory. 66 | """ 67 | self.top_level_directory = top_level_directory 68 | # Load root symbol strings from the Excel file 69 | self.root_symbols_str = self._load_symbol_definitions 70 | 71 | @property 72 | def _load_symbol_definitions(self) -> list: 73 | """ 74 | Load option symbol string definitions from an Excel file. 75 | 76 | :return: List of root symbol strings. 77 | """ 78 | # Path to the symbol definition file 79 | root_symbols_file: Path = self.top_level_directory / self.SYMBOL_DEFINITION_FILE 80 | 81 | # Check if the file exists 82 | assert root_symbols_file.is_file(), f"{root_symbols_file} does not exist." 83 | 84 | # Load root symbols from the 'spxSymbols' sheet in the Excel file 85 | root_symbols_df: pd.DataFrame = pd.read_excel(root_symbols_file, sheet_name='spxSymbols', skiprows=[0], 86 | usecols=[0], names=['root_symbols']) 87 | 88 | # Strip whitespace and return as a list of strings 89 | return root_symbols_df['root_symbols'].dropna().str.strip().values.tolist() 90 | 91 | @staticmethod 92 | def open_sftp(): 93 | """ 94 | Open an SFTP connection to CBOE DataShop using stored credentials. 95 | 96 | :return: pysftp.Connection object for SFTP communication. 97 | :raises ConnectionError: If unable to establish a connection. 98 | """ 99 | user_dict = data_shop_login() # Retrieve login credentials 100 | cnopts = pysftp.CnOpts() 101 | cnopts.hostkeys = None # Disable host key verification for this connection 102 | 103 | # Suppress warning related to host key verification 104 | with warnings.catch_warnings(): 105 | warnings.simplefilter("ignore", category=UserWarning) 106 | 107 | try: 108 | # Establish SFTP connection 109 | sftp = pysftp.Connection('sftp.datashop.livevol.com', 110 | username=user_dict['user'], 111 | password=user_dict['password'], 112 | cnopts=cnopts) 113 | except Exception as e: 114 | raise ConnectionError(f"Failed to connect to SFTP: {e}") 115 | 116 | return sftp 117 | 118 | @staticmethod 119 | def unzip_files(in_directory: Path, out_directory: Path): 120 | """ 121 | Unzip all .zip files from the input directory into the output directory. 122 | 123 | :param in_directory: Path object for the input directory containing zip files. 124 | :param out_directory: Path object for the output directory to extract files to. 125 | """ 126 | # Loop through all files in the input directory 127 | for item in os.listdir(in_directory): 128 | if item.endswith('.zip'): 129 | file_path = in_directory / item 130 | try: 131 | # Extract the contents of the zip file 132 | with zipfile.ZipFile(file_path) as zip_ref: 133 | zip_ref.extractall(out_directory) 134 | except zipfile.BadZipFile as err: 135 | print(f"Error extracting {item}: {err}") 136 | 137 | def __get_zip_files(self, output_directory: Path, order_string: str): 138 | """ 139 | Download zip files from the SFTP server into the specified directory. 140 | 141 | :param output_directory: Path object where downloaded zip files will be stored. 142 | :param order_string: String for the SFTP folder containing the files. 143 | """ 144 | # Establish SFTP connection 145 | sftp = self.open_sftp() 146 | 147 | # Download the directory from the server to the local directory 148 | sftp.get_d(order_string, output_directory, preserve_mtime=True) 149 | 150 | # List the files on the SFTP server 151 | sftp_file_list = sftp.listdir(order_string) 152 | 153 | # Print the names of the downloaded files 154 | for file in sftp_file_list: 155 | if file.endswith('.zip'): 156 | print(f"Downloading... {file}") 157 | 158 | sftp.close() # Close the SFTP connection 159 | 160 | def get_subscription_files(self, output_directory: Path): 161 | """ 162 | Download the subscription files from the CBOE DataShop SFTP server. 163 | 164 | :param output_directory: Path object where the downloaded files will be saved. 165 | """ 166 | # Ensure the output directory exists, if not, create it 167 | if not output_directory.is_dir(): 168 | output_directory.mkdir(parents=True) 169 | 170 | # Download the zip files 171 | self.__get_zip_files(output_directory, self.SUBSCRIPTION_STR) 172 | 173 | def update_data_files(self, temporary_file_directory: Path) -> bool: 174 | """ 175 | Download, unzip, process, and update option data if not already up-to-date. 176 | 177 | :param temporary_file_directory: Path object for temporary storage of raw files. 178 | :return: True if data files were updated, False otherwise. 179 | """ 180 | feather_directory = self.top_level_directory / 'feather' # Directory for processed data 181 | assert feather_directory.is_dir(), f"{feather_directory} does not exist." 182 | assert temporary_file_directory.is_dir(), f"{temporary_file_directory} does not exist." 183 | 184 | # Get the most recent business day 185 | latest_business_date = pd.to_datetime('today') - pd.tseries.offsets.BDay(1) 186 | 187 | # Retrieve the list of available option dates from the existing data 188 | opt_dates_all = get_dates(feather_directory) 189 | 190 | # Check if the data is up-to-date 191 | if opt_dates_all[-1].date() != latest_business_date.date(): 192 | print('Downloading Option data from CBOE...') 193 | start_time = time() 194 | 195 | # Download and process the data files 196 | self.get_subscription_files(temporary_file_directory) 197 | self.unzip_files(temporary_file_directory, temporary_file_directory) 198 | self.csv_to_feather(temporary_file_directory, feather_directory) 199 | 200 | end_time = time() 201 | print(f"Option files updated in {round(end_time - start_time)} seconds.") 202 | return True 203 | else: 204 | print('Option files are up-to-date.') 205 | return False 206 | 207 | def csv_to_feather(self, in_directory: Path, out_directory: Path, archive_files=True): 208 | """ 209 | Convert CSV files to Feather format and optionally archive the original files. 210 | 211 | :param in_directory: Path object containing CSV files. 212 | :param out_directory: Path object where Feather files will be stored. 213 | :param archive_files: Boolean flag indicating whether to archive the original files. 214 | """ 215 | zip_archive_directory = self.top_level_directory / 'zip' # Directory for zip file archives 216 | csv_archive_directory = self.top_level_directory / 'csv' # Directory for csv file archives 217 | 218 | # Ensure the output directory exists 219 | if not out_directory.is_dir(): 220 | out_directory.mkdir(parents=True) 221 | 222 | # Compile a regex pattern for filtering option symbols 223 | regex_pattern = '|'.join(self.root_symbols_str) 224 | 225 | # Process each CSV file in the input directory 226 | for item in os.listdir(in_directory): 227 | if item.endswith('.csv'): 228 | file_path = in_directory / item 229 | option_df = pd.read_csv(file_path) 230 | 231 | # Convert quote_date and expiration to datetime format 232 | option_df[['quote_date', 'expiration']] = option_df[['quote_date', 'expiration']].apply(pd.to_datetime) 233 | 234 | # Ensure option_type is uppercase 235 | option_df['option_type'] = option_df['option_type'].str.upper() 236 | 237 | # Remove rows with SPXW root symbol and filter by root_symbols 238 | option_df = option_df[~option_df['root'].str.contains('SPXW')] 239 | option_df = option_df[option_df['root'].str.contains(regex_pattern)] 240 | 241 | # Save data by option type (P for Put, C for Call) in Feather format 242 | for option_type in self.OPTION_TYPES: 243 | df_filtered = option_df[option_df['option_type'] == option_type] 244 | file_name = f"{os.path.splitext(item)[0]}_{option_type}.feather" 245 | df_filtered.reset_index().to_feather(out_directory / file_name) 246 | 247 | # Archive the original zip and csv files if required 248 | if archive_files: 249 | self._archive_files(in_directory, csv_archive_directory, zip_archive_directory) 250 | 251 | @staticmethod 252 | def _archive_files(in_directory: Path, csv_archive_directory: Path, zip_archive_directory: Path): 253 | """ 254 | Archive CSV and ZIP files by moving them to the archive directories. 255 | 256 | :param in_directory: Path object for the directory containing files to be archived. 257 | :param csv_archive_directory: Path object where CSV files will be moved. 258 | :param zip_archive_directory: Path object where ZIP files will be moved. 259 | """ 260 | # Move files to their respective archive directories 261 | for item in os.listdir(in_directory): 262 | file_path = in_directory / item 263 | if item.endswith('.csv'): 264 | file_path.rename(csv_archive_directory / item) 265 | elif item.endswith('.zip'): 266 | file_path.rename(zip_archive_directory / item) 267 | else: 268 | file_path.unlink() # Remove other non-relevant files 269 | 270 | 271 | 272 | class DividendYieldScraper: 273 | def __init__(self, url='https://www.multpl.com/s-p-500-dividend-yield/table/by-month'): 274 | self.url = url 275 | self.dy_df = self._fetch_and_clean_data() 276 | 277 | def _fetch_and_clean_data(self): 278 | """Fetches and cleans the dividend yield data from the given URL.""" 279 | # Package the request, send the request and catch the response 280 | response = requests.get(self.url) 281 | 282 | # Set the correct encoding 283 | response.encoding = 'utf-8' 284 | 285 | # Wrap the HTML content in a StringIO object 286 | raw_html_tbl = pd.read_html(StringIO(response.text)) 287 | 288 | # Access the first table 289 | dy_df = raw_html_tbl[0] 290 | 291 | # Convert the 'Date' column to datetime format 292 | dy_df['Date'] = pd.to_datetime(dy_df['Date'], format='%b %d, %Y') 293 | 294 | # Set 'Date' as the index 295 | dy_df.set_index('Date', drop=True, inplace=True) 296 | 297 | # Clean the 'Value' column by removing '†' and '%' symbols 298 | dy_df['Value'] = dy_df['Value'].str.replace('†', '').str.replace('%', '').str.strip() 299 | 300 | # Convert the cleaned 'Value' column to numeric 301 | dy_df['Value'] = pd.to_numeric(dy_df['Value']) 302 | 303 | # Filter out future end-of-month dates 304 | today = datetime.today() 305 | dy_df = dy_df[~((dy_df.index.is_month_end) & (dy_df.index > today))] 306 | 307 | return dy_df 308 | 309 | @property 310 | def latest_yield(self): 311 | """Returns the latest available dividend yield.""" 312 | return self.dy_df['Value'].iloc[0] 313 | 314 | @property 315 | def full_history(self): 316 | """Returns the full history of dividend yields.""" 317 | return self.dy_df 318 | 319 | 320 | def main(): 321 | # try: 322 | raw_file_updater = GetRawCBOEOptionData(GetRawCBOEOptionData.TOP_LEVEL_PATH) 323 | raw_file_updater.update_data_files(GetRawCBOEOptionData.TOP_LEVEL_PATH / 'test') 324 | 325 | 326 | if __name__ == '__main__': 327 | main() 328 | -------------------------------------------------------------------------------- /vix_utilities.py: -------------------------------------------------------------------------------- 1 | # from IPython.display import display_html, HTML 2 | import pyfolio as pf 3 | import numpy as np 4 | import pandas as pd 5 | from statsmodels.tsa.arima_model import ARMA 6 | # import statsmodels.formula.api as smf 7 | import statsmodels.tsa.api as smt 8 | import statsmodels.api as sm 9 | import scipy.stats as scs 10 | # from arch import arch_model 11 | import matplotlib.gridspec as gridspec 12 | from ib_insync import Future, util 13 | # import matplotlib.pyplot as plt 14 | import matplotlib as mpl 15 | import matplotlib.pyplot as plt 16 | import os 17 | import re 18 | import zipfile 19 | from option_utilities import read_feather 20 | from pyfolio.timeseries import cum_returns 21 | 22 | 23 | # import nest_asyncio 24 | # from time import time 25 | # import plistlib 26 | # import sys 27 | 28 | from spx_data_update import UpdateSP500Data, ImpliedVolatilityHistory, SP500Index, IbWrapper, GetRawCBOEOptionData 29 | 30 | from option_utilities import PlotConstants, chart_format 31 | 32 | 33 | class VixForecast: 34 | 35 | def __init__(self): 36 | self.vix_full_hist = self.vix_history 37 | self.yhat = None 38 | self.model_fit = None 39 | self.vix = None 40 | 41 | def forecast_vix(self, history=None, steps=300): 42 | if history is None: 43 | history = self.vix_full_hist 44 | model = ARMA(history.values, order=(2, 2)) 45 | # start_params for ARMA model are from VIX Premium paper 46 | self.model_fit = model.fit(disp=0, start_params=[20.083, 1.651, -0.654, -0.714, -0.064]) 47 | 48 | output = self.model_fit.forecast(steps=steps) 49 | self.yhat = output[0] 50 | self.vix = history 51 | return 52 | 53 | @property 54 | def vix_history(self): 55 | iv_hist = ImpliedVolatilityHistory() 56 | vix = iv_hist.implied_vol_index 57 | if vix.index[-1].date() == pd.to_datetime('today').date(): 58 | # remove last observation if today 59 | vix = vix[:-1] 60 | return vix 61 | 62 | @property 63 | def time_series_plots(self): 64 | self._tsplot(self.vix, lags=50) 65 | 66 | def vix_beta(self, rolling_window=21 * 60): 67 | sp5 = SP500Index() 68 | if self.vix is None: 69 | vix = self.vix_full_hist 70 | else: 71 | vix = self.vix 72 | shared_dates = vix.index.intersection(sp5.excess_return_index.index) 73 | beta = pf.timeseries.rolling_beta(vix.reindex(shared_dates).pct_change().dropna(how='any'), 74 | sp5.excess_return_index.reindex(shared_dates).pct_change().dropna(how='any'), 75 | rolling_window=rolling_window) 76 | return beta, rolling_window 77 | 78 | def plot_rolling_beta(self, **kwargs): 79 | beta, rolling_window = self.vix_beta(**kwargs) 80 | pc = PlotConstants() 81 | with plt.style.context('bmh'): 82 | _ = plt.figure(figsize=pc.fig_size, 83 | dpi=600, 84 | facecolor='None', 85 | edgecolor='None') 86 | gs = gridspec.GridSpec(1, 1, wspace=0.5, hspace=0.25) 87 | ax_beta = plt.subplot(gs[:]) 88 | ax_beta = beta.plot(lw=1.5, 89 | ax=ax_beta, 90 | grid=True, 91 | alpha=0.4, 92 | color=pc.color_yellow, 93 | title='VIX beta to S&P500 - {} days rolling window'.format(rolling_window)) 94 | ax_beta.set_ylabel('Beta') 95 | ax_beta.axhline(beta.mean(), 96 | color='k', 97 | ls='--', 98 | lw=0.75, 99 | alpha=1.0) 100 | chart_format([ax_beta], pc.color_light) 101 | plt.autoscale(enable=True, 102 | axis='x', 103 | tight=True) 104 | return ax_beta 105 | 106 | @staticmethod 107 | def _tsplot(y, lags=None, figsize=(16, 9), style='bmh'): 108 | if not isinstance(y, pd.Series): 109 | y = pd.Series(y) 110 | with plt.style.context(style): 111 | _ = plt.figure(figsize=figsize) 112 | mpl.rcParams['font.sans-serif'] = 'Roboto Condensed' 113 | mpl.rcParams['font.family'] = 'sans-serif' 114 | layout = (3, 2) 115 | ts_ax = plt.subplot2grid(layout, (0, 0), colspan=2) 116 | acf_ax = plt.subplot2grid(layout, (1, 0)) 117 | pacf_ax = plt.subplot2grid(layout, (1, 1)) 118 | qq_ax = plt.subplot2grid(layout, (2, 0)) 119 | pp_ax = plt.subplot2grid(layout, (2, 1)) 120 | 121 | y.plot(ax=ts_ax) 122 | ts_ax.set_title('Time Series Analysis Plots') 123 | smt.graphics.plot_acf(y, lags=lags, ax=acf_ax, alpha=0.5) 124 | smt.graphics.plot_pacf(y, lags=lags, ax=pacf_ax, alpha=0.5) 125 | sm.qqplot(y, line='s', ax=qq_ax) 126 | qq_ax.set_title('QQ Plot') 127 | scs.probplot(y, sparams=(y.mean(), y.std()), plot=pp_ax) 128 | 129 | plt.tight_layout() 130 | return 131 | 132 | 133 | def get_futures(contract_str, remove_weekly=False): 134 | ibw = IbWrapper() 135 | ib = ibw.ib 136 | vix = Future(contract_str, includeExpired=False) 137 | cds = ib.reqContractDetails(vix) 138 | 139 | contracts = [cd.contract for cd in cds] 140 | if remove_weekly: 141 | contracts = [contract for contract in contracts if len(contract.localSymbol) <= 4] 142 | 143 | bars_list = [] 144 | for contract in contracts: 145 | bars = ib.reqHistoricalData( 146 | contract, 147 | endDateTime='', 148 | durationStr='5 D', 149 | barSizeSetting='1 day', 150 | whatToShow='TRADES', 151 | useRTH=True, 152 | formatDate=1) 153 | if bars: 154 | bars_list.append(util.df(bars)) 155 | 156 | ib.disconnect() 157 | contract_df = util.df(contracts) 158 | close_list = [item.loc[:, ['date', 'close']] for item in bars_list] 159 | close_list = [item.set_index('date') for item in close_list] 160 | 161 | close_list = [item.rename(index=str, columns={'close': name}) 162 | for item, name in zip(close_list, 163 | pd.to_datetime(contract_df['lastTradeDateOrContractMonth']))] 164 | future_series = pd.concat(close_list, axis=1, sort=False) 165 | future_series = future_series.transpose().sort_index() 166 | future_series.columns = pd.to_datetime(future_series.columns) 167 | return future_series, contract_df 168 | 169 | 170 | class UpdateVIXData: 171 | DIRECTORY = UpdateSP500Data.DATA_BASE_PATH / 'CBOERawVixData' 172 | 173 | def __init__(self): 174 | self.order_string = '/order_000008108/item_000010804/' 175 | self.zip_directory, self.csv_directory, self.feather_directory = [self.DIRECTORY / item for item in 176 | ['zip', 'csv', 'feather']] 177 | self.price_columns = ['open_bid', 'open_ask', 'avg_bid', 'avg_ask', 'close_bid', 178 | 'close_ask', 'open_px', 'close_px', 'low_px', 'high_px'] 179 | 180 | def download_cboe_vix(self): 181 | """Retrieve raw zip files from CBOE Datashop FTP server 182 | Will return error once data is removed from FTP server""" 183 | ftp = GetRawCBOEOptionData.open_ftp() 184 | # Download zip files 185 | ftp.cwd(self.order_string) 186 | ftp_file_list = ftp.nlst() 187 | for file in ftp_file_list: 188 | if file.endswith('.zip'): 189 | print("Downloading..." + file) 190 | ftp.retrbinary("RETR " + file, open(self.zip_directory / file, 'wb').write) 191 | ftp.close() 192 | 193 | # Unzip to csv 194 | for item in os.listdir(self.zip_directory): # loop through items in dir 195 | if item.endswith('.zip'): 196 | file_name = self.zip_directory / item # get full path of files 197 | zip_ref = zipfile.ZipFile(file_name) # create zipfile object 198 | try: 199 | zip_ref.extractall(self.csv_directory) # extract file to dir 200 | except zipfile.BadZipFile as err: 201 | print("Zipfile error: {0} for {1}".format(err, item)) 202 | zip_ref.close() # close file 203 | num_csv, num_zip = len(os.listdir(self.csv_directory)), len(os.listdir(self.zip_directory)) 204 | assert (num_csv == num_zip) 205 | 206 | @property 207 | def raw_df(self): 208 | dataframe_list = [] 209 | for item in os.listdir(self.csv_directory): 210 | if item.endswith('.csv'): 211 | future_df = pd.read_csv(self.csv_directory / item) 212 | dataframe_list.append(future_df) 213 | raw_df = pd.concat(dataframe_list, axis=0, ignore_index=True) 214 | return raw_df 215 | 216 | 217 | class VixCBOE: 218 | def __init__(self, expiry_type=0): 219 | """Class to manipulate and provide methods on raw CBOE vix futures dataframe""" 220 | 221 | raw_cboe_df = read_feather(UpdateVIXData.DIRECTORY / 'feather' / 'vix_data.feather') 222 | raw_cboe_df = raw_cboe_df.set_index('quote_date') 223 | 224 | # Fill missing close price with average of bid & ask 225 | matching_exp = [s for s in raw_cboe_df.columns if "expiry" in s] 226 | close_digits = [] 227 | for string in matching_exp: 228 | digit = int(re.findall("\d+", string)[0]) 229 | close_digits.append(digit) 230 | 231 | close_digits.sort() 232 | for item in close_digits: 233 | raw_cboe_df.loc[:, 'close' + str(item)] = raw_cboe_df[['close_ask' + str(item), 234 | 'close_bid' + str(item)]].mean(axis=1) 235 | raw_cboe_df.loc[:, 'close' + str(item)] = raw_cboe_df.loc[:, 'close' + str(item)].fillna( 236 | raw_cboe_df.loc[:, 'close_px' 237 | + str(item)]) 238 | self.closing_prices = raw_cboe_df[['close' + str(digit) for digit in close_digits]] 239 | self.expirations = raw_cboe_df[['expiry' + str(digit) for digit in close_digits]] 240 | 241 | self.start_date = self.closing_prices.index[0] 242 | self.expiry_type = expiry_type # expiry_type is either string or positive integer 243 | 244 | @property 245 | def _expiry_returns(self): 246 | """ Returns continous future arithmetic return if contracts are held to expiry""" 247 | roll_rows = self.expirations['expiry1'].diff(1) > pd.Timedelta('0 Days') # Day before expiry 248 | returns = self.closing_prices.pct_change() 249 | close = self.closing_prices.copy() 250 | # Cross the columns on the day after expiry 251 | column_shift_ret = close.divide(close.shift(periods=-1, axis='columns').shift(periods=1, axis='rows')) - 1 252 | returns[roll_rows] = column_shift_ret[roll_rows] 253 | return returns 254 | 255 | @property 256 | def _business_days_2_expiry(self): 257 | """ Returns number of business days to expiration """ 258 | expirations = self.expirations.copy() 259 | column_names = self.expirations.columns 260 | business_day_list = [] 261 | _notNaT = np.datetime64(pd.datetime.today()) 262 | for col in column_names: 263 | expiry = expirations[col] 264 | begin_dates = expiry.index 265 | end_dates = pd.DatetimeIndex(expiry.values) 266 | end_dates_mask = end_dates.to_series().isna().values 267 | bus_days = np.busday_count(list(begin_dates.date), 268 | list(pd.DatetimeIndex(np.where(end_dates_mask, _notNaT, end_dates)).date)) 269 | out_bus_days = [np.nan if x in bus_days[end_dates_mask] else x for x in bus_days] 270 | business_day_list.append(pd.Series(data=out_bus_days, index=expiry.index, name=col)) 271 | return pd.concat(business_day_list, axis=1) 272 | 273 | def rolled_future(self): 274 | """Returns continuous return, price index, expiries and days 2 expiry for vix future rolled according to 275 | expiry type""" 276 | expiry_dates = self.expirations['expiry1'] 277 | returns = self._expiry_returns 278 | business_days_2_exp = self._business_days_2_expiry 279 | eom_dates = returns.loc[returns.groupby(returns.index.to_period('M')).apply(lambda x: x.index.max())].index 280 | 281 | last_month_end = eom_dates[-1] + pd.offsets.MonthEnd(0) 282 | eom_dates = eom_dates[:-1] 283 | eom_dates = eom_dates.insert(-1, last_month_end) 284 | roll_dates = eom_dates.sort_values() 285 | 286 | expiry_for_roll = [] 287 | for dts in expiry_dates: 288 | idx = roll_dates.get_loc(dts, method='ffill') 289 | expiry_for_roll.append(roll_dates[idx]) 290 | day_diff = expiry_dates.index - pd.DatetimeIndex(expiry_for_roll) 291 | front_month_bool = day_diff.days <= 0 292 | back_month_bool = ~front_month_bool 293 | 294 | rolled_return, rolled_future_price = \ 295 | [pd.concat([item['close2'][back_month_bool], item['close1'][front_month_bool]], axis=0).sort_index() 296 | for item in [returns, self.closing_prices]] 297 | 298 | rolled_expiries, days_2_exp = \ 299 | [pd.concat([item['expiry2'][back_month_bool], item['expiry1'][front_month_bool]], axis=0).sort_index() 300 | for item in [self.expirations, business_days_2_exp]] 301 | 302 | rolled_return[0] = np.nan # replace first empty observation with NaN 303 | return rolled_return, rolled_expiries, days_2_exp, rolled_future_price 304 | 305 | @property 306 | def rolled_return(self): 307 | """Returns daily return from long position in vix future""" 308 | return self.rolled_future()[0].rename('long_vix') 309 | 310 | @property 311 | def rolled_idx(self): 312 | """Returns cumulative return index from long position in vix future""" 313 | start_idx = 100 314 | cumulative_returns = cum_returns(self.rolled_return, start_idx) 315 | # Add back start of index 316 | cumulative_returns[self.start_date] = start_idx 317 | idx = cumulative_returns.reindex(cumulative_returns.index.sort_values()) 318 | return idx.rename('long_vix') 319 | 320 | @property 321 | def rolled_idx_short(self): 322 | """Returns cumulative return index from short position in vix future""" 323 | idx = 1 / self.rolled_idx 324 | idx = idx / idx[0] * 100 325 | return idx.rename('short_vix') 326 | 327 | @property 328 | def rolled_return_short(self): 329 | """Returns arithmetic return from short position in vix future""" 330 | return self.rolled_idx_short.pct_change() 331 | -------------------------------------------------------------------------------- /option_utilities.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on Sat Mar 10 14:50:38 2018 3 | 4 | @author: macbook2 5 | """ 6 | 7 | import calendar 8 | from time import time 9 | import datetime as dt 10 | from pathlib import Path 11 | import numpy as np 12 | import pandas_datareader.data as web 13 | from dateutil.relativedelta import relativedelta 14 | from XmlConverter import XmlConverter 15 | from urllib.request import urlretrieve 16 | import pandas as pd 17 | # import pyfolio as pf 18 | import ffn as ff 19 | import matplotlib.transforms as bbox 20 | from matplotlib import rcParams 21 | from matplotlib import cm 22 | # import seaborn as sns 23 | import matplotlib.pyplot as plt 24 | import matplotlib.gridspec as gridspec 25 | from matplotlib.ticker import FormatStrFormatter 26 | import matplotlib.colors as colors 27 | 28 | 29 | def time_it(method): 30 | 31 | def timed(*args, **kw): 32 | ts = time() 33 | result = method(*args, **kw) 34 | te = time() 35 | print('Function: {} took {:.5f} sec'.format(method.__name__, te - ts)) 36 | return result 37 | 38 | return timed 39 | 40 | 41 | def chart_format(ax_list, txt_color): 42 | grid_ticks_format(ax_list) 43 | for item in ax_list: 44 | color_axis(item, txt_color) 45 | invisible_spines(item) 46 | 47 | 48 | def color_axis(ax, txt_color): 49 | for item in ([ax.title, ax.xaxis.label, ax.yaxis.label] + 50 | ax.get_xticklabels() + ax.get_yticklabels()): 51 | item.set_color(txt_color) 52 | 53 | 54 | def invisible_spines(ax): 55 | """Hide axis spines""" 56 | for key, value in ax.spines.items(): 57 | ax.spines[key].set_visible(False) 58 | 59 | 60 | def grid_ticks_format(ax_list): 61 | """Hide x & y ticks and format grid lines""" 62 | [ax.grid(color='grey', 63 | linestyle=':', 64 | linewidth=1, 65 | alpha=0.5) for ax in ax_list] 66 | 67 | [ax.tick_params(axis='both', # changes apply to the both axis 'x', 'y' 68 | which='both', # both major and minor ticks are affected 69 | bottom=False, # ticks along the bottom edge are off 70 | top=False, 71 | left=False) for ax in ax_list] 72 | 73 | 74 | def plot_performance_quad(returns, fig_path=None, fig_name='heat_map_quad', font_size=20): 75 | 76 | def truncate_colormap(cmap, minval=0.0, maxval=1.0, n=100): 77 | new_cmap = colors.LinearSegmentedColormap.from_list( 78 | 'trunc({n},{a:.2f},{b:.2f})'.format(n=cmap.name, a=minval, b=maxval), 79 | cmap(np.linspace(minval, maxval, n))) 80 | return new_cmap 81 | fig = plt.figure(figsize=(16, 9)) 82 | fig.suptitle(returns.name, fontsize=16) 83 | gs = gridspec.GridSpec(2, 2, wspace=0.2, hspace=0.3) 84 | ax_heatmap = plt.subplot(gs[0, 0]) 85 | ax_monthly = plt.subplot(gs[0, 1]) 86 | ax_box_plot = plt.subplot(gs[1, 0]) 87 | ax_yearly = plt.subplot(gs[1, 1]) 88 | 89 | # Chart 1: Heatmap 90 | # pf.plotting.plot_monthly_returns_heatmap(returns, ax=ax_heatmap) 91 | 92 | # monthly_ret_table = pf.timeseries.aggregate_returns(returns, 'monthly') 93 | monthly_ret_table = monthly_ret_table.unstack().round(3) 94 | ax = plt.gca() 95 | cmap = cm.viridis 96 | new_cmap = truncate_colormap(cmap, 0.2, 0.8) 97 | # new_cmap=cmap 98 | sns.heatmap( 99 | monthly_ret_table.fillna(0) * 100 | 100.0, 101 | annot=True, 102 | annot_kws={ 103 | "size": font_size}, 104 | alpha=1.0, 105 | center=0.0, 106 | cbar=False, 107 | mask=monthly_ret_table.isna(), 108 | cmap=new_cmap, 109 | ax=ax_heatmap) 110 | 111 | ax_heatmap.set_xticklabels(np.arange(0.5, 12.5, step=1)) 112 | ax_heatmap.set_xticklabels(['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'], 113 | rotation=45) 114 | ylabels = ax_heatmap.get_yticklabels() 115 | ax_heatmap.set_yticklabels(ylabels, rotation=45) 116 | ax_heatmap.set_xlabel('') 117 | ax_heatmap.set_ylabel('') 118 | # ax_heatmap.set_label(rotation=90) 119 | 120 | # Chart 2: Monthly return distribution 121 | # pf.plotting.plot_monthly_returns_dist(returns, ax=ax_monthly) 122 | ax_monthly.xaxis.set_major_formatter(FormatStrFormatter('%.1f%%')) 123 | ax_monthly.set_xlabel('') 124 | leg1 = ax_monthly.legend(['mean'], framealpha=0.0, prop={'size': font_size}) 125 | for text in leg1.get_texts(): 126 | # text.set_color('white') 127 | text.set_label('mean') 128 | 129 | # Chart 3: Return quantiles 130 | # df_weekly = pf.timeseries.aggregate_returns(returns, convert_to='weekly') 131 | # df_monthly = pf.timeseries.aggregate_returns(returns, convert_to='monthly') 132 | pf.plotting.plot_return_quantiles(returns, df_weekly, df_monthly, ax=ax_box_plot) 133 | 134 | # Chart 4: Annual returns 135 | # pf.plotting.plot_annual_returns(returns, ax=ax_yearly) 136 | _ = ax_yearly.legend(['mean'], framealpha=0.0, prop={'size': font_size}) 137 | ax_yearly.xaxis.set_major_formatter(FormatStrFormatter('%.1f%%')) 138 | plt.xticks(rotation=45) 139 | ax_yearly.set_xlabel('') 140 | ax_yearly.set_ylabel('') 141 | for ax in [ax_box_plot, ax_heatmap, ax_monthly, ax_yearly]: 142 | for item in ([ax.title, ax.xaxis.label, ax.yaxis.label] + 143 | ax.get_xticklabels() + ax.get_yticklabels()): 144 | item.set_fontsize(font_size) 145 | 146 | for items in (ax_yearly.get_yticklabels() + ax_heatmap.get_yticklabels()): 147 | items.set_fontsize(font_size - 5) 148 | if fig_path is not None: 149 | if Path.is_dir(fig_path): 150 | plt.savefig(fig_path / fig_name, dpi=600, bbox_inches='tight', transparent=True) 151 | return fig 152 | 153 | 154 | def next_third_friday(dts): 155 | """ Given a third friday find next third friday""" 156 | dts += dt.timedelta(weeks=4) 157 | return dts if dts.day >= 15 else dts + dt.timedelta(weeks=1) 158 | 159 | 160 | def third_fridays(dts, num_dts): 161 | """Given a date, calculates num_dts next third fridays""" 162 | # Find closest friday to 15th of month 163 | middle_month = dt.date(dts.year, dts.month, 15) 164 | result = [middle_month + dt.timedelta(days=(calendar.FRIDAY - middle_month.weekday()) % 7)] 165 | # This month's third friday is today or has passed. Find next. 166 | if result[0] <= dts: 167 | result[0] = next_third_friday(result[0]) 168 | for _ in range(num_dts - 1): 169 | result.append(next_third_friday(result[-1])) 170 | return result 171 | 172 | 173 | # @time_it 174 | def get_actual_option_expiries(expiry_dates_theo, trade_dates, in_dir): 175 | """retrieve available option expiries given theoretical expiries and trade_dates""" 176 | expiry_dates_actual = [] 177 | all_available_expiry = [] 178 | for i, item in enumerate(expiry_dates_theo): 179 | # dtf = feather.read_dataframe(in_dir + trade_dates[i].strftime(format='%Y-%m-%d') + '_P' + '.feather') 180 | dtf = pd.read_feather(in_dir + trade_dates[i].strftime(format='%Y-%m-%d') + '_P' + '.feather') 181 | all_expiration_dates = pd.DatetimeIndex(dtf['expiration'].unique()) 182 | all_expiration_dates = all_expiration_dates.sort_values() 183 | all_available_expiry.append(all_expiration_dates) 184 | expiry_index = all_expiration_dates.get_loc(item, method='ffill') 185 | if trade_dates[i] == trade_dates[-1]: 186 | expiration_date = all_expiration_dates[expiry_index] 187 | else: 188 | while all_expiration_dates[expiry_index] <= trade_dates[i + 1]: 189 | expiry_index = expiry_index + 1 190 | expiration_date = all_expiration_dates[expiry_index] 191 | expiry_dates_actual.append(expiration_date) 192 | return pd.DatetimeIndex(expiry_dates_actual), all_available_expiry 193 | 194 | 195 | def get_theoretical_strike(trade_dates, expiry_dates, spot_price, risk_free, z_score, dividend_yield, sigma, 196 | listing_spread=''): 197 | """Returns option strike with constant delta""" 198 | num_exp = np.size(expiry_dates) 199 | if len(trade_dates) == 1: # Daily run use case 200 | trade_dates = trade_dates.repeat(num_exp) 201 | sigma = np.tile(sigma, num_exp) 202 | dividend_yield = np.tile(dividend_yield, num_exp) 203 | spot_price = np.tile(spot_price, num_exp) 204 | 205 | option_life = np.array([timeDelta.days / 365 for timeDelta in 206 | [expiryDate - tradeDate for expiryDate, 207 | tradeDate in zip(expiry_dates, trade_dates)]]) 208 | time_discount = np.multiply((risk_free - dividend_yield + (sigma ** 2) / 2), option_life) 209 | time_scale = np.multiply(sigma, np.sqrt(option_life)) 210 | theoretical_strike = [np.multiply(spot_price, np.exp(time_discount + np.multiply(time_scale, score))) for score in 211 | z_score] 212 | theoretical_strike = np.column_stack(tuple(theoretical_strike)) 213 | if listing_spread != '': 214 | theoretical_strike = np.transpose(np.round(theoretical_strike) - 215 | np.mod(np.round(theoretical_strike), listing_spread)) 216 | return theoretical_strike 217 | 218 | 219 | def write_feather(dataframe: pd.DataFrame, path): 220 | """ Wrapper function for feather.write_dataframe adds row index as column and saves as feather""" 221 | df = dataframe.copy() 222 | df['index'] = df.index 223 | df.reset_index(inplace=True, drop=True) 224 | # feather.write_dataframe(dataframe, path) 225 | df.to_feather(path) 226 | 227 | 228 | def read_feather(path): 229 | """ Wrapper function feather.read_dataframe adds date columns from index""" 230 | # out_df = feather.read_dataframe(path) 231 | out_df = pd.read_feather(path) 232 | out_df['index'] = pd.to_datetime(out_df['index']) 233 | out_df = out_df.set_index(['index']) 234 | return out_df 235 | 236 | 237 | def perf_stats(returns: pd.Series, **kwargs): 238 | """ Wrapper function for pf.timeseries.performance""" 239 | # performance = pf.timeseries.perf_stats(returns, **kwargs) 240 | perf_index = list(performance.index) 241 | performance['StartDate'], performance['EndDate'] = list(returns.index[[0, -1]] 242 | .strftime('%b %d, %Y')) 243 | performance = performance.reindex(['StartDate', 'EndDate'] + perf_index) 244 | performance = performance.rename(returns.name) 245 | performance = performance.drop('common_sense_ratio', axis=0) 246 | return performance 247 | 248 | 249 | def get_asset(fund_dict, update=True): 250 | """Wrapper function to return Adjusted close from Yahoo - Use with care as fund dictionary value will over-write 251 | name""" 252 | db_directory = Path.home() / 'Library' / 'Mobile Documents' / 'com~apple~CloudDocs' / 'localDB' / 'feather' 253 | if update: 254 | all_funds = [web.get_data_yahoo(key, 'DEC-31-70') for key, _ in fund_dict.items()] 255 | all_funds = [fund['Adj Close'] for fund in all_funds] 256 | all_funds = [fund.rename(fund_name) for fund, fund_name in zip(all_funds, fund_dict.values())] 257 | [write_feather(fund.to_frame(), db_directory / (name + '.feather')) for fund, name in zip(all_funds, 258 | fund_dict.keys())] 259 | all_funds = [read_feather(db_directory / (key + '.feather')) for key, _ in fund_dict.items()] 260 | return all_funds 261 | 262 | 263 | def matlab2datetime(matlab_datenum): 264 | def matlab_convert_2_datetime(single_date): 265 | day = dt.datetime.fromordinal(int(single_date)) 266 | dayfrac = dt.timedelta(days=single_date % 1) - dt.timedelta(days=366) 267 | return day + dayfrac 268 | 269 | try: 270 | python_dates = [matlab_convert_2_datetime(int(dts)) for dts in matlab_datenum] 271 | except TypeError: 272 | print(matlab_datenum, 'is not iterable') 273 | return pd.DatetimeIndex(python_dates) 274 | 275 | 276 | class PlotConstants: 277 | FONT_SIZE = 9 278 | FIG_PATH = Path.home() / 'Dropbox' / 'outputDev' / 'fig' 279 | BB = bbox.Bbox([[0.25, 0.25], [7.46, 4.2]]) 280 | FIG_SIZE = (8, 4.5) # 16/9 Aspect ratio 281 | COLOR_LIGHT = '#3f5378' 282 | COLOR_DARK = '#263248' 283 | COLOR_YELLOW = '#ff9800' 284 | 285 | def __init__(self, font_size=FONT_SIZE, fig_path=FIG_PATH, fig_size=FIG_SIZE, 286 | bb=BB, color_light=COLOR_LIGHT, color_dark=COLOR_DARK, color_yellow=COLOR_YELLOW): 287 | rcParams['font.sans-serif'] = 'Roboto Condensed' 288 | rcParams['font.family'] = 'sans-serif' 289 | 290 | self.font_size = font_size 291 | self.fig_path = fig_path 292 | 293 | self.bb = bb 294 | self.fig_size = fig_size 295 | self.color_light = color_light 296 | self.color_dark = color_dark 297 | self.color_yellow = color_yellow 298 | 299 | 300 | class USSimpleYieldCurve: 301 | """Simple US Zero coupon yield curve for today up to one year""" 302 | # Simple Zero yield curve built from TBill discount yields and effective fund rate 303 | # This is a simplified approximation for a full term structure model 304 | # Consider improving by building fully specified yield curve model using 305 | # Quantlib 306 | def __init__(self): 307 | end = dt.date.today() 308 | start = end - dt.timedelta(days=10) 309 | zero_rates = web.DataReader(['DFF', 'DTB4WK', 'DTB3', 'DTB6', 'DTB1YR'], 'fred', start, end) 310 | zero_rates = zero_rates.dropna(axis=0) 311 | zero_yld_date = zero_rates.index[-1] 312 | new_index = [zero_yld_date + relativedelta(days=1), 313 | zero_yld_date + relativedelta(weeks=4), 314 | zero_yld_date + relativedelta(months=3), 315 | zero_yld_date + relativedelta(months=6), 316 | zero_yld_date + relativedelta(years=1)] 317 | zero_curve = pd.DataFrame(data=zero_rates.iloc[-1].values, index=new_index, columns=[end]) 318 | self.zero_curve = zero_curve.resample('D').interpolate(method='polynomial', order=2) 319 | 320 | def get_zero4_date(self, input_date): 321 | """Retrieve zero yield maturity for input_date""" 322 | return self.zero_curve.loc[input_date] 323 | 324 | 325 | class USZeroYieldCurve: 326 | """US Zero coupon overnnight to 30 year interpolated yield curve""" 327 | ZERO_URL = 'http://www.federalreserve.gov/econresdata/researchdata/feds200628.xls' 328 | DB_PATH = Path.home() / 'Library'/ 'Mobile Documents' / 'com~apple~CloudDocs' / 'localDB' / 'xl' 329 | 330 | def __init__(self, update_data=True): 331 | self.relative_dates = [relativedelta(days=1), relativedelta(months=3), relativedelta(months=6)] + \ 332 | [relativedelta(years=x) for x in range(1, 31)] 333 | fed_zero_feather = Path(self.DB_PATH / 'fedzero.feather') 334 | if update_data: 335 | if fed_zero_feather.is_file(): 336 | # load old file 337 | seconds_since_update = time() - fed_zero_feather.stat().st_mtime 338 | zero_yields_old = read_feather(str(fed_zero_feather)) 339 | latest_business_date = pd.to_datetime('today') - pd.tseries.offsets.BDay(1) 340 | if zero_yields_old.index[-1].date() != latest_business_date.date(): 341 | # Check file was updated in last 12 hours 342 | if seconds_since_update > (3600 * 12): 343 | self.get_raw_zeros() 344 | else: 345 | self.get_raw_zeros() 346 | 347 | self.zero_yields = read_feather(str(fed_zero_feather)) 348 | 349 | def get_zero_4dates(self, as_of_dates, maturity_dates, date_adjust): 350 | """Retrieve zero yield maturities for maturity dates""" 351 | if isinstance(as_of_dates, pd.Timestamp): 352 | return self.__get_zero_4date(as_of_dates, maturity_dates, date_adjust) 353 | elif isinstance(as_of_dates, pd.DatetimeIndex): 354 | assert as_of_dates.shape == maturity_dates.shape 355 | zeros = [] 356 | for each_date, each_maturity in zip(as_of_dates, maturity_dates): 357 | zeros.append(self.__get_zero_4date(each_date, each_maturity, date_adjust)) 358 | return zeros 359 | 360 | def __get_zero_4date(self, as_of_date, maturity_date, date_adjust): 361 | """Interpolate yield curve between points""" 362 | maturities = pd.DatetimeIndex([as_of_date + x for x in self.relative_dates]) 363 | # Bond market sometimes closed means we will have missing dates e.g columbus day 364 | if date_adjust: 365 | try: 366 | zero_yld_curve = self.zero_yields.loc[[as_of_date]] 367 | except: 368 | dt_idx = self.zero_yields.index.get_loc(as_of_date, method='pad') 369 | tmp_zero_dts = self.zero_yields.index[dt_idx] 370 | zero_yld_curve = self.zero_yields.loc[[tmp_zero_dts]] 371 | else: 372 | zero_yld_curve = self.zero_yields.loc[[as_of_date]] 373 | 374 | zero_yld_series = pd.Series(data=zero_yld_curve.values.squeeze(), index=maturities) 375 | if not(maturity_date in maturities): 376 | zero_yld_series.loc[pd.to_datetime(maturity_date)] = float('nan') 377 | zero_yld_series = zero_yld_series.sort_index() 378 | zero_yld_series = zero_yld_series.interpolate(method='polynomial', order=2) 379 | return zero_yld_series[maturity_date] 380 | # zero_yld_curve = pd.DataFrame(data=np.transpose(zero_yld_curve.values), 381 | # index=maturities, columns=[as_of_date]) 382 | # # TODO check 2nd order polynomial yield curve interpolation 383 | # zero_yld_curve = zero_yld_curve.resample('D').interpolate(method='polynomial', order=2) 384 | # return zero_yld_curve.loc[maturity_date] 385 | 386 | def get_raw_zeros(self): 387 | """Update zero coupon yields from FED and FRED""" 388 | try: 389 | print('Updating zero coupon yields') 390 | start_time = time() 391 | urlretrieve(self.ZERO_URL, self.DB_PATH / 'feds200628.xls') 392 | converter = XmlConverter(input_path=str(self.DB_PATH) + '/feds200628.xls', 393 | first_header='SVENY01', last_header='TAU2') 394 | converter.parse() 395 | gsw_zero = converter.build_dataframe() 396 | gsw_zero = gsw_zero.iloc[:, 0:30].copy() 397 | # Reverse dates 398 | gsw_zero = gsw_zero.reindex(index=gsw_zero.index[::-1]) 399 | start_date = gsw_zero.index[0] 400 | fred_data = web.DataReader(['DFF', 'DTB3', 'DTB6'], 'fred', start_date) 401 | zero_yld_matrix = pd.concat([fred_data.dropna(), gsw_zero], axis=1) 402 | zero_yld_matrix = zero_yld_matrix.fillna(method='ffill') 403 | write_feather(zero_yld_matrix, str(self.DB_PATH / 'fedzero.feather')) 404 | end_time = time() 405 | print('File updated in ' + str(round(end_time-start_time)) + ' seconds') 406 | except: 407 | raise Exception('Zero curve update failed - Zero curve not updated') 408 | 409 | @property 410 | def cash_index(self): 411 | """Daily Cash return index based on monthly investment in a 3-month t-bill""" 412 | discount_yield = self.zero_yields['DTB3'].resample('BM').ffill() 413 | face_value = 10000 414 | tbill_price = face_value - (discount_yield / 100 * 91 * face_value) / 360 415 | investment_yield = (face_value - tbill_price) / face_value * (365 / 91) 416 | return_per_day_month = (investment_yield.shift(1) / 12) / investment_yield.shift(1).index.days_in_month 417 | return_per_day = return_per_day_month.resample('D').bfill() 418 | cash_idx = ff.to_price_index(return_per_day, 100) 419 | # cash_idx = pf.timeseries.cum_returns(return_per_day, 100) 420 | return cash_idx 421 | 422 | 423 | 424 | 425 | -------------------------------------------------------------------------------- /option_daily_prod.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Tue Nov 13 08:33:48 2018 5 | 6 | @author: macbook2 7 | """ 8 | import datetime 9 | from datetime import timedelta 10 | import pandas as pd 11 | import numpy as np 12 | from abc import ABC, abstractmethod 13 | from ib_insync import Index, Option, ContFuture, Future 14 | from option_utilities import time_it, USSimpleYieldCurve, get_theoretical_strike 15 | from spx_data_update import DividendYieldHistory, IbWrapper 16 | from ib_insync.util import isNan 17 | import io 18 | import contextlib 19 | import logging 20 | 21 | 22 | 23 | class OptionAsset(ABC): 24 | def __init__(self, mkt_symbol, vol_symbol, exchange_dict): 25 | """Abstract class for option asset container""" 26 | self.settlement_PM = None 27 | exchange_mkt = exchange_dict['exchange_mkt'] 28 | exchange_vol = exchange_dict['exchange_vol'] 29 | exchange_opt = exchange_dict['exchange_opt'] 30 | self.trading_class = exchange_dict['trading_class'] 31 | underlying_index = Index(mkt_symbol, exchange_mkt) 32 | ibw = IbWrapper() 33 | ib = ibw.ib 34 | self.underlying_qc = self.__get_underlying_qc(underlying_index, ib) 35 | self.sigma_qc = self.get_sigma_qc(vol_symbol, ib, exchange_vol) 36 | self.chain = self.get_option_chain(underlying_index, ib, exchange_opt) 37 | 38 | ib.disconnect() 39 | """"" Abstract option asset container - Each underlying instrument is an instance of the OptionAsset class 40 | and each instance is the only argument for the option_market Class. """ 41 | 42 | @staticmethod 43 | def __get_underlying_qc(underlying_index, ib): 44 | """Retrieve IB qualifying contracts for an index""" 45 | index_qc = ib.qualifyContracts(underlying_index) 46 | assert(len(index_qc) == 1) 47 | return index_qc[0] 48 | 49 | @property 50 | def get_expirations(self): 51 | """Retrieve Dataframe of option expirations (last trading day) for option chain in object""" 52 | expirations = pd.DataFrame(list(self.chain.expirations), 53 | index=pd.DatetimeIndex(self.chain.expirations), 54 | columns=['expirations']) 55 | timedelta = expirations.index - datetime.datetime.today() 56 | expirations['year_fraction'] = timedelta.days / 365 57 | # remove negative when latest expiry is today 58 | expirations = expirations[expirations['year_fraction'] > 0] 59 | return expirations.sort_index() 60 | 61 | @abstractmethod 62 | def get_option_chain(self, underlying_index, ib, exchange): 63 | """Abstract method""" 64 | # 65 | pass 66 | 67 | @abstractmethod 68 | def get_sigma_qc(self, vol_symbol, ib, exchange): 69 | """Abstract method""" 70 | # should return empty string if no pre-calculated vol index exists 71 | pass 72 | 73 | @staticmethod 74 | @abstractmethod 75 | def get_dividend_yield(): 76 | """Abstract method - Gets latest dividend yield""" 77 | # should return empty string if no pre-calculated vol index exists 78 | pass 79 | 80 | 81 | class SpxOptionAsset(OptionAsset): 82 | def __init__(self, trading_class='SPX'): 83 | """"" Asset container for SPX - S&P 500 Index. """ 84 | mkt_symbol = 'SPX' 85 | vol_symbol = 'VIX' 86 | exchange_dict = {'exchange_mkt': 'CBOE', 'exchange_vol': 'CBOE', 'exchange_opt': 'CBOE', 87 | 'trading_class': trading_class} # other choice is SPXW 88 | super().__init__(mkt_symbol, vol_symbol, exchange_dict) 89 | if trading_class == 'SPXW': 90 | self.settlement_PM = True 91 | else: 92 | self.settlement_PM = False 93 | 94 | # def get_sigma_qc(self, vol_symbol, ib, exchange): 95 | # """Returns implied Volatility for market""" 96 | # sigma_index = Index(vol_symbol, exchange) 97 | # sigma_qc = ib.qualifyContracts(sigma_index) 98 | # assert(len(sigma_qc) == 1) 99 | # return sigma_qc[0] 100 | 101 | def get_sigma_qc(self, vol_symbol, ib, exchange): 102 | """Returns implied Volatility for market - Using continous front month future""" 103 | 104 | # Define the VIX continuous future 105 | vix_cont_future = ContFuture('VIX', exchange='CFE') 106 | # print(vix_cont_future) 107 | 108 | # Qualify the contract 109 | qualified_contracts = ib.qualifyContracts(vix_cont_future) 110 | # print(qualified_contracts) 111 | if qualified_contracts: 112 | vix_contract = qualified_contracts[0] 113 | ticker = ib.reqMktData(vix_contract) 114 | ib.sleep(1) 115 | # print(ticker) 116 | # Get the latest price 117 | vix_price = ticker.marketPrice() if ticker.marketPrice() else ticker.close 118 | print(f"VIX Continuous Future Price: {vix_price}") 119 | else: 120 | print("Could not qualify contract for VIX continuous future.") 121 | 122 | # Define the base futures contract for VIX 123 | base_future = Future('VIX', exchange='CFE') 124 | 125 | # Fetch all available futures contracts for VIX 126 | futures = ib.reqContractDetails(base_future) 127 | future_contracts = [cd.contract for cd in futures] 128 | 129 | # Get today's date and the target date one month from today 130 | today = datetime.datetime.now().date() 131 | target_date = today + timedelta(days=30) 132 | 133 | # Initialize a list to hold the contract details with expiry dates 134 | contracts_with_expiry = [] 135 | 136 | # Gather expiry dates for each contract 137 | for contract in future_contracts: 138 | # Parse the expiry date from the contract 139 | expiry_date = datetime.datetime.strptime(contract.lastTradeDateOrContractMonth, '%Y%m%d').date() 140 | contracts_with_expiry.append((contract, expiry_date)) 141 | 142 | # Convert to DataFrame for easier manipulation 143 | df = pd.DataFrame(contracts_with_expiry, columns=['Contract', 'ExpiryDate']) 144 | 145 | # Calculate the absolute difference in days from the target date 146 | df['DaysToTarget'] = (df['ExpiryDate'] - target_date).abs().dt.days 147 | 148 | # Select the contract with the smallest difference in days to the target date 149 | closest_contract = df.loc[df['DaysToTarget'].idxmin()]['Contract'] 150 | 151 | # Select the contract with the highest open interest 152 | # most_open_interest_contract = df.loc[df['OpenInterest'].idxmax()]['Contract'] 153 | ticker = ib.reqMktData(closest_contract) 154 | ib.sleep(1) 155 | vix_price = ticker.marketPrice() if ticker.marketPrice() else ticker.close 156 | print(f"VIX One month out: {vix_price}") 157 | # print(f'VIX most traded contract month:{ticker}') 158 | 159 | return closest_contract 160 | # return qualified_contracts[0] 161 | 162 | def get_option_chain(self, underlying_index, ib, exchange): 163 | """Retrieve IB qualifying options contracts for an index""" 164 | all_chains = ib.reqSecDefOptParams(underlying_index.symbol, '', 165 | underlying_index.secType, 166 | underlying_index.conId) 167 | # TO DO Consider moving this to abstract function as different markets will have different 168 | # conditions around which options to select 169 | chain = next(c for c in all_chains if c.tradingClass == self.trading_class and c.exchange == exchange) 170 | return chain 171 | 172 | @staticmethod 173 | def get_dividend_yield(): 174 | """Gets latest dividend yield""" 175 | # TO DO: Add check on date of latest dividend yield 176 | dividend_yield_history = DividendYieldHistory() 177 | dividend_yield = dividend_yield_history.dy_monthly[dividend_yield_history.dy_monthly.columns[0]][-1] / 100 178 | return dividend_yield 179 | 180 | 181 | class RSL2OptionAsset(OptionAsset): 182 | 183 | def __init__(self): 184 | mkt_symbol = 'RUT' 185 | vol_symbol = 'RVX' 186 | exchange_dict = {'exchange_mkt': 'RUSSELL', 'exchange_vol': 'CBOE', 'exchange_opt': 'CBOE'} 187 | super().__init__(mkt_symbol, vol_symbol, exchange_dict) 188 | 189 | def get_sigma_qc(self, vol_symbol, ib, exchange): 190 | """Returns implied Volatility for market""" 191 | sigma_index = Index(vol_symbol, exchange) 192 | sigma_qc = ib.qualifyContracts(sigma_index) 193 | assert(len(sigma_qc) == 1) 194 | return sigma_qc[0] 195 | 196 | @staticmethod 197 | def get_option_chain(underlying_index, ib, exchange): 198 | """Retrieve IB qualifying options contracts for an index""" 199 | all_chains = ib.reqSecDefOptParams(underlying_index.symbol, '', 200 | underlying_index.secType, 201 | underlying_index.conId) 202 | # TO DO Consider moving this to abstract function as different markets will have different 203 | # conditions around which options to select 204 | chain = next(c for c in all_chains if c.tradingClass == underlying_index.symbol and c.exchange == exchange) 205 | return chain 206 | 207 | @staticmethod 208 | def get_dividend_yield(): 209 | """Gets latest dividend yield""" 210 | # TO DO: Add check on date of latest dividend yield 211 | # TO DO: Change to RSL2 dividend yield 212 | # dividend_yield_history = DividendYieldHistory() 213 | # dividend_yield = dividend_yield_history.dy_monthly[-1] / 100 214 | print('Warning: RSL2 Using Fixed Dividend yield') 215 | dividend_yield = 0.0134 216 | 217 | return dividend_yield 218 | 219 | # 220 | # class _emfOptionAsset(OptionAsset): 221 | # def __init__(self, mkt_symbol='MXEF', vol_symbol='VXEEM', exchange=('CBOE', 'CBOE'), \ 222 | # currency='USD', multiplier='100', sec_type='IND'): 223 | # super().__init__(mkt_symbol, vol_symbol, exchange, \ 224 | # currency, multiplier, sec_type) 225 | # self.listing_spread = 10 226 | # 227 | # @staticmethod 228 | # def get_option_implied_dividend_yld(): 229 | # """Returns latest dividend yield for market""" 230 | # url = 'http://www.wsj.com/mdc/public/page/2_3021-peyield.html' 231 | # # Package the request, send the request and catch the response: r 232 | # raw_html_tbl = pd.read_html(url) 233 | # dy_df = raw_html_tbl[2] 234 | # latest_dividend_yield = float(dy_df.iloc[2, 4]) /100 235 | # return latest_dividend_yield 236 | 237 | 238 | class TradeChoice: 239 | 240 | def __init__(self, tickers, mkt_prices, account_value, z_score, yield_curve, trade_date, option_expiry): 241 | self.tickers = tickers 242 | self.spot = mkt_prices[0] 243 | self.sigma = mkt_prices[1] 244 | self.account_value = account_value 245 | self.z_score = z_score 246 | # last_trade_dates = [item.contract.lastTradeDateOrContractMonth for item in self.tickers] 247 | # unique_last_trade_dates = pd.to_datetime(list(dict.fromkeys(last_trade_dates))) 248 | self.expirations = option_expiry 249 | self.yield_curve = yield_curve 250 | self.trade_date = trade_date 251 | 252 | @property 253 | def strike_grid(self): 254 | strikes = [item.contract.strike for item in self.tickers] 255 | strike_array = np.array(strikes).astype(int).reshape(len(self.expirations), 256 | len(strikes) // len(self.expirations)) 257 | df_out = pd.DataFrame(strike_array, index=self.expirations, columns=self.z_score) 258 | df_out = self._format_index(df_out) 259 | return df_out 260 | 261 | @property 262 | def premium_grid(self): 263 | premium_mid = [item.marketPrice() for item in self.tickers] 264 | premium_mid = np.round(premium_mid, 2) 265 | premium_mid = premium_mid.reshape(len(self.expirations), 266 | len(premium_mid) // len(self.expirations)) 267 | df_out = pd.DataFrame(premium_mid, index=self.expirations, columns=self.z_score) 268 | df_out = self._format_index(df_out) 269 | return df_out 270 | 271 | @property 272 | def prices_grid(self): 273 | bid, ask = zip(*[(item.bid, item.ask) for item in self.tickers]) 274 | list_val = [np.array(item).reshape((len(self.expirations), 275 | len(item) // len(self.expirations))) for item in [bid, ask]] 276 | df_lst = [pd.DataFrame(item, index=self.expirations, columns=self.z_score) for item in list_val] 277 | df_out = df_lst[0].astype(str) + '/' + df_lst[1].astype(str) 278 | df_out = self._format_index(df_out) 279 | return df_out 280 | 281 | def pct_otm_grid(self, last_price): 282 | df_out = self.strike_grid / last_price - 1 283 | return df_out 284 | 285 | def option_lots(self, leverage, capital_at_risk): 286 | risk_free = self.yield_curve.get_zero4_date(self.expirations.date) / 100 287 | option_life = np.array([timeDelta.days / 365 for timeDelta in 288 | [expiryDate - self.trade_date for expiryDate in self.expirations]]) 289 | strike_discount = np.exp(- risk_free.mul(option_life)) 290 | strike_discount = strike_discount.squeeze() # convert to series 291 | notional_capital = self.strike_grid.mul(strike_discount, axis=0) - self.premium_grid 292 | contract_lots = [round(capital_at_risk / (notional_capital.copy() / num_leverage * 100), 0) 293 | for num_leverage in leverage] 294 | for counter, df in enumerate(contract_lots): 295 | df.index.name = 'Lev %i' % leverage[counter] 296 | contract_lots = [df.apply(pd.to_numeric, downcast='integer') for df in contract_lots] 297 | return contract_lots 298 | 299 | def margin(self, last_price): 300 | # 100% of premium + 20% spot price - (spot-strike) 301 | # otm_margin = last_price - self.strike_grid 302 | # otm_margin[otm_margin < 0] = 0 303 | single_margin_a = (self.premium_grid + 0.2 * last_price) - (last_price - self.strike_grid) 304 | # 100% of premium + 10% * strike 305 | single_margin_b = self.premium_grid + 0.1 * self.strike_grid 306 | margin = pd.concat([single_margin_a, single_margin_b]).max(level=0) 307 | margin = margin * int(self.tickers[0].contract.multiplier) 308 | return margin 309 | 310 | @staticmethod 311 | def _format_index(df_in): 312 | df_out = df_in.set_index(df_in.index.tz_localize(None).normalize()) 313 | return df_out 314 | 315 | 316 | class OptionMarket: 317 | """IB Interface class that fetches data from IB to pass to trade choice object 318 | 319 | Args: 320 | param1 (OptionAsset): Option asset that contains description of underlying asset. 321 | """ 322 | 323 | def __init__(self, opt_asset: OptionAsset): 324 | self.option_asset = opt_asset 325 | self.trade_date = pd.DatetimeIndex([datetime.datetime.today()], tz='US/Eastern') 326 | self.zero_curve = USSimpleYieldCurve() 327 | self.dividend_yield = self.option_asset.get_dividend_yield() 328 | self.option_expiry = None 329 | 330 | # @time_it 331 | def form_trade_choice(self, z_score, num_expiries, right='P'): 332 | """Forms option trade choice 333 | 334 | Only public method of OptionMarket class, initiates connection to IB server, 335 | retrieves account value, prices for underlying instrument, the implied volatility index and 336 | the relevant option tickers. 337 | 338 | Args: 339 | z_score (numpy array): Range of Z scores for theoretical option strikes 340 | num_expiries (int): Number of option expirations 341 | right (`str`, optional): 'P' or 'C' 342 | 343 | Returns: TradeChoice object 344 | 345 | Raises: 346 | .""" 347 | 348 | ibw = IbWrapper() 349 | ib = ibw.ib 350 | liquidation_value = self._get_account_tag(ib, 'NetLiquidationByCurrency') 351 | # TO DO: this will not work when underlying does not have implied vol index 352 | # this will happen when we need to calculate an implied vol index 353 | contracts = [self.option_asset.underlying_qc, self.option_asset.sigma_qc] 354 | mkt_prices = self._get_market_prices(ib, contracts) 355 | 356 | option_tickers = self._option_tickers(ib, mkt_prices, num_expiries, z_score, right) 357 | 358 | trd_choice = TradeChoice(option_tickers, mkt_prices, liquidation_value, z_score, self.zero_curve, 359 | self.trade_date, self.option_expiry) 360 | 361 | ib.disconnect() 362 | return trd_choice 363 | 364 | def _calculate_option_expiry(self, last_trade_dates_df): 365 | """Calculate the option expiry based on the settlement type.""" 366 | 367 | # Assuming SPXW is identified by a trading class or some attribute of the option asset 368 | if 'SPXW' in self.option_asset.trading_class: 369 | # Logic for SPXW, typically PM settled options 370 | option_expiry = last_trade_dates_df.index.normalize() + pd.DateOffset(hours=16) 371 | elif self.option_asset.settlement_PM: 372 | # General PM settled options logic 373 | option_expiry = last_trade_dates_df.index.normalize() + pd.DateOffset(hours=16) 374 | else: 375 | # AM settled options logic 376 | option_expiry = last_trade_dates_df.index + pd.tseries.offsets.BDay(1) 377 | option_expiry = option_expiry.normalize() + pd.DateOffset(hours=9) + pd.DateOffset(minutes=45) 378 | 379 | return option_expiry.tz_localize(tz='US/Eastern') 380 | 381 | # @time_it 382 | def _option_tickers(self, ib, mkt_prices, num_expiries, z_score, right): 383 | """ Retrieves valid option tickers based on theoretical strikes. 384 | 385 | :param ib: Interactive brokers connection 386 | :param mkt_prices: List of underlying index and vol index prices 387 | :param num_expiries (int or list): number of expirations 388 | :param z_score (numpy array): Range of Z scores for theoretical option strikes 389 | :param right (str) : Type of option P or C 390 | :return: Option tickers 391 | """ 392 | 393 | # Validate inputs 394 | if not mkt_prices or len(mkt_prices) < 2: 395 | raise ValueError("Market prices should contain at least two values: underlying price and volatility.") 396 | if not isinstance(z_score, np.ndarray): 397 | raise TypeError("z_score should be a numpy array.") 398 | 399 | # Handle single int or list for num_expiries 400 | if isinstance(num_expiries, int): 401 | num_expiries = range(num_expiries) 402 | last_trade_dates_df = self.option_asset.get_expirations.iloc[num_expiries] 403 | 404 | # Calculate option expiry 405 | self.option_expiry = self._calculate_option_expiry(last_trade_dates_df) 406 | 407 | # Calculate the risk-free rate 408 | risk_free = self.zero_curve.get_zero4_date(self.option_expiry.date) / 100 409 | 410 | # Get the last price and sigma (volatility) 411 | last_price = mkt_prices[0] 412 | sigma = mkt_prices[1] / 100 413 | 414 | # Calculate theoretical strikes 415 | theoretical_strikes = get_theoretical_strike(self.trade_date, self.option_expiry, 416 | last_price, risk_free.squeeze().values, 417 | z_score, self.dividend_yield, sigma) 418 | 419 | # Prepare the list of expiration dates 420 | expiration_date_list = last_trade_dates_df['expirations'].tolist() 421 | expiration_date_list = [item for item in expiration_date_list for _ in range(len(z_score))] 422 | 423 | # Get the closest valid contract for each theoretical strike and expiration date 424 | contracts = [self._get_closest_valid_contract(strike, expiration, ib, right) 425 | for strike, expiration in zip(theoretical_strikes.flatten(), expiration_date_list)] 426 | 427 | # Pass the list of contracts to reqTickers 428 | try: 429 | tickers = ib.reqTickers(*contracts) 430 | logger.info(f"Successfully retrieved tickers: {tickers}") 431 | except Exception as e: 432 | logger.error(f"Error retrieving tickers: {e}") 433 | raise 434 | 435 | return tickers 436 | 437 | 438 | @staticmethod 439 | def _get_account_tag(ib, tag): 440 | account_tag = [v for v in ib.accountValues() if v.tag == tag and v.currency == 'BASE'] 441 | return account_tag 442 | 443 | 444 | @staticmethod 445 | # @time_it 446 | def _get_market_prices(ib, contracts): 447 | 448 | # tickers = ib.reqTickers(*contracts) 449 | 450 | # Alternative to get live tickers 451 | for contract in contracts: 452 | ib.reqMktData(contract, '', False, False) 453 | 454 | # print('Waiting for tickers') 455 | ib.sleep(1) 456 | tickers = [ib.ticker(contract) for contract in contracts] 457 | # print(tickers) 458 | 459 | mkt_prices = [ticker.last if ticker.marketPrice() == ticker.close else ticker.marketPrice() 460 | for ticker in tickers] 461 | if any([True for item in mkt_prices if isNan(item)]): 462 | mkt_prices = [ticker.marketPrice() for ticker in tickers] 463 | 464 | return mkt_prices 465 | 466 | # def _get_closest_valid_contract(self, theoretical_strike, expiration, ib, right='P'): 467 | # """Return valid contract for expiration closest to theoretical_strike""" 468 | # exchange = self.option_asset.chain.exchange 469 | # symbol = self.option_asset.underlying_qc.symbol 470 | # strikes_sorted = sorted(list(self.option_asset.chain.strikes), 471 | # key=lambda x: abs(x - theoretical_strike)) 472 | # ii = 0 473 | # contract = Option(symbol, expiration, strikes_sorted[ii], right, exchange, 474 | # tradingClass=self.option_asset.trading_class) 475 | # qualified_contract = validate_contract(ib, contract) 476 | # 477 | # # Loop until a valid contract is found or ii exceeds 1000 478 | # while qualified_contract is None and ii < 1000: 479 | # ii += 1 480 | # contract = Option(symbol, expiration, strikes_sorted[ii], right, exchange, 481 | # tradingClass=self.option_asset.trading_class) 482 | # qualified_contract = validate_contract(ib, contract) 483 | # 484 | # # Assertion to break when infinite loop exits after ii > 1000 485 | # assert qualified_contract is not None, "No valid contracts found" 486 | # return qualified_contract 487 | 488 | def _get_closest_valid_contract(self, theoretical_strike, expiration, ib, right='P'): 489 | """Return valid contract for expiration closest to theoretical_strike""" 490 | exchange = self.option_asset.chain.exchange 491 | symbol = self.option_asset.underlying_qc.symbol 492 | strikes_sorted = sorted(self.option_asset.chain.strikes, key=lambda x: abs(x - theoretical_strike)) 493 | 494 | for ii, strike in enumerate(strikes_sorted): 495 | contract = Option(symbol, expiration, strike, right, exchange, tradingClass=self.option_asset.trading_class) 496 | qualified_contract = validate_contract(ib, contract) 497 | 498 | # Log and return the first valid contract found 499 | if qualified_contract is not None: 500 | logger.info(f"Found valid contract at index {ii} with strike {strike}") 501 | return qualified_contract 502 | 503 | # Early exit: if no valid contract is found in a reasonable range 504 | if ii > 1000: 505 | logger.warning(f"Exceeded 1000 iterations without finding a valid contract") 506 | break 507 | 508 | assert qualified_contract is not None, "No valid contracts found" 509 | return qualified_contract 510 | 511 | 512 | @staticmethod 513 | def get_closest_valid_twin_contract(qualified_contracts, ib): 514 | """ Returns call for put (and vice versa) qualified contract 515 | Will return an error if contract not found""" 516 | key = lambda x: 'C' if x == 'P' else 'P' 517 | contracts = [Option(list_elem[0], list_elem[1], list_elem[2], list_elem[3], list_elem[4]) for list_elem \ 518 | in [[contract.symbol, contract.lastTradeDateOrContractMonth, contract.strike, key(contract.right), 519 | contract.exchange] for contract in qualified_contracts]] 520 | qualified_contract_twins = ib.qualifyContracts(*contracts) 521 | 522 | return qualified_contract_twins 523 | 524 | @staticmethod 525 | def get_option_implied_dividend_yld(qualified_contracts: list, ib, market_price): 526 | expiration_str = [contract.lastTradeDateOrContractMonth for contract in qualified_contracts] 527 | timedelta = pd.DatetimeIndex(expiration_str) - pd.datetime.today() 528 | year_fraction = timedelta.days / 365 529 | 530 | tickers = ib.reqTickers(*qualified_contracts) 531 | pv_dividends = [ticker.modelGreeks.pvDividend for ticker in tickers] 532 | dividend_yield = np.array(pv_dividends) / (market_price * year_fraction) 533 | 534 | return dividend_yield 535 | 536 | logging.basicConfig( 537 | filename='option_market.log', # Log file name 538 | level=logging.INFO, # Log level (INFO, DEBUG, etc.) 539 | format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', # Log format 540 | ) 541 | 542 | logger = logging.getLogger(__name__) 543 | 544 | def validate_contract(ib, contract): 545 | try: 546 | with contextlib.redirect_stderr(io.StringIO()): 547 | qualified_contracts = ib.qualifyContracts(contract) 548 | 549 | if not qualified_contracts: 550 | logger.info(f"Invalid contract: {contract}") 551 | return None 552 | else: 553 | logger.info(f"Contract is valid: {qualified_contracts[0]}") 554 | return qualified_contracts[0] 555 | 556 | except Exception as e: 557 | logger.error(f"An error occurred during contract validation: {e}") 558 | return None -------------------------------------------------------------------------------- /option_simulation.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Thu Mar 29 14:19:37 2018 5 | 6 | @author: macbook2 7 | """ 8 | # import feather 9 | import pandas as pd 10 | import numpy as np 11 | import pyfolio as pf 12 | # import matplotlib.pyplot as plt 13 | # import matplotlib.gridspec as gridspec 14 | # from matplotlib.ticker import FormatStrFormatter 15 | # from pathlib import Path 16 | from option_utilities import get_actual_option_expiries, USZeroYieldCurve, get_theoretical_strike, read_feather 17 | from spx_data_update import UpdateSP500Data, get_dates 18 | 19 | 20 | class OptionSimulation: 21 | COL_NAMES = ['strike_traded', 'strike_theo', 'days_2_exp', 'zero', 'bid_1545', 'ask_1545'] 22 | GREEK_COL_NAMES = ['delta_1545', 'gamma_1545', 'theta_1545', 'vega_1545', 'rho_1545', 'implied_volatility_1545', 'active_underlying_price_1545'] 23 | 24 | def __init__(self, update_simulation_data=False): 25 | if update_simulation_data: 26 | updater = UpdateSP500Data() 27 | self.usZeroYldCurve = updater.usZeroYldCurve 28 | else: 29 | self.usZeroYldCurve = USZeroYieldCurve(update_data=False) 30 | self.feather_directory = UpdateSP500Data.TOP_LEVEL_PATH / 'feather' 31 | file_names = {'spot': 'sp500_close', 'sigma': 'vix_index', 'dividend_yield': 'sp500_dividend_yld'} 32 | self.sim_param = self.get_simulation_parameters(UpdateSP500Data.TOP_LEVEL_PATH, file_names) 33 | self.expiration_actual = None 34 | # Simulation dates depend depend on availability of zero rates 35 | last_zero_date = self.usZeroYldCurve.zero_yields.index[-1] 36 | self.sim_dates_all = self.sim_param.index[self.sim_param.index <= last_zero_date] 37 | self.option_type = None 38 | 39 | @staticmethod 40 | def get_trade_dates(sim_dates_all, trade_type='EOM'): 41 | # Add pre-cooked trade date recipes here 42 | month_diff = sim_dates_all.month[1:] - sim_dates_all.month[0:-1] 43 | eom_trade_dates = sim_dates_all[np.append(month_diff.values.astype(bool), False)] 44 | # mon3_trade_dates = pd.date_range(self.sim_dates_all[0], self.sim_dates_all[-1], freq='WOM-3MON') 45 | # tue3_trade_dates = pd.date_range(self.sim_dates_all[0], self.sim_dates_all[-1], freq='WOM-3TUE') 46 | # wed3_trade_dates = pd.date_range(self.sim_dates_all[0], self.sim_dates_all[-1], freq='WOM-3WED') 47 | # thu3_trade_dates = pd.date_range(self.sim_dates_all[0], self.sim_dates_all[-1], freq='WOM-3THU') 48 | # fri3_trade_dates = pd.date_range(self.sim_dates_all[0], self.sim_dates_all[-1], freq='WOM-3FRI') 49 | # mon1_trade_dates = pd.date_range(self.sim_dates_all[0], self.sim_dates_all[-1], freq='WOM-1MON') 50 | # tue1_trade_dates = pd.date_range(self.sim_dates_all[0], self.sim_dates_all[-1], freq='WOM-1TUE') 51 | # wed1_trade_dates = pd.date_range(self.sim_dates_all[0], self.sim_dates_all[-1], freq='WOM-1WED') 52 | # thu1_trade_dates = pd.date_range(self.sim_dates_all[0], self.sim_dates_all[-1], freq='WOM-1THU') 53 | # fri1_trade_dates = pd.date_range(self.sim_dates_all[0], self.sim_dates_all[-1], freq='WOM-1FRI') 54 | if isinstance(trade_type, str): 55 | def select_trade_date_type(x): 56 | return { 57 | 'EOM': eom_trade_dates, 58 | # '3MON': mon3_trade_dates, 59 | # '3TUE': tue3_trade_dates, 60 | # '3WED': wed3_trade_dates, 61 | # '3THU': thu3_trade_dates, 62 | # '3FRI': fri3_trade_dates, 63 | # '1MON': mon1_trade_dates, 64 | # '1TUE': tue1_trade_dates, 65 | # '1WED': wed1_trade_dates, 66 | # '1THU': thu1_trade_dates, 67 | # '1FRI': fri1_trade_dates 68 | }.get(x, 9) 69 | trade_dates = select_trade_date_type(trade_type) 70 | trade_dates = pd.DatetimeIndex(trade_dates.date) 71 | elif isinstance(trade_type, tuple): 72 | assert len(trade_type) == 2 73 | assert trade_type[0] < trade_type[-1] 74 | trade_dates = sim_dates_all[trade_type[0]::trade_type[-1]] 75 | 76 | assert any(trade_dates.intersection(sim_dates_all) == trade_dates), \ 77 | 'Trade dates are not a subset of simulation dates' 78 | return trade_dates 79 | 80 | def _get_expiration_dates(self, option_duration_months, trade_dates): 81 | """Create expiration dates based on trade dates and number of expiry months""" 82 | # TODO: Generalize for option_duration_days 83 | # expiration_theoretical = trade_dates + pd.Timedelta(option_duration_months, unit='M') 84 | # expiration_theoretical = pd.DatetimeIndex(expiration_theoretical.date) 85 | 86 | expiration_theoretical = trade_dates + pd.DateOffset(months=option_duration_months) 87 | expiration_actual, available_expiries = get_actual_option_expiries(expiration_theoretical, 88 | trade_dates, 89 | str(self.feather_directory) + 90 | '/UnderlyingOptionsEODCalcs_') 91 | return expiration_actual 92 | 93 | def trade_sim(self, zscore, option_duration_months, option_type='P', 94 | trade_day_type='EOM'): 95 | self.option_type = option_type 96 | 97 | '''Run option simulation''' 98 | print('Running Simulation - trade_day_type:' + str(trade_day_type) + ' | Z-score ' + str(zscore) + 99 | ' | Duration ' + str(option_duration_months) + ' | Option Type:{}'.format(option_type)) 100 | 101 | trade_dates = self.get_trade_dates(self.sim_dates_all, trade_day_type) 102 | 103 | trade_model_inputs = self.sim_param.loc[trade_dates] 104 | 105 | self.expiration_actual = self._get_expiration_dates(option_duration_months, trade_dates) 106 | 107 | zero_yields = self.usZeroYldCurve.get_zero_4dates(as_of_dates=trade_dates, 108 | maturity_dates=self.expiration_actual, 109 | date_adjust=True) 110 | 111 | zero_yields = pd.Series(data=zero_yields, index=trade_dates, name='zeros') 112 | zero_yields = pd.concat([zero_yields, 113 | pd.Series(data=self.expiration_actual, index=zero_yields.index, 114 | name='expiration_date')], axis=1) 115 | 116 | trade_model_inputs[zero_yields.columns] = zero_yields 117 | spot_price = trade_model_inputs.loc[:, 'sp500_close'].values 118 | dividend_yield = trade_model_inputs.loc[:, 'Yield Value'].values / 100 119 | sigma = trade_model_inputs.loc[:, 'vix_index'].values / 100 120 | risk_free = trade_model_inputs.loc[:, 'zeros'].values / 100 121 | option_strikes_theoretical = get_theoretical_strike(trade_dates, 122 | self.expiration_actual, 123 | spot_price, risk_free, [zscore], 124 | dividend_yield, sigma) 125 | 126 | trade_model_inputs['strike_theoretical'] = option_strikes_theoretical 127 | 128 | sim_dates_live = pd.date_range(trade_dates[0], self.sim_dates_all[-1], freq='B') 129 | sim_dates_live = sim_dates_live.intersection(self.sim_dates_all) 130 | 131 | # Simulation date cannot go beyond last expiry 132 | if sim_dates_live[-1] >= self.expiration_actual[-1]: 133 | last_sim_date_idx = sim_dates_live.get_loc(self.expiration_actual[-1]) 134 | sim_dates_live = sim_dates_live[:last_sim_date_idx] 135 | 136 | dtf_trades = self.simulation_loop(option_type, sim_dates_live, trade_dates, trade_model_inputs, 137 | self.usZeroYldCurve, 138 | self.feather_directory) 139 | 140 | sim_output = SimulationParameters(dtf_trades, zscore, sim_dates_live, option_type, str(trade_day_type)) 141 | return sim_output 142 | 143 | @staticmethod 144 | def simulation_loop(option_type, sim_dates_live, trade_dates, trade_model_inputs, zero_curve, 145 | feather_input=None): 146 | dtf_trades = [] 147 | for i, trade_dt in enumerate(trade_dates): 148 | # Get date slice between two trading dates 149 | start_idx = sim_dates_live.get_loc(trade_dates[i]) 150 | 151 | if trade_dt == trade_dates[-1]: 152 | # last date slice is to end of simulation period 153 | date_slice = sim_dates_live[start_idx:] 154 | else: 155 | end_idx = sim_dates_live.get_loc(trade_dates[i + 1]) + 1 156 | date_slice = sim_dates_live[start_idx:end_idx] 157 | # Create empty data frame 158 | df_out = pd.DataFrame(np.nan, index=date_slice, columns=OptionSimulation.COL_NAMES 159 | + OptionSimulation.GREEK_COL_NAMES) 160 | # loop through each day within a date_slice 161 | for dts in date_slice: 162 | try: 163 | dtf = feather_input[feather_input['quote_date'] == dts] 164 | except TypeError: 165 | # dtf = feather.read_dataframe(str(feather_input) + 166 | # '/UnderlyingOptionsEODCalcs_' + 167 | # dts.strftime(format='%Y-%m-%d') 168 | # + '_' + option_type + '.feather') 169 | dtf = pd.read_feather(str(feather_input) + 170 | '/UnderlyingOptionsEODCalcs_' + 171 | dts.strftime(format='%Y-%m-%d') 172 | + '_' + option_type + '.feather') 173 | # First trade date find traded strike from available strikes based on 174 | # theoretical strike 175 | if dts == date_slice[0]: 176 | expiry_date = trade_model_inputs.loc[dts]['expiration_date'] 177 | strike_theo = trade_model_inputs.loc[dts]['strike_theoretical'] 178 | option_trade_data = dtf[dtf['expiration'] == expiry_date] 179 | available_strikes = option_trade_data['strike'] 180 | # Look for strike in available strikes 181 | idx = (np.abs(available_strikes.values - strike_theo)).argmin() 182 | strike_traded = available_strikes.iloc[idx] 183 | else: 184 | option_trade_data = dtf[dtf['expiration'] == expiry_date] 185 | 186 | days2exp = expiry_date - dts 187 | zero_rate = zero_curve.get_zero_4dates(as_of_dates=dts, 188 | maturity_dates=expiry_date, 189 | date_adjust=True) / 100 190 | df_out.loc[dts, 'zero'] = zero_rate 191 | df_out.loc[dts, 'strike_traded'] = strike_traded 192 | df_out.loc[dts, 'days_2_exp'] = days2exp.days 193 | df_out.loc[dts, 'strike_theo'] = strike_theo 194 | 195 | df_out.loc[dts, 'bid_1545'] = option_trade_data[option_trade_data['strike'] == 196 | strike_traded]['bid_1545'].iloc[0] 197 | 198 | df_out.loc[dts, 'ask_1545'] = option_trade_data[option_trade_data['strike'] == 199 | strike_traded]['ask_1545'].iloc[0] 200 | 201 | df_out.loc[dts, OptionSimulation.GREEK_COL_NAMES] = option_trade_data[option_trade_data['strike'] == 202 | strike_traded][OptionSimulation.GREEK_COL_NAMES].iloc[0] 203 | dtf_trades.append(df_out) 204 | return dtf_trades 205 | 206 | @staticmethod 207 | def get_simulation_parameters(input_path, file_names): 208 | """ Returns closing spot, implied vol and dividend yield for instrument 209 | :param input_path: 210 | :param file_names: 211 | :return : 212 | """ 213 | file_strings = [str(input_path / file_name) for file_name in file_names.values()] 214 | list_df = [read_feather(file_string) for file_string in file_strings] 215 | out_df = pd.concat(list_df, axis=1) 216 | # Forward fill first monthly dividend yield 217 | # TODO remove dependency on 'Yield Value' column name 218 | out_df['Yield Value'] = out_df[['Yield Value']].fillna(method='ffill') 219 | out_df = out_df.dropna(axis=0, how='any') 220 | 221 | # Double check dates from feather files are identical to out_df 222 | opt_dates = get_dates(input_path / 'feather') 223 | assert all(opt_dates == out_df.index) 224 | 225 | return out_df 226 | 227 | @staticmethod 228 | def get_previous_business_day(super_set: pd.DatetimeIndex, sub_set: pd.DatetimeIndex): 229 | diff = sub_set.difference(super_set) 230 | while len(diff) > 0: 231 | new_dates = diff - pd.tseries.offsets.BDay(1) 232 | sub_set = new_dates.union(sub_set.intersection(super_set)) 233 | diff = sub_set.difference(super_set) 234 | return sub_set 235 | 236 | 237 | class SimulationParameters: 238 | def __init__(self, dtf_trades, zscore, sim_dates_live, option_type: str, trade_day_type: str): 239 | self.dtf_trades = dtf_trades 240 | self.zscore = zscore 241 | self.sim_dates_live = sim_dates_live 242 | self.option_type = option_type 243 | self.trade_day_type = trade_day_type 244 | 245 | 246 | class OptionTrades: 247 | def __init__(self, sim_output: SimulationParameters, leverage: float, **kwargs): 248 | self.simulation_parameters = sim_output 249 | if np.isscalar(leverage): 250 | self.leverage = pd.Series(leverage, self.simulation_parameters.sim_dates_live) 251 | else: 252 | self.leverage = leverage 253 | self.all_returns = self.sell_option(**kwargs) 254 | 255 | def sell_option(self, trade_mid=True): 256 | dtf_trades = self.simulation_parameters.dtf_trades 257 | for i, item in enumerate(dtf_trades): 258 | item['discount'] = item['days_2_exp'] / 365 * - item['zero'] 259 | item['discount'] = item['discount'].map(np.exp) 260 | if trade_mid: 261 | item['premium_sold'] = pd.concat([item['ask_1545'], 262 | item['bid_1545']], axis=1).mean(axis=1) 263 | else: 264 | # Option sold at bid and then valued @ ask 265 | item['premium_sold'] = item['ask_1545'] 266 | item.loc[item.index[0], 'premium_sold'] = item.iloc[0]['bid_1545'].astype(float) 267 | 268 | item['asset_capital'] = item['strike_traded'] * item['discount'] - item['premium_sold'] 269 | item['equity_capital'] = item['asset_capital'] / self.leverage 270 | premium_diff = item[['premium_sold']].diff(axis=0) * -1 271 | item['return_arithmetic'] = premium_diff.divide(item['equity_capital'].shift(1), 272 | axis=0).astype(np.float64) 273 | premium_diff.iloc[0] = item['equity_capital'].iloc[0] 274 | item['return_geometric'] = np.log(item['return_arithmetic'].astype(np.float64) + 1) 275 | dtf_trades[i] = item 276 | 277 | return_list_geometric = [] 278 | return_list_arithmetic = [] 279 | for item in dtf_trades: 280 | return_list_geometric.append(item['return_geometric'].dropna()) 281 | return_list_arithmetic.append(item['return_arithmetic'].dropna()) 282 | 283 | returns_geometric = pd.concat(return_list_geometric) 284 | returns_arithmetic = pd.concat(return_list_arithmetic) 285 | return returns_geometric, returns_arithmetic 286 | 287 | @property 288 | def greeks(self): 289 | """Get trade simulation greeks""" 290 | greeks_list = [] 291 | for item in self.simulation_parameters.dtf_trades: 292 | # Greeks are 1 to n-1 293 | greeks_list.append(item[OptionSimulation.GREEK_COL_NAMES].iloc[:-1].astype(np.float64)) 294 | # Need to add greeks for last day of simulation 295 | greeks_list[-1] = item[OptionSimulation.GREEK_COL_NAMES].astype(np.float64) 296 | 297 | greeks = pd.concat(greeks_list) 298 | # delta, gamma, theta, vega, rho need to be multiplied by -1 * leverage 299 | greek_col_bool = sum([greeks.columns.str.contains(item) 300 | for item in ['delta', 'gamma', 'theta', 'vega', 'rho']]) > 0 301 | greek_columns = greeks.loc[:, greek_col_bool] 302 | greek_columns = greek_columns.multiply(-1 * self.leverage, axis=0) 303 | greeks.loc[:, greek_col_bool] = greek_columns 304 | return greeks 305 | 306 | @property 307 | def strikes(self): 308 | """Get trade simulation strikes""" 309 | strike_list = [] 310 | for item in self.simulation_parameters.dtf_trades: 311 | strike_list.append(item['strike_traded'].iloc[:-1].astype(np.float64)) 312 | return pd.concat(strike_list) 313 | 314 | @property 315 | def spot(self): 316 | """Get trade simulation strikes""" 317 | spot_list = [] 318 | for item in self.simulation_parameters.dtf_trades: 319 | spot_list.append(item['strike_traded'].iloc[:-1].astype(np.float64)) 320 | return pd.concat(spot_list) 321 | 322 | @property 323 | def days_2_expiry(self): 324 | """Get trade simulation days 2 expiry""" 325 | days_list = [] 326 | for item in self.simulation_parameters.dtf_trades: 327 | # Greeks are 1 to n-1 328 | days_list.append(item['days_2_exp'].iloc[:-1].astype(np.float64)) 329 | return pd.concat(days_list) 330 | 331 | @property 332 | def returns(self): 333 | """Return daily arithmetic returns""" 334 | returns_out = self.all_returns[-1].rename(self.strategy_name) 335 | return returns_out 336 | 337 | @property 338 | def return_index(self): 339 | """Return daily arithmetic returns""" 340 | index_out = pf.timeseries.cum_returns(self.returns, 100) 341 | index_out[self.simulation_parameters.sim_dates_live[0]] = 100 342 | index_out = index_out.reindex(index_out.index.sort_values()) 343 | return index_out 344 | 345 | @property 346 | def strategy_name(self): 347 | strategy_name = '{}{}{}L{}'.format(self.simulation_parameters.trade_day_type, 348 | self.simulation_parameters.option_type, 349 | self.simulation_parameters.zscore, 350 | self.leverage.mean()) 351 | return strategy_name 352 | 353 | @property 354 | def trade_dates(self): 355 | simulation_trade_dates = [item.index[0] for item in self.simulation_parameters.dtf_trades] 356 | return pd.DatetimeIndex(simulation_trade_dates) 357 | 358 | @property 359 | def performance_summary(self): 360 | """Get simulation performance""" 361 | # convert returns to series for pyfolio function 362 | performance = pf.timeseries.perf_stats(self.returns) 363 | perf_index = list(performance.index) 364 | performance = performance['perf_stats'] 365 | performance['StartDate'], performance['EndDate'] = list(self.simulation_parameters.sim_dates_live[[0, -1]] 366 | .strftime('%b %d, %Y')) 367 | performance['Leverage'], performance['ZScore'], performance['Avg_Days'] = [self.leverage.mean(), 368 | self.simulation_parameters.zscore, 369 | self.days_2_expiry.mean()] 370 | performance = performance.reindex(['StartDate', 'EndDate', 'Leverage', 'ZScore', 'Avg_Days'] + perf_index) 371 | performance = performance.append(self.greeks.mean()) 372 | performance = performance.rename(self.strategy_name) 373 | performance = performance.to_frame() 374 | performance = performance.drop(['active_underlying_price_1545'], axis=0) 375 | 376 | return performance 377 | 378 | 379 | # def plot_performance_quad(returns, fig_path=None, fig_name='heat_map_quad', font_size=20): 380 | # 381 | # fig = plt.figure(figsize=(16, 9)) 382 | # fig.suptitle(returns.name, fontsize=16) 383 | # gs = gridspec.GridSpec(2, 2, wspace=0.2, hspace=0.3) 384 | # ax_heatmap = plt.subplot(gs[0, 0]) 385 | # ax_monthly = plt.subplot(gs[0, 1]) 386 | # ax_box_plot = plt.subplot(gs[1, 0]) 387 | # ax_yearly = plt.subplot(gs[1, 1]) 388 | # 389 | # # Chart 1: Heatmap 390 | # pf.plotting.plot_monthly_returns_heatmap(returns, ax=ax_heatmap) 391 | # ax_heatmap.set_xticklabels(np.arange(0.5, 12.5, step=1)) 392 | # ax_heatmap.set_xticklabels(['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'], 393 | # rotation=45) 394 | # ax_heatmap.set_xlabel('') 395 | # ax_heatmap.set_ylabel('') 396 | # # ax_heatmap.set_label(rotation=90) 397 | # 398 | # # Chart 2: Monthly return distribution 399 | # pf.plotting.plot_monthly_returns_dist(returns, ax=ax_monthly) 400 | # ax_monthly.xaxis.set_major_formatter(FormatStrFormatter('%.1f%%')) 401 | # ax_monthly.set_xlabel('') 402 | # leg1 = ax_monthly.legend(['mean'], framealpha=0.0, prop={'size': font_size}) 403 | # for text in leg1.get_texts(): 404 | # # text.set_color('white') 405 | # text.set_label('mean') 406 | # 407 | # # Chart 3: Return quantiles 408 | # df_weekly = pf.timeseries.aggregate_returns(returns, convert_to='weekly') 409 | # df_monthly = pf.timeseries.aggregate_returns(returns, convert_to='monthly') 410 | # pf.plotting.plot_return_quantiles(returns, df_weekly, df_monthly, ax=ax_box_plot) 411 | # 412 | # # Chart 4: Annual returns 413 | # pf.plotting.plot_annual_returns(returns, ax=ax_yearly) 414 | # _ = ax_yearly.legend(['mean'], framealpha=0.0, prop={'size': font_size}) 415 | # ax_yearly.xaxis.set_major_formatter(FormatStrFormatter('%.1f%%')) 416 | # plt.xticks(rotation=45) 417 | # ax_yearly.set_xlabel('') 418 | # ax_yearly.set_ylabel('') 419 | # for ax in [ax_box_plot, ax_heatmap, ax_monthly, ax_yearly]: 420 | # for item in ([ax.title, ax.xaxis.label, ax.yaxis.label] + 421 | # ax.get_xticklabels() + ax.get_yticklabels()): 422 | # item.set_fontsize(font_size) 423 | # 424 | # for items in (ax_yearly.get_yticklabels() + ax_heatmap.get_yticklabels()): 425 | # items.set_fontsize(font_size - 5) 426 | # if fig_path is not None: 427 | # if Path.is_dir(fig_path): 428 | # plt.savefig(fig_path / fig_name, dpi=600, bbox_inches='tight', transparent=True) 429 | # return fig 430 | 431 | 432 | class OptionWeeklySimulation: 433 | COL_NAMES = OptionSimulation.COL_NAMES 434 | GREEK_COL_NAMES = OptionSimulation.GREEK_COL_NAMES 435 | 436 | def __init__(self, update_simulation_data=False): 437 | if update_simulation_data: 438 | updater = UpdateSP500Data() 439 | self.usZeroYldCurve = updater.usZeroYldCurve 440 | else: 441 | self.usZeroYldCurve = USZeroYieldCurve(update_data=False) 442 | self.csv_directory = UpdateSP500Data.TOP_LEVEL_PATH / 'csv' 443 | file_names = {'spot': 'sp500_close', 'sigma': 'vix_index', 'dividend_yield': 'sp500_dividend_yld'} 444 | self.sim_param = OptionSimulation.get_simulation_parameters(UpdateSP500Data.TOP_LEVEL_PATH, file_names) 445 | self.expiration_actual = None 446 | # self.raw_df = feather.read_dataframe(UpdateSP500Data.TOP_LEVEL_PATH / 'raw_df.feather') 447 | self.raw_df = csv_2_feather(UpdateSP500Data.TOP_LEVEL_PATH / 'csv') 448 | # Simulation dates depend depend on availability of zero rates 449 | last_zero_date = self.usZeroYldCurve.zero_yields.index[-1] 450 | sim_dates_all = pd.DatetimeIndex(self.raw_df['quote_date'].unique()) 451 | sim_dates_all = sim_dates_all[sim_dates_all <= last_zero_date] 452 | self.sim_dates_all = sim_dates_all.sort_values() 453 | self.zscore = None 454 | # self.option_data = self.raw_df() 455 | 456 | def trade_sim(self, zscore, option_duration, option_type='P'): 457 | raw_df = self.raw_df 458 | raw_df.loc[:, 'option_type'] = raw_df['option_type'].apply(str.upper) 459 | raw_df = raw_df[raw_df['option_type'] == option_type] 460 | '''Run option simulation''' 461 | print('Running Simulation - Weekly Options - | Z-score ' + 462 | str(zscore) + ' | Duration ' + str(option_duration.days) + ' Days | Option Type:{}'.format(option_type)) 463 | self.zscore = zscore 464 | # trade_dates = OptionSimulation.get_trade_dates(self.sim_dates_all, trade_type=trade_day_type) 465 | # trade_model_inputs = self.sim_param.loc[trade_dates] 466 | # self.expiration_actual = self._get_expiration_dates(option_duration, trade_dates, raw_df) 467 | 468 | return raw_df 469 | 470 | # @staticmethod 471 | # def _get_expiration_dates(option_duration_weeks, trade_dates, raw_df): 472 | # expiration_theoretical = OptionWeeklySimulation.theoretical_expiration_dates(option_duration_weeks, trade_dates) 473 | # 474 | # # expiration_theoretical = pd.DatetimeIndex(expiration_theoretical) 475 | # # expiration_actual, available_expiries = get_actual_option_expiries(expiration_theoretical, 476 | # # trade_dates, 477 | # # str(self.feather_directory) + 478 | # # '/UnderlyingOptionsEODCalcs_') 479 | # return expiration_theoretical 480 | 481 | # def actual_expiration_dates(self): 482 | # self.raw_df.groupby('quote_date') 483 | # all_expiration_dates = pd.DatetimeIndex(dtf['expiration'].unique()) 484 | # all_expiration_dates = all_expiration_dates.sort_values() 485 | # all_available_expiry.append(all_expiration_dates) 486 | # expiry_index = all_expiration_dates.get_loc(item, method='ffill') 487 | # if trade_dates[i] == trade_dates[-1]: 488 | # expiration_date = all_expiration_dates[expiry_index] 489 | # else: 490 | # while all_expiration_dates[expiry_index] <= trade_dates[i + 1]: 491 | # expiry_index = expiry_index + 1 492 | # expiration_date = all_expiration_dates[expiry_index] 493 | # expiry_dates_actual.append(expiration_date) 494 | # 495 | # return pd.DatetimeIndex(expiry_dates_actual), all_available_expiry 496 | 497 | 498 | 499 | # @staticmethod 500 | # def theoretical_expiration_dates(option_duration, trade_dates): 501 | # """Return DatetimeIndex of theoretical expiration dates""" 502 | # expiration_theoretical = trade_dates + option_duration 503 | # # Check that theoretical every expiration except last is after following trade_date 504 | # bool_idx = expiration_theoretical[:-1] >= trade_dates[1:] 505 | # if any(~bool_idx): 506 | # print('Some expiration dates are before following trade date - shifting expirations') 507 | # expiration_theoretical_series = expiration_theoretical[:-1].to_series() 508 | # trade_dates_series = trade_dates[1:].to_series() 509 | # expiration_theoretical_series[~bool_idx] = np.NaN # Replace old values with nan 510 | # expiration_theoretical_series = pd.concat([expiration_theoretical_series.dropna(), 511 | # trade_dates_series[~bool_idx]], axis=0) 512 | # expiration_theoretical_series = expiration_theoretical_series.sort_values() 513 | # expiration_theoretical_list = expiration_theoretical_series.index.tolist() 514 | # expiration_theoretical_list.append(expiration_theoretical[-1]) # Add back last expiration date 515 | # expiration_theoretical_dti = pd.DatetimeIndex(np.asarray(expiration_theoretical_list)) 516 | # else: 517 | # expiration_theoretical_dti = expiration_theoretical 518 | # return expiration_theoretical_dti 519 | 520 | 521 | def csv_2_feather(csv_directory): 522 | 523 | spxw_feather = UpdateSP500Data.TOP_LEVEL_PATH / 'raw_df.feather' 524 | # history = feather.read_dataframe(spxw_feather) 525 | history = pd.read_feather(spxw_feather) 526 | last_date = pd.DatetimeIndex(history['quote_date'].unique()).sort_values()[-1] 527 | 528 | csv_dates = get_dates(csv_directory, file_type='.csv') 529 | csv_dates = csv_dates.to_series() 530 | 531 | csv_dates = csv_dates[csv_dates > last_date] 532 | csv_dates = csv_dates.index 533 | try: 534 | file_list = [] 535 | for item in csv_dates: 536 | file_list.append('UnderlyingOptionsEODCalcs_' + item.strftime('%Y-%m-%d') + '.csv') 537 | dataframe_list = [] 538 | greek_cols = ['delta_1545', 539 | 'rho_1545', 540 | 'vega_1545', 541 | 'gamma_1545', 542 | 'theta_1545'] 543 | # for item in os.listdir(csv_directory): 544 | for item in file_list: 545 | if item.endswith('.csv'): 546 | future_df = pd.read_csv(csv_directory / item) 547 | if pd.DatetimeIndex(future_df['quote_date'].unique()) > last_date: 548 | dataframe_list.append(future_df) 549 | 550 | raw_df = pd.concat(dataframe_list, axis=0, ignore_index=True) 551 | raw_df = raw_df[['quote_date', 'root', 'expiration', 'strike', 552 | 'option_type', 'open', 'high', 'low', 'close', 'active_underlying_price_1545', 553 | 'implied_volatility_1545', 'delta_1545', 'gamma_1545', 'theta_1545', 554 | 'vega_1545', 'rho_1545', 'bid_1545', 'ask_1545']] 555 | raw_df = raw_df[raw_df['root'] == 'SPXW'] 556 | raw_df.loc[:, ['quote_date', 'expiration']] = raw_df.loc[:, ['quote_date', 'expiration']].apply( 557 | pd.to_datetime) 558 | raw_df.loc[:, greek_cols] = raw_df.loc[:, greek_cols].apply(pd.to_numeric, errors='coerce') 559 | raw_df = pd.concat([history, raw_df], axis=0) 560 | raw_df = raw_df.sort_values('quote_date').reset_index(drop=True) 561 | # feather.write_dataframe(raw_df, spxw_feather) 562 | raw_df.to_feather(spxw_feather) 563 | print('Feather updated') 564 | except ValueError: 565 | print('Feather file not updated') 566 | raw_df = history 567 | # ['underlying_symbol', 'quote_date', 'root', 'expiration', 'strike', 568 | # 'option_type', 'open', 'high', 'low', 'close', 'trade_volume', 569 | # 'bid_size_1545', 'bid_1545', 'ask_size_1545', 'ask_1545', 570 | # 'underlying_bid_1545', 'underlying_ask_1545', 571 | # 'implied_underlying_price_1545', 'active_underlying_price_1545', 572 | # 'implied_volatility_1545', 'delta_1545', 'gamma_1545', 'theta_1545', 573 | # 'vega_1545', 'rho_1545', 'bid_size_eod', 'bid_eod', 'ask_size_eod', 574 | # 'ask_eod', 'underlying_bid_eod', 'underlying_ask_eod', 'vwap', 575 | # 'open_interest', 'delivery_code'] 576 | 577 | return raw_df 578 | -------------------------------------------------------------------------------- /option_stream/main.py: -------------------------------------------------------------------------------- 1 | from bokeh.io import curdoc 2 | from bokeh.models import ColumnDataSource, DataTable, TableColumn, Div, Slider, NumberFormatter 3 | from bokeh.layouts import layout, row, column 4 | import pandas as pd 5 | import numpy as np 6 | from ib_insync import Option, Contract, Index, util, ContractDetails, IB, Ticker 7 | from zoneinfo import ZoneInfo # Available in Python 3.9 and later 8 | from datetime import datetime 9 | from typing import List, Set, Dict 10 | 11 | 12 | from stream_utilities import IbWrapper, USSimpleYieldCurve, illiquid_equity 13 | 14 | 15 | def convert_to_datestamps(date_lists: List[List[str]]) -> List[List[datetime]]: 16 | """ 17 | Convert lists of date strings in 'YYYYMMDD' format to lists of datetime objects set to 16:00 EST (America/New_York). 18 | 19 | Args: 20 | date_lists (List[List[str]]): A list of lists containing date strings in 'YYYYMMDD' format. 21 | 22 | Returns: 23 | List[List[datetime]]: A list of lists where each date string is converted into a datetime object set to 16:00 EST. 24 | 25 | Example: 26 | Input: [['20230915', '20231022'], ['20240101']] 27 | Output: [[datetime(2023, 9, 15, 16, 0, tzinfo=ZoneInfo("America/New_York")), 28 | datetime(2023, 10, 22, 16, 0, tzinfo=ZoneInfo("America/New_York"))], 29 | [datetime(2024, 1, 1, 16, 0, tzinfo=ZoneInfo("America/New_York"))]] 30 | """ 31 | # Define the EST time zone using America/New_York (handles EST/EDT) 32 | est = ZoneInfo("America/New_York") 33 | 34 | # Prepare a list to hold all the converted date lists 35 | all_datestamps = [] 36 | 37 | # Iterate over each list of date strings 38 | for date_list in date_lists: 39 | # List to hold converted datetime objects for the current date_list 40 | datestamps = [] 41 | 42 | for date_str in date_list: 43 | # Parse the date string into a datetime object with default time (00:00) 44 | date_obj = datetime.strptime(date_str, '%Y%m%d') 45 | 46 | # Set the time to 4:00 PM EST (16:00) and apply the EST timezone 47 | date_obj = date_obj.replace(hour=16, minute=0, second=0, tzinfo=est) 48 | 49 | # Append the datetime object to the current list 50 | datestamps.append(date_obj) 51 | 52 | # Append the current list of datetime objects to the final result 53 | all_datestamps.append(datestamps) 54 | 55 | # Return the list of lists with converted datetime objects 56 | return all_datestamps 57 | 58 | 59 | def fetch_option_chain_via_params(ibw, underlying_symbol: str = 'SPX') -> pd.DataFrame: 60 | """ 61 | Fetches the option chain parameters for a given underlying symbol from Interactive Brokers (IB). 62 | 63 | Args: 64 | underlying_symbol (str): The ticker symbol of the underlying index for which to fetch the option chain. 65 | Defaults to 'SPX' (S&P 500 index). 66 | 67 | Returns: 68 | pd.DataFrame: A DataFrame containing the option chain parameters for the specified underlying symbol, 69 | filtered to include only SPXW options and the SMART exchange. The expirations are converted 70 | to datetime objects in EST time zone. 71 | 72 | Raises: 73 | ValueError: If no contract details are found for the provided underlying symbol. 74 | 75 | Example: 76 | Input: 'SPX' 77 | Output: DataFrame with option chain parameters including expirations converted to EST datetime objects. 78 | """ 79 | # Define the underlying contract for SPX (Index contract) 80 | spx_contract = Index(underlying_symbol, 'CBOE', 'USD') 81 | 82 | # Open a connection to IBWrapper only once 83 | # ibw.ib # Access the Interactive Brokers (IB) connection 84 | 85 | # Fetch the contract details to get a valid conId (contract ID) 86 | contract_details: list[ContractDetails] = ibw.ib.reqContractDetails(spx_contract) 87 | 88 | # Raise an error if no valid contract details are found 89 | if not contract_details: 90 | raise ValueError(f"Contract details not found for {underlying_symbol}") 91 | 92 | # Extract the conId from the first contract detail (this is required for further queries) 93 | spx_contract.conId = contract_details[0].contract.conId 94 | print(f"SPX contract conId: {spx_contract.conId}") 95 | 96 | # Request all option parameters for SPX (using the symbol, security type, and contract ID) 97 | option_params = ibw.ib.reqSecDefOptParams(spx_contract.symbol, '', spx_contract.secType, spx_contract.conId) 98 | 99 | # Convert the option parameters to a DataFrame for easier manipulation 100 | option_params_df: pd.DataFrame = util.df(option_params) 101 | 102 | # Filter the DataFrame to include only SPXW options and options listed on the SMART exchange 103 | filtered_params = option_params_df[ 104 | (option_params_df['tradingClass'].isin(['SPXW'])) & (option_params_df['exchange'] == 'SMART') 105 | ].copy() 106 | 107 | # Convert expiration dates from strings to datetime objects in EST time zone 108 | filtered_params['expirations_timestamps'] = convert_to_datestamps(filtered_params.loc[:, 'expirations'].copy()) 109 | 110 | # Return the filtered option parameters DataFrame 111 | return filtered_params 112 | 113 | 114 | def get_theoretical_strike( 115 | option_expiry: List[datetime], 116 | spot_price: List[float], 117 | risk_free: pd.Series, 118 | z_score: List[float], 119 | dividend_yield: float, 120 | sigma: List[float] 121 | ) -> pd.DataFrame: 122 | """ 123 | Calculate theoretical option strikes with constant delta for given expiries, spot price, risk-free rates, 124 | volatility, and z-scores. 125 | 126 | Args: 127 | option_expiry (List[datetime]): List of option expiry dates (in datetime format). 128 | spot_price (List[float]): A list containing a single float representing the spot price. 129 | risk_free (pd.Series): Series of risk-free rates (as percentages, e.g., 5 for 5%), with one value for each expiry. 130 | z_score (List[float]): List of z-scores for which to calculate the theoretical strikes. 131 | dividend_yield (float): The dividend yield of the underlying security (as a percentage, e.g., 1.3 for 1.3%). 132 | sigma (List[float]): A list containing a single float representing the implied volatility (as a percentage, e.g., 20 for 20%). 133 | 134 | Returns: 135 | pd.DataFrame: A DataFrame containing the theoretical strikes for each z-score, with additional columns 136 | for option life, time discount, time scale, and strike discount. 137 | 138 | Raises: 139 | ValueError: If `spot_price` or `sigma` are not lists with exactly one element. 140 | 141 | Example: 142 | Input: 143 | option_expiry = [datetime(2024, 12, 31), datetime(2025, 1, 31)] 144 | spot_price = [4000] 145 | risk_free = pd.Series([5.0, 5.2]) 146 | z_score = [-1, 0, 1] 147 | dividend_yield = 1.3 148 | sigma = [20] 149 | Output: 150 | DataFrame with theoretical strikes for each z-score. 151 | """ 152 | 153 | # Ensure sigma and spot_price are lists with exactly one element 154 | if isinstance(sigma, list) and len(sigma) == 1: 155 | sigma = sigma[0] / 100 # Convert sigma from percentage to decimal 156 | else: 157 | raise ValueError("sigma must be a list with one element.") 158 | 159 | if isinstance(spot_price, list) and len(spot_price) == 1: 160 | spot_price = spot_price[0] 161 | else: 162 | raise ValueError("spot_price must be a list with one element.") 163 | 164 | # Get the current trade date (using EST timezone) 165 | trade_date = datetime.now(tz=ZoneInfo("America/New_York")) 166 | 167 | # Calculate option life in years for each expiry (time to expiration) 168 | option_life = [(date - trade_date).total_seconds() / (365 * 24 * 60 * 60) for date in option_expiry] 169 | 170 | # Create a DataFrame to store data, using expiry dates as the index 171 | df = pd.DataFrame(index=[dt.strftime("%Y%m%d") for dt in option_expiry]) 172 | df['Days to Expiry'] = [(date - trade_date).days for date in option_expiry] 173 | # Fill the DataFrame with constant values and calculated values 174 | df['option_life'] = option_life 175 | df['trade_date'] = trade_date 176 | df['sigma'] = sigma # Apply the single sigma value to all rows 177 | df['dividend_yield'] = dividend_yield 178 | df['spot_price'] = spot_price # Apply the single spot price to all rows 179 | df['risk_free'] = risk_free / 100 # Convert risk-free rates from percentages to decimals (vectorized) 180 | 181 | # Calculate the time discount and time scale 182 | df['time_discount'] = (df['risk_free'] - df['dividend_yield'] + (df['sigma'] ** 2) / 2) * df['option_life'] 183 | df['time_scale'] = df['sigma'] * np.sqrt(df['option_life']) 184 | 185 | # Calculate the strike discount 186 | df['strike_discount'] = np.exp(-df['risk_free'].mul(df['option_life'])) 187 | 188 | # Vectorized calculation of theoretical strike for each z-score 189 | 190 | df[f'theoretical_strike'] = df['spot_price'] * np.exp(df['time_discount'] + df['time_scale'] * z_score) 191 | 192 | return df 193 | 194 | 195 | class QualifiedContractsCache: 196 | def __init__(self): 197 | # Cache for qualified contracts 198 | self.cache = {} 199 | # Cache for unqualified contracts 200 | self.unqualified_cache = set() 201 | 202 | def get_contract(self, contract_key): 203 | """ 204 | Retrieves a qualified contract from the cache if it exists. 205 | """ 206 | return self.cache.get(contract_key) 207 | 208 | def add_contract(self, contract_key, qualified_contract): 209 | """ 210 | Adds a qualified contract to the cache. 211 | """ 212 | self.cache[contract_key] = qualified_contract 213 | 214 | def is_unqualified(self, contract_key): 215 | """ 216 | Checks if the contract is known to be unqualified. 217 | """ 218 | return contract_key in self.unqualified_cache 219 | 220 | def add_unqualified(self, contract_key): 221 | """ 222 | Records a contract as unqualified. 223 | """ 224 | self.unqualified_cache.add(contract_key) 225 | 226 | 227 | 228 | def qualify_all_contracts( 229 | ib_wrapper: IbWrapper, 230 | strikes_df: pd.DataFrame, 231 | available_strikes: List[float], 232 | cache: QualifiedContractsCache 233 | ) -> pd.DataFrame: 234 | """ 235 | Qualifies option contracts for given strikes and expirations, handling errors and retrying qualification 236 | with alternative strikes if necessary, while utilizing a cache to avoid re-qualification. 237 | """ 238 | # Ensure 'expiry_date' is in string format 'YYYYMMDD' 239 | strikes_df['expiry_date'] = strikes_df['expiry_date'].astype(str) 240 | 241 | # Initialize 'qualified_contracts' column 242 | strikes_df['qualified_contracts'] = None 243 | 244 | # Initialize a set to keep track of used strikes for each expiry date 245 | used_strikes_per_expiry: Dict[str, Set[float]] = {expiry: set() for expiry in strikes_df['expiry_date'].unique()} 246 | 247 | # Iterate over each row to process contracts 248 | for idx, row in strikes_df.iterrows(): 249 | expiry_str = row['expiry_date'] 250 | theoretical_strike = row['theoretical_strike'] 251 | 252 | # Start with the closest strike 253 | closest_strike = min(available_strikes, key=lambda y: abs(theoretical_strike - y)) 254 | 255 | # Initialize a list of alternative strikes sorted by proximity 256 | alternative_strikes = sorted(available_strikes, key=lambda y: abs(theoretical_strike - y)) 257 | 258 | # Remove strikes that have already been used for this expiry 259 | alternative_strikes = [strike for strike in alternative_strikes if 260 | strike not in used_strikes_per_expiry[expiry_str]] 261 | 262 | # Attempt to qualify the contract using alternative strikes 263 | qualified_contract = None 264 | for strike in alternative_strikes: 265 | # Create a unique key for the contract 266 | contract_key = ( 267 | 'SPX', 268 | expiry_str, 269 | strike, 270 | 'P', # Assuming Put options 271 | 'SMART', 272 | 'USD', 273 | 'SPXW' 274 | ) 275 | 276 | # Check if the contract is known to be unqualified 277 | if cache.is_unqualified(contract_key): 278 | # Skip this contract as we know it cannot be qualified 279 | used_strikes_per_expiry[expiry_str].add(strike) 280 | continue 281 | 282 | # Check if the contract is in the cache 283 | cached_contract = cache.get_contract(contract_key) 284 | if cached_contract: 285 | # Use the cached contract 286 | qualified_contract = cached_contract 287 | # Mark the strike as used for this expiry date 288 | used_strikes_per_expiry[expiry_str].add(strike) 289 | break # Contract is found and qualified 290 | else: 291 | # Create the Option contract 292 | option = Option( 293 | symbol='SPX', 294 | lastTradeDateOrContractMonth=expiry_str, 295 | strike=strike, 296 | right='P', 297 | exchange='SMART', 298 | currency='USD', 299 | tradingClass='SPXW' 300 | ) 301 | 302 | try: 303 | # Qualify the contract 304 | qualified_contract = ib_wrapper.ib.qualifyContracts(option)[0] 305 | # Store in cache 306 | cache.add_contract(contract_key, qualified_contract) 307 | # Mark the strike as used for this expiry date 308 | used_strikes_per_expiry[expiry_str].add(strike) 309 | break # Successfully qualified 310 | except Exception as e: 311 | print(f"Failed to qualify contract {option}: {e}") 312 | # Mark the strike as used and add to unqualified cache 313 | used_strikes_per_expiry[expiry_str].add(strike) 314 | cache.add_unqualified(contract_key) 315 | qualified_contract = None 316 | continue # Try the next alternative strike 317 | 318 | if qualified_contract is None: 319 | print(f"No valid contract found for expiry {expiry_str} and theoretical strike {theoretical_strike}") 320 | # Handle the case where no valid contract could be qualified 321 | strikes_df.at[idx, 'qualified_contracts'] = None 322 | strikes_df.at[idx, 'closest_strike'] = None 323 | else: 324 | # Assign the qualified contract and the strike used to the DataFrame 325 | strikes_df.at[idx, 'qualified_contracts'] = qualified_contract 326 | strikes_df.at[idx, 'closest_strike'] = qualified_contract.strike 327 | 328 | return strikes_df 329 | 330 | 331 | def get_bid_ask_for_contracts(ib: IB, qualified_contracts: pd.Series) -> pd.DataFrame: 332 | """ 333 | Fetches bid and ask prices for a list of qualified option contracts from Interactive Brokers (IB). 334 | 335 | Args: 336 | ib (IB): An instance of the IB connection to interact with the Interactive Brokers API. 337 | qualified_contracts (pd.Series): A pandas Series of qualified option contracts. 338 | 339 | Returns: 340 | pd.DataFrame: A DataFrame containing market data including bid, ask, last traded price, volume, 341 | and market price (midpoint) for each contract. 342 | 343 | Steps: 344 | 1. Request market data for each qualified contract. 345 | 2. Retry up to 5 times for contracts with invalid bid or ask prices. 346 | 3. Collect bid, ask, last traded price, volume, and market price for each contract and return as a DataFrame. 347 | 348 | Example: 349 | Input: qualified_contracts as a pandas Series of option contracts. 350 | Output: DataFrame with bid, ask, last traded, volume, and market price for each contract. 351 | """ 352 | 353 | market_data = [] # List to store the market data for each contract 354 | 355 | # Batch request for market data (snapshot=True for current prices only) 356 | tickers: Dict[Contract, IB] = {} 357 | for contract in qualified_contracts: 358 | # Request market data for each contract (non-snapshot for continuous updates) 359 | ticker = ib.reqMktData(contract, snapshot=True) 360 | tickers[contract] = ticker 361 | 362 | # Wait for all the market data to arrive 363 | ib.sleep(1) # Sleep to allow the initial data to populate 364 | 365 | for contract, ticker in tickers.items(): 366 | retry_attempts = 5 # Max number of retries for invalid bid/ask data 367 | attempt = 0 368 | 369 | # Retry until valid bid and ask data is received or max retries is reached 370 | while (pd.isna(ticker.bid) or pd.isna(ticker.ask)) and attempt < retry_attempts: 371 | print(f"Invalid bid/ask for {contract}, retrying... (Attempt {attempt + 1})") 372 | ib.sleep(1) # Wait for 1 second before retrying 373 | attempt += 1 374 | 375 | # Append the contract and the data after the bid/ask are valid or after max retries 376 | market_data.append({ 377 | 'contract': contract, 378 | 'bid': ticker.bid if ticker.bid >= 1 else None, # Use None for invalid bids 379 | 'ask': ticker.ask if ticker.ask >= 1 else None, # Use None for invalid asks 380 | 'last_traded': ticker.last, # Last traded price 381 | 'volume': ticker.volume, # Volume for the day 382 | 'market': ticker.marketPrice() # Market price (midpoint of bid/ask) 383 | }) 384 | 385 | # Convert the market data into a DataFrame 386 | return pd.DataFrame(market_data, index=qualified_contracts.index) 387 | 388 | 389 | def get_account_tag(ib, tag): 390 | account_tag = [v for v in ib.accountValues() if v.tag == tag and v.currency == 'BASE'] 391 | return account_tag 392 | 393 | 394 | def fetch_data( 395 | ib_wrapper: IbWrapper, 396 | option_expiries: List[datetime], 397 | risk_free: pd.Series, 398 | option_params_df: pd.DataFrame, 399 | cache: QualifiedContractsCache, 400 | leverage: float = 1, 401 | z_score: float = -1 402 | ) -> pd.DataFrame: 403 | """ 404 | Fetches market data and calculates theoretical strikes for SPX options, including bid/ask prices, volume, and margin calculations. 405 | 406 | Args: 407 | ib_wrapper (IbWrapper): An instance of the IbWrapper that provides a connection to the IB API. 408 | option_expiries (List[datetime]): A list of option expiry dates. 409 | risk_free (pd.Series): Series of risk-free rates corresponding to each expiry. 410 | option_params_df (pd.DataFrame): DataFrame containing the option parameters, including available strikes. 411 | 412 | Returns: 413 | pd.DataFrame: A DataFrame with calculated option strikes, bid/ask data, and margin calculations. 414 | 415 | Steps: 416 | 1. Fetch market data for SPX and VIX index contracts. 417 | 2. Calculate theoretical option strikes based on market prices and risk-free rates. 418 | 3. Qualify the option contracts and retrieve bid/ask data. 419 | 4. Calculate margin, notional capital, and leverage for each contract. 420 | 5. Return a cleaned DataFrame with relevant information for display or further analysis. 421 | 422 | Example: 423 | Input: A list of option expiries, risk-free rates, and option parameter DataFrame. 424 | Output: DataFrame with bid, ask, margin, and other calculated values. 425 | """ 426 | 427 | # Define the contracts for SPX and VIX 428 | def get_market_data(ib_wrapper: IbWrapper, contracts: List[Contract]) -> Dict[str, float]: 429 | """ 430 | Fetches the snapshot market prices for a list of contracts. 431 | 432 | Args: 433 | ib_wrapper: The instance of the IbWrapper to interact with Interactive Brokers. 434 | contracts: A list of contracts (e.g., SPX, VIX) for which market data needs to be fetched. 435 | 436 | Returns: 437 | A dictionary with contract symbols as keys and their corresponding market prices as values. 438 | """ 439 | tickers = {} 440 | 441 | # Request market data for all contracts in snapshot mode 442 | for contract in contracts: 443 | ticker = ib_wrapper.ib.reqMktData(contract, '', snapshot=True) 444 | tickers[contract.symbol] = ticker 445 | 446 | # Wait for data to be populated and ensure no NaNs 447 | # ib_wrapper.ib.sleep(1) 448 | 449 | # Extract the market prices and return in a dictionary 450 | market_prices = {} 451 | for symbol, ticker in tickers.items(): 452 | while pd.isna(ticker.marketPrice()): 453 | ib_wrapper.ib.sleep(0.1) 454 | market_prices[symbol] = ticker.marketPrice() 455 | 456 | return market_prices 457 | 458 | spx_contract = Index('SPX', 'CBOE', 'USD') 459 | vix_contract = Index('VIX', 'CBOE', 'USD') 460 | 461 | # Qualify each contract separately 462 | spx_contract = ib_wrapper.ib.qualifyContracts(spx_contract)[0] 463 | vix_contract = ib_wrapper.ib.qualifyContracts(vix_contract)[0] 464 | market_prices = get_market_data(ib_wrapper, [spx_contract, vix_contract]) 465 | 466 | 467 | # Retrieve the market prices for SPX and VIX 468 | spx_price = market_prices['SPX'] 469 | vix_price = market_prices['VIX'] 470 | 471 | # Step 1: Calculate theoretical strikes 472 | strikes_df = get_theoretical_strike( 473 | option_expiries, [spx_price], risk_free, [z_score], 0.013, [vix_price] 474 | ) 475 | available_strikes = option_params_df['strikes'].values[0] 476 | strikes_df['expiry_date'] = strikes_df.index 477 | 478 | # Step 2: Qualify all option contracts based on theoretical strikes 479 | strikes_df = qualify_all_contracts(ib_wrapper, strikes_df, available_strikes, cache) 480 | 481 | # Step 3: Get the liquidation value for the account 482 | liquidation_value = get_account_tag(ib_wrapper.ib, 'NetLiquidationByCurrency') 483 | 484 | # Step 4: Get bid/ask market data for all qualified contracts 485 | market_data_df = get_bid_ask_for_contracts(ib_wrapper.ib, strikes_df['qualified_contracts']) 486 | strikes_df['bid'] = market_data_df['bid'] 487 | strikes_df['ask'] = market_data_df['ask'] 488 | strikes_df['last_traded'] = market_data_df['last_traded'] 489 | strikes_df['volume'] = market_data_df['volume'] 490 | strikes_df['market'] = market_data_df['market'] 491 | 492 | # Step 5: Calculate mid price and other derived metrics 493 | strikes_df['mid'] = (strikes_df['bid'] + strikes_df['ask']) / 2 494 | notional_capital = strikes_df['closest_strike'] * strikes_df['strike_discount'] - strikes_df['mid'] 495 | 496 | # Calculate lots for each leverage level and margin 497 | capital_at_risk = illiquid_equity(discount=0.5) + float(liquidation_value[0].value) 498 | # for num_leverage in [1, 1.5, 2]: 499 | # strikes_df[f'lots_leverage_{num_leverage}'] = round( 500 | # capital_at_risk / (notional_capital / num_leverage * 100), 0 501 | # ) 502 | 503 | 504 | strikes_df[f'Lots'] = round( 505 | capital_at_risk / (notional_capital / leverage * 100), 0 506 | ) 507 | 508 | # Step 6: Calculate margin for each contract 509 | single_margin_a = (strikes_df['mid'] + 0.2 * strikes_df['spot_price']) - ( 510 | strikes_df['spot_price'] - strikes_df['closest_strike'] 511 | ) 512 | single_margin_b = strikes_df['mid'] + 0.1 * strikes_df['closest_strike'] 513 | margin = pd.concat([single_margin_a, single_margin_b], axis=1).max(axis=1) 514 | strikes_df['Margin'] = margin * 100 515 | strikes_df['Margin'] = strikes_df['Margin'] * strikes_df['Lots'] 516 | 517 | # Drop unnecessary columns and clean up the DataFrame 518 | out_df = strikes_df.drop( 519 | columns=[ 520 | 'option_life', 'trade_date', 'time_discount', 'time_scale', 521 | 'strike_discount', 'theoretical_strike', 'last_traded', 'volume', 522 | 'qualified_contracts' 523 | ] 524 | ).copy() 525 | # Convert expiry dates to a more readable format 526 | out_df['expiry_date'] = out_df['expiry_date'].apply( 527 | lambda x: datetime.strptime(str(x), '%Y%m%d').strftime('%d %b %Y') 528 | ) 529 | out_df['Discount'] = out_df['closest_strike'] / out_df['spot_price'] - 1 530 | out_df.rename(columns={'closest_strike': 'Strike', 531 | 'expiry_date': 'Expiry', 532 | 'bid': 'Bid', 533 | 'ask': 'Ask', 534 | 'mid': 'Mid', 535 | }, inplace=True) 536 | 537 | # Handle missing values in numerical and non-numerical columns 538 | for col in out_df.columns: 539 | if pd.api.types.is_numeric_dtype(out_df[col]): 540 | out_df.fillna({col: 0}, inplace=True) 541 | else: 542 | out_df.fillna({col: '--'}, inplace=True) 543 | 544 | # Return the updated DataFrame 545 | return out_df 546 | 547 | class PriceTracker: 548 | """ 549 | A class to track the current and previous prices for SPX, VIX, or any other asset 550 | and determine whether the trend is up, down, or unchanged. 551 | """ 552 | def __init__(self): 553 | # Store the current and previous prices and trends 554 | self.previous_prices = {} 555 | self.trends = {} 556 | 557 | def update_price(self, symbol, current_price): 558 | """ 559 | Updates the price for a given symbol and determines the trend (up, down, unchanged). 560 | """ 561 | # Check if we have a previous price stored for this symbol 562 | if symbol in self.previous_prices: 563 | previous_price = self.previous_prices[symbol] 564 | # Determine the trend 565 | if current_price > previous_price: 566 | self.trends[symbol] = 'green' 567 | elif current_price < previous_price: 568 | self.trends[symbol] = 'red' 569 | else: 570 | # If unchanged, keep the previous trend 571 | self.trends[symbol] = self.trends.get(symbol, 'black') 572 | else: 573 | # If no previous price, default to black 574 | self.trends[symbol] = 'black' 575 | 576 | # Update the previous price with the current one 577 | self.previous_prices[symbol] = current_price 578 | 579 | def get_trend(self, symbol): 580 | """ 581 | Returns the trend color for the given symbol. 582 | """ 583 | return self.trends.get(symbol, 'black') 584 | 585 | # Update create_bokeh_app to pass arguments to fetch_data 586 | # Integrate the PriceTracker class into the Bokeh app 587 | def create_bokeh_app(): 588 | """ 589 | Creates a Bokeh app that displays a DataTable with SPX option data and tracks price trends. 590 | """ 591 | # Initialize the IBWrapper to connect to Interactive Brokers 592 | ib_wrapper = IbWrapper() 593 | 594 | # Initialize the cache 595 | cache = QualifiedContractsCache() 596 | 597 | # Initialize PriceTracker to track SPX and VIX trends 598 | price_tracker = PriceTracker() 599 | 600 | try: 601 | # Establish connection to Interactive Brokers 602 | ib_wrapper.connect_to_ib() 603 | 604 | # Fetch option chain parameters (including expiration dates and strikes) 605 | option_params_df = fetch_option_chain_via_params(ib_wrapper) 606 | 607 | # Filter valid option expirations based on the current date 608 | option_expiries = [ 609 | expiry for expiry in option_params_df['expirations_timestamps'].values[0] 610 | if expiry >= datetime.now(tz=ZoneInfo("America/New_York")) 611 | ] 612 | 613 | # Initialize yield curve to fetch risk-free rates for the option expiries 614 | yld_curve = USSimpleYieldCurve() 615 | risk_free = yld_curve.get_zero4_date([date.date() for date in option_expiries]) 616 | lev_slider = Slider(start=0.5, end=4, value=1, step=0.5, title="Leverage") 617 | z_slider = Slider(start=-4, end=4, value=-1, step=1, title="Z-Score") 618 | # Fetch the full data set with market prices, strikes, and margins 619 | out_df = fetch_data( 620 | ib_wrapper, option_expiries, risk_free, option_params_df, cache, leverage=lev_slider.value, 621 | z_score=z_slider.value 622 | ) 623 | 624 | # Set up the data source for the Bokeh DataTable 625 | source = ColumnDataSource(out_df) 626 | previous_source = ColumnDataSource(out_df.copy()) 627 | 628 | # Create Divs to display SPX and VIX prices with default values 629 | spx_div = Div(text=f"SPX Price: {out_df.iloc[0]['spot_price']:.2f}") 630 | vix_div = Div(text=f"VIX Price: {out_df.iloc[0]['sigma'] * 100:.2f}") 631 | liquidation_value = get_account_tag(ib_wrapper.ib, 'NetLiquidationByCurrency') 632 | capital_at_risk = (illiquid_equity(discount=0.5) + float(liquidation_value[0].value)) * lev_slider.value 633 | 634 | account_div_1 = Div(text=f"Liquidation Value: ${float(liquidation_value[0].value):,.0f}") 635 | account_div_2 = Div(text=f"Capital at Risk: ${capital_at_risk:,.0f}") 636 | 637 | 638 | display_cols = ['Expiry', 'Days to Expiry', 'Strike', 'Bid', 'Ask', 'Mid', 'Discount', 'Margin', 'Lots'] 639 | # Mapping of column names to their respective formatters 640 | formatter_map = { 641 | "Mid": NumberFormatter(format="0.0"), # One decimal place for Mid 642 | "Margin": NumberFormatter(format="$0,0"), # Currency format for Margin 643 | "Discount": NumberFormatter(format="0.00%"), # Percentage format for Discount 644 | } 645 | 646 | 647 | # Create the columns for the DataTable, applying formatters where necessary 648 | columns = [ 649 | TableColumn(field=col, title=col, formatter=formatter_map.get(col)) 650 | if formatter_map.get(col) else TableColumn(field=col, title=col) 651 | for col in display_cols 652 | ] 653 | 654 | # Create the DataTable using the data source and defined columns 655 | data_table = DataTable( 656 | source=source, 657 | columns=columns, 658 | width=1000, 659 | height=1000, 660 | index_position=None 661 | ) 662 | 663 | # Create the layout for the Bokeh app 664 | app_layout = column(row(spx_div, vix_div, account_div_1, account_div_2), row(lev_slider, z_slider), data_table) 665 | 666 | # Periodic callback function to update data every second 667 | def update(): 668 | # Fetch updated data 669 | update_df = fetch_data(ib_wrapper, option_expiries, risk_free, option_params_df, cache, 670 | leverage=lev_slider.value, z_score=z_slider.value 671 | ) 672 | 673 | # Store previous data 674 | previous_source.data = source.data.copy() 675 | 676 | # Update the data source with the new data 677 | source.data = update_df.to_dict(orient='list') 678 | 679 | # Grab the current SPX and VIX prices 680 | current_spx_price = source.data['spot_price'][0] 681 | current_vix_price = source.data['sigma'][0] * 100 682 | 683 | # Update the price tracker for SPX and VIX 684 | price_tracker.update_price('SPX', current_spx_price) 685 | price_tracker.update_price('VIX', current_vix_price) 686 | 687 | # Get the trend colors for SPX and VIX 688 | spx_color = price_tracker.get_trend('SPX') 689 | vix_color = price_tracker.get_trend('VIX') 690 | 691 | # Update SPX and VIX prices in the Divs with conditional color formatting 692 | spx_div.text = f"SPX Price: {current_spx_price:.2f}" 693 | vix_div.text = f"VIX Price: {current_vix_price:.2f}" 694 | liquidation_value = get_account_tag(ib_wrapper.ib, 'NetLiquidationByCurrency') 695 | capital_at_risk = (illiquid_equity(discount=0.5) + float(liquidation_value[0].value)) * lev_slider.value 696 | account_div_1.text = f"Liquidation Value: ${float(liquidation_value[0].value):,.0f}" 697 | account_div_2.text = f"Capital at Risk: ${capital_at_risk:,.0f}" 698 | 699 | # Add a periodic callback to update the data every 1 second (1000 ms) 700 | curdoc().add_periodic_callback(update, 1000) 701 | 702 | # Return the layout for the Bokeh document root 703 | return app_layout 704 | 705 | except ConnectionError as e: 706 | # Handle connection errors by logging the error and returning an empty layout 707 | print(f"Failed to connect to IB: {e}") 708 | return column() # Return an empty layout if IB connection fails 709 | 710 | 711 | 712 | # Add the Bokeh app layout to the current document root 713 | curdoc().add_root(create_bokeh_app()) 714 | 715 | 716 | 717 | 718 | -------------------------------------------------------------------------------- /spx_data_update.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import zipfile 4 | # from ftplib import FTP 5 | import pysftp 6 | from pathlib import Path 7 | from time import time 8 | # import feather 9 | import pandas as pd 10 | import numpy as np 11 | from pandas import DataFrame 12 | # import quandl 13 | # import requests 14 | from scipy.io import loadmat 15 | # from pyfolio.timeseries import cum_returns 16 | from urllib.request import urlretrieve 17 | import plistlib 18 | import warnings 19 | import nest_asyncio 20 | from datetime import datetime 21 | 22 | 23 | from option_utilities import USZeroYieldCurve, write_feather, read_feather, matlab2datetime, get_asset 24 | from ib_insync import IB, util, Index 25 | # from twilio.rest import Client 26 | 27 | 28 | # class SMSMessage: 29 | # 30 | # def __init__(self, sms_text='This message is empty'): 31 | # # account_sid = config_key('account_sid') 32 | # # twilio_sms_number = config_key('twilio_sms_number') 33 | # client = Client(config_key('account_sid'), config_key('twilio_token')) 34 | # message = client.messages \ 35 | # .create( 36 | # body=sms_text, 37 | # from_=config_key('twilio_sms_number'), 38 | # to=config_key('cell_number') 39 | # ) 40 | # print(message.sid) 41 | 42 | 43 | class UpdateSP500Data: 44 | DATA_BASE_PATH = Path.home() / 'Library' / 'Mobile Documents' / 'com~apple~CloudDocs' / 'localDB' 45 | TOP_LEVEL_PATH = DATA_BASE_PATH / 'cboeRawVolData' 46 | 47 | def __init__(self): 48 | # Check basic file structure exists, if not create it 49 | path_list = [self.TOP_LEVEL_PATH] 50 | path_list.extend([self.TOP_LEVEL_PATH / sub_directory 51 | for sub_directory in ['zip', 'csv', 'feather']]) 52 | for directory in path_list: 53 | if not os.path.isdir(directory): 54 | print('Warning: {0} does not exist - creating it'.format(str(directory))) 55 | os.mkdir(directory) 56 | 57 | self.GetRawOptionData = GetRawCBOEOptionData(self.TOP_LEVEL_PATH) 58 | self.GetRawOptionData.update_data_files(self.TOP_LEVEL_PATH / 'test') 59 | 60 | self.ImpliedVol = ImpliedVolatilityHistory() 61 | self.DividendYieldHistory = DividendYieldHistory() 62 | self.usZeroYldCurve = USZeroYieldCurve(update_data=True) 63 | self.ClosingPriceHistory = ClosingPriceHistory(self.TOP_LEVEL_PATH / 'feather') 64 | self.save_data() 65 | 66 | def save_data(self): 67 | self.ImpliedVol.save_vix_df(self.TOP_LEVEL_PATH) 68 | self.DividendYieldHistory.save_dividend_yield_df(self.TOP_LEVEL_PATH) 69 | self.ClosingPriceHistory.save_daily_close(self.TOP_LEVEL_PATH) 70 | 71 | 72 | # class GetRawCBOEOptionData: 73 | # OPTION_TYPES = ['P', 'C'] 74 | # # Need to update this string each year for subscription renewal 75 | # # if datetime.today().date() > pd.to_datetime('20-Mar-2023').date(): 76 | # # print('Warning - Update subscription string for SPX from CBOE Datashop') 77 | # SUBSCRIPTION_STR = 'subscriptions/order_000012838/item_000016265/' 78 | # # / subscriptions / order_000012838 / item_000016265 / 79 | # # SUBSCRIPTION_STR = '/subscriptions/order_000008352/item_000011077/' 80 | # # SUBSCRIPTION_STR = 'order_000008421/item_000011148/' 81 | # 82 | # SYMBOL_DEFINITION_FILE = 'OptionSymbolConversionHistory.xlsx' 83 | # 84 | # def __init__(self, top_level_directory): 85 | # 86 | # self.top_level_directory = top_level_directory 87 | # # Specific to SPX - Get option symbol string 88 | # root_symbols_file = self.top_level_directory / self.SYMBOL_DEFINITION_FILE 89 | # assert (root_symbols_file.is_file()) 90 | # root_symbols_df = pd.read_excel(root_symbols_file, sheet_name='spxSymbols', skiprows=[0], 91 | # usecols=[0], index_col=None, names=['root_symbols']) 92 | # self.root_symbols_str = root_symbols_df['root_symbols'].dropna().str.strip().values 93 | # 94 | # @staticmethod 95 | # def open_sftp(): 96 | # user_dict = data_shop_login() 97 | # "Open ftp connection to CBOE datashop" 98 | # cnopts = pysftp.CnOpts() 99 | # cnopts.hostkeys = None 100 | # sftp = pysftp.Connection('sftp.datashop.livevol.com', 101 | # username=user_dict['user'], 102 | # password=user_dict['password'], 103 | # cnopts=cnopts) 104 | # # ftp = FTP(host='ftp.datashop.livevol.com', 105 | # # user=user_dict['user'], 106 | # # passwd=user_dict['password']) 107 | # return sftp 108 | # 109 | # @staticmethod 110 | # def unzip_file(in_directory, out_directory): 111 | # """Unzip files to csv """ 112 | # for item in os.listdir(in_directory): # loop through items in dir 113 | # if item.endswith('.zip'): 114 | # file_name = in_directory / item # get full path of files 115 | # zip_ref = zipfile.ZipFile(file_name) # create zipfile object 116 | # try: 117 | # zip_ref.extractall(out_directory) # extract file to dir 118 | # except zipfile.BadZipFile as err: 119 | # print("Zipfile error: {0} for {1}".format(err, item)) 120 | # zip_ref.close() # close file 121 | # 122 | # def __get_zip_files(self, output_directory, order_string): 123 | # """Download zip files from order_string to output_directory""" 124 | # sftp = self.open_sftp() 125 | # sftp.get_d(order_string, output_directory, preserve_mtime=True) 126 | # sftp_file_list = sftp.listdir(order_string) 127 | # # ftp.cwd(order_string) 128 | # # ftp_file_list = ftp.nlst() 129 | # for file in sftp_file_list: 130 | # if file.endswith('.zip'): 131 | # print("Downloading..." + file) 132 | # sftp.close() 133 | # 134 | # def get_subscription_files(self, output_directory: Path): 135 | # if not os.path.isdir(output_directory): 136 | # os.mkdir(output_directory) 137 | # assert(output_directory.is_dir()) 138 | # self.__get_zip_files(output_directory, self.SUBSCRIPTION_STR) 139 | # 140 | # def update_data_files(self, temporary_file_directory): 141 | # """ Download zip files from CBOE, unzip to csv, process and turn into feather 142 | # TODO: Should be in separate simulation data update & fetch class that creates/updates database 143 | # :rtype: Bool""" 144 | # feather_directory = self.top_level_directory / 'feather' 145 | # assert(feather_directory.is_dir()) 146 | # assert temporary_file_directory.is_dir(), '{} directory does not exist'.format(temporary_file_directory) 147 | # latest_business_date = pd.to_datetime('today') - pd.tseries.offsets.BDay(1) 148 | # opt_dates_all = get_dates(feather_directory) 149 | # if opt_dates_all[-1].date() != latest_business_date.date(): 150 | # start_time = time() 151 | # print('Downloading Option data from CBOE') 152 | # self.get_subscription_files(temporary_file_directory) 153 | # self.unzip_file(temporary_file_directory, temporary_file_directory) 154 | # self.csv2feather(temporary_file_directory, feather_directory) 155 | # end_time = time() 156 | # files_updated = True 157 | # print('Option files updated in: ' + str(round(end_time - start_time)) + ' seconds') 158 | # else: 159 | # files_updated = False 160 | # print('Option files not updated') 161 | # return files_updated 162 | # 163 | # def csv2feather(self, in_directory, out_directory, archive_files=True): 164 | # """Open raw csv files, remove weekly options and all options not in 165 | # root_symbols_file build dataframe and convert to feather 166 | # archive zip and csv files""" 167 | # zip_archive_directory = self.top_level_directory / 'zip' 168 | # csv_archive_directory = self.top_level_directory / 'csv' 169 | # # Check/create output directory 170 | # if not os.path.isdir(out_directory): 171 | # os.mkdir(out_directory) 172 | # # list of all files in directory (includes .DS_store hidden file) 173 | # regex_pattern = '|'.join(self.root_symbols_str) 174 | # for item in os.listdir(in_directory): # loop through items in dir 175 | # if item.endswith('.csv'): 176 | # option_df = pd.read_csv(in_directory / item) 177 | # # Convert quote_date and expiration to datetime format 178 | # option_df[['quote_date', 'expiration']] = option_df[['quote_date', 'expiration']].apply(pd.to_datetime) 179 | # # Convert option type to upper cap 180 | # option_df['option_type'] = option_df['option_type'].apply(str.upper) 181 | # # Remove SPXW because its the only root that contains SPX 182 | # option_df = option_df[~option_df['root'].str.contains('SPXW')] 183 | # # Create new column of days2Expiry 184 | # option_df = option_df[option_df['root'].str.contains(regex_pattern)] 185 | # for option_type in self.OPTION_TYPES: 186 | # df2save = option_df[option_df['option_type'] == option_type] 187 | # file_name = os.path.splitext(item)[0] + '_' + option_type + '.feather' 188 | # # 189 | # # feather.write_dataframe(df2save, str(out_directory / file_name)) 190 | # df2save.reset_index().to_feather(str(out_directory / file_name)) 191 | # if archive_files: 192 | # # This makes sure we keep the archive - we will be missing zip and csv 193 | # for item in os.listdir(in_directory): 194 | # if item.endswith('.csv'): 195 | # os.rename(in_directory / item, str(csv_archive_directory / item)) 196 | # elif item.endswith('.zip'): 197 | # os.rename(in_directory / item, str(zip_archive_directory / item)) 198 | # else: 199 | # os.remove(in_directory / item) 200 | 201 | 202 | 203 | 204 | class GetRawCBOEOptionData: 205 | """Class for handling raw option data downloads and processing from the CBOE DataShop.""" 206 | 207 | OPTION_TYPES = ['P', 'C'] # Option types: P for Put, C for Call 208 | SUBSCRIPTION_STR = 'subscriptions/order_000012838/item_000016265/' # Subscription string for data access 209 | SYMBOL_DEFINITION_FILE = 'OptionSymbolConversionHistory.xlsx' # Excel file containing symbol conversion history 210 | 211 | def __init__(self, top_level_directory: Path): 212 | """ 213 | Initialize with the directory path for storing the data. 214 | 215 | :param top_level_directory: Path object pointing to the top-level directory. 216 | """ 217 | self.top_level_directory = top_level_directory 218 | # Load root symbol strings from the Excel file 219 | self.root_symbols_str = self._load_symbol_definitions 220 | 221 | @property 222 | def _load_symbol_definitions(self) -> list: 223 | """ 224 | Load option symbol string definitions from an Excel file. 225 | 226 | :return: List of root symbol strings. 227 | """ 228 | # Path to the symbol definition file 229 | root_symbols_file: Path = self.top_level_directory / self.SYMBOL_DEFINITION_FILE 230 | 231 | # Check if the file exists 232 | assert root_symbols_file.is_file(), f"{root_symbols_file} does not exist." 233 | 234 | # Load root symbols from the 'spxSymbols' sheet in the Excel file 235 | root_symbols_df: DataFrame = pd.read_excel(root_symbols_file, sheet_name='spxSymbols', skiprows=[0], 236 | usecols=[0], names=['root_symbols']) 237 | 238 | # Strip whitespace and return as a list of strings 239 | return root_symbols_df['root_symbols'].dropna().str.strip().values.tolist() 240 | 241 | @staticmethod 242 | def open_sftp(): 243 | """ 244 | Open an SFTP connection to CBOE DataShop using stored credentials. 245 | 246 | :return: pysftp.Connection object for SFTP communication. 247 | :raises ConnectionError: If unable to establish a connection. 248 | """ 249 | user_dict = data_shop_login() # Retrieve login credentials 250 | cnopts = pysftp.CnOpts() 251 | cnopts.hostkeys = None # Disable host key verification for this connection 252 | 253 | # Suppress warning related to host key verification 254 | with warnings.catch_warnings(): 255 | warnings.simplefilter("ignore", category=UserWarning) 256 | 257 | try: 258 | # Establish SFTP connection 259 | sftp = pysftp.Connection('sftp.datashop.livevol.com', 260 | username=user_dict['user'], 261 | password=user_dict['password'], 262 | cnopts=cnopts) 263 | except Exception as e: 264 | raise ConnectionError(f"Failed to connect to SFTP: {e}") 265 | 266 | return sftp 267 | 268 | @staticmethod 269 | def unzip_files(in_directory: Path, out_directory: Path): 270 | """ 271 | Unzip all .zip files from the input directory into the output directory. 272 | 273 | :param in_directory: Path object for the input directory containing zip files. 274 | :param out_directory: Path object for the output directory to extract files to. 275 | """ 276 | # Loop through all files in the input directory 277 | for item in os.listdir(in_directory): 278 | if item.endswith('.zip'): 279 | file_path = in_directory / item 280 | try: 281 | # Extract the contents of the zip file 282 | with zipfile.ZipFile(file_path) as zip_ref: 283 | zip_ref.extractall(out_directory) 284 | except zipfile.BadZipFile as err: 285 | print(f"Error extracting {item}: {err}") 286 | 287 | def __get_zip_files(self, output_directory: Path, order_string: str): 288 | """ 289 | Download zip files from the SFTP server into the specified directory. 290 | 291 | :param output_directory: Path object where downloaded zip files will be stored. 292 | :param order_string: String for the SFTP folder containing the files. 293 | """ 294 | # Establish SFTP connection 295 | sftp = self.open_sftp() 296 | 297 | # Download the directory from the server to the local directory 298 | sftp.get_d(order_string, output_directory, preserve_mtime=True) 299 | 300 | # List the files on the SFTP server 301 | sftp_file_list = sftp.listdir(order_string) 302 | 303 | # Print the names of the downloaded files 304 | for file in sftp_file_list: 305 | if file.endswith('.zip'): 306 | print(f"Downloading... {file}") 307 | 308 | sftp.close() # Close the SFTP connection 309 | 310 | def get_subscription_files(self, output_directory: Path): 311 | """ 312 | Download the subscription files from the CBOE DataShop SFTP server. 313 | 314 | :param output_directory: Path object where the downloaded files will be saved. 315 | """ 316 | # Ensure the output directory exists, if not, create it 317 | if not output_directory.is_dir(): 318 | output_directory.mkdir(parents=True) 319 | 320 | # Download the zip files 321 | self.__get_zip_files(output_directory, self.SUBSCRIPTION_STR) 322 | 323 | def update_data_files(self, temporary_file_directory: Path) -> bool: 324 | """ 325 | Download, unzip, process, and update option data if not already up-to-date. 326 | 327 | :param temporary_file_directory: Path object for temporary storage of raw files. 328 | :return: True if data files were updated, False otherwise. 329 | """ 330 | feather_directory = self.top_level_directory / 'feather' # Directory for processed data 331 | assert feather_directory.is_dir(), f"{feather_directory} does not exist." 332 | assert temporary_file_directory.is_dir(), f"{temporary_file_directory} does not exist." 333 | 334 | # Get the most recent business day 335 | latest_business_date = pd.to_datetime('today') - pd.tseries.offsets.BDay(1) 336 | 337 | # Retrieve the list of available option dates from the existing data 338 | opt_dates_all = get_dates(feather_directory) 339 | 340 | # Check if the data is up-to-date 341 | if opt_dates_all[-1].date() != latest_business_date.date(): 342 | print('Downloading Option data from CBOE...') 343 | start_time = time() 344 | 345 | # Download and process the data files 346 | self.get_subscription_files(temporary_file_directory) 347 | self.unzip_files(temporary_file_directory, temporary_file_directory) 348 | self.csv_to_feather(temporary_file_directory, feather_directory) 349 | 350 | end_time = time() 351 | print(f"Option files updated in {round(end_time - start_time)} seconds.") 352 | return True 353 | else: 354 | print('Option files are up-to-date.') 355 | return False 356 | 357 | def csv_to_feather(self, in_directory: Path, out_directory: Path, archive_files=True): 358 | """ 359 | Convert CSV files to Feather format and optionally archive the original files. 360 | 361 | :param in_directory: Path object containing CSV files. 362 | :param out_directory: Path object where Feather files will be stored. 363 | :param archive_files: Boolean flag indicating whether to archive the original files. 364 | """ 365 | zip_archive_directory = self.top_level_directory / 'zip' # Directory for zip file archives 366 | csv_archive_directory = self.top_level_directory / 'csv' # Directory for csv file archives 367 | 368 | # Ensure the output directory exists 369 | if not out_directory.is_dir(): 370 | out_directory.mkdir(parents=True) 371 | 372 | # Compile a regex pattern for filtering option symbols 373 | regex_pattern = '|'.join(self.root_symbols_str) 374 | 375 | # Process each CSV file in the input directory 376 | for item in os.listdir(in_directory): 377 | if item.endswith('.csv'): 378 | file_path = in_directory / item 379 | option_df = pd.read_csv(file_path) 380 | 381 | # Convert quote_date and expiration to datetime format 382 | option_df[['quote_date', 'expiration']] = option_df[['quote_date', 'expiration']].apply(pd.to_datetime) 383 | 384 | # Ensure option_type is uppercase 385 | option_df['option_type'] = option_df['option_type'].str.upper() 386 | 387 | # Remove rows with SPXW root symbol and filter by root_symbols 388 | option_df = option_df[~option_df['root'].str.contains('SPXW')] 389 | option_df = option_df[option_df['root'].str.contains(regex_pattern)] 390 | 391 | # Save data by option type (P for Put, C for Call) in Feather format 392 | for option_type in self.OPTION_TYPES: 393 | df_filtered = option_df[option_df['option_type'] == option_type] 394 | file_name = f"{os.path.splitext(item)[0]}_{option_type}.feather" 395 | df_filtered.reset_index().to_feather(out_directory / file_name) 396 | 397 | # Archive the original zip and csv files if required 398 | if archive_files: 399 | self._archive_files(in_directory, csv_archive_directory, zip_archive_directory) 400 | 401 | @staticmethod 402 | def _archive_files(in_directory: Path, csv_archive_directory: Path, zip_archive_directory: Path): 403 | """ 404 | Archive CSV and ZIP files by moving them to the archive directories. 405 | 406 | :param in_directory: Path object for the directory containing files to be archived. 407 | :param csv_archive_directory: Path object where CSV files will be moved. 408 | :param zip_archive_directory: Path object where ZIP files will be moved. 409 | """ 410 | # Move files to their respective archive directories 411 | for item in os.listdir(in_directory): 412 | file_path = in_directory / item 413 | if item.endswith('.csv'): 414 | file_path.rename(csv_archive_directory / item) 415 | elif item.endswith('.zip'): 416 | file_path.rename(zip_archive_directory / item) 417 | else: 418 | file_path.unlink() # Remove other non-relevant files 419 | 420 | 421 | class ImpliedVolatilityHistory: 422 | 423 | def __init__(self): 424 | vix = get_vix() 425 | self.implied_vol_index = vix.rename('vix_index') 426 | 427 | def save_vix_df(self, out_directory: Path, file_name='vix_index'): 428 | write_feather(self.implied_vol_index.to_frame(), str(out_directory / file_name)) 429 | 430 | 431 | class DividendYieldHistory: 432 | 433 | def __init__(self): 434 | dy_monthly = get_sp5_dividend_yield() 435 | self.dy_monthly = dy_monthly.rename(columns={"Value": "Yield Value"}) 436 | 437 | def save_dividend_yield_df(self, out_directory: Path, file_name='sp500_dividend_yld'): 438 | # dividend_yield_df = self.dy_monthly.to_frame() 439 | write_feather(self.dy_monthly, str(out_directory / file_name)) 440 | 441 | 442 | class ClosingPriceHistory: 443 | 444 | def __init__(self, feather_directory): 445 | self.option_data_dates = get_dates(feather_directory) 446 | self.daily_close = get_daily_close(self.option_data_dates, str(feather_directory) + '/') 447 | 448 | def save_daily_close(self, output_directory): 449 | write_feather(self.daily_close, str(output_directory / 'sp500_close')) 450 | 451 | 452 | class VixTSM: 453 | def __init__(self, expiry_type=0): 454 | """ Class to retrieve tsm vix futures data and create return and index series""" 455 | try: 456 | raw_tsm = loadmat('/Volumes/ExtraStorage/base/db/fut/vix.mat') 457 | except FileNotFoundError: 458 | raw_tsm = loadmat(str(UpdateSP500Data.DATA_BASE_PATH / 'mat' / 'vix.mat')) 459 | python_dates = matlab2datetime(raw_tsm['t'].squeeze()) 460 | column_names = [item[0] for item in raw_tsm['h'][:, 0]] 461 | raw_x_data = np.round(raw_tsm['x'], 4) 462 | self.raw_tsm_df = pd.DataFrame(data=raw_x_data, index=python_dates, columns=column_names) 463 | self.raw_tsm_df = self.raw_tsm_df.iloc[:-1, :] # remove last row 464 | self.start_date = self.raw_tsm_df.index[0] 465 | self.expiry_type = expiry_type # expiry_type is either string or positive integer 466 | self.rolled_return, self.rolled_expiries, self.days_2_exp, self.rolled_future = self._rolled_future_return() 467 | 468 | def _rolled_future_return(self): 469 | """Returns arithmetic return from long position in vix future""" 470 | expiry_dates = pd.to_datetime(self.raw_tsm_df['exp1'].astype(int), format='%Y%m%d') 471 | returns = self._expiry_returns 472 | days_2_exp = self._expiration_days_2_expiry 473 | if self.expiry_type == 'eom': 474 | eom_dates = returns.index[returns.reset_index().groupby(returns.index.to_period('M'))['index'].idxmax()] 475 | last_month_end = eom_dates[-1] + pd.offsets.MonthEnd(0) 476 | eom_dates = eom_dates[:-1] 477 | eom_dates = eom_dates.insert(-1, last_month_end) 478 | roll_dates = eom_dates.sort_values() 479 | else: 480 | # TODO: add checks to make sure roll_dates are subset of return index dates 481 | expiry_dates_unique = pd.to_datetime(self.raw_tsm_df['exp1'].unique().astype(int), format='%Y%m%d') 482 | roll_dates = expiry_dates_unique - pd.offsets.BDay(self.expiry_type) 483 | 484 | expiry_for_roll = [] 485 | for dts in expiry_dates: 486 | idx = roll_dates.get_loc(dts, method='ffill') 487 | expiry_for_roll.append(roll_dates[idx]) 488 | day_diff = expiry_dates.index - pd.DatetimeIndex(expiry_for_roll) 489 | front_month_bool = day_diff.days < 0 490 | back_month_bool = ~front_month_bool 491 | 492 | rolled_return = pd.concat([returns['close2'][back_month_bool], returns['close1'][front_month_bool]], 493 | axis=0).sort_index() 494 | rolled_return[0] = np.nan # replace first empty observation with NaN 495 | 496 | rolled_expiries = pd.concat([self.raw_tsm_df['exp2'][back_month_bool], 497 | self.raw_tsm_df['exp1'][front_month_bool]], axis=0).sort_index() 498 | 499 | days_2_exp = pd.concat([days_2_exp['exp2'][back_month_bool], 500 | days_2_exp['exp1'][front_month_bool]], axis=0).sort_index() 501 | 502 | rolled_future = pd.concat([self.raw_tsm_df['close2'][back_month_bool], 503 | self.raw_tsm_df['close1'][front_month_bool]], axis=0).sort_index() 504 | 505 | return rolled_return, rolled_expiries, days_2_exp, rolled_future 506 | 507 | @property 508 | def _expiry_returns(self): 509 | """ Returns future arithmetic return if contracts are held to expiry""" 510 | close_cols = [col for col in self.raw_tsm_df.columns if 'close' in col] 511 | close = self.raw_tsm_df[close_cols].copy() 512 | roll_rows = self.raw_tsm_df['exp1'].diff() > 0 # Day after expiry 513 | returns = close.pct_change() 514 | # Cross the columns on the day after expiry 515 | column_shift_ret = close.divide(close.shift(periods=-1, axis='columns').shift(periods=1, axis='rows')) - 1 516 | returns[roll_rows] = column_shift_ret[roll_rows] 517 | return returns 518 | 519 | @property 520 | def _expiration_days_2_expiry(self): 521 | """Returns number of days to expiry for each contract month""" 522 | # TODO: This is an approximation that assumes there is only one day between expiration date and last day of 523 | # contract 524 | # exp_cols = [col for col in self.raw_tsm_df.columns if 'exp' in col] 525 | # expiries = self.raw_tsm_df[exp_cols].fillna(0).astype(int).apply(pd.to_datetime, 526 | # format='%Y%m%d', 527 | # errors='coerce') 528 | # expiry_dates = expiries.subtract(self.raw_tsm_df.index, axis=0) 529 | # # Dates in TSM are last trading day so add one day for expiration 530 | # expiry_dates = expiry_dates + pd.Timedelta(days=1) 531 | # expiry_days = pd.concat([expiry_dates[cols].dt.days for cols in expiry_dates.columns], axis=1) 532 | 533 | exp_cols = [col for col in self.raw_tsm_df.columns if 'exp' in col] 534 | expiries = self.raw_tsm_df[exp_cols].fillna(0).astype(int).apply(pd.to_datetime, 535 | format='%Y%m%d', 536 | errors='coerce') 537 | # Dates in TSM are last trading day so add one day for expiration 538 | expiry_list = [expiries[cols].add(pd.Timedelta(days=1)) for cols in expiries.columns] 539 | num_bus_days = [np.busday_count(item.index.values.astype('12 or days>31 620 | opt_dates_list = [] 621 | for item in os.listdir(feather_directory): # loop through items in dir 622 | if item.endswith(file_type): 623 | date_string = re.search(regex_pattern, item) 624 | if date_string: 625 | opt_dates_list.append(date_string.group()) 626 | opt_dates_list = list(set(opt_dates_list)) 627 | opt_dates_all = pd.DatetimeIndex([pd.to_datetime(date_item, yearfirst=True, 628 | format='%Y-%m-%d') 629 | for date_item in opt_dates_list]) 630 | opt_dates_all = opt_dates_all.sort_values() 631 | return opt_dates_all 632 | 633 | 634 | def get_vix(): 635 | """Fetch vix from Interactive Brokers and append to history''' 636 | :return: Dataframe 637 | """ 638 | ibw = IbWrapper() 639 | ib = ibw.ib 640 | vix = Index('VIX') 641 | cds = ib.reqContractDetails(vix) 642 | 643 | # contracts = [cd.contract for cd in cds] 644 | bars = ib.reqHistoricalData(cds[0].contract, 645 | endDateTime='', 646 | durationStr='1 Y', 647 | barSizeSetting='1 day', 648 | whatToShow='TRADES', 649 | useRTH=True, 650 | formatDate=1) 651 | ib.disconnect() 652 | vix = util.df(bars) 653 | vix = vix.set_index('date') 654 | vix.index = pd.to_datetime(vix.index) 655 | vix = vix[['open', 'high', 'low', 'close']] 656 | 657 | vix_history = read_feather(str(UpdateSP500Data.TOP_LEVEL_PATH / 'vix_history')) 658 | 659 | full_hist = vix.combine_first(vix_history) 660 | write_feather(full_hist, str(UpdateSP500Data.TOP_LEVEL_PATH / 'vix_history')) 661 | return full_hist['close'] 662 | 663 | 664 | def get_sp5_dividend_yield(): 665 | """Fetch dividend yield from Quandl''' 666 | :return: Dataframe 667 | """ 668 | # quandl.ApiConfig.api_key = quandle_api() 669 | # try: 670 | # spx_dividend_yld = quandl.get('MULTPL/SP500_DIV_YIELD_MONTH', collapse='monthly') 671 | # spx_dividend_yld = spx_di vidend_yld.resample('MS').bfill() 672 | # except: 673 | # print('Quandl failed - Scraping dividend yield from Mutlp.com') 674 | # else: 675 | # print('Quandl failed - Scraping dividend yield from Mutlp.com') 676 | spx_dividend_yld = scrape_sp5_div_yield() 677 | return spx_dividend_yld 678 | 679 | 680 | def scrape_sp5_div_yield(): 681 | """Scrape S&P 500 dividend yield from www.multpl.com 682 | :rtype: pd.Dataframe 683 | """ 684 | url = 'https://www.multpl.com/s-p-500-dividend-yield/table/by-month' 685 | # Package the request, send the request and catch the response: r 686 | raw_html_tbl = pd.read_html(url) 687 | dy_df = raw_html_tbl[0] 688 | # Clear dataframe 689 | dy_df.columns = dy_df.iloc[0] 690 | dy_df = dy_df.drop([0]) 691 | dy_df[dy_df.columns[0]] = pd.to_datetime(dy_df.loc[:, dy_df.columns[0]], 692 | format='%b %d, %Y') 693 | dy_df = dy_df.set_index(dy_df.columns[0]) 694 | dy_df = dy_df[dy_df.columns[0]] 695 | spx_dividend_yld = pd.to_numeric(dy_df.str.replace('%', '').str.replace('estimate', '').str.strip()) 696 | spx_dividend_yld = spx_dividend_yld.reindex(spx_dividend_yld.index[::-1]) 697 | spx_dividend_yld = spx_dividend_yld.resample('MS').bfill() 698 | return spx_dividend_yld 699 | 700 | 701 | def quandle_api(): 702 | return config_key('Quandl') 703 | 704 | 705 | def data_shop_login(): 706 | return config_key('cbeoDataShop_dict') 707 | 708 | 709 | def illiquid_equity(discount=0.5): 710 | return sum(config_key('illiquid_equity').values()) * discount 711 | 712 | 713 | def config_key(dict_key: str): 714 | file_name = UpdateSP500Data.DATA_BASE_PATH / 'config.plist' 715 | assert (file_name.is_file()) 716 | f = open(str(file_name), 'rb') 717 | pl = plistlib.load(f) 718 | return pl[dict_key] 719 | 720 | 721 | def feather_clean(in_directory): 722 | """ Utility function to clean feather files""" 723 | # in_directory = UpdateSP500Data.TOP_LEVEL_PATH / 'feather' 724 | Path.is_dir(in_directory) 725 | all_files = os.listdir(in_directory) 726 | for item in all_files: 727 | if item.endswith('.feather'): 728 | # Remove options with strikes at 5$ 729 | option_df = pd.read_feather(in_directory / item) 730 | idx = option_df['strike'] == 5 731 | option_df = option_df.drop(option_df.index[idx]) 732 | # # Remove Quarterly options 733 | # idx2 = option_df['root'] == 'SPXQ' 734 | # option_df = option_df.drop(option_df.index[idx2]) 735 | # # Remove Monthly options 736 | # idx2 = option_df['root'] == 'SPXM' 737 | # option_df = option_df.drop(option_df.index[idx2]) 738 | # feather.write_dataframe(option_df, str(in_directory / item)) 739 | option_df.to_feather(str(in_directory / item)) 740 | 741 | from ib_insync import IB, Option 742 | import nest_asyncio 743 | import contextlib 744 | import io 745 | 746 | class IbWrapper: 747 | def __init__(self, client_id=30): 748 | """Wrapper function for Interactive Broker API connection""" 749 | self.ib = IB() 750 | self.ib.errorEvent += self.on_error # Attach the error handler 751 | nest_asyncio.apply() 752 | self.connect_to_ib(client_id) 753 | 754 | def connect_to_ib(self, client_id): 755 | """Attempt to connect to IB Gateway or TWS, suppressing connection errors.""" 756 | with contextlib.redirect_stdout(io.StringIO()), contextlib.redirect_stderr(io.StringIO()): # Suppress both stdout and stderr 757 | try: 758 | # Attempt to connect to IB Gateway 759 | self.ib.connect('127.0.0.1', port=4001, clientId=client_id) 760 | print("Connected to IB Gateway on port 4001") 761 | except ConnectionRefusedError: 762 | print("IB Gateway connection failed. Attempting to connect to TWS...") 763 | try: 764 | # Attempt to connect to TWS as a fallback 765 | self.ib.connect('127.0.0.1', port=7496, clientId=client_id) 766 | print("Connected to TWS on port 7496") 767 | except ConnectionRefusedError: 768 | print("TWS connection also failed. Please ensure the API port is open and try again.") 769 | 770 | @staticmethod 771 | def on_error(req_id, error_code, error_string, contract): 772 | """Custom error handling method for the IB API.""" 773 | with contextlib.redirect_stdout(io.StringIO()), contextlib.redirect_stderr(io.StringIO()): # Suppress both stdout and stderr 774 | if error_code == 200: 775 | # Suppress or log the specific Error 200 - No security definition found 776 | pass # Suppressing the output completely 777 | elif error_code in [2104, 2106, 2158]: 778 | # These are not errors, just information about data farm connections 779 | pass # Suppressing the output completely 780 | else: 781 | print(f"Error {error_code}, reqId {req_id}: {error_string}, contract: {contract}") 782 | 783 | # def request_ticker_data(self, contracts_flat): 784 | # """Request ticker data for the given contracts.""" 785 | # # Qualify contracts to ensure they are valid 786 | # qualified_contracts = self.ib.qualifyContracts(*contracts_flat) 787 | # if not qualified_contracts: 788 | # print(f"No valid contracts found in the provided list.") 789 | # return None 790 | # 791 | # try: 792 | # # Use the correct method from ib_insync 793 | # tickers = self.ib.reqTickers(*qualified_contracts) 794 | # if not tickers: 795 | # print(f"No ticker data found for the provided contracts.") 796 | # return None 797 | # 798 | # for ticker in tickers: 799 | # print(f"Received ticker data: {ticker}") 800 | # 801 | # return tickers 802 | # 803 | # except Exception as e: 804 | # print(f"An error occurred while requesting ticker data: {e}") 805 | # return None 806 | 807 | 808 | 809 | def main(): 810 | # try: 811 | raw_file_updater = GetRawCBOEOptionData(UpdateSP500Data.TOP_LEVEL_PATH) 812 | raw_file_updater.update_data_files(UpdateSP500Data.TOP_LEVEL_PATH / 'test') 813 | print('success') 814 | # except Exception: 815 | # cboe_msg = 'CBOE Data download failed' 816 | # else: 817 | # cboe_msg = 'Option files downloaded' 818 | 819 | # try: 820 | # USZeroYieldCurve(update_data=True) 821 | # yld_crv_msg = 'US Yield Curve Updated' 822 | # except Exception: 823 | # yld_crv_msg = 'Yield Curve download failed' 824 | 825 | # _ = SMSMessage('{0} \n {1}'.format(cboe_msg, 826 | # yld_crv_msg)) 827 | 828 | 829 | if __name__ == '__main__': 830 | main() 831 | --------------------------------------------------------------------------------