├── .gitignore ├── LICENSE.md ├── PyABS.py ├── README.md ├── TALF_1.ipynb ├── TRAIN.csv ├── __init__.py └── scores.csv /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | *.pyc 3 | 4 | \.ipynb_checkpoints/ 5 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2019 Luis M Sanchez 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | -------------------------------------------------------------------------------- /PyABS.py: -------------------------------------------------------------------------------- 1 | """PyABS, light version of aspects of proprietary code.""" 2 | import pandas as pd 3 | from pandas.compat import lmap 4 | import numpy as np 5 | from statsmodels.tsa.arima_model import ARMA 6 | from scipy.linalg import cholesky 7 | import matplotlib.pyplot as plt 8 | from tqdm import tqdm 9 | 10 | 11 | def autocorrelation_and_significance(series, ax=None, **kwds): 12 | """Autocorrelation and significant lags for time series.""" 13 | n = len(series) 14 | data = np.asarray(series) 15 | if ax is None: 16 | ax = plt.gca(xlim=(1, n), ylim=(-1.0, 1.0)) 17 | plt.rc('xtick', labelsize=15) 18 | plt.rc('ytick', labelsize=15) 19 | mean = np.mean(data) 20 | c0 = np.sum((data - mean) ** 2) / float(n) 21 | 22 | def r(h): 23 | return ((data[:n - h] - mean) * 24 | (data[h:] - mean)).sum() / float(n) / c0 25 | x = np.arange(n) + 1 26 | y = lmap(r, x) 27 | z95 = 1.959963984540054 28 | z95l = -z95/np.sqrt(n) 29 | z95h = z95/np.sqrt(n) 30 | ax.axhline(y=z95h, linestyle=':', color='red', ) 31 | ax.axhline(y=0.0, color='black') 32 | ax.axhline(y=z95l, linestyle=':', color='red') 33 | ax.set_xlabel("lag", fontsize=15) 34 | ax.set_ylabel("autocorrelation", fontsize=15) 35 | ax.plot(x, y, **kwds) 36 | if 'label' in kwds: 37 | ax.legend() 38 | ax.grid() 39 | df = pd.DataFrame({'autocorrelation': y, 40 | 'lag': x}) 41 | lags = np.sort(np.append(df[df['autocorrelation'] > z95h]['lag'], 42 | df[df['autocorrelation'] < z95l]['lag'])) 43 | return df, lags, ax 44 | 45 | 46 | def optimal_params_ar_model(data, lags_to_test, cap=4, test_criteria='BIC', **kwds): 47 | """Optimal lags using Bayes or Akaike Information Criteria. 48 | 49 | This function tests n lags to find out if auto regresive models of 50 | order > 1 are worth exploring, using BIC or AIC information criteria. 51 | Keyword arguments: 52 | data -- the array to test 53 | lags_to_test -- list of lags to test 54 | cap -- max number of lags t plot 55 | tst_criteria -- lower band array 56 | low_band_name -- either BIC or AIC 57 | **kwds -- matplotlib keyword arguments 58 | """ 59 | ax = plt.gca() 60 | plt.rc('xtick', labelsize=13) 61 | plt.rc('ytick', labelsize=13) 62 | num_lags = len(lags_to_test) 63 | information_criteria = np.zeros(num_lags) 64 | for lag in list(lags_to_test)[:cap]: 65 | mod = ARMA(data.values, order=(lag, 0)) 66 | res = mod.fit() 67 | if test_criteria == 'BIC': 68 | information_criteria[lag] = res.bic 69 | ax.set_title('Bayes Information Criterion', fontsize=20) 70 | ax.set_ylabel('BIC', fontsize=15) 71 | elif test_criteria == 'AIC': 72 | information_criteria[lag] = res.aic 73 | ax.set_title('Akaike Information Criterion', fontsize=20) 74 | ax.set_ylabel('AIC', fontsize=15) 75 | 76 | ax.set_xlabel('Lag', fontsize=15) 77 | ax.plot(lags_to_test[:cap], information_criteria[:cap], **kwds) 78 | ax.legend(loc='best') 79 | 80 | return ax 81 | 82 | 83 | def ar_param_dictionary(train_df, order): 84 | """Parameters of autoregressive models. 85 | 86 | Given a train df, this functions fits auto regressive models of any order 87 | given for the different time series in the train df and stores a summary 88 | of results, the AR1 value and the volatility (standard deviation) of 89 | observations. 90 | """ 91 | ar_params = {} 92 | for i, col in enumerate(train_df): 93 | asset = train_df[col] 94 | mod = ARMA(asset, order=(order, 0)) 95 | res = mod.fit() 96 | ar_params[i] = {'name': col, 97 | 'summary': res.summary(), 98 | 'AR1': res.arparams[0], 99 | 'vol': res.sigma2} 100 | return ar_params 101 | 102 | 103 | def simulate_correlated_random_numbers(corr_matrix, n=1000): 104 | """Multivariate random normal. 105 | 106 | A generalization of the one-dimensional normal distribution to higher 107 | dimensions, using Cholesky decomposition. 108 | https://math.stackexchange.com/questions/2079137/generating-multivariate-normal-samples-why-cholesky 109 | """ 110 | upper_cholesky = cholesky(corr_matrix) 111 | rnd_numbers = np.random.normal(0.0, 1.0, size=(n, corr_matrix.shape[0])) 112 | ans = rnd_numbers@upper_cholesky 113 | return ans 114 | 115 | 116 | def simulate_single_set_interest_rates(train_df, date_ix, ar_params_dict, 117 | vol_stress=1): 118 | """Simulate 1 path of multiple future interest rates. 119 | 120 | Given an historical time series of interest rates, this function generates 121 | a path of correlated interest rates. 122 | Keyword arguments: 123 | train_df -- data frame with time series 124 | date_ix -- an array wit hthe dates to simulate 125 | ar_params_dict -- dictionariy containing parameters for different ts 126 | vol_stress -- vol_stress 127 | """ 128 | corr_matrix = train_df.corr().as_matrix() 129 | fut_rates = {} 130 | for i in range(train_df.shape[1]): 131 | fut_rates[i] = np.zeros(len(date_ix)) 132 | fut_rates[i][0] = train_df.iloc[-1, i] 133 | 134 | corr_rnd = simulate_correlated_random_numbers(corr_matrix) 135 | for k in range(train_df.shape[1]): 136 | for z in range(len(date_ix)): 137 | # skip the first value, since it is the seed value for the sim 138 | if z != 0: 139 | fut_rates[k][z] = fut_rates[k][z-1]*ar_params_dict[k]['AR1'] + ar_params_dict[k]['vol'] * corr_rnd[:, k][z] 140 | fut_rates[k][z] = (fut_rates[k][z])*(vol_stress) 141 | # for cases ofa simulatd negative spread, set the sread to the 142 | # previous positive spread 143 | if fut_rates[k][z] < 0: 144 | fut_rates[k][z] = fut_rates[k][z-1] 145 | rates_df = pd.DataFrame(fut_rates, index=date_ix) 146 | rates_df.columns = train_df.columns 147 | return rates_df 148 | 149 | 150 | def simulate_several_sets_correlated_rates(df_train, sims, date_index, ar_params_dict): 151 | """Simulate many paths of multiple future interest rates. 152 | 153 | Given an historical time series of interest rates in a df, the number of 154 | simulations to perform, an index of future dates, and a dictionary of 155 | autoregressive parameters for interest rates, this function generates a 156 | dictionary of data frames contaning paths for all correlated interest rates 157 | in the historical time series, plus a dictionary of dfs with the rates for 158 | specifc rates, for further analysis. 159 | """ 160 | assets = df_train.columns.tolist() 161 | all_sims = {} 162 | for i in tqdm(range(sims)): 163 | all_sims[i] = simulate_single_set_interest_rates(df_train, date_index, ar_params_dict) 164 | master_sim = pd.DataFrame(pd.concat(all_sims, axis=1)) 165 | master_sim.columns = master_sim.columns.get_level_values(1) 166 | asset_sim = {} 167 | for asset in assets: 168 | asset_sim[asset] = master_sim.filter(like=asset, axis=1) 169 | asset_sim[asset].columns = list(np.arange(sims)) 170 | return all_sims, asset_sim 171 | 172 | 173 | def estimate_1yr_transition(initial_rating='AAA'): 174 | """Simulate 1 period rating transition. 175 | 176 | This function estimates the transition from any given rating 177 | 'AAA', 'AA', 'A', 'BBB', 'BB', 'B', 'CCC' to the same rating plus the 'D' 178 | (default) state, based on observed trasitions for ABS in 1 year, excluding 179 | mortgages. These are approximations, and each asset class should have its 180 | own transition matrix. 181 | """ 182 | data = np.array([[9.081e-01, 8.330e-02, 6.500e-03, 9.000e-04, 6.000e-04, 3.000e-04, 2.000e-04, 1.000e-04], 183 | [7.000e-03, 9.065e-01, 7.790e-02, 6.400e-03, 6.000e-04, 1.300e-03, 2.000e-04, 1.000e-04], 184 | [9.000e-04, 2.270e-02, 9.105e-01, 5.520e-02, 7.400e-03, 2.600e-03, 1.000e-04, 6.000e-04], 185 | [2.000e-04, 3.300e-03, 5.950e-02, 8.693e-01, 5.300e-02, 1.170e-02, 1.200e-03, 1.800e-03], 186 | [3.000e-04, 1.400e-03, 6.700e-03, 7.730e-02, 8.053e-01, 8.840e-02, 1.000e-02, 1.060e-02], 187 | [0.000e+00, 1.100e-03, 2.400e-03, 4.300e-03, 6.480e-02, 8.346e-01, 4.070e-02, 5.210e-02], 188 | [2.200e-03, 2.200e-03, 2.200e-03, 1.300e-02, 2.380e-02, 1.124e-01, 6.486e-01, 1.956e-01]]) 189 | initial_ratings = ['AAA', 'AA', 'A', 'BBB', 'BB', 'B', 'CCC'] 190 | transition_to = initial_ratings.copy() 191 | transition_to.append('D') 192 | p_transition = pd.DataFrame(data, index=initial_ratings, columns=transition_to).transpose().to_dict() 193 | final = np.random.choice(list(p_transition[initial_rating].keys()), 1, p=list(p_transition[initial_rating].values()))[0] 194 | return final 195 | 196 | 197 | def estimate_transition_vector(initial_rating, years): 198 | """Simulate rates upgrades or downgrades in n years. 199 | 200 | This function simulates the movement of the initial rating over time, 201 | by using the function recursively, i.e.: the initial rating feeds the 202 | function, and the output of the function feeds the function again until 203 | n periods have been completed. This is a one state Markov process. 204 | """ 205 | input_list = [initial_rating] 206 | new_rating = {} 207 | if input_list == []: 208 | return 0 209 | for i in range(years-1): 210 | new_rating[i] = estimate_1yr_transition(initial_rating=input_list[-1]) 211 | input_list.append(new_rating[i]) 212 | if new_rating == 'D': 213 | return input_list 214 | return input_list 215 | 216 | 217 | def simulate_purchase_per_sim_rate_scenario(purchase_weeks, sims, rates_sim_dict, spreads_dict, date_index, to_invest=[30, 20, 20, 20, 10]): 218 | """Simulate purchases per scenarios. 219 | 220 | This function simulates the purchase of assets under the different interest 221 | rates scenarios. 222 | """ 223 | purchase_dict = {} 224 | for i in tqdm(range(sims)): 225 | capital = [] 226 | assets = np.random.choice(list(p_issuance.keys()), purchase_weeks, p=list(p_issuance.values())) 227 | terms = np.random.choice(list(p_term.keys()), purchase_weeks, p=list(p_term.values())) 228 | for asset, term in list(zip(assets, terms)): 229 | capital.append(f_risk_capital[asset][term]) 230 | data_dict = {'rate_label': list(assets), 231 | 'term': list(terms), 232 | 'f_risk_capital': capital} 233 | purchase_dict[i] = pd.DataFrame(data_dict) 234 | purchase_dict[i]['purchase_week'] = purchase_dict[i].index+1 235 | purchase_dict[i]['purchase_date'] = date_index[purchase_dict[i]['purchase_week']] 236 | purchase_dict[i]['maturity_date'] = date_index[purchase_dict[i]['purchase_week']+(purchase_dict[i]['term']*52)] 237 | purchase_dict[i]['asset'] = purchase_dict[i]['term'].astype(str) + '_yr_' + purchase_dict[i]['rate_label'] 238 | purchase_dict[i]['benchmark_asset'] = '3_yr_' + purchase_dict[i]['rate_label'] 239 | 240 | for i in range(sims): 241 | for ix, col in purchase_dict[i].iterrows(): 242 | purchase_dict[i].at[ix, 'benchmark_ABS_spread'] = rates_sim_dict[i].iloc[col['purchase_week']][col['benchmark_asset']] 243 | purchase_dict[i].at[ix, 'libor'] = rates_sim_dict[i].iloc[col['purchase_week']]['libor'] 244 | purchase_dict[i].at[ix, 'risk_capital'] = to_invest[ix] 245 | purchase_dict[i].at[ix, 'fed_loan'] = (to_invest[ix]/col['f_risk_capital'])-to_invest[ix] 246 | purchase_dict[i].at[ix, 'final_rating'] = estimate_transition_vector('AAA', col['term'])[-1] 247 | purchase_dict[i]['total_purchase'] = purchase_dict[i]['fed_loan'] + purchase_dict[i]['risk_capital'] 248 | purchase_dict[i]['spread_over_libor'] = purchase_dict[i]['benchmark_ABS_spread'] - (purchase_dict[i]['asset'].map(spreads_dict)) 249 | r1 = ((purchase_dict[i]['libor'] + purchase_dict[i]['spread_over_libor']))*purchase_dict[i]['f_risk_capital'] 250 | r2 = ((purchase_dict[i]['spread_over_libor'])-100)*(1-purchase_dict[i]['f_risk_capital']) 251 | purchase_dict[i]['exp_annual_r'] = ((r1+r2)/(purchase_dict[i]['f_risk_capital']))/10000 252 | purchase_dict[i] = purchase_dict[i][['asset', 253 | 'purchase_week', 254 | 'purchase_date', 255 | 'maturity_date', 256 | 'term', 257 | 'f_risk_capital', 258 | 'risk_capital', 259 | 'fed_loan', 260 | 'total_purchase', 261 | 'libor', 262 | 'spread_over_libor', 263 | 'exp_annual_r', 264 | 'final_rating']] 265 | return purchase_dict 266 | 267 | 268 | f_risk_capital = {'auto_AAA': {1: 0.10, 2: 0.11, 3: 0.12}, 269 | 'student_loan_AAA': {1: 0.08, 2: 0.09, 3: 0.10}, 270 | 'helc_AAA': {1: 0.12, 2: 0.13, 3: 0.14}, 271 | 'credit_card_AAA': {1: 0.05, 2: 0.05, 3: 0.06}} 272 | 273 | 274 | p_term = {1: 0.20, 2: 0.30, 3: 0.50} 275 | 276 | 277 | p_issuance = {'auto_AAA': 0.20, 278 | 'student_loan_AAA': 0.20, 279 | 'helc_AAA': 0.30, 280 | 'credit_card_AAA': 0.30} 281 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ### PyABS ### 2 | 3 | Code to simulate purchase of portfolio of Asset Backed Securities under simulated interest rate scenarios. This code is intended to illustrate opportunities presented by a real life scenario, the Term Asset Loan Facility, explained in tis article: https://towardsdatascience.com/alpha-generation-using-data-science-quantitative-analysis-abs-talf-part-1-eade08b075c 4 | 5 | ![Image of concept](https://cdn-images-1.medium.com/max/800/1*6DcNyLMw3rwAw1JOvsgUMg.png) 6 | 7 | ### Steps ### 8 | 9 | * Generate correlated random numbers to feed a multivariate process applied to interest rates 10 | * Simulate movements of spreads over a becnhmark for given asset classes 11 | * Simulate purchases of assets under given assumptions 12 | * Simulate probabilities of assets transitioning from intial rating to other ratings and defaults, using a Markov process 13 | * Calculate distribution of returns/risks under several scenarios 14 | 15 | ### Contact: ### 16 | * Luis M Sanchez: 17 | -------------------------------------------------------------------------------- /TRAIN.csv: -------------------------------------------------------------------------------- 1 | name,average_price,market,month,commodity 2 | AAA,450,X,3,wheat 3 | AAA,343,Y,3,wheat 4 | AAA,605,Z,3,wheat 5 | AAA,548,X,3,wheat 6 | AAA,343,Y,3,wheat 7 | AAA,518,Z,2,wheat 8 | AAA,588,X,2,wheat 9 | AAA,588,Y,2,wheat 10 | AAA,294,Z,2,wheat 11 | BBB,338,X,1,wheat 12 | BBB,343,Y,1,wheat 13 | BBB,1800,Z,1,wheat 14 | BBB,931,X,1,wheat 15 | BBB,2000,Y,3,wheat 16 | BBB,525,Z,3,wheat 17 | BBB,525,X,3,wheat 18 | BBB,399,Y,3,wheat 19 | CCC,399,Z,3,wheat 20 | CCC,500,X,2,wheat 21 | CCC,500,Y,2,wheat 22 | CCC,600,Z,2,wheat 23 | CCC,600,X,2,wheat 24 | CCC,600,Y,1,wheat 25 | CCC,600,Z,1,wheat 26 | DDD,400,X,1,wheat 27 | DDD,925,Y,1,wheat 28 | DDD,500,Z,3,wheat 29 | DDD,1830,X,3,wheat 30 | DDD,1875,Y,3,wheat 31 | DDD,1060,Z,3,wheat 32 | DDD,1500,X,3,wheat 33 | DDD,1500,Y,2,wheat 34 | DDD,1300,Z,2,wheat -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmsanch/pyABS/9bd765b7deb76a1329a8c750ee7008be3dfd150e/__init__.py -------------------------------------------------------------------------------- /scores.csv: -------------------------------------------------------------------------------- 1 | name,market,commodity,month,probability 2 | AAA,X,wheat,1,0.029817978 3 | BBB,X,wheat,2,0.125098345 4 | CCC,X,wheat,3,0.033820919 5 | DDD,Y,wheat,4,0.164019417 6 | AAA,Y,wheat,1,0.186440554 7 | BBB,Y,wheat,2,0.116901239 8 | CCC,Z,wheat,3,0.167238637 9 | DDD,Z,wheat,4,0.375220602 10 | BBB,X,wheat,1,0.062204079 11 | CCC,Y,wheat,2,0.048344306 12 | DDD,Z,wheat,3,0.089629271 13 | AAA,X,wheat,4,0.084991896 14 | AAA,X,wheat,1,0.317607153 15 | AAA,X,wheat,2,0.261206347 16 | AAA,Y,wheat,3,0.295565891 17 | AAA,Y,wheat,4,0.076192024 18 | AAA,Y,wheat,1,0.022560915 19 | DDD,Z,wheat,2,0.076835612 20 | CCC,Z,wheat,3,0.272481814 21 | DDD,X,wheat,4,0.205807318 22 | AAA,Y,wheat,1,0.266290415 23 | BBB,Z,wheat,2,0.14826552 24 | CCC,X,wheat,3,0.151685827 25 | DDD,X,wheat,4,0.114539865 26 | BBB,X,wheat,1,0.093286169 27 | AAA,Y,wheat,2,0.093730445 28 | BBB,Y,wheat,3,0.07537145 29 | CCC,Y,wheat,4,0.239036917 30 | DDD,Z,wheat,1,0.113287981 31 | AAA,Z,wheat,2,0.05552707 32 | BBB,X,wheat,3,0.290214136 33 | CCC,Y,wheat,4,0.124862745 34 | DDD,Z,wheat,1,0.198851781 35 | BBB,X,wheat,2,0.246712921 36 | CCC,X,wheat,3,0.076254396 37 | DDD,X,wheat,4,0.140496122 38 | AAA,Y,wheat,1,0.604620728 39 | AAA,Y,wheat,2,0.132080841 40 | AAA,Y,wheat,3,0.457020219 41 | AAA,Z,wheat,4,0.317894417 42 | AAA,Z,wheat,1,0.085713615 43 | AAA,X,wheat,2,0.062841487 44 | DDD,Y,wheat,3,0.100599552 45 | CCC,Z,wheat,4,0.12705455 46 | DDD,X,wheat,1,0.047764071 47 | AAA,X,wheat,2,0.023521924 48 | BBB,X,wheat,3,0.330975253 49 | CCC,Y,wheat,4,0.063890525 50 | DDD,Y,wheat,1,0.107600092 51 | BBB,Y,wheat,2,0.102889007 52 | CCC,Z,wheat,3,0.198324327 53 | CCC,Z,wheat,4,0.01704665 54 | CCC,X,wheat,1,0.077477311 55 | CCC,Y,wheat,2,0.030768188 56 | CCC,Z,wheat,3,0.144079809 57 | CCC,Z,wheat,4,0.237769058 --------------------------------------------------------------------------------