├── .gitignore
├── LICENSE.md
├── PyABS.py
├── README.md
├── TALF_1.ipynb
├── TRAIN.csv
├── __init__.py
└── scores.csv


/.gitignore:
--------------------------------------------------------------------------------
1 | 
2 | *.pyc
3 | 
4 | \.ipynb_checkpoints/
5 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | 
 2 | The MIT License (MIT)
 3 | 
 4 | Copyright (c) 2019 Luis M Sanchez
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | of this software and associated documentation files (the "Software"), to deal
 8 | in the Software without restriction, including without limitation the rights
 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 | 


--------------------------------------------------------------------------------
/PyABS.py:
--------------------------------------------------------------------------------
  1 | """PyABS, light version of aspects of proprietary code."""
  2 | import pandas as pd
  3 | from pandas.compat import lmap
  4 | import numpy as np
  5 | from statsmodels.tsa.arima_model import ARMA
  6 | from scipy.linalg import cholesky
  7 | import matplotlib.pyplot as plt
  8 | from tqdm import tqdm
  9 | 
 10 | 
 11 | def autocorrelation_and_significance(series, ax=None, **kwds):
 12 |     """Autocorrelation and significant lags for time series."""
 13 |     n = len(series)
 14 |     data = np.asarray(series)
 15 |     if ax is None:
 16 |         ax = plt.gca(xlim=(1, n), ylim=(-1.0, 1.0))
 17 |     plt.rc('xtick', labelsize=15)
 18 |     plt.rc('ytick', labelsize=15)
 19 |     mean = np.mean(data)
 20 |     c0 = np.sum((data - mean) ** 2) / float(n)
 21 | 
 22 |     def r(h):
 23 |         return ((data[:n - h] - mean) *
 24 |                 (data[h:] - mean)).sum() / float(n) / c0
 25 |     x = np.arange(n) + 1
 26 |     y = lmap(r, x)
 27 |     z95 = 1.959963984540054
 28 |     z95l = -z95/np.sqrt(n)
 29 |     z95h = z95/np.sqrt(n)
 30 |     ax.axhline(y=z95h, linestyle=':', color='red', )
 31 |     ax.axhline(y=0.0, color='black')
 32 |     ax.axhline(y=z95l, linestyle=':', color='red')
 33 |     ax.set_xlabel("lag", fontsize=15)
 34 |     ax.set_ylabel("autocorrelation", fontsize=15)
 35 |     ax.plot(x, y, **kwds)
 36 |     if 'label' in kwds:
 37 |         ax.legend()
 38 |     ax.grid()
 39 |     df = pd.DataFrame({'autocorrelation': y,
 40 |                        'lag': x})
 41 |     lags = np.sort(np.append(df[df['autocorrelation'] > z95h]['lag'],
 42 |                              df[df['autocorrelation'] < z95l]['lag']))
 43 |     return df, lags, ax
 44 | 
 45 | 
 46 | def optimal_params_ar_model(data, lags_to_test, cap=4, test_criteria='BIC', **kwds):
 47 |     """Optimal lags using Bayes or Akaike Information Criteria.
 48 | 
 49 |     This function tests n lags to find out if auto regresive models of
 50 |     order > 1 are worth exploring, using BIC or AIC information criteria.
 51 |     Keyword arguments:
 52 |     data -- the array to test
 53 |     lags_to_test -- list of lags to test
 54 |     cap -- max number of lags t plot
 55 |     tst_criteria -- lower band array
 56 |     low_band_name -- either BIC or AIC
 57 |     **kwds -- matplotlib keyword arguments
 58 |     """
 59 |     ax = plt.gca()
 60 |     plt.rc('xtick', labelsize=13)
 61 |     plt.rc('ytick', labelsize=13)
 62 |     num_lags = len(lags_to_test)
 63 |     information_criteria = np.zeros(num_lags)
 64 |     for lag in list(lags_to_test)[:cap]:
 65 |         mod = ARMA(data.values, order=(lag, 0))
 66 |         res = mod.fit()
 67 |         if test_criteria == 'BIC':
 68 |             information_criteria[lag] = res.bic
 69 |             ax.set_title('Bayes Information Criterion', fontsize=20)
 70 |             ax.set_ylabel('BIC', fontsize=15)
 71 |         elif test_criteria == 'AIC':
 72 |             information_criteria[lag] = res.aic
 73 |             ax.set_title('Akaike Information Criterion', fontsize=20)
 74 |             ax.set_ylabel('AIC', fontsize=15)
 75 | 
 76 |     ax.set_xlabel('Lag', fontsize=15)
 77 |     ax.plot(lags_to_test[:cap], information_criteria[:cap], **kwds)
 78 |     ax.legend(loc='best')
 79 | 
 80 |     return ax
 81 | 
 82 | 
 83 | def ar_param_dictionary(train_df, order):
 84 |     """Parameters of autoregressive models.
 85 | 
 86 |     Given a train df, this functions fits auto regressive models of any order
 87 |     given for the different time series in the train df and stores a summary
 88 |     of results, the AR1 value and the volatility (standard deviation) of
 89 |     observations.
 90 |     """
 91 |     ar_params = {}
 92 |     for i, col in enumerate(train_df):
 93 |         asset = train_df[col]
 94 |         mod = ARMA(asset, order=(order, 0))
 95 |         res = mod.fit()
 96 |         ar_params[i] = {'name': col,
 97 |                         'summary': res.summary(),
 98 |                         'AR1': res.arparams[0],
 99 |                         'vol': res.sigma2}
100 |     return ar_params
101 | 
102 | 
103 | def simulate_correlated_random_numbers(corr_matrix, n=1000):
104 |     """Multivariate random normal.
105 | 
106 |     A generalization of the one-dimensional normal distribution to higher
107 |     dimensions, using Cholesky decomposition.
108 |     https://math.stackexchange.com/questions/2079137/generating-multivariate-normal-samples-why-cholesky
109 |     """
110 |     upper_cholesky = cholesky(corr_matrix)
111 |     rnd_numbers = np.random.normal(0.0, 1.0, size=(n, corr_matrix.shape[0]))
112 |     ans = rnd_numbers@upper_cholesky
113 |     return ans
114 | 
115 | 
116 | def simulate_single_set_interest_rates(train_df, date_ix, ar_params_dict,
117 |                                        vol_stress=1):
118 |     """Simulate 1 path of multiple future interest rates.
119 | 
120 |     Given an historical time series of interest rates, this function generates
121 |     a path of correlated interest rates.
122 |     Keyword arguments:
123 |     train_df -- data frame with time series
124 |     date_ix -- an array wit hthe dates to simulate
125 |     ar_params_dict -- dictionariy containing parameters for different ts
126 |     vol_stress -- vol_stress
127 |     """
128 |     corr_matrix = train_df.corr().as_matrix()
129 |     fut_rates = {}
130 |     for i in range(train_df.shape[1]):
131 |         fut_rates[i] = np.zeros(len(date_ix))
132 |         fut_rates[i][0] = train_df.iloc[-1, i]
133 | 
134 |     corr_rnd = simulate_correlated_random_numbers(corr_matrix)
135 |     for k in range(train_df.shape[1]):
136 |         for z in range(len(date_ix)):
137 |             # skip the first value, since it is the seed value for the sim
138 |             if z != 0:
139 |                 fut_rates[k][z] = fut_rates[k][z-1]*ar_params_dict[k]['AR1'] + ar_params_dict[k]['vol'] * corr_rnd[:, k][z]
140 |                 fut_rates[k][z] = (fut_rates[k][z])*(vol_stress)
141 |                 # for cases ofa simulatd negative spread, set the sread to the
142 |                 # previous positive spread
143 |                 if fut_rates[k][z] < 0:
144 |                     fut_rates[k][z] = fut_rates[k][z-1]
145 |     rates_df = pd.DataFrame(fut_rates, index=date_ix)
146 |     rates_df.columns = train_df.columns
147 |     return rates_df
148 | 
149 | 
150 | def simulate_several_sets_correlated_rates(df_train, sims, date_index, ar_params_dict):
151 |     """Simulate many paths of multiple future interest rates.
152 | 
153 |     Given an historical time series of interest rates in a df, the number of
154 |     simulations to perform, an index of future dates, and a dictionary of
155 |     autoregressive parameters for interest rates, this function generates a
156 |     dictionary of data frames contaning paths for all correlated interest rates
157 |     in the historical time series, plus a dictionary of dfs with the rates for
158 |     specifc rates, for further analysis.
159 |     """
160 |     assets = df_train.columns.tolist()
161 |     all_sims = {}
162 |     for i in tqdm(range(sims)):
163 |         all_sims[i] = simulate_single_set_interest_rates(df_train, date_index, ar_params_dict)
164 |     master_sim = pd.DataFrame(pd.concat(all_sims, axis=1))
165 |     master_sim.columns = master_sim.columns.get_level_values(1)
166 |     asset_sim = {}
167 |     for asset in assets:
168 |         asset_sim[asset] = master_sim.filter(like=asset, axis=1)
169 |         asset_sim[asset].columns = list(np.arange(sims))
170 |     return all_sims, asset_sim
171 | 
172 | 
173 | def estimate_1yr_transition(initial_rating='AAA'):
174 |     """Simulate 1 period rating transition.
175 | 
176 |     This function estimates the transition from any given rating
177 |     'AAA', 'AA', 'A', 'BBB', 'BB', 'B', 'CCC' to the same rating plus the 'D'
178 |     (default) state, based on observed trasitions for ABS in 1 year, excluding
179 |     mortgages. These are approximations, and each asset class should have its
180 |     own transition matrix.
181 |     """
182 |     data = np.array([[9.081e-01, 8.330e-02, 6.500e-03, 9.000e-04, 6.000e-04, 3.000e-04, 2.000e-04, 1.000e-04],
183 |                      [7.000e-03, 9.065e-01, 7.790e-02, 6.400e-03, 6.000e-04, 1.300e-03, 2.000e-04, 1.000e-04],
184 |                      [9.000e-04, 2.270e-02, 9.105e-01, 5.520e-02, 7.400e-03, 2.600e-03, 1.000e-04, 6.000e-04],
185 |                      [2.000e-04, 3.300e-03, 5.950e-02, 8.693e-01, 5.300e-02, 1.170e-02, 1.200e-03, 1.800e-03],
186 |                      [3.000e-04, 1.400e-03, 6.700e-03, 7.730e-02, 8.053e-01, 8.840e-02, 1.000e-02, 1.060e-02],
187 |                      [0.000e+00, 1.100e-03, 2.400e-03, 4.300e-03, 6.480e-02, 8.346e-01, 4.070e-02, 5.210e-02],
188 |                      [2.200e-03, 2.200e-03, 2.200e-03, 1.300e-02, 2.380e-02, 1.124e-01, 6.486e-01, 1.956e-01]])
189 |     initial_ratings = ['AAA', 'AA', 'A', 'BBB', 'BB', 'B', 'CCC']
190 |     transition_to = initial_ratings.copy()
191 |     transition_to.append('D')
192 |     p_transition = pd.DataFrame(data, index=initial_ratings, columns=transition_to).transpose().to_dict()
193 |     final = np.random.choice(list(p_transition[initial_rating].keys()), 1, p=list(p_transition[initial_rating].values()))[0]
194 |     return final
195 | 
196 | 
197 | def estimate_transition_vector(initial_rating, years):
198 |     """Simulate rates upgrades or downgrades in n years.
199 | 
200 |     This function simulates the movement of the initial rating over time,
201 |     by using the function recursively, i.e.: the initial rating feeds the
202 |     function, and the output of the function feeds the function again until
203 |     n periods have been completed. This is a one state Markov process.
204 |     """
205 |     input_list = [initial_rating]
206 |     new_rating = {}
207 |     if input_list == []:
208 |         return 0
209 |     for i in range(years-1):
210 |         new_rating[i] = estimate_1yr_transition(initial_rating=input_list[-1])
211 |         input_list.append(new_rating[i])
212 |         if new_rating == 'D':
213 |             return input_list
214 |     return input_list
215 | 
216 | 
217 | def simulate_purchase_per_sim_rate_scenario(purchase_weeks, sims, rates_sim_dict, spreads_dict, date_index, to_invest=[30, 20, 20, 20, 10]):
218 |     """Simulate purchases per scenarios.
219 | 
220 |     This function simulates the purchase of assets under the different interest
221 |     rates scenarios.
222 |     """
223 |     purchase_dict = {}
224 |     for i in tqdm(range(sims)):
225 |         capital = []
226 |         assets = np.random.choice(list(p_issuance.keys()), purchase_weeks, p=list(p_issuance.values()))
227 |         terms = np.random.choice(list(p_term.keys()), purchase_weeks, p=list(p_term.values()))
228 |         for asset, term in list(zip(assets, terms)):
229 |             capital.append(f_risk_capital[asset][term])
230 |         data_dict = {'rate_label': list(assets),
231 |                      'term': list(terms),
232 |                      'f_risk_capital': capital}
233 |         purchase_dict[i] = pd.DataFrame(data_dict)
234 |         purchase_dict[i]['purchase_week'] = purchase_dict[i].index+1
235 |         purchase_dict[i]['purchase_date'] = date_index[purchase_dict[i]['purchase_week']]
236 |         purchase_dict[i]['maturity_date'] = date_index[purchase_dict[i]['purchase_week']+(purchase_dict[i]['term']*52)]
237 |         purchase_dict[i]['asset'] = purchase_dict[i]['term'].astype(str) + '_yr_' + purchase_dict[i]['rate_label']
238 |         purchase_dict[i]['benchmark_asset'] = '3_yr_' + purchase_dict[i]['rate_label']
239 | 
240 |     for i in range(sims):
241 |         for ix, col in purchase_dict[i].iterrows():
242 |             purchase_dict[i].at[ix, 'benchmark_ABS_spread'] = rates_sim_dict[i].iloc[col['purchase_week']][col['benchmark_asset']]
243 |             purchase_dict[i].at[ix, 'libor'] = rates_sim_dict[i].iloc[col['purchase_week']]['libor']
244 |             purchase_dict[i].at[ix, 'risk_capital'] = to_invest[ix]
245 |             purchase_dict[i].at[ix, 'fed_loan'] = (to_invest[ix]/col['f_risk_capital'])-to_invest[ix]
246 |             purchase_dict[i].at[ix, 'final_rating'] = estimate_transition_vector('AAA', col['term'])[-1]
247 |         purchase_dict[i]['total_purchase'] = purchase_dict[i]['fed_loan'] + purchase_dict[i]['risk_capital']
248 |         purchase_dict[i]['spread_over_libor'] = purchase_dict[i]['benchmark_ABS_spread'] - (purchase_dict[i]['asset'].map(spreads_dict))
249 |         r1 = ((purchase_dict[i]['libor'] + purchase_dict[i]['spread_over_libor']))*purchase_dict[i]['f_risk_capital']
250 |         r2 = ((purchase_dict[i]['spread_over_libor'])-100)*(1-purchase_dict[i]['f_risk_capital'])
251 |         purchase_dict[i]['exp_annual_r'] = ((r1+r2)/(purchase_dict[i]['f_risk_capital']))/10000
252 |         purchase_dict[i] = purchase_dict[i][['asset',
253 |                                              'purchase_week',
254 |                                              'purchase_date',
255 |                                              'maturity_date',
256 |                                              'term',
257 |                                              'f_risk_capital',
258 |                                              'risk_capital',
259 |                                              'fed_loan',
260 |                                              'total_purchase',
261 |                                              'libor',
262 |                                              'spread_over_libor',
263 |                                              'exp_annual_r',
264 |                                              'final_rating']]
265 |     return purchase_dict
266 | 
267 | 
268 | f_risk_capital = {'auto_AAA':         {1: 0.10, 2: 0.11, 3: 0.12},
269 |                   'student_loan_AAA': {1: 0.08, 2: 0.09, 3: 0.10},
270 |                   'helc_AAA':         {1: 0.12, 2: 0.13, 3: 0.14},
271 |                   'credit_card_AAA':  {1: 0.05, 2: 0.05, 3: 0.06}}
272 | 
273 | 
274 | p_term = {1: 0.20, 2: 0.30, 3: 0.50}
275 | 
276 | 
277 | p_issuance = {'auto_AAA':         0.20,
278 |               'student_loan_AAA': 0.20,
279 |               'helc_AAA':         0.30,
280 |               'credit_card_AAA':  0.30}
281 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ### PyABS ###
 2 | 
 3 | Code to simulate purchase of portfolio of Asset Backed Securities under simulated interest rate scenarios. This code is intended to illustrate opportunities presented by a real life scenario, the Term Asset Loan Facility, explained in tis article: https://towardsdatascience.com/alpha-generation-using-data-science-quantitative-analysis-abs-talf-part-1-eade08b075c
 4 | 
 5 | ![Image of concept](https://cdn-images-1.medium.com/max/800/1*6DcNyLMw3rwAw1JOvsgUMg.png)
 6 | 
 7 | ### Steps ###
 8 | 
 9 | *   Generate correlated random numbers to feed a multivariate process applied to interest rates
10 | *   Simulate movements of spreads over a becnhmark for given asset classes
11 | *   Simulate purchases of assets under given assumptions
12 | *   Simulate probabilities of assets transitioning from intial rating to other ratings and defaults, using a Markov process
13 | *   Calculate distribution of returns/risks under several scenarios
14 | 
15 | ### Contact: ###
16 | * Luis M Sanchez:	<lmsanch@gmail.com>
17 | 


--------------------------------------------------------------------------------
/TRAIN.csv:
--------------------------------------------------------------------------------
 1 | name,average_price,market,month,commodity
 2 | AAA,450,X,3,wheat
 3 | AAA,343,Y,3,wheat
 4 | AAA,605,Z,3,wheat
 5 | AAA,548,X,3,wheat
 6 | AAA,343,Y,3,wheat
 7 | AAA,518,Z,2,wheat
 8 | AAA,588,X,2,wheat
 9 | AAA,588,Y,2,wheat
10 | AAA,294,Z,2,wheat
11 | BBB,338,X,1,wheat
12 | BBB,343,Y,1,wheat
13 | BBB,1800,Z,1,wheat
14 | BBB,931,X,1,wheat
15 | BBB,2000,Y,3,wheat
16 | BBB,525,Z,3,wheat
17 | BBB,525,X,3,wheat
18 | BBB,399,Y,3,wheat
19 | CCC,399,Z,3,wheat
20 | CCC,500,X,2,wheat
21 | CCC,500,Y,2,wheat
22 | CCC,600,Z,2,wheat
23 | CCC,600,X,2,wheat
24 | CCC,600,Y,1,wheat
25 | CCC,600,Z,1,wheat
26 | DDD,400,X,1,wheat
27 | DDD,925,Y,1,wheat
28 | DDD,500,Z,3,wheat
29 | DDD,1830,X,3,wheat
30 | DDD,1875,Y,3,wheat
31 | DDD,1060,Z,3,wheat
32 | DDD,1500,X,3,wheat
33 | DDD,1500,Y,2,wheat
34 | DDD,1300,Z,2,wheat


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmsanch/pyABS/9bd765b7deb76a1329a8c750ee7008be3dfd150e/__init__.py


--------------------------------------------------------------------------------
/scores.csv:
--------------------------------------------------------------------------------
 1 | name,market,commodity,month,probability
 2 | AAA,X,wheat,1,0.029817978
 3 | BBB,X,wheat,2,0.125098345
 4 | CCC,X,wheat,3,0.033820919
 5 | DDD,Y,wheat,4,0.164019417
 6 | AAA,Y,wheat,1,0.186440554
 7 | BBB,Y,wheat,2,0.116901239
 8 | CCC,Z,wheat,3,0.167238637
 9 | DDD,Z,wheat,4,0.375220602
10 | BBB,X,wheat,1,0.062204079
11 | CCC,Y,wheat,2,0.048344306
12 | DDD,Z,wheat,3,0.089629271
13 | AAA,X,wheat,4,0.084991896
14 | AAA,X,wheat,1,0.317607153
15 | AAA,X,wheat,2,0.261206347
16 | AAA,Y,wheat,3,0.295565891
17 | AAA,Y,wheat,4,0.076192024
18 | AAA,Y,wheat,1,0.022560915
19 | DDD,Z,wheat,2,0.076835612
20 | CCC,Z,wheat,3,0.272481814
21 | DDD,X,wheat,4,0.205807318
22 | AAA,Y,wheat,1,0.266290415
23 | BBB,Z,wheat,2,0.14826552
24 | CCC,X,wheat,3,0.151685827
25 | DDD,X,wheat,4,0.114539865
26 | BBB,X,wheat,1,0.093286169
27 | AAA,Y,wheat,2,0.093730445
28 | BBB,Y,wheat,3,0.07537145
29 | CCC,Y,wheat,4,0.239036917
30 | DDD,Z,wheat,1,0.113287981
31 | AAA,Z,wheat,2,0.05552707
32 | BBB,X,wheat,3,0.290214136
33 | CCC,Y,wheat,4,0.124862745
34 | DDD,Z,wheat,1,0.198851781
35 | BBB,X,wheat,2,0.246712921
36 | CCC,X,wheat,3,0.076254396
37 | DDD,X,wheat,4,0.140496122
38 | AAA,Y,wheat,1,0.604620728
39 | AAA,Y,wheat,2,0.132080841
40 | AAA,Y,wheat,3,0.457020219
41 | AAA,Z,wheat,4,0.317894417
42 | AAA,Z,wheat,1,0.085713615
43 | AAA,X,wheat,2,0.062841487
44 | DDD,Y,wheat,3,0.100599552
45 | CCC,Z,wheat,4,0.12705455
46 | DDD,X,wheat,1,0.047764071
47 | AAA,X,wheat,2,0.023521924
48 | BBB,X,wheat,3,0.330975253
49 | CCC,Y,wheat,4,0.063890525
50 | DDD,Y,wheat,1,0.107600092
51 | BBB,Y,wheat,2,0.102889007
52 | CCC,Z,wheat,3,0.198324327
53 | CCC,Z,wheat,4,0.01704665
54 | CCC,X,wheat,1,0.077477311
55 | CCC,Y,wheat,2,0.030768188
56 | CCC,Z,wheat,3,0.144079809
57 | CCC,Z,wheat,4,0.237769058


--------------------------------------------------------------------------------