├── .gitignore
├── LICENSE
├── README.md
└── probability_of_default_tools.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | 
 49 | # Translations
 50 | *.mo
 51 | *.pot
 52 | 
 53 | # Django stuff:
 54 | *.log
 55 | local_settings.py
 56 | 
 57 | # Flask stuff:
 58 | instance/
 59 | .webassets-cache
 60 | 
 61 | # Scrapy stuff:
 62 | .scrapy
 63 | 
 64 | # Sphinx documentation
 65 | docs/_build/
 66 | 
 67 | # PyBuilder
 68 | target/
 69 | 
 70 | # Jupyter Notebook
 71 | .ipynb_checkpoints
 72 | 
 73 | # pyenv
 74 | .python-version
 75 | 
 76 | # celery beat schedule file
 77 | celerybeat-schedule
 78 | 
 79 | # SageMath parsed files
 80 | *.sage.py
 81 | 
 82 | # dotenv
 83 | .env
 84 | 
 85 | # virtualenv
 86 | .venv
 87 | venv/
 88 | ENV/
 89 | 
 90 | # Spyder project settings
 91 | .spyderproject
 92 | .spyproject
 93 | 
 94 | # Rope project settings
 95 | .ropeproject
 96 | 
 97 | # mkdocs documentation
 98 | /site
 99 | 
100 | # mypy
101 | .mypy_cache/
102 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Bradford Lynch
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # p-def-aws-lambda
2 | Distributed compute of probability of default on AWS Lambda
3 | 


--------------------------------------------------------------------------------
/probability_of_default_tools.py:
--------------------------------------------------------------------------------
  1 | import StringIO, time, boto3
  2 | #import pandas as pd
  3 | import numpy as np
  4 | import scipy.optimize as sco
  5 | from scipy.stats import norm
  6 | 
  7 | def rolling_window(a, window):
  8 |     '''
  9 |     Use Numpy's stride tricks to create a rolling window over array a
 10 | 
 11 |     Args:
 12 |         a (ndarray): Numpy array of values to calculate rolling window over
 13 |         window (int): Width of window
 14 | 
 15 |     Returns:
 16 |         ndarray: Array of rolling values
 17 |     '''
 18 |     shape = a.shape[:-1] + (a.shape[-1] - window + 1, window)
 19 |     strides = a.strides + (a.strides[-1],)
 20 |     return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)
 21 | 
 22 | def get_pd_dataframe(key, bucket='p-def'):
 23 |     '''
 24 |     Loads CSV file of data for a single company from the file key specified.
 25 | 
 26 |     Args:
 27 |         key (str): Key pointing to file in S3
 28 | 
 29 |     Returns:
 30 |         dict: Map of column name to numpy array column index
 31 |         ndarray: Numpy array of data
 32 |     '''
 33 |     # Create S3 client and retrieve data based on key
 34 |     client = boto3.client('s3')
 35 |     obj = client.get_object(Bucket=bucket, Key=key)
 36 | 
 37 |     # Read in data
 38 |     raw_data = StringIO.StringIO(obj['Body'].read())
 39 | 
 40 |     return pd.read_csv(raw_data, index_col=0, parse_dates=True)
 41 | 
 42 | def get_data(key, bucket='p-def'):
 43 |     '''
 44 |     Loads CSV file of data for a single company from the file key specified.
 45 | 
 46 |     Args:
 47 |         key (str): Key pointing to file in S3
 48 | 
 49 |     Returns:
 50 |         dict: Map of column name to numpy array column index
 51 |         ndarray: Numpy array of data
 52 |     '''
 53 |     # Create S3 client and retrieve data based on key
 54 |     client = boto3.client('s3')
 55 |     obj = client.get_object(Bucket=bucket, Key=key)
 56 | 
 57 |     # Read in data
 58 |     raw_data = obj['Body'].read().splitlines()
 59 | 
 60 |     # Create map of column name to index, assuming the first column is dropped
 61 |     header_map = {key:value-1 for value,key in enumerate(raw_data[0].split(','))}
 62 | 
 63 |     # Discard header row and split off first column (Date column)
 64 |     dates = [row.split(',')[0] for row in raw_data[1:]]
 65 |     data = [",".join(row.split(',')[1:]) for row in raw_data[1:]]
 66 | 
 67 |     # Creat numpy array of data
 68 |     data = np.genfromtxt(data, delimiter=',')
 69 | 
 70 |     return header_map, dates, data
 71 | 
 72 | def save_data(data, key, bucket='p-def'):
 73 |     '''
 74 |     Writes data for a single company to a file at the specified key.
 75 | 
 76 |     Args:
 77 |         data (File buffer): File buffer containing data to write to file
 78 |         key (str): Key pointing to file in S3
 79 | 
 80 |     Returns:
 81 |         bool: True if data was successfully written to S3
 82 |     '''
 83 |     #
 84 |     s3 = boto3.resource('s3')
 85 |     res = s3.Object(bucket, key).put(Body=data)
 86 | 
 87 |     return res
 88 | 
 89 | def merge_data_to_csv(header_map, dates, corp_data, results):
 90 |     '''
 91 |     Combines arguments back into a single CSV file.
 92 | 
 93 |     Args:
 94 |         header_map (dict): Map of column name to numpy array column index
 95 |         dates (list): Date of each observation in the dataset
 96 |         corp_data (ndarray): Numpy array of company data
 97 |         results (ndarray): Numpy array of same length as company data containing asset value results
 98 | 
 99 |     Returns:
100 |         StringIO: File buffer containined data in CSV format
101 |     '''
102 |     csv_file = StringIO.StringIO()
103 | 
104 |     # Convert header map to map of index to name
105 |     i_to_name = {value+1:key for key, value in header_map.items()}
106 | 
107 |     # Add column names for new asset value columns
108 |     i_start = max(i_to_name.keys()) + 1
109 |     for i in range(results.shape[1]):
110 |         i_to_name[i_start + i] = 'Va_{:d}'.format(i+1)
111 | 
112 |     # Create header row
113 |     csv_file.write(','.join([i_to_name[i] for i in range(len(i_to_name))]) + '\n')
114 | 
115 |     # Combine company data and results and write to CSV
116 |     np.savetxt(csv_file, np.hstack((np.array((dates)).reshape((-1,1)), corp_data, results)), fmt='%s', delimiter=',')
117 | 
118 |     # Reset file position
119 |     csv_file.seek(0)
120 | 
121 |     return csv_file
122 | 
123 | 
124 | def solve_for_asset_value(corp_data, header_map, time_horizon, min_hist_vals=252):
125 |     '''
126 |     Solves for a firm's asset value based on a time history of the firm's equity
127 |     value, debt level, and risk-free rate.
128 | 
129 |     Args:
130 |         corp_data (ndarray): Numpy array of company data (Equity value, face value of debt, and risk-free rate)
131 |         header_map (dict): Map of column name to the data column index in corp_data
132 |         time_horizon (list): List of time horizons (In years) to calulate asset value for
133 |         min_hist_vals (int): Minimum number of days to use for calculating historical data
134 | 
135 |     Returns:
136 |         ndarray: Numpy array of time-series of asset values
137 | 
138 |     '''
139 |     import scipy.optimize as sco
140 |     from scipy.stats import norm
141 | 
142 |     def equations(v_a, debug=False):
143 |         d1 = (np.log(v_a/face_val_debt) + (r_f + 0.5*sigma_a**2)*T)/(sigma_a*np.sqrt(T))
144 |         d2 = d1 - sigma_a*np.sqrt(T)
145 | 
146 |         y1 = v_e - (v_a*norm.cdf(d1) - np.exp(-r_f*T)*face_val_debt*norm.cdf(d2))
147 | 
148 |         if debug:
149 |             print("d1 = {:.6f}".format(d1))
150 |             print("d2 = {:.6f}".format(d2))
151 |             print("Error = {:.6f}".format(y1))
152 | 
153 |         return y1
154 | 
155 |     # Set window width for calculating historical data
156 |     win = 252
157 | 
158 |     # Set start point of historical data
159 |     start_time = min_hist_vals
160 |     timesteps = range(min_hist_vals, len(corp_data))
161 | 
162 |     # Calculate historical volatility
163 |     ret_col = header_map['RET']
164 |     sigma_e = np.zeros((corp_data.shape[0]))
165 |     sigma_e[:win-1] = np.nan
166 |     sigma_e[win-1:] = np.std(rolling_window(np.log(corp_data[:,ret_col] + 1), win), axis=-1)
167 | 
168 |     assert type(time_horizon) in [list, tuple],"time_horizon must be a list"
169 | 
170 |     # Create array for storing results
171 |     results = np.empty((corp_data.shape[0],len(time_horizon)))
172 | 
173 |     for i, years in enumerate(time_horizon):
174 |         T = 252*years
175 |         # Set initial guess for firm value equal to the equity value
176 |         results[:,i] = corp_data[:,header_map['mkt_val']]
177 | 
178 |         # Run through all days
179 |         for i_t, t in enumerate(timesteps):
180 |             # Check if the company is levered
181 |             if corp_data[t,header_map['face_value_debt']] > 1e-10:
182 |                 # Company is levered, calculate probability of default
183 |                 # Calculate initial guess at sigma_a
184 |                 v_a_per = results[t-252:t,i]
185 |                 v_a_ret = np.log(v_a_per/np.roll(v_a_per,1))
186 |                 v_a_ret[0] = np.nan
187 |                 sigma_a = np.nanstd(v_a_ret)
188 | 
189 |                 if i_t == 0:
190 |                     subset_timesteps = range(t-252, t+1)
191 |                 else:
192 |                     #subset_timesteps = corp_data.loc[t-pd.Timedelta(20,'D'):t].index
193 |                     subset_timesteps = [t]
194 | 
195 |                 # Iterate on previous values of V_a
196 |                 n_its = 0
197 |                 while n_its < 10:
198 |                     n_its += 1
199 |                     # Loop over timesteps, calculating Va using initial guess for sigma_a
200 |                     for t_sub in subset_timesteps:
201 |                         r_f = (1 + corp_data[t_sub,header_map['DGS1']])**(1.0/365) - 1
202 |                         v_e = corp_data[t_sub,header_map['mkt_val']]
203 |                         face_val_debt = corp_data[t_sub,header_map['face_value_debt']]
204 |                         sol = sco.root(equations, results[t_sub,i])
205 |                         results[t_sub,i] = sol['x'][0]
206 | 
207 |                     # Update sigma_a based on new values of Va
208 |                     last_sigma_a = sigma_a
209 |                     v_a_per = results[t-252:t,i]
210 |                     v_a_ret = np.log(v_a_per/np.roll(v_a_per,1))
211 |                     v_a_ret[0] = np.nan
212 |                     sigma_a = np.nanstd(v_a_ret)
213 | 
214 |                     if abs(last_sigma_a - sigma_a) < 1e-3:
215 |                         #corp_data.loc[t_sub, 'sigma_a'] = sigma_a
216 |                         break
217 |             else:
218 |                 # Company is unlevered, Va = Ve
219 |                 pass
220 | 
221 |     return results
222 | 
223 | def run_model(key, time_horizon=[1,2,3,4,5]):
224 |     '''
225 |     Apply B-S option pricing model to calculate inferred firm asset values as a
226 |     function of time.
227 | 
228 |     Args:
229 |         key (str): key pointing to data in S3
230 |         time_horizon (list): List of time horizons (In Years) to calculate model over
231 | 
232 |     Returns:
233 |         float: Time run was started (In unix time)
234 |         float: Time run finished (In unix time)
235 |         dict: Response from S3 write
236 | 
237 |     '''
238 |     start = time.time()
239 | 
240 |     # Get data from S3
241 |     h_map, dates, data = get_data(key)
242 | 
243 |     if len(dates) > 252:
244 |         # Run the simulation
245 |         results = solve_for_asset_value(data, h_map, time_horizon=time_horizon)
246 | 
247 |         # Merge data back into CSV
248 |         csv_file = merge_data_to_csv(h_map, dates, data, results)
249 | 
250 |         # Save results to S3
251 |         result_key = key.replace('merged-corp-data', 'merton-results')
252 |         response = save_data(csv_file, result_key)
253 |     else:
254 |         response = False
255 | 
256 |     end = time.time()
257 | 
258 |     return start, end, response
259 | 


--------------------------------------------------------------------------------