├── .gitignore ├── LICENSE ├── README.md └── probability_of_default_tools.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Bradford Lynch 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # p-def-aws-lambda 2 | Distributed compute of probability of default on AWS Lambda 3 | -------------------------------------------------------------------------------- /probability_of_default_tools.py: -------------------------------------------------------------------------------- 1 | import StringIO, time, boto3 2 | #import pandas as pd 3 | import numpy as np 4 | import scipy.optimize as sco 5 | from scipy.stats import norm 6 | 7 | def rolling_window(a, window): 8 | ''' 9 | Use Numpy's stride tricks to create a rolling window over array a 10 | 11 | Args: 12 | a (ndarray): Numpy array of values to calculate rolling window over 13 | window (int): Width of window 14 | 15 | Returns: 16 | ndarray: Array of rolling values 17 | ''' 18 | shape = a.shape[:-1] + (a.shape[-1] - window + 1, window) 19 | strides = a.strides + (a.strides[-1],) 20 | return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides) 21 | 22 | def get_pd_dataframe(key, bucket='p-def'): 23 | ''' 24 | Loads CSV file of data for a single company from the file key specified. 25 | 26 | Args: 27 | key (str): Key pointing to file in S3 28 | 29 | Returns: 30 | dict: Map of column name to numpy array column index 31 | ndarray: Numpy array of data 32 | ''' 33 | # Create S3 client and retrieve data based on key 34 | client = boto3.client('s3') 35 | obj = client.get_object(Bucket=bucket, Key=key) 36 | 37 | # Read in data 38 | raw_data = StringIO.StringIO(obj['Body'].read()) 39 | 40 | return pd.read_csv(raw_data, index_col=0, parse_dates=True) 41 | 42 | def get_data(key, bucket='p-def'): 43 | ''' 44 | Loads CSV file of data for a single company from the file key specified. 45 | 46 | Args: 47 | key (str): Key pointing to file in S3 48 | 49 | Returns: 50 | dict: Map of column name to numpy array column index 51 | ndarray: Numpy array of data 52 | ''' 53 | # Create S3 client and retrieve data based on key 54 | client = boto3.client('s3') 55 | obj = client.get_object(Bucket=bucket, Key=key) 56 | 57 | # Read in data 58 | raw_data = obj['Body'].read().splitlines() 59 | 60 | # Create map of column name to index, assuming the first column is dropped 61 | header_map = {key:value-1 for value,key in enumerate(raw_data[0].split(','))} 62 | 63 | # Discard header row and split off first column (Date column) 64 | dates = [row.split(',')[0] for row in raw_data[1:]] 65 | data = [",".join(row.split(',')[1:]) for row in raw_data[1:]] 66 | 67 | # Creat numpy array of data 68 | data = np.genfromtxt(data, delimiter=',') 69 | 70 | return header_map, dates, data 71 | 72 | def save_data(data, key, bucket='p-def'): 73 | ''' 74 | Writes data for a single company to a file at the specified key. 75 | 76 | Args: 77 | data (File buffer): File buffer containing data to write to file 78 | key (str): Key pointing to file in S3 79 | 80 | Returns: 81 | bool: True if data was successfully written to S3 82 | ''' 83 | # 84 | s3 = boto3.resource('s3') 85 | res = s3.Object(bucket, key).put(Body=data) 86 | 87 | return res 88 | 89 | def merge_data_to_csv(header_map, dates, corp_data, results): 90 | ''' 91 | Combines arguments back into a single CSV file. 92 | 93 | Args: 94 | header_map (dict): Map of column name to numpy array column index 95 | dates (list): Date of each observation in the dataset 96 | corp_data (ndarray): Numpy array of company data 97 | results (ndarray): Numpy array of same length as company data containing asset value results 98 | 99 | Returns: 100 | StringIO: File buffer containined data in CSV format 101 | ''' 102 | csv_file = StringIO.StringIO() 103 | 104 | # Convert header map to map of index to name 105 | i_to_name = {value+1:key for key, value in header_map.items()} 106 | 107 | # Add column names for new asset value columns 108 | i_start = max(i_to_name.keys()) + 1 109 | for i in range(results.shape[1]): 110 | i_to_name[i_start + i] = 'Va_{:d}'.format(i+1) 111 | 112 | # Create header row 113 | csv_file.write(','.join([i_to_name[i] for i in range(len(i_to_name))]) + '\n') 114 | 115 | # Combine company data and results and write to CSV 116 | np.savetxt(csv_file, np.hstack((np.array((dates)).reshape((-1,1)), corp_data, results)), fmt='%s', delimiter=',') 117 | 118 | # Reset file position 119 | csv_file.seek(0) 120 | 121 | return csv_file 122 | 123 | 124 | def solve_for_asset_value(corp_data, header_map, time_horizon, min_hist_vals=252): 125 | ''' 126 | Solves for a firm's asset value based on a time history of the firm's equity 127 | value, debt level, and risk-free rate. 128 | 129 | Args: 130 | corp_data (ndarray): Numpy array of company data (Equity value, face value of debt, and risk-free rate) 131 | header_map (dict): Map of column name to the data column index in corp_data 132 | time_horizon (list): List of time horizons (In years) to calulate asset value for 133 | min_hist_vals (int): Minimum number of days to use for calculating historical data 134 | 135 | Returns: 136 | ndarray: Numpy array of time-series of asset values 137 | 138 | ''' 139 | import scipy.optimize as sco 140 | from scipy.stats import norm 141 | 142 | def equations(v_a, debug=False): 143 | d1 = (np.log(v_a/face_val_debt) + (r_f + 0.5*sigma_a**2)*T)/(sigma_a*np.sqrt(T)) 144 | d2 = d1 - sigma_a*np.sqrt(T) 145 | 146 | y1 = v_e - (v_a*norm.cdf(d1) - np.exp(-r_f*T)*face_val_debt*norm.cdf(d2)) 147 | 148 | if debug: 149 | print("d1 = {:.6f}".format(d1)) 150 | print("d2 = {:.6f}".format(d2)) 151 | print("Error = {:.6f}".format(y1)) 152 | 153 | return y1 154 | 155 | # Set window width for calculating historical data 156 | win = 252 157 | 158 | # Set start point of historical data 159 | start_time = min_hist_vals 160 | timesteps = range(min_hist_vals, len(corp_data)) 161 | 162 | # Calculate historical volatility 163 | ret_col = header_map['RET'] 164 | sigma_e = np.zeros((corp_data.shape[0])) 165 | sigma_e[:win-1] = np.nan 166 | sigma_e[win-1:] = np.std(rolling_window(np.log(corp_data[:,ret_col] + 1), win), axis=-1) 167 | 168 | assert type(time_horizon) in [list, tuple],"time_horizon must be a list" 169 | 170 | # Create array for storing results 171 | results = np.empty((corp_data.shape[0],len(time_horizon))) 172 | 173 | for i, years in enumerate(time_horizon): 174 | T = 252*years 175 | # Set initial guess for firm value equal to the equity value 176 | results[:,i] = corp_data[:,header_map['mkt_val']] 177 | 178 | # Run through all days 179 | for i_t, t in enumerate(timesteps): 180 | # Check if the company is levered 181 | if corp_data[t,header_map['face_value_debt']] > 1e-10: 182 | # Company is levered, calculate probability of default 183 | # Calculate initial guess at sigma_a 184 | v_a_per = results[t-252:t,i] 185 | v_a_ret = np.log(v_a_per/np.roll(v_a_per,1)) 186 | v_a_ret[0] = np.nan 187 | sigma_a = np.nanstd(v_a_ret) 188 | 189 | if i_t == 0: 190 | subset_timesteps = range(t-252, t+1) 191 | else: 192 | #subset_timesteps = corp_data.loc[t-pd.Timedelta(20,'D'):t].index 193 | subset_timesteps = [t] 194 | 195 | # Iterate on previous values of V_a 196 | n_its = 0 197 | while n_its < 10: 198 | n_its += 1 199 | # Loop over timesteps, calculating Va using initial guess for sigma_a 200 | for t_sub in subset_timesteps: 201 | r_f = (1 + corp_data[t_sub,header_map['DGS1']])**(1.0/365) - 1 202 | v_e = corp_data[t_sub,header_map['mkt_val']] 203 | face_val_debt = corp_data[t_sub,header_map['face_value_debt']] 204 | sol = sco.root(equations, results[t_sub,i]) 205 | results[t_sub,i] = sol['x'][0] 206 | 207 | # Update sigma_a based on new values of Va 208 | last_sigma_a = sigma_a 209 | v_a_per = results[t-252:t,i] 210 | v_a_ret = np.log(v_a_per/np.roll(v_a_per,1)) 211 | v_a_ret[0] = np.nan 212 | sigma_a = np.nanstd(v_a_ret) 213 | 214 | if abs(last_sigma_a - sigma_a) < 1e-3: 215 | #corp_data.loc[t_sub, 'sigma_a'] = sigma_a 216 | break 217 | else: 218 | # Company is unlevered, Va = Ve 219 | pass 220 | 221 | return results 222 | 223 | def run_model(key, time_horizon=[1,2,3,4,5]): 224 | ''' 225 | Apply B-S option pricing model to calculate inferred firm asset values as a 226 | function of time. 227 | 228 | Args: 229 | key (str): key pointing to data in S3 230 | time_horizon (list): List of time horizons (In Years) to calculate model over 231 | 232 | Returns: 233 | float: Time run was started (In unix time) 234 | float: Time run finished (In unix time) 235 | dict: Response from S3 write 236 | 237 | ''' 238 | start = time.time() 239 | 240 | # Get data from S3 241 | h_map, dates, data = get_data(key) 242 | 243 | if len(dates) > 252: 244 | # Run the simulation 245 | results = solve_for_asset_value(data, h_map, time_horizon=time_horizon) 246 | 247 | # Merge data back into CSV 248 | csv_file = merge_data_to_csv(h_map, dates, data, results) 249 | 250 | # Save results to S3 251 | result_key = key.replace('merged-corp-data', 'merton-results') 252 | response = save_data(csv_file, result_key) 253 | else: 254 | response = False 255 | 256 | end = time.time() 257 | 258 | return start, end, response 259 | --------------------------------------------------------------------------------