├── loan_timing_report.pdf ├── README.md └── loan_timing.R /loan_timing_report.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bot13956/Monte_Carlo_Simulation_Loan_Status/HEAD/loan_timing_report.pdf -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Monte_Carlo_Simulation_Loan_Status 2 | 3 | Author: Benjamin O. Tayo 4 | 5 | Date: 11/22/2018 6 | 7 | Introduction: Predicting the status of a loan is an important problem in risk assessment. A bank or financial organization has to be able to estimate the risk involved before granting a loan to a customer. Data Science and predictive analytics play an important role in building models that can be used to predict the probability of loan default. In this project, we are provided with a data set loan_timing.csv containing 50000 data points. Each data point represents a loan, and two features are provided as follows: 8 | 9 | a) The column with header “days since origination” indicates the number of days that elapsed between origination and the date when the data was collected. 10 | 11 | b) For loans that charged off before the data was collected, the column with header “days from origination to charge-off” indicates the number of days that elapsed between origination and charge-off. For all other loans, this column is blank. 12 | 13 | Project Objective: The goal of this project is to use techniques of data science to estimate what fraction of these loans will have charged off by the time all of their 3-year terms are finished. 14 | 15 | loan_timing.csv: the dataset 16 | 17 | loan_timing.R: the R code 18 | 19 | loan_timing_report.pdf: project report and summary 20 | -------------------------------------------------------------------------------- /loan_timing.R: -------------------------------------------------------------------------------- 1 | #R Code for Predicting Loan Status using Monte Carlo Simulation 2 | 3 | # author: Benjamin O. Tayo 4 | 5 | # Date: 11/22/2018 6 | 7 | #Import Necessary Libarries 8 | library(readr) 9 | library(tidyverse) 10 | library(broom) 11 | library(caret) 12 | 13 | # IMPORTATION OF DATASET 14 | df<-read_csv("loan_timing.csv",na="NA") 15 | names(df)=c("origination","chargeoff") 16 | #partition data set into two: default (charged off ) and current 17 | index<-which(!(df$chargeoff=="NA")) 18 | default<-df%>%slice(index) 19 | current<-df%>%slice(-index) 20 | 21 | #EXPLORATORY DATA ANALYSIS 22 | 23 | # Figure 1: Histogram of days to charge-off for defaulted loans 24 | 25 | default%>%ggplot(aes(chargeoff))+geom_histogram(color="white",fill="skyblue")+ 26 | xlab('days to charge-off')+ylab('count')+ 27 | ggtitle("Histogram of days to charge-off for defaulted loans")+ 28 | theme( 29 | plot.title = element_text(color="black", size=12, hjust=0.5, face="bold"), 30 | axis.title.x = element_text(color="black", size=12, face="bold"), 31 | axis.title.y = element_text(color="black", size=12, face="bold"), 32 | legend.title = element_blank() 33 | ) 34 | 35 | # Figure 2: Histogram of days since origination for defaulted loans 36 | 37 | default%>%ggplot(aes(origination))+geom_histogram(color="white",fill="skyblue")+ 38 | xlab('days since origination')+ylab('count')+ 39 | ggtitle("Histogram of days since origination for defaulted loans")+ 40 | theme( 41 | plot.title = element_text(color="black", size=12, hjust=0.5, face="bold"), 42 | axis.title.x = element_text(color="black", size=12, face="bold"), 43 | axis.title.y = element_text(color="black", size=12, face="bold"), 44 | legend.title = element_blank() 45 | ) 46 | 47 | # Figure 3: Plot of days to charge-off vs. days since origination for defaulted loans 48 | 49 | default%>%ggplot(aes(origination,chargeoff))+geom_point()+ 50 | xlab('days since origination')+ylab('days to charge-off')+ 51 | ggtitle("days to charge-off vs. days since origination")+ 52 | theme( 53 | plot.title = element_text(color="black", size=12, hjust=0.5, face="bold"), 54 | axis.title.x = element_text(color="black", size=12, face="bold"), 55 | axis.title.y = element_text(color="black", size=12, face="bold"), 56 | legend.title = element_blank() 57 | ) 58 | 59 | # Figure 4: Histogram of days since origination for active loans 60 | 61 | current%>%ggplot(aes(origination))+geom_histogram(color="white",fill="skyblue")+ 62 | xlab('days since origination')+ylab('count')+ 63 | ggtitle("Histogram of days since origination for current loans")+ 64 | theme( 65 | plot.title = element_text(color="black", size=12, hjust=0.5, face="bold"), 66 | axis.title.x = element_text(color="black", size=12, face="bold"), 67 | axis.title.y = element_text(color="black", size=12, face="bold"), 68 | legend.title = element_blank() 69 | ) 70 | 71 | # Monte Carlo Simulation of Defaulted Loans 72 | set.seed(2) 73 | N <- 3*365 # loan duration in days 74 | df_MC<-data.frame(u=round(runif(15500,0,N)),v=round(runif(15500,0,N))) 75 | df_MC<-df_MC%>%filter(v<=u) 76 | df_MC<-df_MC%>%filter(u<=730 & v<=730) #select loans within first 2 years 77 | 78 | df_MC[1:nrow(default),]%>%ggplot(aes(u,v))+geom_point()+ 79 | xlab('days since origination')+ylab('days to charge-off')+ 80 | ggtitle("MC simulation of days to charge-off vs. days since origination")+ 81 | theme( 82 | plot.title = element_text(color="black", size=12, hjust=0.5, face="bold"), 83 | axis.title.x = element_text(color="black", size=12, face="bold"), 84 | axis.title.y = element_text(color="black", size=12, face="bold"), 85 | legend.title = element_blank() 86 | ) 87 | 88 | 89 | # Predicting fraction of these loans that will have charged off by the time all of their 3-year terms are finished. 90 | set.seed(2) 91 | B<-1000 92 | fraction<-replicate(B, { 93 | df2<-data.frame(u=round(runif(50000,0,N)),v=round(runif(50000,0,N))) 94 | df2<-df2%>%filter(v<=u) 95 | b2<-(df2%>%filter(u<=730 & v<=730)) 96 | total<-(nrow(df2)/nrow(b2))*nrow(default) 97 | 100.0*(total/50000.0) 98 | }) 99 | 100 | mean(fraction) 101 | 102 | fdf<-data.frame(fraction=fraction) 103 | 104 | fdf%>%ggplot(aes(fraction))+geom_histogram(color="white",fill="skyblue")+ 105 | xlab('percent of charged off loans after 3-year term ')+ylab('count')+ 106 | ggtitle("Histogram of percent of charged off loans")+ 107 | theme( 108 | plot.title = element_text(color="black", size=12, hjust=0.5, face="bold"), 109 | axis.title.x = element_text(color="black", size=12, face="bold"), 110 | axis.title.y = element_text(color="black", size=12, face="bold"), 111 | legend.title = element_blank() 112 | ) 113 | 114 | # Calculate Confidence Interval of Percentage of Defualted Loans after 3-year term 115 | 116 | mean<-mean(fraction) 117 | sd<-sd(fraction) 118 | confidence_interval<-c(mean-2*sd, mean+2*sd) 119 | confidence_interval 120 | 121 | --------------------------------------------------------------------------------