├── ab.png ├── model.p ├── loan.png ├── README.md └── streamlit.py /ab.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mariiagusarova/Streamlit-Loan-Prediction-Web-App/HEAD/ab.png -------------------------------------------------------------------------------- /model.p: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mariiagusarova/Streamlit-Loan-Prediction-Web-App/HEAD/model.p -------------------------------------------------------------------------------- /loan.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mariiagusarova/Streamlit-Loan-Prediction-Web-App/HEAD/loan.png -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Streamlit-Loan-Prediction-Web-App 2 | 3 | Build A Beautiful Machine Learning Web App With Streamlit | Python Tutorial (Python code included) 4 | 5 | This is a User Interface web app: 6 | 7 | 8 | ![Screenshot 2023-02-01 at 14 57 55](https://user-images.githubusercontent.com/68427979/216192467-2fdb5b25-d1da-484d-af50-f0b2777ae869.png) 9 | 10 | 11 | You can find a live demo preview of the UI here: https://www.youtube.com/watch?v=Ei_wy0dAAe0 12 | -------------------------------------------------------------------------------- /streamlit.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | import pandas as pd 3 | import base64 4 | from sklearn.linear_model import LogisticRegression 5 | from sklearn.model_selection import train_test_split 6 | from sklearn.preprocessing import MinMaxScaler 7 | import numpy as np 8 | import pickle as pkl 9 | import shap 10 | import streamlit.components.v1 as components 11 | 12 | #Load the saved model 13 | model=pkl.load(open("model.p","rb")) 14 | 15 | st.set_page_config( 16 | page_title="Loan Prediction App", 17 | page_icon="/Users/mariia/Desktop/AWS/loan.png" 18 | ) 19 | 20 | st.set_option('deprecation.showPyplotGlobalUse', False) 21 | 22 | ###################### 23 | #main page layout 24 | ###################### 25 | 26 | st.title("Loan Default Prediction") 27 | st.subheader("Are you sure your loan applicant is surely going to pay the loan back?💸 " 28 | "This machine learning app will help you to make a prediction to help you with your decision!") 29 | 30 | col1, col2 = st.columns([1, 1]) 31 | 32 | with col1: 33 | st.image("/Users/mariia/Desktop/AWS/loan.png") 34 | 35 | with col2: 36 | st.write("""To borrow money, credit analysis is performed. Credit analysis involves the measure to investigate 37 | the probability of the applicant to pay back the loan on time and predict its default/ failure to pay back. 38 | 39 | These challenges get more complicated as the count of applications increases that are reviewed by loan officers. 40 | Human approval requires extensive hour effort to review each application, however, the company will always seek 41 | cost optimization and improve human productivity. This sometimes causes human error and bias, as it’s not practical 42 | to digest a large number of applicants considering all the factors involved.""") 43 | 44 | st.subheader("To predict default/ failure to pay back status, you need to follow the steps below:") 45 | st.markdown(""" 46 | 1. Enter/choose the parameters that best descibe your applicant on the left side bar; 47 | 2. Press the "Predict" button and wait for the result. 48 | 49 | """) 50 | 51 | st.subheader("Below you could find prediction result: ") 52 | 53 | ###################### 54 | #sidebar layout 55 | ###################### 56 | 57 | st.sidebar.title("Loan Applicant Info") 58 | st.sidebar.image("/Users/mariia/Desktop/AWS/ab.png", width=100) 59 | st.sidebar.write("Please choose parameters that descibe the applicant") 60 | 61 | #input features 62 | term = st.sidebar.radio("Select Loan term: ", ('36months', '60months')) 63 | loan_amnt =st.sidebar.slider("Please choose Loan amount you would like to apply:",min_value=1000, max_value=40000,step=500) 64 | emp_length = st.sidebar.selectbox('Please choose your employment length', ("< 1 year", "1 year", "2 years", "3 years", "4 years", "5 years", 65 | "6 years", "7 years","8 years","9 years","10+ years") ) 66 | annual_inc =st.sidebar.slider("Please choose your annual income:", min_value=10000, max_value=200000,step=1000) 67 | sub_grade =st.sidebar.selectbox('Please choose grade', ("A1", "A2", "A3", "A4", "A5", "B1", "B2", "B3","B4","B5","C1", "C2", "C3", 68 | "C4", "C5", "D1", "D2", "D3","D4","D5", "E1", "E2", "E3","E4","E5","F1", "F2", 69 | "F3", "F4", "F5", "G1", "G2", "G3","G4","G5")) 70 | dti=st.sidebar.slider("Please choose DTI:",min_value=0.1, max_value=100.1,step=0.1) 71 | mths_since_recent_inq=st.sidebar.slider("Please choose your mths_since_recent_inq:",min_value=1, max_value=25,step=1) 72 | revol_util=st.sidebar.slider("Please choose revol_util:",min_value=0.1, max_value=150.1,step=0.1) 73 | num_op_rev_tl=st.sidebar.slider("Please choose num_op_rev_tl:",min_value=1, max_value=50,step=1) 74 | 75 | def preprocess(loan_amnt, term, sub_grade, emp_length, annual_inc, dti, mths_since_recent_inq, revol_util, num_op_rev_tl): 76 | # Pre-processing user input 77 | 78 | user_input_dict={'loan_amnt':[loan_amnt], 'term':[term], 'sub_grade':[sub_grade], 'emp_length':[emp_length], 'annual_inc':[annual_inc], 'dti':[dti], 79 | 'mths_since_recent_inq':[mths_since_recent_inq], 'revol_util':[revol_util], 'num_op_rev_tl':[num_op_rev_tl]} 80 | user_input=pd.DataFrame(data=user_input_dict) 81 | 82 | 83 | #user_input=np.array(user_input) 84 | #user_input=user_input.reshape(1,-1) 85 | 86 | cleaner_type = {"term": {"36months": 1.0, "60months": 2.0}, 87 | "sub_grade": {"A1": 1.0, "A2": 2.0, "A3": 3.0, "A4": 4.0, "A5": 5.0, 88 | "B1": 11.0, "B2": 12.0, "B3": 13.0, "B4": 14.0, "B5": 15.0, 89 | "C1": 21.0, "C2": 22.0, "C3": 23.0, "C4": 24.0, "C5": 25.0, 90 | "D1": 31.0, "D2": 32.0, "D3": 33.0, "D4": 34.0, "D5": 35.0, 91 | "E1": 41.0, "E2": 42.0, "E3": 43.0, "E4": 44.0, "E5": 45.0, 92 | "F1": 51.0, "F2": 52.0, "F3": 53.0, "F4": 54.0, "F5": 55.0, 93 | "G1": 61.0, "G2": 62.0, "G3": 63.0, "G4": 64.0, "G5": 65.0, 94 | }, 95 | "emp_length": {"< 1 year": 0.0, '1 year': 1.0, '2 years': 2.0, '3 years': 3.0, '4 years': 4.0, 96 | '5 years': 5.0, '6 years': 6.0, '7 years': 7.0, '8 years': 8.0, '9 years': 9.0, 97 | '10+ years': 10.0 } 98 | } 99 | 100 | user_input = user_input.replace(cleaner_type) 101 | 102 | return user_input 103 | 104 | #user_input=preprocess 105 | user_input=preprocess(loan_amnt, term, sub_grade, emp_length, annual_inc, dti, mths_since_recent_inq, revol_util, num_op_rev_tl) 106 | 107 | #predict button 108 | 109 | btn_predict = st.sidebar.button("Predict") 110 | 111 | if btn_predict: 112 | pred = model.predict_proba(user_input)[:, 1] 113 | 114 | if pred[0] < 0.78: 115 | st.error('Warning! The applicant has a high risk to not pay the loan back!') 116 | else: 117 | st.success('It is green! The aplicant has a high probability to pay the loan back!') 118 | 119 | #prepare test set for shap explainability 120 | loans = st.cache(pd.read_csv)("mycsvfile.csv") 121 | X = loans.drop(columns=['loan_status','home_ownership__ANY','home_ownership__MORTGAGE','home_ownership__NONE','home_ownership__OTHER','home_ownership__OWN', 122 | 'home_ownership__RENT','addr_state__AK','addr_state__AL','addr_state__AR','addr_state__AZ','addr_state__CA','addr_state__CO','addr_state__CT', 123 | 'addr_state__DC','addr_state__DE','addr_state__FL','addr_state__GA','addr_state__HI','addr_state__ID','addr_state__IL','addr_state__IN', 124 | 'addr_state__KS','addr_state__KY','addr_state__LA','addr_state__MA','addr_state__MD','addr_state__ME','addr_state__MI','addr_state__MN', 125 | 'addr_state__MO','addr_state__MS','addr_state__MT','addr_state__NC','addr_state__ND','addr_state__NE','addr_state__NH','addr_state__NJ', 126 | 'addr_state__NM','addr_state__NV','addr_state__NY','addr_state__OH','addr_state__OK','addr_state__OR','addr_state__PA','addr_state__RI', 127 | 'addr_state__SC','addr_state__SD','addr_state__TN','addr_state__TX','addr_state__UT','addr_state__VA','addr_state__VT', 'addr_state__WA', 128 | 'addr_state__WI','addr_state__WV','addr_state__WY']) 129 | y = loans[['loan_status']] 130 | y_ravel = y.values.ravel() 131 | 132 | X_train, X_test, y_train, y_test = train_test_split(X, y_ravel, test_size=0.25, random_state=42, stratify=y) 133 | 134 | st.subheader('Result Interpretability - Applicant Level') 135 | shap.initjs() 136 | explainer = shap.Explainer(model, X_train) 137 | shap_values = explainer(user_input) 138 | fig = shap.plots.bar(shap_values[0]) 139 | st.pyplot(fig) 140 | 141 | st.subheader('Model Interpretability - Overall') 142 | shap_values_ttl = explainer(X_test) 143 | fig_ttl = shap.plots.beeswarm(shap_values_ttl) 144 | st.pyplot(fig_ttl) 145 | st.write(""" In this chart blue and red mean the feature value, e.g. annual income blue is a smaller value e.g. 40K USD, 146 | and red is a higher value e.g. 100K USD. The width of the bars represents the number of observations on a certain feature value, 147 | for example with the annual_inc feature we can see that most of the applicants are within the lower-income or blue area. And on axis x negative SHAP 148 | values represent applicants that are likely to churn and the positive values on the right side represent applicants that are likely to pay the loan back. 149 | What we are learning from this chart is that features such as annual_inc and sub_grade are the most impactful features driving the outcome prediction. 150 | The higher the salary is, or the lower the subgrade is, the more likely the applicant to pay the loan back and vice versa, which makes total sense in our case. 151 | """) 152 | 153 | st.write(""" 154 | 155 | **Author:Mariia Gusarova** 156 | 157 | You could find more about this project on Medium [here]. 158 | """) 159 | --------------------------------------------------------------------------------