├── .gitignore
├── app.yaml
├── Procfile
├── columns.pkl
├── std_scaler.pkl
├── images
    ├── 5prediction.PNG
    ├── 2server_running.PNG
    ├── 3welcome_screen.PNG
    ├── 4filled_index.png
    └── 1accuracy_console.PNG
├── prediction_classifier.pkl
├── __pycache__
    └── keras_model.cpython-37.pyc
├── requirements.txt
├── .gcloudignore
├── static
    └── styles
    │   └── mainpage.css
├── LICENSE
├── app.py
├── README.md
├── templates
    └── index.html
└── model.py


/.gitignore:
--------------------------------------------------------------------------------
1 | testing notebooks/


--------------------------------------------------------------------------------
/app.yaml:
--------------------------------------------------------------------------------
1 | runtime: python37


--------------------------------------------------------------------------------
/Procfile:
--------------------------------------------------------------------------------
1 | web: gunicorn app:app
2 | 


--------------------------------------------------------------------------------
/columns.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/blurred-machine/ANN-based-Banking-Churn-Prediction/HEAD/columns.pkl


--------------------------------------------------------------------------------
/std_scaler.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/blurred-machine/ANN-based-Banking-Churn-Prediction/HEAD/std_scaler.pkl


--------------------------------------------------------------------------------
/images/5prediction.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/blurred-machine/ANN-based-Banking-Churn-Prediction/HEAD/images/5prediction.PNG


--------------------------------------------------------------------------------
/images/2server_running.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/blurred-machine/ANN-based-Banking-Churn-Prediction/HEAD/images/2server_running.PNG


--------------------------------------------------------------------------------
/images/3welcome_screen.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/blurred-machine/ANN-based-Banking-Churn-Prediction/HEAD/images/3welcome_screen.PNG


--------------------------------------------------------------------------------
/images/4filled_index.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/blurred-machine/ANN-based-Banking-Churn-Prediction/HEAD/images/4filled_index.png


--------------------------------------------------------------------------------
/prediction_classifier.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/blurred-machine/ANN-based-Banking-Churn-Prediction/HEAD/prediction_classifier.pkl


--------------------------------------------------------------------------------
/images/1accuracy_console.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/blurred-machine/ANN-based-Banking-Churn-Prediction/HEAD/images/1accuracy_console.PNG


--------------------------------------------------------------------------------
/__pycache__/keras_model.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/blurred-machine/ANN-based-Banking-Churn-Prediction/HEAD/__pycache__/keras_model.cpython-37.pyc


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | Flask==1.1.1
 2 | gunicorn==19.9.0
 3 | itsdangerous==1.1.0
 4 | Jinja2==2.10.1
 5 | MarkupSafe==1.1.1
 6 | Werkzeug==0.15.5
 7 | numpy>=1.9.2
 8 | scipy>=0.15.1
 9 | scikit-learn>=0.18
10 | matplotlib>=1.4.3
11 | pandas>=0.19
12 | keras
13 | tensorflow==1.15.2


--------------------------------------------------------------------------------
/.gcloudignore:
--------------------------------------------------------------------------------
 1 | # This file specifies files that are *not* uploaded to Google Cloud Platform
 2 | # using gcloud. It follows the same syntax as .gitignore, with the addition of
 3 | # "#!include" directives (which insert the entries of the given .gitignore-style
 4 | # file at that point).
 5 | #
 6 | # For more information, run:
 7 | #   $ gcloud topic gcloudignore
 8 | #
 9 | .gcloudignore
10 | # If you would like to upload your .git directory, .gitignore file or files
11 | # from your .gitignore file, remove the corresponding line
12 | # below:
13 | .git
14 | .gitignore
15 | 
16 | # Python pycache:
17 | __pycache__/
18 | # Ignored by the build system
19 | /setup.cfg


--------------------------------------------------------------------------------
/static/styles/mainpage.css:
--------------------------------------------------------------------------------
 1 | /*.button {
 2 |   border-radius: 4px;
 3 |   background-color: #f4511e;
 4 |   border: none;
 5 |   color: #FFFFFF;
 6 |   text-align: center;
 7 |   font-size: 28px;
 8 |   padding: 10px;
 9 |   width: 150px;
10 |   transition: all 0.5s;
11 |   cursor: pointer;
12 |   margin: 5px;
13 | }
14 | 
15 | .button span {
16 |   cursor: pointer;
17 |   display: inline-block;
18 |   position: relative;
19 |   transition: 0.5s;
20 | }
21 | 
22 | .button span:after {
23 |   content: '\00bb';
24 |   position: absolute;
25 |   opacity: 0;
26 |   top: 0;
27 |   right: -20px;
28 |   transition: 0.5s;
29 | }
30 | 
31 | .button:hover span {
32 |   padding-right: 25px;
33 | }
34 | 
35 | .button:hover span:after {
36 |   opacity: 1;
37 |   right: 0;
38 | }*/


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Paras Varshney
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Wed Apr 22 06:43:26 2020
 4 | 
 5 | @author: paras
 6 | """
 7 | 
 8 | from flask import Flask, jsonify, request
 9 | import pandas as pd
10 | import numpy as np
11 | import json
12 | from sklearn.externals import joblib 
13 | 
14 | 
15 | from sklearn.preprocessing import LabelEncoder, OneHotEncoder
16 | from sklearn.compose import ColumnTransformer
17 | from sklearn.preprocessing import StandardScaler
18 |    
19 | import flask
20 | 
21 | app = Flask(__name__)
22 | 
23 | main_cols = joblib.load("columns.pkl")
24 |     
25 | 
26 | def clean_data(df_x):
27 |     le = LabelEncoder()
28 |     df_x.Gender = le.fit_transform(df_x.Gender)
29 |     df_x = pd.get_dummies(data = df_x,  columns=["Geography"], drop_first = False)
30 |     return df_x
31 | 
32 | 
33 | def standardize_data(dta):
34 |     scaler = joblib.load("std_scaler.pkl")
35 |     X_transformed = scaler.transform(dta)
36 |     return X_transformed
37 | 
38 | 
39 | @app.route('/')
40 | def index():
41 |     return flask.render_template('index.html')
42 | 
43 | 
44 | @app.route('/predict', methods=['POST'])
45 | def predict():
46 |     
47 |     form_data = request.form.to_dict()
48 |     
49 |     df_input = pd.DataFrame.from_records([form_data], )
50 |     df_input = df_input.drop(['submitBtn'], axis=1)
51 |     df_input = pd.DataFrame(df_input)
52 |  
53 |     sample_df = pd.DataFrame(columns = main_cols)
54 |     clean_df = clean_data(df_input)
55 |     main_df = sample_df.append(clean_df)
56 |     main_df = main_df.fillna(0)
57 |     print(main_df)
58 |     
59 |          
60 |     std_df = standardize_data(main_df)
61 |     print(std_df)
62 |     
63 |     clf = joblib.load('prediction_classifier.pkl')
64 |     pred = clf.predict(std_df)
65 |     print(pred, pred[0], pred[0][0])
66 |     x = round(pred[0][0]*100, 2)
67 |     
68 |     print(x)
69 |     
70 |     return flask.render_template('index.html', predicted_value="Customer Churn rate: {}%".format(str(x)))
71 |     # return jsonify({'prediction': str(x)})
72 | 
73 | if __name__ == '__main__':
74 |     app.run(host='0.0.0.0', port=8080)


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # ANN-based-Banking-Churn-Prediction
 2 | - This repository will have complete machine learning and deep learning based banking churn prediction ANN model which will analyze tha probablity for a customer to leave.
 3 | - The project was deployed on Google Cloud Platform as well as completely tested on Localhost.
 4 | </br>
 5 | 
 6 | ## Project Description
 7 | ### Welcome Screen
 8 | - The main welcome screen is made in **HTML5** and **CSS3** with a basic and simple design.
 9 | - Here is the main screen for the **Bank Churn Prediction** Interface for the bank admin.</br>
10 | 
11 | ![Welcome Screen](https://github.com/paras009/ANN-based-Banking-Churn-Prediction/blob/master/images/3welcome_screen.PNG)
12 | 
13 | ### Bank Admin Input
14 | - The bank employee has to enter the details of the customer whose churn they want to analyze.
15 | - Below is the screenshot of the input being filled by bank employee.</br>
16 | 
17 | ![Filled Screen](https://github.com/paras009/ANN-based-Banking-Churn-Prediction/blob/master/images/4filled_index.png)
18 | 
19 | ## Analysis and Accuracy
20 | - The Prediction engine is built over a deep **Artificial Neural Network** backed with **[Keras](https://www.tensorflow.org/guide/keras)**.
21 | - I have achieved an accuracy of around **~85%** on both training and testing data.</br>
22 | - The ANN is trained over K-fold cross validation testing over 10 rounds to find if it was underfit or overfit over the data based on the variance betweent the accuracies of the 10 rotations.
23 | - The model is Tuned over the Hyerparametes to find the best **batch_size**, **epoch** and **optimizer** for generating the best possible combination for best fit model.
24 | 
25 | ![Accuracy Python Console](https://github.com/paras009/ANN-based-Banking-Churn-Prediction/blob/master/images/1accuracy_console.PNG)
26 | 
27 | ## Deployment and Production
28 | -  The **API interfacing** for the deplyment on [Localhost](http://localhost:8080/index) is done using [Flask](https://flask.palletsprojects.com/en/1.1.x/).
29 | - The server is run on Local system during the staging of the project.
30 | - Older deployment was done on [Google Cloud Platform](https://cloud.google.com/)
31 | - Recently, the final deployment was done on **Heloku** platform and can be accessed from the link below.
32 | - LINK: [https://banking-churn-pediction](https://banking-churn-pediction.herokuapp.com)
33 | 
34 | ## Predictions
35 | - The final prediction of the model is the percentage of churn for that customer.
36 | -  The prediction signifies the chances of the customer to leave the services of the bank which makes the bank to _focus more on such such customers_ and try to retain them using **[Sales and Marketing strategies](https://github.com/paras009/Sales-and-Marketing-Analytics)** about which I have worked in this [GitHub](https://github.com/paras009/Sales-and-Marketing-Analytics) module.</br>
37 | 
38 | ![Prediction Screenshot](https://github.com/paras009/ANN-based-Banking-Churn-Prediction/blob/master/images/5prediction.PNG)
39 | 
40 | ## Contribution
41 | - The project is built completely by Paras Varshney.</br>
42 | Connect on [LinkedIn](https://www.linkedin.com/in/pv009)</br>
43 | Follow on [Medium](https://medium.com/@pv009)</br>
44 | Follow on [Github](https://github.com/paras009)</br>
45 | 
46 | #### Thank You!
47 | 


--------------------------------------------------------------------------------
/templates/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang=”en”>
 3 | <head>
 4 |     <meta charset="UTF-8">
 5 | 
 6 |     <meta http-equiv="x-ua-compatible" content="ie=edge">
 7 |     <meta name="viewport" content="width=device-width, initial-scale=1">
 8 | 
 9 | 	<!-- <link rel= "stylesheet" type= "text/css" href= "{{ url_for('static',filename='styles/mainpage.css') }}"> -->
10 |     <!-- <link rel= "stylesheet" type= "text/css" href= "../static/styles/mainpage.css"> -->
11 | 
12 |     <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.4.1/css/bootstrap.min.css">
13 |     <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.4.1/jquery.min.js"></script>
14 |     <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.4.1/js/bootstrap.min.js"></script>
15 |     <title>Banking Churn Prediction</title>
16 |     <style type="text/css">
17 |         .button {
18 |   border-radius: 4px;
19 |   background-color: #f4511e;
20 |   border: none;
21 |   color: #FFFFFF;
22 |   text-align: center;
23 |   font-size: 28px;
24 |   padding: 10px;
25 |   width: 150px;
26 |   transition: all 0.5s;
27 |   cursor: pointer;
28 |   margin: 5px;
29 | }
30 | 
31 | .button span {
32 |   cursor: pointer;
33 |   display: inline-block;
34 |   position: relative;
35 |   transition: 0.5s;
36 | }
37 | 
38 | .button span:after {
39 |   content: '\00bb';
40 |   position: absolute;
41 |   opacity: 0;
42 |   top: 0;
43 |   right: -20px;
44 |   transition: 0.5s;
45 | }
46 | 
47 | .button:hover span {
48 |   padding-right: 25px;
49 | }
50 | 
51 | .button:hover span:after {
52 |   opacity: 1;
53 |   right: 0;
54 | }
55 |     </style>
56 | </head>
57 | <body style="background-color: #6A359C; text-align: center">
58 |     <div style="  text-align: center;">
59 |       <h1 style="color: #FFF">Banking Churn Prediction</h1>
60 |       <div class="container">
61 |     <div class="row">
62 |       <div class="col-sm-12">
63 |   <form action="/predict" method="POST">
64 | 
65 |     <div style="margin: 25px;">
66 |         <input type="text" name="CreditScore" class="form-control" style="margin: 3px" placeholder="Credit Score(eg: 632)">
67 |         <input type="text" name="Geography" class="form-control" style="margin: 3px" placeholder="Geography (Germany, France, Spain)">
68 |         <input type="text" name="Gender" class="form-control" style="margin: 3px" placeholder="Gender(Male, Female)">
69 |         <input type="text" name="Age" class="form-control" style="margin: 3px" placeholder="Age(eg: 25)">
70 |         <input type="text" name="Tenure" class="form-control" style="margin: 3px" placeholder="Tenure(Time since member)">
71 | 
72 |         <input type="text" name="Balance" class="form-control" style="margin: 3px" placeholder="Balance(eg: 500000)">
73 |         <input type="text" name="NumOfProducts" class="form-control" style="margin: 3px" placeholder="Num Of Products(eg: 2)">
74 |         <input type="text" name="HasCrCard" class="form-control" style="margin: 3px" placeholder="Has Credit Card('1'=Yes, '0'=No)">
75 |         <input type="text" name="IsActiveMember" class="form-control" style="margin: 3px" placeholder="Is Active Member?('1'=Yes, '0'=No)">
76 |         <input type="text" name="EstimatedSalary" class="form-control" style="margin: 3px" placeholder="Estimated Salary(eg: 25000)">
77 |     </div>
78 |         <button class="button" type="submit" name="submitBtn"><span>Submit </span></button>
79 | 
80 | 
81 | 
82 |   </form>
83 | 	<h3 style="color: white;">
84 | 	{{predicted_value}}
85 | 	</h3>
86 | </div>
87 | </div>
88 | </div>
89 | 
90 | </div>
91 | 
92 | 
93 | </body>
94 | </html>
95 | 


--------------------------------------------------------------------------------
/model.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding: utf-8
  3 | 
  4 | # -*- coding: utf-8 -*-
  5 | """
  6 | Created on Wed Apr 22 05:42:28 2020
  7 | 
  8 | @author: paras
  9 | """
 10 | 
 11 | import pandas as pd
 12 | import numpy as np
 13 | from sklearn.externals import joblib 
 14 | 
 15 | from sklearn.preprocessing import LabelEncoder, OneHotEncoder
 16 | from sklearn.compose import ColumnTransformer
 17 | 
 18 | import keras
 19 | from keras.models import Sequential
 20 | from keras.layers import Dense, Dropout
 21 | 
 22 | from keras.wrappers.scikit_learn import KerasClassifier
 23 | from sklearn.model_selection import cross_val_score
 24 | from sklearn.model_selection import GridSearchCV
 25 | 
 26 | 
 27 | 
 28 |     
 29 | df = pd.read_csv("Churn_Modelling.csv")
 30 | 
 31 | df_x = df.iloc[:, 3:13]
 32 | df_y = df.iloc[:, 13]
 33 | 
 34 | # df.isna().sum()
 35 | 
 36 | def clean_data(df):
 37 | 
 38 |     le = LabelEncoder()
 39 |     df.Gender = le.fit_transform(df.Gender)
 40 |     df = pd.get_dummies(data = df, columns=["Geography"], drop_first = False)
 41 |     df = df.sort_index(axis=1)
 42 |     return df
 43 | 
 44 | df_x = clean_data(df_x)
 45 | # df_x.head()
 46 | 
 47 | # columnTransformer = ColumnTransformer([('encoder', OneHotEncoder(), [1])], remainder='passthrough')
 48 | # col_tnf = columnTransformer.fit_transform(df_x)
 49 | # df_x = np.array(col_tnf, dtype = np.str)
 50 | # df_x = df_x[:, 1:]
 51 | 
 52 | from sklearn.model_selection import train_test_split
 53 | X_train, X_test, y_train, y_test = train_test_split(df_x, df_y, test_size = 0.2, random_state = 0)
 54 | joblib.dump(X_train.columns, "columns.pkl")
 55 | 
 56 | from sklearn.preprocessing import StandardScaler
 57 | scaler = StandardScaler()
 58 | X_train = scaler.fit_transform(X_train)
 59 | X_test = scaler.transform(X_test)
 60 | joblib.dump(scaler, "std_scaler.pkl")
 61 | print(X_test)
 62 | print(X_train.shape[1])
 63 | 
 64 | 
 65 | 
 66 | 
 67 | 
 68 | 
 69 | def generate_report():
 70 |     # predicting values
 71 |     y_pred = classifier.predict(X_test)
 72 |     print("\nPredicted values: "+str(y_pred)+"\n")
 73 |     y_pred = (y_pred > 0.5)
 74 |     
 75 |     # Making the Confusion Matrix
 76 |     from sklearn.metrics import confusion_matrix
 77 |     cm = confusion_matrix(y_test, y_pred)
 78 |     accuracy = (cm[0,0]+cm[1, 1])/(cm[0,0]+cm[1, 1]+cm[1,0]+cm[0, 1])
 79 |     print("\nTest Accuracy: "+str(accuracy)+"\n")
 80 |     
 81 | 
 82 | 
 83 | 
 84 | 
 85 | 
 86 | 
 87 | # Build basic ANN model 
 88 | def build_model():
 89 |     classifier = Sequential()
 90 |     classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu', input_dim = X_train.shape[1]))
 91 |     classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu'))
 92 |     classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))
 93 |     classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
 94 |     return classifier
 95 | classifier = build_model()
 96 | classifier.fit(X_train, y_train, batch_size = 10, epochs = 30)
 97 | generate_report()
 98 | 
 99 | 
100 | 
101 | 
102 | 
103 | 
104 | # Implementing K-fold Cross validation
105 | 
106 | def build_model():
107 |     classifier = Sequential()
108 |     classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu', input_dim = X_train.shape[1]))
109 |     classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu'))
110 |     classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))
111 |     classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
112 |     return classifier
113 | classifier = KerasClassifier(build_fn = build_model, batch_size = 10, epochs = 40)
114 | accuracy_list = cross_val_score(estimator = classifier, X = X_train, y = y_train, cv = 10)
115 | mean_acc = accuracy_list.mean()
116 | std_acc = accuracy_list.std()
117 | print(mean_acc, std_acc)
118 | 
119 | classifier.fit(X_train, y_train)
120 | generate_report()
121 | 
122 | 
123 | 
124 | 
125 | 
126 | 
127 | 
128 | 
129 | # implement Hyperparameter tuning for getting better accuracy using Grid Search
130 | def build_model(optimizer):
131 |     classifier = Sequential()
132 |     classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu', input_dim = X_train.shape[1]))
133 |     classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu'))
134 |     classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))
135 |     classifier.compile(optimizer = optimizer, loss = 'binary_crossentropy', metrics = ['accuracy'])
136 |     return classifier
137 | classifier = KerasClassifier(build_fn = build_model)
138 | parameters = {"batch_size": [5, 15, 30], 
139 |               "epochs": [30, 100],
140 |               "optimizer": ["adam", "rmsprop"]}
141 | grid_search = GridSearchCV(estimator = classifier,
142 |                            param_grid = parameters,
143 |                            scoring = "accuracy",
144 |                            cv=10)
145 | grid_search = grid_search.fit(X_train, y_train)
146 | best_parameters = grid_search.best_params_
147 | best_accuracy = grid_search.best_score_
148 | print(best_parameters, best_accuracy)
149 | generate_report()
150 | 
151 | 
152 | 
153 | 
154 | 
155 | # save the model so created above into a picle.
156 | joblib.dump(classifier, 'prediction_classifier.pkl') 
157 | 
158 | 
159 | 


--------------------------------------------------------------------------------