├── .gitignore ├── app.yaml ├── Procfile ├── columns.pkl ├── std_scaler.pkl ├── images ├── 5prediction.PNG ├── 2server_running.PNG ├── 3welcome_screen.PNG ├── 4filled_index.png └── 1accuracy_console.PNG ├── prediction_classifier.pkl ├── __pycache__ └── keras_model.cpython-37.pyc ├── requirements.txt ├── .gcloudignore ├── static └── styles │ └── mainpage.css ├── LICENSE ├── app.py ├── README.md ├── templates └── index.html └── model.py /.gitignore: -------------------------------------------------------------------------------- 1 | testing notebooks/ -------------------------------------------------------------------------------- /app.yaml: -------------------------------------------------------------------------------- 1 | runtime: python37 -------------------------------------------------------------------------------- /Procfile: -------------------------------------------------------------------------------- 1 | web: gunicorn app:app 2 | -------------------------------------------------------------------------------- /columns.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/blurred-machine/ANN-based-Banking-Churn-Prediction/HEAD/columns.pkl -------------------------------------------------------------------------------- /std_scaler.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/blurred-machine/ANN-based-Banking-Churn-Prediction/HEAD/std_scaler.pkl -------------------------------------------------------------------------------- /images/5prediction.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/blurred-machine/ANN-based-Banking-Churn-Prediction/HEAD/images/5prediction.PNG -------------------------------------------------------------------------------- /images/2server_running.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/blurred-machine/ANN-based-Banking-Churn-Prediction/HEAD/images/2server_running.PNG -------------------------------------------------------------------------------- /images/3welcome_screen.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/blurred-machine/ANN-based-Banking-Churn-Prediction/HEAD/images/3welcome_screen.PNG -------------------------------------------------------------------------------- /images/4filled_index.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/blurred-machine/ANN-based-Banking-Churn-Prediction/HEAD/images/4filled_index.png -------------------------------------------------------------------------------- /prediction_classifier.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/blurred-machine/ANN-based-Banking-Churn-Prediction/HEAD/prediction_classifier.pkl -------------------------------------------------------------------------------- /images/1accuracy_console.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/blurred-machine/ANN-based-Banking-Churn-Prediction/HEAD/images/1accuracy_console.PNG -------------------------------------------------------------------------------- /__pycache__/keras_model.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/blurred-machine/ANN-based-Banking-Churn-Prediction/HEAD/__pycache__/keras_model.cpython-37.pyc -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | Flask==1.1.1 2 | gunicorn==19.9.0 3 | itsdangerous==1.1.0 4 | Jinja2==2.10.1 5 | MarkupSafe==1.1.1 6 | Werkzeug==0.15.5 7 | numpy>=1.9.2 8 | scipy>=0.15.1 9 | scikit-learn>=0.18 10 | matplotlib>=1.4.3 11 | pandas>=0.19 12 | keras 13 | tensorflow==1.15.2 -------------------------------------------------------------------------------- /.gcloudignore: -------------------------------------------------------------------------------- 1 | # This file specifies files that are *not* uploaded to Google Cloud Platform 2 | # using gcloud. It follows the same syntax as .gitignore, with the addition of 3 | # "#!include" directives (which insert the entries of the given .gitignore-style 4 | # file at that point). 5 | # 6 | # For more information, run: 7 | # $ gcloud topic gcloudignore 8 | # 9 | .gcloudignore 10 | # If you would like to upload your .git directory, .gitignore file or files 11 | # from your .gitignore file, remove the corresponding line 12 | # below: 13 | .git 14 | .gitignore 15 | 16 | # Python pycache: 17 | __pycache__/ 18 | # Ignored by the build system 19 | /setup.cfg -------------------------------------------------------------------------------- /static/styles/mainpage.css: -------------------------------------------------------------------------------- 1 | /*.button { 2 | border-radius: 4px; 3 | background-color: #f4511e; 4 | border: none; 5 | color: #FFFFFF; 6 | text-align: center; 7 | font-size: 28px; 8 | padding: 10px; 9 | width: 150px; 10 | transition: all 0.5s; 11 | cursor: pointer; 12 | margin: 5px; 13 | } 14 | 15 | .button span { 16 | cursor: pointer; 17 | display: inline-block; 18 | position: relative; 19 | transition: 0.5s; 20 | } 21 | 22 | .button span:after { 23 | content: '\00bb'; 24 | position: absolute; 25 | opacity: 0; 26 | top: 0; 27 | right: -20px; 28 | transition: 0.5s; 29 | } 30 | 31 | .button:hover span { 32 | padding-right: 25px; 33 | } 34 | 35 | .button:hover span:after { 36 | opacity: 1; 37 | right: 0; 38 | }*/ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Paras Varshney 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Wed Apr 22 06:43:26 2020 4 | 5 | @author: paras 6 | """ 7 | 8 | from flask import Flask, jsonify, request 9 | import pandas as pd 10 | import numpy as np 11 | import json 12 | from sklearn.externals import joblib 13 | 14 | 15 | from sklearn.preprocessing import LabelEncoder, OneHotEncoder 16 | from sklearn.compose import ColumnTransformer 17 | from sklearn.preprocessing import StandardScaler 18 | 19 | import flask 20 | 21 | app = Flask(__name__) 22 | 23 | main_cols = joblib.load("columns.pkl") 24 | 25 | 26 | def clean_data(df_x): 27 | le = LabelEncoder() 28 | df_x.Gender = le.fit_transform(df_x.Gender) 29 | df_x = pd.get_dummies(data = df_x, columns=["Geography"], drop_first = False) 30 | return df_x 31 | 32 | 33 | def standardize_data(dta): 34 | scaler = joblib.load("std_scaler.pkl") 35 | X_transformed = scaler.transform(dta) 36 | return X_transformed 37 | 38 | 39 | @app.route('/') 40 | def index(): 41 | return flask.render_template('index.html') 42 | 43 | 44 | @app.route('/predict', methods=['POST']) 45 | def predict(): 46 | 47 | form_data = request.form.to_dict() 48 | 49 | df_input = pd.DataFrame.from_records([form_data], ) 50 | df_input = df_input.drop(['submitBtn'], axis=1) 51 | df_input = pd.DataFrame(df_input) 52 | 53 | sample_df = pd.DataFrame(columns = main_cols) 54 | clean_df = clean_data(df_input) 55 | main_df = sample_df.append(clean_df) 56 | main_df = main_df.fillna(0) 57 | print(main_df) 58 | 59 | 60 | std_df = standardize_data(main_df) 61 | print(std_df) 62 | 63 | clf = joblib.load('prediction_classifier.pkl') 64 | pred = clf.predict(std_df) 65 | print(pred, pred[0], pred[0][0]) 66 | x = round(pred[0][0]*100, 2) 67 | 68 | print(x) 69 | 70 | return flask.render_template('index.html', predicted_value="Customer Churn rate: {}%".format(str(x))) 71 | # return jsonify({'prediction': str(x)}) 72 | 73 | if __name__ == '__main__': 74 | app.run(host='0.0.0.0', port=8080) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ANN-based-Banking-Churn-Prediction 2 | - This repository will have complete machine learning and deep learning based banking churn prediction ANN model which will analyze tha probablity for a customer to leave. 3 | - The project was deployed on Google Cloud Platform as well as completely tested on Localhost. 4 |
5 | 6 | ## Project Description 7 | ### Welcome Screen 8 | - The main welcome screen is made in **HTML5** and **CSS3** with a basic and simple design. 9 | - Here is the main screen for the **Bank Churn Prediction** Interface for the bank admin.
10 | 11 | ![Welcome Screen](https://github.com/paras009/ANN-based-Banking-Churn-Prediction/blob/master/images/3welcome_screen.PNG) 12 | 13 | ### Bank Admin Input 14 | - The bank employee has to enter the details of the customer whose churn they want to analyze. 15 | - Below is the screenshot of the input being filled by bank employee.
16 | 17 | ![Filled Screen](https://github.com/paras009/ANN-based-Banking-Churn-Prediction/blob/master/images/4filled_index.png) 18 | 19 | ## Analysis and Accuracy 20 | - The Prediction engine is built over a deep **Artificial Neural Network** backed with **[Keras](https://www.tensorflow.org/guide/keras)**. 21 | - I have achieved an accuracy of around **~85%** on both training and testing data.
22 | - The ANN is trained over K-fold cross validation testing over 10 rounds to find if it was underfit or overfit over the data based on the variance betweent the accuracies of the 10 rotations. 23 | - The model is Tuned over the Hyerparametes to find the best **batch_size**, **epoch** and **optimizer** for generating the best possible combination for best fit model. 24 | 25 | ![Accuracy Python Console](https://github.com/paras009/ANN-based-Banking-Churn-Prediction/blob/master/images/1accuracy_console.PNG) 26 | 27 | ## Deployment and Production 28 | - The **API interfacing** for the deplyment on [Localhost](http://localhost:8080/index) is done using [Flask](https://flask.palletsprojects.com/en/1.1.x/). 29 | - The server is run on Local system during the staging of the project. 30 | - Older deployment was done on [Google Cloud Platform](https://cloud.google.com/) 31 | - Recently, the final deployment was done on **Heloku** platform and can be accessed from the link below. 32 | - LINK: [https://banking-churn-pediction](https://banking-churn-pediction.herokuapp.com) 33 | 34 | ## Predictions 35 | - The final prediction of the model is the percentage of churn for that customer. 36 | - The prediction signifies the chances of the customer to leave the services of the bank which makes the bank to _focus more on such such customers_ and try to retain them using **[Sales and Marketing strategies](https://github.com/paras009/Sales-and-Marketing-Analytics)** about which I have worked in this [GitHub](https://github.com/paras009/Sales-and-Marketing-Analytics) module.
37 | 38 | ![Prediction Screenshot](https://github.com/paras009/ANN-based-Banking-Churn-Prediction/blob/master/images/5prediction.PNG) 39 | 40 | ## Contribution 41 | - The project is built completely by Paras Varshney.
42 | Connect on [LinkedIn](https://www.linkedin.com/in/pv009)
43 | Follow on [Medium](https://medium.com/@pv009)
44 | Follow on [Github](https://github.com/paras009)
45 | 46 | #### Thank You! 47 | -------------------------------------------------------------------------------- /templates/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | Banking Churn Prediction 16 | 56 | 57 | 58 |
59 |

Banking Churn Prediction

60 |
61 |
62 |
63 |
64 | 65 |
66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 |
78 | 79 | 80 | 81 | 82 |
83 |

84 | {{predicted_value}} 85 |

86 |
87 |
88 |
89 | 90 |
91 | 92 | 93 | 94 | 95 | -------------------------------------------------------------------------------- /model.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | # -*- coding: utf-8 -*- 5 | """ 6 | Created on Wed Apr 22 05:42:28 2020 7 | 8 | @author: paras 9 | """ 10 | 11 | import pandas as pd 12 | import numpy as np 13 | from sklearn.externals import joblib 14 | 15 | from sklearn.preprocessing import LabelEncoder, OneHotEncoder 16 | from sklearn.compose import ColumnTransformer 17 | 18 | import keras 19 | from keras.models import Sequential 20 | from keras.layers import Dense, Dropout 21 | 22 | from keras.wrappers.scikit_learn import KerasClassifier 23 | from sklearn.model_selection import cross_val_score 24 | from sklearn.model_selection import GridSearchCV 25 | 26 | 27 | 28 | 29 | df = pd.read_csv("Churn_Modelling.csv") 30 | 31 | df_x = df.iloc[:, 3:13] 32 | df_y = df.iloc[:, 13] 33 | 34 | # df.isna().sum() 35 | 36 | def clean_data(df): 37 | 38 | le = LabelEncoder() 39 | df.Gender = le.fit_transform(df.Gender) 40 | df = pd.get_dummies(data = df, columns=["Geography"], drop_first = False) 41 | df = df.sort_index(axis=1) 42 | return df 43 | 44 | df_x = clean_data(df_x) 45 | # df_x.head() 46 | 47 | # columnTransformer = ColumnTransformer([('encoder', OneHotEncoder(), [1])], remainder='passthrough') 48 | # col_tnf = columnTransformer.fit_transform(df_x) 49 | # df_x = np.array(col_tnf, dtype = np.str) 50 | # df_x = df_x[:, 1:] 51 | 52 | from sklearn.model_selection import train_test_split 53 | X_train, X_test, y_train, y_test = train_test_split(df_x, df_y, test_size = 0.2, random_state = 0) 54 | joblib.dump(X_train.columns, "columns.pkl") 55 | 56 | from sklearn.preprocessing import StandardScaler 57 | scaler = StandardScaler() 58 | X_train = scaler.fit_transform(X_train) 59 | X_test = scaler.transform(X_test) 60 | joblib.dump(scaler, "std_scaler.pkl") 61 | print(X_test) 62 | print(X_train.shape[1]) 63 | 64 | 65 | 66 | 67 | 68 | 69 | def generate_report(): 70 | # predicting values 71 | y_pred = classifier.predict(X_test) 72 | print("\nPredicted values: "+str(y_pred)+"\n") 73 | y_pred = (y_pred > 0.5) 74 | 75 | # Making the Confusion Matrix 76 | from sklearn.metrics import confusion_matrix 77 | cm = confusion_matrix(y_test, y_pred) 78 | accuracy = (cm[0,0]+cm[1, 1])/(cm[0,0]+cm[1, 1]+cm[1,0]+cm[0, 1]) 79 | print("\nTest Accuracy: "+str(accuracy)+"\n") 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | # Build basic ANN model 88 | def build_model(): 89 | classifier = Sequential() 90 | classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu', input_dim = X_train.shape[1])) 91 | classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu')) 92 | classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid')) 93 | classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy']) 94 | return classifier 95 | classifier = build_model() 96 | classifier.fit(X_train, y_train, batch_size = 10, epochs = 30) 97 | generate_report() 98 | 99 | 100 | 101 | 102 | 103 | 104 | # Implementing K-fold Cross validation 105 | 106 | def build_model(): 107 | classifier = Sequential() 108 | classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu', input_dim = X_train.shape[1])) 109 | classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu')) 110 | classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid')) 111 | classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy']) 112 | return classifier 113 | classifier = KerasClassifier(build_fn = build_model, batch_size = 10, epochs = 40) 114 | accuracy_list = cross_val_score(estimator = classifier, X = X_train, y = y_train, cv = 10) 115 | mean_acc = accuracy_list.mean() 116 | std_acc = accuracy_list.std() 117 | print(mean_acc, std_acc) 118 | 119 | classifier.fit(X_train, y_train) 120 | generate_report() 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | # implement Hyperparameter tuning for getting better accuracy using Grid Search 130 | def build_model(optimizer): 131 | classifier = Sequential() 132 | classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu', input_dim = X_train.shape[1])) 133 | classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu')) 134 | classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid')) 135 | classifier.compile(optimizer = optimizer, loss = 'binary_crossentropy', metrics = ['accuracy']) 136 | return classifier 137 | classifier = KerasClassifier(build_fn = build_model) 138 | parameters = {"batch_size": [5, 15, 30], 139 | "epochs": [30, 100], 140 | "optimizer": ["adam", "rmsprop"]} 141 | grid_search = GridSearchCV(estimator = classifier, 142 | param_grid = parameters, 143 | scoring = "accuracy", 144 | cv=10) 145 | grid_search = grid_search.fit(X_train, y_train) 146 | best_parameters = grid_search.best_params_ 147 | best_accuracy = grid_search.best_score_ 148 | print(best_parameters, best_accuracy) 149 | generate_report() 150 | 151 | 152 | 153 | 154 | 155 | # save the model so created above into a picle. 156 | joblib.dump(classifier, 'prediction_classifier.pkl') 157 | 158 | 159 | --------------------------------------------------------------------------------