├── Analysis of Bank Churn.docx ├── Bank_Churn.csv ├── EDA-analysis.py ├── README.md ├── main.py └── objective.py /Analysis of Bank Churn.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KethaapaAjaykumar/Python-Dashboard/b8345d11b50a29604f3c09109dd6b317addf7c12/Analysis of Bank Churn.docx -------------------------------------------------------------------------------- /EDA-analysis.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import seaborn as sns 4 | import matplotlib.pyplot as plt 5 | # Load data file (I have taken the data in csv formt) 6 | df = pd.read_csv("C:\\Users\\ajayk\\Desktop\\Bank_Churn.csv") 7 | 8 | # MODULE 1: EDA ANALYSIS (Visualizing + Cleaning of data ) 9 | def eda_module(df): 10 | print("\nEDA ANALYSIS0") 11 | print("\nStep 1: Checking for Null Values") 12 | print(df.isnull().sum()) 13 | print("\nStep 2: Removing Duplicate Records") 14 | df.drop_duplicates(inplace=True) 15 | print("Duplicates removed successfully.") 16 | print("\nStep 3: Correlation Matrix (All Numeric Features present in the file)") 17 | plt.figure(figsize=(10, 8)) 18 | sns.heatmap(df.corr(numeric_only=True), annot=True, cmap='coolwarm') 19 | plt.title("Correlation Heatmap") 20 | plt.tight_layout() 21 | plt.show() 22 | print("\nStep 4: Covariance Matrix for CreditScore', 'Age', 'Balance', 'EstimatedSalary") 23 | cov_columns = ['CreditScore', 'Age', 'Balance', 'EstimatedSalary'] 24 | print("Selected columns for covariance matrix:", cov_columns) 25 | print(df[cov_columns].cov()) 26 | print("\nStep 5: Outlier Detection using Boxplot") 27 | num_cols = ['CreditScore', 'Age', 'Balance'] 28 | plt.figure(figsize=(10, 6)) 29 | sns.boxplot(data=df[num_cols], orient='h') 30 | plt.title("Outlier Detection: Credit Score, Age, Balance") 31 | plt.xlabel("Values") 32 | plt.tight_layout() 33 | plt.show() 34 | print("\nStep 6: Pairplot for Key Numerical Features") 35 | pairplot_columns = ['CreditScore', 'Age', 'Balance', 'EstimatedSalary', 'Exited'] 36 | print("Pairplot columns:", pairplot_columns) 37 | sampled_df = df.sample(500, random_state=1) 38 | sns.pairplot(sampled_df[pairplot_columns], hue='Exited') 39 | plt.suptitle("Pairplot of Selected Features (Sample of 500 Records)", y=1.02) 40 | plt.show() 41 | #PRESS 1 for eda analysis 42 | 43 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Python-Dashboard 2 | For detailed explanation refer Analysis-of-Bank-Churn word file 3 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | # MAIN EXECUTION 2 | import objective 3 | import EDA-analysis 4 | def main(): 5 | print("========== Bank Customer Churn Analysis ==========") 6 | print("Choose a Module to Run:\n1 - EDA Analysis\n2 - Churn Insights & Objectives") 7 | choice = input("Enter 1 or 2: ") 8 | if choice == '1': 9 | eda_module(df) 10 | elif choice == '2': 11 | objective_insights_module(df) 12 | else: 13 | print("Invalid input. Please enter 1 or 2.") 14 | if __name__ == "__main__": 15 | main() 16 | -------------------------------------------------------------------------------- /objective.py: -------------------------------------------------------------------------------- 1 | import EDA-analysis 2 | # MODULE 2: OBJECTIVE-BASED INSIGHTS 3 | def objective_insights_module(df): 4 | print("\nOBJECTIVE THAT I HAVE SPECIFICALLY CHOOSEN TO PERFORM IN THIS PROJECT") 5 | # OBJ-1 6 | print("\nObjective 1: Why Customers Leave Based on Age, Balance, and Credit Score") 7 | for col in ['Age', 'Balance', 'CreditScore']: 8 | plt.figure() 9 | sns.histplot(data=df, x=col, hue='Exited', kde=True, multiple="stack") 10 | plt.title(f"{col} Distribution by Churn") 11 | plt.tight_layout() 12 | plt.show() 13 | #OBJ-2 14 | print("\nObjective 2: Churn Rate Comparison by Gender") 15 | plt.figure() 16 | sns.countplot(data=df, x='Gender', hue='Exited') 17 | plt.title("Churn Count by Gender") 18 | plt.tight_layout() 19 | plt.show() 20 | #OBJ-3 21 | print("\nObjective 3: Churn Rate Comparison by Country") 22 | plt.figure() 23 | sns.countplot(data=df, x='Geography', hue='Exited') 24 | plt.title("Churn Count by Geography") 25 | plt.tight_layout() 26 | plt.show() 27 | #OBJ-4 28 | print("\nObjective 4: Churn Rate by Age Group") 29 | df['AgeGroup'] = pd.cut(df['Age'], bins=[18, 30, 40, 50, 60, 100], 30 | labels=["18-30", "31-40", "41-50", "51-60", "60+"]) 31 | plt.figure() 32 | sns.countplot(data=df, x='AgeGroup', hue='Exited') 33 | plt.title("Churn Count by Age Group") 34 | plt.tight_layout() 35 | plt.show() 36 | #OBJ-5 customer trends and risks 37 | print("\nObjective 5.1: Spending Habits of Churned vs Loyal Customers") 38 | plt.figure() 39 | sns.boxplot(data=df, x='Exited', y='EstimatedSalary') 40 | plt.title("Estimated Salary by Churn") 41 | plt.tight_layout() 42 | plt.show() 43 | print("\nObjective 5.2: Identify Key Factors Linked to Churn") 44 | plt.figure(figsize=(10, 8)) 45 | sns.heatmap(df.corr(numeric_only=True), annot=True, cmap='coolwarm') 46 | plt.title("Correlation Heatmap with Churn") 47 | plt.tight_layout() 48 | plt.show() 49 | print("\n Objective 5.3: Summary of Customer Trends and Risk Factors") 50 | print( 51 | """These are insights that I have concluded from this project 52 | Key Insights: 53 | Older customers (especially those above 50) tend to churn more. 54 | Customers with high balances but low credit scores are likely to churn more. 55 | Churn is slightly higher among female customers and those in France. 56 | Salaries are also a factor and lower earners tends to churn more. 57 | """) 58 | #press 2 to to view the objectives 59 | --------------------------------------------------------------------------------