├── County_Level_FFS_Data_for_Shared_Savings_Program_Benchmark_PUF_2023_Offest_Assignable_2024_Starters (1).csv ├── README.md └── main.py /README.md: -------------------------------------------------------------------------------- 1 | # Python-Powered-Healthcare-Data-Analysis-Uncovering-Insights-from-County-Level-FFS-Data 2 | 3 | 4 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import seaborn as sns 4 | import matplotlib.pyplot as plt 5 | 6 | data=pd.read_csv("C:\\Users\\MANAB\\Downloads\\County_Level_FFS_Data_for_Shared_Savings_Program_Benchmark_PUF_2023_Offest_Assignable_2024_Starters.csv") 7 | data.replace("*",pd.NA,inplace=True) 8 | data.replace(".",pd.NA,inplace=True) 9 | data.to_csv("Cleaned_Project_File.csv",index=False) 10 | data.info() 11 | data.describe() 12 | print(data.isnull().sum()) 13 | 14 | df=pd.read_csv("Cleaned_Project_File.csv") 15 | print(df) 16 | print(df.isnull().sum()) 17 | df['PER_CAPITA_EXP_ESRD'].fillna(df['PER_CAPITA_EXP_ESRD'].mean(),inplace=True) 18 | df['AVG_RISK_SCORE_ESRD'].fillna(df['AVG_RISK_SCORE_ESRD'].mean(),inplace=True) 19 | df['AVG_DEMOG_SCORE_ESRD'].fillna(df['AVG_DEMOG_SCORE_ESRD'].mean(),inplace=True) 20 | df['PERSON_YEARS_ESRD'].fillna(df['PERSON_YEARS_ESRD'].mean(),inplace=True) 21 | df['PER_CAPITA_EXP_DIS'].fillna(df['PER_CAPITA_EXP_DIS'].mean(),inplace=True) 22 | df['AVG_RISK_SCORE_DIS'].fillna(df['AVG_RISK_SCORE_DIS'].mean(),inplace=True) 23 | df['AVG_DEMOG_SCORE_DIS'].fillna(df['AVG_DEMOG_SCORE_DIS'].mean(),inplace=True) 24 | df['PERSON_YEARS_DIS'].fillna(df['PERSON_YEARS_DIS'].mean(),inplace=True) 25 | df['PER_CAPITA_EXP_AGDU'].fillna(df['PER_CAPITA_EXP_AGDU'].mean(),inplace=True) 26 | df['AVG_RISK_SCORE_AGDU'].fillna(df['AVG_RISK_SCORE_AGDU'].mean(),inplace=True) 27 | df['AVG_DEMOG_SCORE_ AGDU'].fillna(df['AVG_DEMOG_SCORE_ AGDU'].mean(),inplace=True) 28 | df['PERSON_YEARS_AGDU'].fillna(df['PERSON_YEARS_AGDU'].mean(),inplace=True) 29 | df['PER_CAPITA_EXP_AGND'].fillna(df['PER_CAPITA_EXP_AGND'].mean(),inplace=True) 30 | df['AVG_RISK_SCORE_AGND'].fillna(df['AVG_RISK_SCORE_AGND'].mean(),inplace=True) 31 | df['AVG_DEMOG_SCORE_AGED/NON-DUAL'].fillna(df['AVG_DEMOG_SCORE_AGED/NON-DUAL'].mean(),inplace=True) 32 | df['PERSON_YEARS_AGND'].fillna(df['PERSON_YEARS_AGND'].mean(),inplace=True) 33 | columns = [ 34 | 'PER_CAPITA_EXP_ESRD', 35 | 'AVG_RISK_SCORE_ESRD', 36 | 'AVG_DEMOG_SCORE_ESRD', 37 | 'PERSON_YEARS_ESRD', 38 | 'PER_CAPITA_EXP_DIS', 39 | 'AVG_RISK_SCORE_DIS', 40 | 'AVG_DEMOG_SCORE_DIS', 41 | 'PERSON_YEARS_DIS', 42 | 'PER_CAPITA_EXP_AGDU', 43 | 'AVG_RISK_SCORE_AGDU', 44 | 'AVG_DEMOG_SCORE_ AGDU', 45 | 'PERSON_YEARS_AGDU', 46 | 'PER_CAPITA_EXP_AGND', 47 | 'AVG_RISK_SCORE_AGND', 48 | 'AVG_DEMOG_SCORE_AGED/NON-DUAL', 49 | 'PERSON_YEARS_AGND' 50 | ] 51 | 52 | print("Mean of Columns:\n") 53 | for col in columns: 54 | print(f"{col} : {df[col].mean()}") 55 | 56 | print("\n") 57 | df['Average_Expenditure']=(df['PER_CAPITA_EXP_ESRD']+df['PER_CAPITA_EXP_DIS']+df['PER_CAPITA_EXP_AGDU']+df['PER_CAPITA_EXP_AGND'])/4 58 | df['Total_Beneficiaries'] = df['PERSON_YEARS_ESRD'] + df['PERSON_YEARS_DIS'] + df['PERSON_YEARS_AGDU'] + df['PERSON_YEARS_AGND'] 59 | top10= df.nlargest(10,'Average_Expenditure') 60 | print("Top 10 Counties with Highest Expenditure:\n") 61 | print(top10[['COUNTY_NAME','Average_Expenditure']]) 62 | print("\n") 63 | statewise = df.groupby('STATE_NAME')['Total_Beneficiaries'].sum().sort_values(ascending=False) 64 | print("Statewise Total Beneficiaries:\n") 65 | print(statewise) 66 | print("\n") 67 | df['Performance_Category'] = np.where(df['Average_Expenditure'] > df['Average_Expenditure'].mean(), 'High', 'Low') 68 | performance=df.groupby('Performance_Category')['Total_Beneficiaries'].sum() 69 | print(performance) 70 | print("\n") 71 | df.info() 72 | print("\n") 73 | df.describe() 74 | 75 | plt.figure(figsize=(10,6)) 76 | sns.barplot(x='Average_Expenditure', y='COUNTY_NAME', data=top10) 77 | plt.title("Top 10 Counties with Highest Expenditure") 78 | plt.show() 79 | 80 | plt.figure(figsize=(8,8)) 81 | statewise.head(10).plot.pie(autopct='%1.1f%%') 82 | plt.title("Top 10 States by Total Beneficiaries") 83 | plt.ylabel('') 84 | plt.show() 85 | 86 | 87 | df.corr(numeric_only=True) 88 | plt.figure(figsize=(10,8)) 89 | sns.heatmap(df.corr(numeric_only=True), annot=True, cmap='coolwarm') 90 | plt.title("Correlation Heatmap") 91 | plt.show() 92 | 93 | plt.figure(figsize=(10,6)) 94 | sns.scatterplot(x='Total_Beneficiaries', y='PER_CAPITA_EXP_AGND', hue='STATE_NAME', data=df) 95 | plt.title('Total Beneficiaries vs Per Capita Expenditure (Aged Non-Dual)') 96 | plt.xlabel('Total Beneficiaries') 97 | plt.ylabel('Per Capita Expenditure') 98 | plt.show() 99 | 100 | 101 | plt.figure(figsize=(8,5)) 102 | sns.kdeplot(df['PER_CAPITA_EXP_ESRD'], fill=True, color='purple') 103 | plt.title('Density Plot of Per Capita Expenditure ESRD') 104 | plt.show() 105 | 106 | 107 | plt.figure(figsize=(12,6)) 108 | sns.countplot(x='STATE_NAME', data=df, order=df['STATE_NAME'].value_counts().index, palette='coolwarm') 109 | plt.xticks(rotation=90) 110 | plt.title("Data Count per State") 111 | plt.show() 112 | 113 | 114 | 115 | selected_columns = [ 116 | 'PER_CAPITA_EXP_ESRD', 117 | 'PER_CAPITA_EXP_DIS', 118 | 'PER_CAPITA_EXP_AGDU', 119 | 'PER_CAPITA_EXP_AGND', 120 | 'AVG_RISK_SCORE_ESRD', 121 | 'AVG_RISK_SCORE_DIS', 122 | 'AVG_RISK_SCORE_AGDU', 123 | 'AVG_RISK_SCORE_AGND' 124 | ] 125 | 126 | plt.figure(figsize=(10,6)) 127 | sns.violinplot(data=df[selected_columns]) 128 | plt.xticks(rotation=45) 129 | plt.title("Violin Plot of Key Features") 130 | plt.show() 131 | 132 | 133 | h=sns.pairplot(df[selected_columns+['STATE_NAME']],hue='STATE_NAME') 134 | h._legend.remove() 135 | plt.suptitle('Pair Plot of Key Expenditure and Risk Score Features',y=1.01) 136 | plt.tight_layout() 137 | plt.show() 138 | 139 | plt.figure(figsize=(15, 7)) 140 | sns.boxplot(data=df[selected_columns], palette='Set2') 141 | 142 | plt.title('Boxplot of Per Capita Expenditure & Risk Scores', fontsize=16, fontweight='bold') 143 | plt.xlabel('Features', fontsize=12) 144 | plt.ylabel('Values', fontsize=12) 145 | plt.xticks(rotation=45) 146 | plt.grid(True, linestyle='--', alpha=0.5) 147 | 148 | plt.tight_layout() 149 | plt.show() 150 | 151 | print(df.isnull().sum()) 152 | print("\n") 153 | summary = df.groupby('STATE_NAME').agg({ 154 | 'Total_Beneficiaries':'sum', 155 | 'Average_Expenditure':'mean' 156 | }).reset_index() 157 | 158 | print(summary) 159 | 160 | print("\n") 161 | print(df.corr(numeric_only=True)) 162 | df.to_csv("Modified_Project_File.csv") 163 | 164 | --------------------------------------------------------------------------------