├── Clean_File.csv ├── Clean_File1.csv ├── Figure_1.png ├── Figure_10.png ├── Figure_2.png ├── Figure_3.png ├── Figure_4.png ├── Figure_5.png ├── Figure_6.png ├── Figure_7.png ├── Figure_8.png ├── Figure_9.png ├── Modified_File.csv ├── README.md └── project.py /Figure_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ashutoshraj25/Data-analysis-using-python-/03dc9838242f861bd3421223b0c324f1b130af0e/Figure_1.png -------------------------------------------------------------------------------- /Figure_10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ashutoshraj25/Data-analysis-using-python-/03dc9838242f861bd3421223b0c324f1b130af0e/Figure_10.png -------------------------------------------------------------------------------- /Figure_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ashutoshraj25/Data-analysis-using-python-/03dc9838242f861bd3421223b0c324f1b130af0e/Figure_2.png -------------------------------------------------------------------------------- /Figure_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ashutoshraj25/Data-analysis-using-python-/03dc9838242f861bd3421223b0c324f1b130af0e/Figure_3.png -------------------------------------------------------------------------------- /Figure_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ashutoshraj25/Data-analysis-using-python-/03dc9838242f861bd3421223b0c324f1b130af0e/Figure_4.png -------------------------------------------------------------------------------- /Figure_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ashutoshraj25/Data-analysis-using-python-/03dc9838242f861bd3421223b0c324f1b130af0e/Figure_5.png -------------------------------------------------------------------------------- /Figure_6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ashutoshraj25/Data-analysis-using-python-/03dc9838242f861bd3421223b0c324f1b130af0e/Figure_6.png -------------------------------------------------------------------------------- /Figure_7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ashutoshraj25/Data-analysis-using-python-/03dc9838242f861bd3421223b0c324f1b130af0e/Figure_7.png -------------------------------------------------------------------------------- /Figure_8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ashutoshraj25/Data-analysis-using-python-/03dc9838242f861bd3421223b0c324f1b130af0e/Figure_8.png -------------------------------------------------------------------------------- /Figure_9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ashutoshraj25/Data-analysis-using-python-/03dc9838242f861bd3421223b0c324f1b130af0e/Figure_9.png -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Data-analysis-using-python- 2 | Project Title: Indicators of Anxiety or Depression Based on Reported Frequency of Symptoms During Last 7 Days 3 | -------------------------------------------------------------------------------- /project.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import seaborn as sns 4 | import matplotlib.pyplot as plt 5 | 6 | # Load and clean data 7 | data = pd.read_csv("D:\\Downloads\\Clean_Python_Project 1.csv") 8 | print(data) 9 | print(data.isnull().sum()) 10 | 11 | # Save cleaned version 12 | data.to_csv("Clean_File1.csv", index=False) 13 | 14 | # Load the cleaned data 15 | df = pd.read_csv("Clean_File1.csv") 16 | 17 | # 2. Boxplot by Age Group 18 | plt.figure(figsize=(12,6)) 19 | age_group = df[df['Group'] == 'By Age'] 20 | sns.boxplot(data=age_group, x='Subgroup', y='Clean_Value', hue='Indicator') 21 | plt.title("Distribution of Symptoms by Age Group") 22 | plt.xticks(rotation=45) 23 | plt.tight_layout() 24 | plt.show() 25 | 26 | # 3. Barplot by State (Latest Phase) 27 | latest_phase = df['Phase'].dropna().max() 28 | latest_data = df[(df['Phase'] == latest_phase) & 29 | (df['Indicator'] == 'Symptoms of Anxiety Disorder') & 30 | (df['Group'] == 'By State')] 31 | plt.figure(figsize=(14,7)) 32 | sns.barplot(data=latest_data, x='State', y='Clean_Value') 33 | plt.xticks(rotation=90) 34 | plt.title(f"Anxiety Symptoms by State (Latest Phase: {latest_phase})") 35 | plt.tight_layout() 36 | plt.show() 37 | 38 | # 4. KDE Plot 39 | plt.figure(figsize=(10,5)) 40 | sns.kdeplot(data=df, x='Clean_Value', hue='Indicator', fill=True) 41 | plt.title("Density Plot of Symptom Percentages") 42 | plt.tight_layout() 43 | plt.show() 44 | 45 | # 5. Violin Plot by Gender 46 | if 'By Sex' in df['Group'].unique(): 47 | gender_group = df[df['Group'] == 'By Sex'] 48 | plt.figure(figsize=(10,6)) 49 | sns.violinplot(data=gender_group, x='Subgroup', y='Clean_Value', hue='Indicator') 50 | plt.title("Symptoms by Gender") 51 | plt.tight_layout() 52 | plt.show() 53 | 54 | # 6. Lineplot of Anxiety by Age Group Over Time 55 | plt.figure(figsize=(12,6)) 56 | age_trend = df[(df['Group'] == 'By Age') & (df['Indicator'] == 'Symptoms of Anxiety Disorder')] 57 | sns.lineplot(data=age_trend, x='Time Period Start Date', y='Clean_Value', hue='Subgroup', marker='o') 58 | plt.title("Anxiety Symptoms Over Time by Age Group") 59 | plt.xticks(rotation=45) 60 | plt.tight_layout() 61 | plt.show() 62 | 63 | # 7. Heatmap using coolwarm 64 | heatmap_df = df[df['Group'] == 'By State'].groupby(['State', 'Indicator'])['Clean_Value'].mean().unstack() 65 | 66 | top_states = heatmap_df.mean(axis=1).sort_values(ascending=False).head(15).index 67 | heatmap_top = heatmap_df.loc[top_states] 68 | 69 | plt.figure(figsize=(16,9)) 70 | sns.heatmap(heatmap_top.round(1), annot=True, fmt=".1f", cmap='coolwarm', linewidths=0.5, linecolor='gray', cbar_kws={"label": "Average Symptom (%)"}) 71 | plt.title("Top 15 States: Average Reported Symptoms by Indicator", fontsize=14) 72 | plt.xlabel("Indicator") 73 | plt.ylabel("State") 74 | plt.xticks(rotation=30, ha='right') 75 | plt.tight_layout() 76 | plt.show() 77 | 78 | # 8. Additional Barplot: National Comparison of Symptoms 79 | latest_date = df['Time Period Start Date'].dropna().max() 80 | latest_national = df[(df['Group'] == 'National Estimate') & (df['Time Period Start Date'] == latest_date)] 81 | 82 | if not latest_national.empty: 83 | plt.figure(figsize=(8,5)) 84 | sns.barplot(data=latest_national, x='Indicator', y='Clean_Value', palette='Set2') 85 | plt.title(f"National Comparison of Symptoms (as of {latest_date})") 86 | plt.ylabel("Reported Symptoms (%)") 87 | plt.tight_layout() 88 | plt.show() 89 | else: 90 | print("No national estimate data available for the latest date.") 91 | 92 | # 10. Facet Grid: Depression in Top 6 States 93 | top_states = df[df['Group'] == 'By State'].groupby('State')['Clean_Value'].mean().sort_values(ascending=False).head(6).index 94 | facet_data = df[(df['Group'] == 'By State') & 95 | (df['State'].isin(top_states)) & 96 | (df['Indicator'] == 'Symptoms of Depression Disorder')] 97 | if not facet_data.empty: 98 | g = sns.FacetGrid(facet_data, col='State', col_wrap=3, height=4, aspect=1.2) 99 | g.map_dataframe(sns.lineplot, x='Time Period Start Date', y='Clean_Value') 100 | g.set_titles("{col_name}") 101 | g.set_axis_labels("Date", "Depression Symptom (%)") 102 | plt.suptitle("Depression Trend Over Time for Top 6 States", y=1.05) 103 | plt.tight_layout() 104 | plt.show() 105 | else: 106 | print("No data available for depression symptoms in top states.") 107 | 108 | # 11. Pairplot using coolwarm 109 | numeric_cols = df.select_dtypes(include=['float64', 'int64']).columns 110 | pair_data = df[numeric_cols].dropna().sample(n=500, random_state=1) 111 | 112 | sns.set(style="whitegrid") 113 | pair_plot = sns.pairplot( 114 | pair_data, 115 | corner=True, 116 | palette='coolwarm', 117 | height=2.8, 118 | aspect=1.2, 119 | plot_kws={'alpha': 0.5, 's': 30, 'edgecolor': 'k'}, 120 | diag_kws={'fill': True, 'color': 'lightcoral'} 121 | ) 122 | pair_plot.fig.suptitle("Pairplot of All Numerical Features", y=1.02, fontsize=14) 123 | plt.tight_layout() 124 | plt.show() 125 | 126 | # Save final modified file 127 | df.to_csv("Modified_File.csv", index=False) 128 | --------------------------------------------------------------------------------