├── README.md ├── Student_Depression_Report.docx ├── project two of python.py └── student_depression_dataset.csv /README.md: -------------------------------------------------------------------------------- 1 | # Python-project_EDA -------------------------------------------------------------------------------- /Student_Depression_Report.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vamsi89489/Python-project_EDA/14c9db03dc239f85d2f0347f442fc6e828e84fbd/Student_Depression_Report.docx -------------------------------------------------------------------------------- /project two of python.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import pandas as pd 4 | import seaborn as sns 5 | import matplotlib.pyplot as plt 6 | from sklearn.ensemble import RandomForestClassifier 7 | from sklearn.model_selection import train_test_split 8 | from sklearn.metrics import classification_report, confusion_matrix, accuracy_score 9 | 10 | # Load and preprocess the dataset 11 | df = pd.read_csv("Depression Student Dataset.csv") 12 | df.columns = [col.strip().replace("?", "").replace(" ", "_") for col in df.columns] 13 | df.rename(columns={ 14 | "Have_you_ever_had_suicidal_thoughts_": "Suicidal_Thoughts", 15 | "Family_History_of_Mental_Illness": "Family_History", 16 | }, inplace=True) 17 | 18 | df['Gender'] = df['Gender'].map({'Male': 0, 'Female': 1}) 19 | df['Suicidal_Thoughts'] = df['Suicidal_Thoughts'].map({'Yes': 1, 'No': 0}) 20 | df['Family_History'] = df['Family_History'].map({'Yes': 1, 'No': 0}) 21 | df['Depression'] = df['Depression'].map({'Yes': 1, 'No': 0}) 22 | df['Dietary_Habits'] = df['Dietary_Habits'].map({'Unhealthy': 0, 'Moderate': 1, 'Healthy': 2}) 23 | df['Sleep_Duration'] = df['Sleep_Duration'].map({ 24 | 'Less than 5 hours': 4, '5-6 hours': 5.5, '7-8 hours': 7.5, 'More than 8 hours': 9 25 | }) 26 | 27 | # Split the data 28 | X = df.drop('Depression', axis=1) 29 | y = df['Depression'] 30 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) 31 | 32 | # Train model 33 | model = RandomForestClassifier(random_state=42) 34 | model.fit(X_train, y_train) 35 | 36 | # 1. Identify key factors contributing to depression 37 | importances = pd.Series(model.feature_importances_, index=X.columns).sort_values(ascending=False) 38 | print("\n1. Feature Importances:") 39 | print(importances) 40 | 41 | # 2. Correlation between academic performance and mental health 42 | print("\n2. Correlation with Depression:") 43 | correlation_matrix = df[['Academic_Pressure', 'Study_Satisfaction', 'Study_Hours', 'Depression']].corr() 44 | print(correlation_matrix) 45 | sns.heatmap(correlation_matrix, annot=True, cmap="coolwarm") 46 | plt.title("Correlation with Depression") 47 | plt.show() 48 | 49 | # 3. Predictive model evaluation 50 | y_pred = model.predict(X_test) 51 | print("\n3. Classification Report:") 52 | print(classification_report(y_test, y_pred)) 53 | print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred)) 54 | print("Accuracy Score:", accuracy_score(y_test, y_pred)) 55 | 56 | # 4. Visualizations using matplotlib/seaborn 57 | plt.figure(figsize=(6,4)) 58 | sns.countplot(data=df, x='Gender', hue='Depression') 59 | plt.title("Depression by Gender") 60 | plt.show() 61 | 62 | plt.figure(figsize=(8,4)) 63 | sns.histplot(data=df, x='Age', hue='Depression', bins=10, multiple="stack") 64 | plt.title("Depression by Age Group") 65 | plt.show() 66 | 67 | plt.figure(figsize=(6,4)) 68 | sns.boxplot(data=df, x='Depression', y='Study_Hours') 69 | plt.title("Study Hours vs Depression") 70 | plt.show() 71 | 72 | plt.figure(figsize=(6,4)) 73 | sns.boxplot(data=df, x='Depression', y='Sleep_Duration') 74 | plt.title("Sleep Duration vs Depression") 75 | plt.show() 76 | 77 | plt.figure(figsize=(6,4)) 78 | sns.countplot(data=df, x='Family_History', hue='Depression') 79 | plt.title("Family History vs Depression") 80 | plt.show() 81 | 82 | # 5. Evaluate impact of support mechanisms 83 | print("\n5. Impact of Support Mechanisms:") 84 | impact = df.groupby(['Family_History', 'Suicidal_Thoughts'])['Depression'].mean().unstack() 85 | print(impact) 86 | 87 | # 6. Recommendations 88 | print("\n6. Data-Driven Recommendations:") 89 | recommendations = [ 90 | "1. Increase awareness and access to counseling services.", 91 | "2. Promote healthy sleep and dietary routines among students.", 92 | "3. Offer financial aid and stress management workshops.", 93 | "4. Create peer support networks and mentoring systems.", 94 | "5. Tailor academic curriculums to improve study satisfaction." 95 | ] 96 | for r in recommendations: 97 | print(r) 98 | --------------------------------------------------------------------------------