├── README.md
├── Student_Depression_Report.docx
├── project two of python.py
└── student_depression_dataset.csv


/README.md:
--------------------------------------------------------------------------------
1 | # Python-project_EDA


--------------------------------------------------------------------------------
/Student_Depression_Report.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vamsi89489/Python-project_EDA/14c9db03dc239f85d2f0347f442fc6e828e84fbd/Student_Depression_Report.docx


--------------------------------------------------------------------------------
/project two of python.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | import pandas as pd
 4 | import seaborn as sns
 5 | import matplotlib.pyplot as plt
 6 | from sklearn.ensemble import RandomForestClassifier
 7 | from sklearn.model_selection import train_test_split
 8 | from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
 9 | 
10 | # Load and preprocess the dataset
11 | df = pd.read_csv("Depression Student Dataset.csv")
12 | df.columns = [col.strip().replace("?", "").replace(" ", "_") for col in df.columns]
13 | df.rename(columns={
14 |     "Have_you_ever_had_suicidal_thoughts_": "Suicidal_Thoughts",
15 |     "Family_History_of_Mental_Illness": "Family_History",
16 | }, inplace=True)
17 | 
18 | df['Gender'] = df['Gender'].map({'Male': 0, 'Female': 1})
19 | df['Suicidal_Thoughts'] = df['Suicidal_Thoughts'].map({'Yes': 1, 'No': 0})
20 | df['Family_History'] = df['Family_History'].map({'Yes': 1, 'No': 0})
21 | df['Depression'] = df['Depression'].map({'Yes': 1, 'No': 0})
22 | df['Dietary_Habits'] = df['Dietary_Habits'].map({'Unhealthy': 0, 'Moderate': 1, 'Healthy': 2})
23 | df['Sleep_Duration'] = df['Sleep_Duration'].map({
24 |     'Less than 5 hours': 4, '5-6 hours': 5.5, '7-8 hours': 7.5, 'More than 8 hours': 9
25 | })
26 | 
27 | # Split the data
28 | X = df.drop('Depression', axis=1)
29 | y = df['Depression']
30 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
31 | 
32 | # Train model
33 | model = RandomForestClassifier(random_state=42)
34 | model.fit(X_train, y_train)
35 | 
36 | # 1. Identify key factors contributing to depression
37 | importances = pd.Series(model.feature_importances_, index=X.columns).sort_values(ascending=False)
38 | print("\n1. Feature Importances:")
39 | print(importances)
40 | 
41 | # 2. Correlation between academic performance and mental health
42 | print("\n2. Correlation with Depression:")
43 | correlation_matrix = df[['Academic_Pressure', 'Study_Satisfaction', 'Study_Hours', 'Depression']].corr()
44 | print(correlation_matrix)
45 | sns.heatmap(correlation_matrix, annot=True, cmap="coolwarm")
46 | plt.title("Correlation with Depression")
47 | plt.show()
48 | 
49 | # 3. Predictive model evaluation
50 | y_pred = model.predict(X_test)
51 | print("\n3. Classification Report:")
52 | print(classification_report(y_test, y_pred))
53 | print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
54 | print("Accuracy Score:", accuracy_score(y_test, y_pred))
55 | 
56 | # 4. Visualizations using matplotlib/seaborn
57 | plt.figure(figsize=(6,4))
58 | sns.countplot(data=df, x='Gender', hue='Depression')
59 | plt.title("Depression by Gender")
60 | plt.show()
61 | 
62 | plt.figure(figsize=(8,4))
63 | sns.histplot(data=df, x='Age', hue='Depression', bins=10, multiple="stack")
64 | plt.title("Depression by Age Group")
65 | plt.show()
66 | 
67 | plt.figure(figsize=(6,4))
68 | sns.boxplot(data=df, x='Depression', y='Study_Hours')
69 | plt.title("Study Hours vs Depression")
70 | plt.show()
71 | 
72 | plt.figure(figsize=(6,4))
73 | sns.boxplot(data=df, x='Depression', y='Sleep_Duration')
74 | plt.title("Sleep Duration vs Depression")
75 | plt.show()
76 | 
77 | plt.figure(figsize=(6,4))
78 | sns.countplot(data=df, x='Family_History', hue='Depression')
79 | plt.title("Family History vs Depression")
80 | plt.show()
81 | 
82 | # 5. Evaluate impact of support mechanisms
83 | print("\n5. Impact of Support Mechanisms:")
84 | impact = df.groupby(['Family_History', 'Suicidal_Thoughts'])['Depression'].mean().unstack()
85 | print(impact)
86 | 
87 | # 6. Recommendations
88 | print("\n6. Data-Driven Recommendations:")
89 | recommendations = [
90 |     "1. Increase awareness and access to counseling services.",
91 |     "2. Promote healthy sleep and dietary routines among students.",
92 |     "3. Offer financial aid and stress management workshops.",
93 |     "4. Create peer support networks and mentoring systems.",
94 |     "5. Tailor academic curriculums to improve study satisfaction."
95 | ]
96 | for r in recommendations:
97 |     print(r)
98 | 


--------------------------------------------------------------------------------