├── ReportPythonDatatset.docx ├── personalizedLearningDataset.py └── personalized_learning_dataset.csv /ReportPythonDatatset.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Prachisharma16/personalisedLearningDataset/ee559680fce0ceb4fdddf5cd6dfc3c2c0a873c20/ReportPythonDatatset.docx -------------------------------------------------------------------------------- /personalizedLearningDataset.py: -------------------------------------------------------------------------------- 1 | #Personalized Learning Dataset 2 | 3 | import pandas as pd 4 | import matplotlib.pyplot as plt 5 | import seaborn as sns 6 | from scipy.stats import ttest_ind, chi2_contingency 7 | import scipy.stats as stats 8 | import dash 9 | from dash import dcc, html 10 | import plotly.express as px 11 | 12 | #Loading the dataset 13 | 14 | df = pd.read_csv("C:/Users/Prachi/OneDrive/Desktop/personalized_learning_dataset.csv") 15 | 16 | 17 | #OBJECTIVE -1: Analyze and Preprocess data by performing Exploratory Data Analysis 18 | 19 | #Displaying basic info 20 | print(df.head()) 21 | print(df.info()) 22 | 23 | missing_values = df.isnull().sum() 24 | print("Missing Values:\n", missing_values) 25 | 26 | duplicate_rows = df.duplicated().sum() 27 | print("Duplicate Rows:", duplicate_rows) 28 | 29 | #Summary statistics for numerical values 30 | numerical_summary = df.describe() 31 | print("Numerical Summary: \n", numerical_summary) 32 | 33 | # Summary for categorical columns 34 | categorical_summary = df.describe(include=["object"]) 35 | print("Categorical Summary:\n", categorical_summary) 36 | 37 | #Plot heatmap of correlations 38 | plt.figure(figsize=(10, 6)) 39 | 40 | correlation_matrix = df.select_dtypes(include=['float64', 'int64']).corr() 41 | sns.heatmap(correlation_matrix, annot=True) 42 | plt.title("Correlation Matrix of Numerical Features") 43 | plt.show() 44 | 45 | 46 | #OBJECTIVE -2: Develop a Personalized Engagement Score 47 | df["Engagement_Score"] = (0.4 * df["Time_Spent_on_Videos"] + 48 | 0.3 * df["Forum_Participation"] + 49 | 0.3 * df["Assignment_Completion_Rate"]) 50 | print("Engagement Score: \n", df[["Student_ID", "Engagement_Score"]].head()) 51 | #Scatter plot 52 | sns.scatterplot(x="Engagement_Score", y="Final_Exam_Score", data=df, hue="Dropout_Likelihood", palette="coolwarm") 53 | plt.title("Engagement Score vs Final Exam Score") 54 | plt.xlabel("Engagement Score") 55 | plt.ylabel("Final Exam Score") 56 | plt.grid(True) 57 | plt.tight_layout() 58 | plt.show() 59 | 60 | # Plot histograms of numerical features 61 | sns.set(style="whitegrid") 62 | 63 | numerical_columns = [ 64 | 'Age', 'Time_Spent_on_Videos', 'Quiz_Attempts', 'Quiz_Scores', 65 | 'Forum_Participation', 'Assignment_Completion_Rate', 66 | 'Final_Exam_Score', 'Feedback_Score' 67 | ] 68 | 69 | df[numerical_columns].hist(figsize=(16, 10), bins=20, edgecolor='black') 70 | plt.suptitle("Histograms of Numerical Features", fontsize=16, fontweight='bold') 71 | plt.tight_layout(rect=[0, 0.03, 1, 0.95]) 72 | plt.show() 73 | 74 | #################################################################################### 75 | 76 | #OBJECTIVE -3: Analyze the Impact of Learning Styles on Academic Success 77 | #Box plot 78 | plt.figure(figsize=(10, 6)) 79 | sns.set_style("whitegrid") 80 | sns.boxplot(x="Learning_Style", y="Final_Exam_Score", data=df, hue="Learning_Style", palette="Set2", legend=False) 81 | plt.title("Box Plot - Final Exam Scores by Learning Style", fontsize=16) 82 | plt.xlabel("Learning Style", fontsize=12) 83 | plt.ylabel("Final Exam Score", fontsize=12) 84 | 85 | plt.tight_layout() 86 | plt.show() 87 | 88 | #Bar plot 89 | learning_style_avg = df.groupby("Learning_Style")["Final_Exam_Score"].mean().reset_index() 90 | 91 | plt.figure(figsize=(8, 5)) 92 | sns.barplot(x="Learning_Style", y="Final_Exam_Score", data=learning_style_avg, hue="Learning_Style", palette="Blues_d", legend=False) 93 | 94 | plt.title("Bar Plot - Average Final Exam Score per Learning Style", fontsize=15) 95 | plt.xlabel("Learning Style") 96 | plt.ylabel("Average Final Exam Score") 97 | 98 | plt.tight_layout() 99 | plt.show() 100 | 101 | #################################################################################### 102 | 103 | #OBJECTIVE -4: Hypothesis Testing - Chi Square Test and T-Test 104 | 105 | #T-Test -> Do Highly Engaged Students Perform Better? 106 | 107 | df["Engagement_Category"] = pd.qcut(df["Engagement_Score"], q=3, labels=["Low", "Medium", "High"]) 108 | 109 | low_engagement = df[df["Engagement_Category"] == "Low"]["Final_Exam_Score"] 110 | high_engagement = df[df["Engagement_Category"] == "High"]["Final_Exam_Score"] 111 | 112 | 113 | t_stat, p_value = ttest_ind(low_engagement, high_engagement, equal_var=False) 114 | 115 | print(f"T-Statistic: {t_stat:.2f}") 116 | print(f"P-Value: {p_value:.5f}\n") 117 | 118 | if p_value < 0.05: 119 | print("Reject H0: High engagement students perform significantly better.") 120 | else: 121 | print("Fail to Reject H0: No significant difference in performance.") 122 | 123 | 124 | #Chi-Square Test -> Does Engagement Level Affect Dropout? 125 | 126 | # Convert Engagement Score into categories (Low, Medium, High) 127 | df["Engagement_Category"] = pd.qcut(df["Engagement_Score"], q=3, labels=["Low", "Medium", "High"]) 128 | 129 | # Contingency table 130 | contingency_table = pd.crosstab(df["Engagement_Category"], df["Dropout_Likelihood"]) 131 | 132 | # Perform Chi-Square Test 133 | chi2_stat, p_value, dof, expected = stats.chi2_contingency(contingency_table) 134 | 135 | print(f"Chi-Square Statistic: {chi2_stat:.2f}") 136 | print(f"P-Value: {p_value:.5f}\n") 137 | 138 | if p_value < 0.05: 139 | print("Reject H0: Engagement Level significantly affects Dropout Likelihood.") 140 | else: 141 | print("Fail to Reject H0: No significant relationship between Engagement Level and Dropout.") 142 | 143 | print(f"Chi-Square Statistic: {chi2_stat:.2f}") 144 | print(f"P-Value: {p_value:.5f}") 145 | 146 | #Grouped bar chart 147 | contingency_table.plot(kind='bar', figsize=(9, 6), colormap='viridis') 148 | plt.title("Dropout Likelihood by Engagement Level", fontsize=16) 149 | plt.xlabel("Engagement Level", fontsize=12) 150 | plt.ylabel("Number of Students", fontsize=12) 151 | plt.xticks(rotation=0) 152 | plt.legend(title="Dropout Likelihood") 153 | plt.grid(axis='y', linestyle='--', alpha=0.7) 154 | plt.tight_layout() 155 | plt.show() 156 | 157 | #OBJECTIVE -5: Identify key factors influencing Dropout Likelihood using charts and plots 158 | 159 | #Distribution Analysis (Box plot) 160 | sns.set(style="whitegrid") 161 | 162 | important_features = [ 163 | 'Time_Spent_on_Videos', 164 | 'Assignment_Completion_Rate', 165 | 'Forum_Participation', 166 | 'Final_Exam_Score' 167 | ] 168 | 169 | fig, axes = plt.subplots(1, 4, figsize=(16, 6)) 170 | 171 | sns.boxplot(y=df['Time_Spent_on_Videos'], ax=axes[0], color='skyblue') 172 | axes[0].set_title("Time Spent on Videos") 173 | axes[0].set_xticks([]) 174 | 175 | sns.boxplot(y=df['Assignment_Completion_Rate'], ax=axes[1], color='lightgreen') 176 | axes[1].set_title("Assignment Completion Rate") 177 | axes[1].set_xticks([]) 178 | 179 | sns.boxplot(y=df['Forum_Participation'], ax=axes[2], color='salmon') 180 | axes[2].set_title("Forum Participation") 181 | axes[2].set_xticks([]) 182 | 183 | sns.boxplot(y=df['Final_Exam_Score'], ax=axes[3], color='plum') 184 | axes[3].set_title("Final Exam Score") 185 | axes[3].set_xticks([]) 186 | 187 | plt.suptitle("Boxplots of Key Features", fontsize=16, fontweight='bold') 188 | plt.tight_layout(rect=[0, 0.03, 1, 0.90]) 189 | plt.show() 190 | 191 | # Visualization of categorical features 192 | sns.set(style="whitegrid") 193 | categorical_features = ['Gender', 'Education_Level', 'Learning_Style', 'Dropout_Likelihood'] 194 | plt.figure(figsize=(16, 10)) 195 | plt.subplot(2, 2, 1) 196 | sns.countplot(x='Gender', data=df, hue="Gender", palette='pastel', legend=False) 197 | plt.title('Gender Distribution') 198 | 199 | plt.subplot(2, 2, 2) 200 | sns.countplot(x='Education_Level', data=df, hue="Education_Level", palette='Set2', legend=False) 201 | plt.title('Education Level Distribution') 202 | plt.xticks(rotation=30) 203 | 204 | plt.subplot(2, 2, 3) 205 | sns.countplot(x='Learning_Style', data=df, hue="Learning_Style", palette='Set3', legend=False) 206 | plt.title('Learning Style Distribution') 207 | plt.xticks(rotation=30) 208 | 209 | plt.subplot(2, 2, 4) 210 | sns.countplot(x='Dropout_Likelihood', data=df, hue="Dropout_Likelihood", palette='coolwarm', legend=False) 211 | plt.title('Dropout Likelihood Distribution') 212 | 213 | plt.suptitle("Distribution of Categorical Features", fontsize=16, fontweight='bold') 214 | plt.tight_layout(rect=[0, 0.03, 1, 0.95]) 215 | plt.show() 216 | 217 | #Develop an Interactive Dashboard for Educators 218 | 219 | # PIE CHART – Learning Style Distribution 220 | pie_chart = px.pie( 221 | df, names="Learning_Style", title="Distribution of Learning Styles", 222 | color_discrete_sequence=px.colors.sequential.RdBu 223 | ) 224 | 225 | # DONUT CHART – Dropout Distribution 226 | donut_chart = px.pie( 227 | df, names="Dropout_Likelihood", title="Dropout Likelihood (Yes/No)", 228 | hole=0.5, color_discrete_sequence=px.colors.sequential.Mint 229 | ) 230 | 231 | # Dash app layout 232 | app = dash.Dash(__name__) 233 | 234 | # Define a dark background style 235 | dark_style = { 236 | 'backgroundColor': '#1e1e2f', 237 | 'color': '#f0f0f0', 238 | 'fontFamily': 'Arial', 239 | 'padding': '20px' 240 | } 241 | 242 | app.layout = html.Div(style={'backgroundColor': dark_style['backgroundColor']}, children=[ 243 | html.H1("Personalized Learning Dashboard", style={ 244 | 'textAlign': 'center', 245 | 'color': dark_style['color'], 246 | 'paddingTop': '20px' 247 | }), 248 | 249 | html.Div([ 250 | dcc.Graph(figure=pie_chart) 251 | ], style={'width': '48%', 'display': 'inline-block', 'padding': '10px'}), 252 | 253 | html.Div([ 254 | dcc.Graph(figure=donut_chart) 255 | ], style={'width': '48%', 'display': 'inline-block', 'padding': '10px'}), 256 | 257 | html.Footer("Developed by Jyotsna Chaudhary", style={ 258 | 'textAlign': 'center', 259 | 'padding': '20px', 260 | 'color': dark_style['color'], 261 | 'fontSize': '14px' 262 | }) 263 | ]) 264 | 265 | if __name__ == '__main__': 266 | app.run_server(debug=True) 267 | --------------------------------------------------------------------------------