├── ReportPythonDatatset.docx
├── personalizedLearningDataset.py
└── personalized_learning_dataset.csv


/ReportPythonDatatset.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Prachisharma16/personalisedLearningDataset/ee559680fce0ceb4fdddf5cd6dfc3c2c0a873c20/ReportPythonDatatset.docx


--------------------------------------------------------------------------------
/personalizedLearningDataset.py:
--------------------------------------------------------------------------------
  1 | #Personalized Learning Dataset
  2 | 
  3 | import pandas as pd
  4 | import matplotlib.pyplot as plt
  5 | import seaborn as sns
  6 | from scipy.stats import ttest_ind, chi2_contingency
  7 | import scipy.stats as stats
  8 | import dash
  9 | from dash import dcc, html
 10 | import plotly.express as px
 11 | 
 12 | #Loading the dataset
 13 | 
 14 | df = pd.read_csv("C:/Users/Prachi/OneDrive/Desktop/personalized_learning_dataset.csv")
 15 | 
 16 | 
 17 | #OBJECTIVE -1: Analyze and Preprocess data by performing Exploratory Data Analysis
 18 | 
 19 | #Displaying basic info
 20 | print(df.head())
 21 | print(df.info())
 22 | 
 23 | missing_values = df.isnull().sum()
 24 | print("Missing Values:\n", missing_values)
 25 | 
 26 | duplicate_rows = df.duplicated().sum()
 27 | print("Duplicate Rows:", duplicate_rows)
 28 | 
 29 | #Summary statistics for numerical values
 30 | numerical_summary = df.describe()
 31 | print("Numerical Summary: \n", numerical_summary)
 32 | 
 33 | # Summary for categorical columns
 34 | categorical_summary = df.describe(include=["object"])
 35 | print("Categorical Summary:\n", categorical_summary)
 36 | 
 37 | #Plot heatmap of correlations
 38 | plt.figure(figsize=(10, 6))
 39 | 
 40 | correlation_matrix = df.select_dtypes(include=['float64', 'int64']).corr()
 41 | sns.heatmap(correlation_matrix, annot=True)
 42 | plt.title("Correlation Matrix of Numerical Features")
 43 | plt.show()
 44 | 
 45 | 
 46 | #OBJECTIVE -2: Develop a Personalized Engagement Score
 47 | df["Engagement_Score"] = (0.4 * df["Time_Spent_on_Videos"] +
 48 |                           0.3 * df["Forum_Participation"] +
 49 |                           0.3 * df["Assignment_Completion_Rate"])
 50 | print("Engagement Score: \n", df[["Student_ID", "Engagement_Score"]].head())
 51 | #Scatter plot
 52 | sns.scatterplot(x="Engagement_Score", y="Final_Exam_Score", data=df, hue="Dropout_Likelihood", palette="coolwarm")
 53 | plt.title("Engagement Score vs Final Exam Score")
 54 | plt.xlabel("Engagement Score")
 55 | plt.ylabel("Final Exam Score")
 56 | plt.grid(True)
 57 | plt.tight_layout()
 58 | plt.show()
 59 | 
 60 | # Plot histograms of numerical features
 61 | sns.set(style="whitegrid")
 62 | 
 63 | numerical_columns = [
 64 |     'Age', 'Time_Spent_on_Videos', 'Quiz_Attempts', 'Quiz_Scores',
 65 |     'Forum_Participation', 'Assignment_Completion_Rate',
 66 |     'Final_Exam_Score', 'Feedback_Score'
 67 | ]
 68 | 
 69 | df[numerical_columns].hist(figsize=(16, 10), bins=20, edgecolor='black')
 70 | plt.suptitle("Histograms of Numerical Features", fontsize=16, fontweight='bold')
 71 | plt.tight_layout(rect=[0, 0.03, 1, 0.95])
 72 | plt.show()
 73 | 
 74 | ####################################################################################
 75 | 
 76 | #OBJECTIVE -3: Analyze the Impact of Learning Styles on Academic Success
 77 | #Box plot
 78 | plt.figure(figsize=(10, 6))
 79 | sns.set_style("whitegrid")
 80 | sns.boxplot(x="Learning_Style", y="Final_Exam_Score", data=df, hue="Learning_Style", palette="Set2", legend=False)
 81 | plt.title("Box Plot - Final Exam Scores by Learning Style", fontsize=16)
 82 | plt.xlabel("Learning Style", fontsize=12)
 83 | plt.ylabel("Final Exam Score", fontsize=12)
 84 | 
 85 | plt.tight_layout()
 86 | plt.show()
 87 | 
 88 | #Bar plot
 89 | learning_style_avg = df.groupby("Learning_Style")["Final_Exam_Score"].mean().reset_index()
 90 | 
 91 | plt.figure(figsize=(8, 5))
 92 | sns.barplot(x="Learning_Style", y="Final_Exam_Score", data=learning_style_avg, hue="Learning_Style", palette="Blues_d", legend=False)
 93 | 
 94 | plt.title("Bar Plot - Average Final Exam Score per Learning Style", fontsize=15)
 95 | plt.xlabel("Learning Style")
 96 | plt.ylabel("Average Final Exam Score")
 97 | 
 98 | plt.tight_layout()
 99 | plt.show()
100 | 
101 | ####################################################################################
102 | 
103 | #OBJECTIVE -4: Hypothesis Testing - Chi Square Test and T-Test
104 | 
105 | #T-Test -> Do Highly Engaged Students Perform Better?
106 | 
107 | df["Engagement_Category"] = pd.qcut(df["Engagement_Score"], q=3, labels=["Low", "Medium", "High"])
108 | 
109 | low_engagement = df[df["Engagement_Category"] == "Low"]["Final_Exam_Score"]
110 | high_engagement = df[df["Engagement_Category"] == "High"]["Final_Exam_Score"]
111 | 
112 | 
113 | t_stat, p_value = ttest_ind(low_engagement, high_engagement, equal_var=False)
114 | 
115 | print(f"T-Statistic: {t_stat:.2f}")
116 | print(f"P-Value: {p_value:.5f}\n")
117 | 
118 | if p_value < 0.05:
119 |     print("Reject H0: High engagement students perform significantly better.")
120 | else:
121 |     print("Fail to Reject H0: No significant difference in performance.")
122 | 
123 | 
124 | #Chi-Square Test -> Does Engagement Level Affect Dropout?
125 | 
126 | # Convert Engagement Score into categories (Low, Medium, High)
127 | df["Engagement_Category"] = pd.qcut(df["Engagement_Score"], q=3, labels=["Low", "Medium", "High"])
128 | 
129 | # Contingency table
130 | contingency_table = pd.crosstab(df["Engagement_Category"], df["Dropout_Likelihood"])
131 | 
132 | # Perform Chi-Square Test
133 | chi2_stat, p_value, dof, expected = stats.chi2_contingency(contingency_table)
134 | 
135 | print(f"Chi-Square Statistic: {chi2_stat:.2f}")
136 | print(f"P-Value: {p_value:.5f}\n")
137 | 
138 | if p_value < 0.05:
139 |     print("Reject H0: Engagement Level significantly affects Dropout Likelihood.")
140 | else:
141 |     print("Fail to Reject H0: No significant relationship between Engagement Level and Dropout.")
142 | 
143 | print(f"Chi-Square Statistic: {chi2_stat:.2f}")
144 | print(f"P-Value: {p_value:.5f}")
145 | 
146 | #Grouped bar chart
147 | contingency_table.plot(kind='bar', figsize=(9, 6), colormap='viridis')
148 | plt.title("Dropout Likelihood by Engagement Level", fontsize=16)
149 | plt.xlabel("Engagement Level", fontsize=12)
150 | plt.ylabel("Number of Students", fontsize=12)
151 | plt.xticks(rotation=0)
152 | plt.legend(title="Dropout Likelihood")
153 | plt.grid(axis='y', linestyle='--', alpha=0.7)
154 | plt.tight_layout()
155 | plt.show()
156 | 
157 | #OBJECTIVE -5: Identify key factors influencing Dropout Likelihood using charts and plots
158 | 
159 | #Distribution Analysis (Box plot)
160 | sns.set(style="whitegrid")
161 | 
162 | important_features = [
163 |     'Time_Spent_on_Videos',
164 |     'Assignment_Completion_Rate',
165 |     'Forum_Participation',
166 |     'Final_Exam_Score'
167 | ]
168 | 
169 | fig, axes = plt.subplots(1, 4, figsize=(16, 6))
170 | 
171 | sns.boxplot(y=df['Time_Spent_on_Videos'], ax=axes[0], color='skyblue')
172 | axes[0].set_title("Time Spent on Videos")
173 | axes[0].set_xticks([])
174 | 
175 | sns.boxplot(y=df['Assignment_Completion_Rate'], ax=axes[1], color='lightgreen')
176 | axes[1].set_title("Assignment Completion Rate")
177 | axes[1].set_xticks([])
178 | 
179 | sns.boxplot(y=df['Forum_Participation'], ax=axes[2], color='salmon')
180 | axes[2].set_title("Forum Participation")
181 | axes[2].set_xticks([])
182 | 
183 | sns.boxplot(y=df['Final_Exam_Score'], ax=axes[3], color='plum')
184 | axes[3].set_title("Final Exam Score")
185 | axes[3].set_xticks([])
186 | 
187 | plt.suptitle("Boxplots of Key Features", fontsize=16, fontweight='bold')
188 | plt.tight_layout(rect=[0, 0.03, 1, 0.90])
189 | plt.show()
190 | 
191 | # Visualization of categorical features
192 | sns.set(style="whitegrid")
193 | categorical_features = ['Gender', 'Education_Level', 'Learning_Style', 'Dropout_Likelihood']
194 | plt.figure(figsize=(16, 10))
195 | plt.subplot(2, 2, 1)
196 | sns.countplot(x='Gender', data=df, hue="Gender", palette='pastel', legend=False)
197 | plt.title('Gender Distribution')
198 | 
199 | plt.subplot(2, 2, 2)
200 | sns.countplot(x='Education_Level', data=df, hue="Education_Level", palette='Set2', legend=False)
201 | plt.title('Education Level Distribution')
202 | plt.xticks(rotation=30)
203 | 
204 | plt.subplot(2, 2, 3)
205 | sns.countplot(x='Learning_Style', data=df, hue="Learning_Style", palette='Set3', legend=False)
206 | plt.title('Learning Style Distribution')
207 | plt.xticks(rotation=30)
208 | 
209 | plt.subplot(2, 2, 4)
210 | sns.countplot(x='Dropout_Likelihood', data=df, hue="Dropout_Likelihood", palette='coolwarm', legend=False)
211 | plt.title('Dropout Likelihood Distribution')
212 | 
213 | plt.suptitle("Distribution of Categorical Features", fontsize=16, fontweight='bold')
214 | plt.tight_layout(rect=[0, 0.03, 1, 0.95])
215 | plt.show()
216 | 
217 | #Develop an Interactive Dashboard for Educators
218 | 
219 | # PIE CHART – Learning Style Distribution
220 | pie_chart = px.pie(
221 |     df, names="Learning_Style", title="Distribution of Learning Styles",
222 |     color_discrete_sequence=px.colors.sequential.RdBu
223 | )
224 | 
225 | # DONUT CHART – Dropout Distribution
226 | donut_chart = px.pie(
227 |     df, names="Dropout_Likelihood", title="Dropout Likelihood (Yes/No)",
228 |     hole=0.5, color_discrete_sequence=px.colors.sequential.Mint
229 | )
230 | 
231 | # Dash app layout
232 | app = dash.Dash(__name__)
233 | 
234 | # Define a dark background style
235 | dark_style = {
236 |     'backgroundColor': '#1e1e2f',
237 |     'color': '#f0f0f0',
238 |     'fontFamily': 'Arial',
239 |     'padding': '20px'
240 | }
241 | 
242 | app.layout = html.Div(style={'backgroundColor': dark_style['backgroundColor']}, children=[
243 |     html.H1("Personalized Learning Dashboard", style={
244 |         'textAlign': 'center',
245 |         'color': dark_style['color'],
246 |         'paddingTop': '20px'
247 |     }),
248 | 
249 |     html.Div([
250 |         dcc.Graph(figure=pie_chart)
251 |     ], style={'width': '48%', 'display': 'inline-block', 'padding': '10px'}),
252 | 
253 |     html.Div([
254 |         dcc.Graph(figure=donut_chart)
255 |     ], style={'width': '48%', 'display': 'inline-block', 'padding': '10px'}),
256 | 
257 |     html.Footer("Developed by Jyotsna Chaudhary", style={
258 |         'textAlign': 'center',
259 |         'padding': '20px',
260 |         'color': dark_style['color'],
261 |         'fontSize': '14px'
262 |     })
263 | ])
264 |     
265 | if __name__ == '__main__':
266 |     app.run_server(debug=True)
267 | 


--------------------------------------------------------------------------------