├── tanishkapythonreport.docx
└── main.ipynb


/tanishkapythonreport.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tanishka-Rana-1/Data-Analysis/HEAD/tanishkapythonreport.docx


--------------------------------------------------------------------------------
/main.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "id": "df1c78b2",
  7 |    "metadata": {},
  8 |    "outputs": [],
  9 |    "source": [
 10 |     "import numpy as np\n",
 11 |     "import pandas as pd\n",
 12 |     "import seaborn as sns\n",
 13 |     "import matplotlib.pyplot as plt\n",
 14 |     "\n",
 15 |     "#reading dataset\n",
 16 |     "df = pd.read_csv('election_2024.csv')\n",
 17 |     "print(df)\n",
 18 |     "\n",
 19 |     "#Exploring the dataset\n",
 20 |     "print(\"Shape of dataset:\", df.shape)\n",
 21 |     "missing = df.isnull().sum()\n",
 22 |     "print(\"Missing values in each column:\\n\", missing)\n",
 23 |     "print(\"Info:\\n\",df.info())\n",
 24 |     "print(\"Description:\\n\",df.describe())\n",
 25 |     "\n",
 26 |     "#-----Cleaning the dataset------------------------------------------------------------------------------------\n",
 27 |     "\n",
 28 |     "#filling missing values in 'Gender' column with 'N/A'\n",
 29 |     "print(\"number of missing values in the 'Gender':\", df['Gender'].isnull().sum())\n",
 30 |     "df['Gender'] = df['Gender'].fillna('N/A')\n",
 31 |     "print(\"Gender Gaps After:\", df['Gender'].isnull().sum())\n",
 32 |     "\n",
 33 |     "#Age:replacing missing values with 0 and ensuring the column is numeric\n",
 34 |     "print(\"number of missing values in 'Age':\", df['Age'].isnull().sum())  \n",
 35 |     "df['Age'] = pd.to_numeric(df['Age'], errors='coerce').fillna(0)\n",
 36 |     "print(\"Age Gaps After:\", df['Age'].isnull().sum())  \n",
 37 |     "\n",
 38 |     "#assigning 'N/A' to missing category\n",
 39 |     "print(\"number of missing values in 'Category':\", df['Category'].isnull().sum())  \n",
 40 |     "df['Category'] = df['Category'].fillna('N/A')\n",
 41 |     "print(\"Category Gaps After:\", df['Category'].isnull().sum())  \n",
 42 |     "\n",
 43 |     "#handling missing candidate names\n",
 44 |     "print(\"number of missing values in the 'Candidate Name':\", df['Candidate Name'].isnull().sum())  \n",
 45 |     "df['Candidate Name'] = df['Candidate Name'].fillna('Unknown')\n",
 46 |     "print(\"Candidate Name Gaps After:\", df['Candidate Name'].isnull().sum())  \n",
 47 |     "\n",
 48 |     "#Party Name: 'Unknown' for missing values\n",
 49 |     "print(\"number of missing values in the 'Party Name':\", df['Party Name'].isnull().sum())  \n",
 50 |     "df['Party Name'] = df['Party Name'].fillna('Unknown')\n",
 51 |     "print(\"Party Name Gaps After:\", df['Party Name'].isnull().sum())\n",
 52 |     "\n",
 53 |     "#Dropping NOTA (not a real contender)\n",
 54 |     "print(\"Rows Before:\", len(df))  \n",
 55 |     "df = df[df['Candidate Name'] != 'NOTA'] \n",
 56 |     "print(\"Rows After Dropping NOTA:\", len(df)) \n",
 57 |     "\n",
 58 |     "\n",
 59 |     "#Making columns numeric, 0 for blanks\n",
 60 |     "numeric_cols = ['Total Votes Polled In The Constituency', 'Valid Votes', \n",
 61 |     "                'Votes Secured - General', 'Votes Secured - Postal', \n",
 62 |     "                'Votes Secured - Total', 'Total Electors']  \n",
 63 |     "for col in numeric_cols:\n",
 64 |     "    print(\"Before -\", col, \"Gaps:\", df[col].isnull().sum())  \n",
 65 |     "    df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0) \n",
 66 |     "    print(\"After -\", col, \"Gaps:\", df[col].isnull().sum())  \n",
 67 |     "\n",
 68 |     "#rechcek after cleaning\n",
 69 |     "print(\"Final Size:\", df.shape)  \n",
 70 |     "print(\"Any Gaps Left?:\\n\", df.isnull().sum())  \n",
 71 |     "print(\"Final Types:\\n\", df.dtypes)  "
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "markdown",
 76 |    "id": "580706a0",
 77 |    "metadata": {},
 78 |    "source": [
 79 |     "# Objective 1"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 |    "execution_count": null,
 85 |    "id": "38948bc2",
 86 |    "metadata": {},
 87 |    "outputs": [],
 88 |    "source": [
 89 |     "#Objective 1: Analyze the Demographic Composition of Candidates\n",
 90 |     "colors = ['#FF6F61', '#6B5B95', '#88B04B', '#F9A825', '#45B7D1']\n",
 91 |     "title_color = '#2C3E50'  # Dark blue\n",
 92 |     "\n",
 93 |     "#1.1 Visualize the distribution of candidates by gender using a bar chart\n",
 94 |     "plt.figure(figsize=(8, 5))\n",
 95 |     "sns.countplot(data=df, x='Gender', hue='Gender', palette=colors[:3], edgecolor='black', linewidth=1, legend=False)\n",
 96 |     "plt.title('1.1 Gender Split of Candidates', fontsize=14, color=title_color)\n",
 97 |     "plt.xlabel('Gender', fontsize=12)\n",
 98 |     "plt.ylabel('Candidate Count', fontsize=12)\n",
 99 |     "plt.show()\n",
100 |     "\n",
101 |     "#1.2  Ages(Histogram with a Curve)\n",
102 |     "plt.figure(figsize=(10, 5))\n",
103 |     "sns.histplot(data=df[df['Age'] > 0], x='Age', bins=20, kde=True, color=colors[1], edgecolor='black', linewidth=1)\n",
104 |     "plt.title('1.2 Age Range of Candidates', fontsize=14, color=title_color)\n",
105 |     "plt.xlabel('Age', fontsize=12)\n",
106 |     "plt.ylabel('Candidate Count', fontsize=12)\n",
107 |     "plt.show()\n",
108 |     "\n",
109 |     "#1.3 Proportional breakdown of candidates by social category using a pie chart\n",
110 |     "plt.figure(figsize=(8, 8))\n",
111 |     "category_counts = df['Category'].value_counts()\n",
112 |     "plt.pie(category_counts, labels=category_counts.index, autopct='%1.1f%%', colors=colors)\n",
113 |     "plt.title('1.3 Social Category Breakdown', fontsize=14, color=title_color)\n",
114 |     "plt.show()\n",
115 |     "\n",
116 |     "# 1.4 gender representation across ten constituencies with the highest candidate counts(stacked bar chart)\n",
117 |     "plt.figure(figsize=(12, 6))\n",
118 |     "top_10_constituencies = df['PC Name'].value_counts().head(10).index  \n",
119 |     "df_top_10 = df[df['PC Name'].isin(top_10_constituencies)]\n",
120 |     "unique_genders = df_top_10['Gender'].nunique()  \n",
121 |     "sns.countplot(data=df_top_10, x='PC Name', hue='Gender', palette=colors[:unique_genders], edgecolor='black', linewidth=1)\n",
122 |     "plt.title('1.4 Gender in Top 10 Constituencies', fontsize=14, color=title_color)\n",
123 |     "plt.xlabel('Constituency', fontsize=12)\n",
124 |     "plt.ylabel('Candidate Count', fontsize=12)\n",
125 |     "plt.xticks(rotation=45)  \n",
126 |     "plt.legend(title='Gender')\n",
127 |     "plt.show()\n"
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "markdown",
132 |    "id": "cb22fcc0",
133 |    "metadata": {},
134 |    "source": [
135 |     "# Objective 2"
136 |    ]
137 |   },
138 |   {
139 |    "cell_type": "code",
140 |    "execution_count": null,
141 |    "id": "436b4445",
142 |    "metadata": {},
143 |    "outputs": [],
144 |    "source": [
145 |     "#Objective 2: Evaluate Party Performance Across Constituencies\n",
146 |     "colors = ['#FF6F61', '#6B5B95', '#88B04B', '#F9A825', '#45B7D1', \n",
147 |     "          '#D4A5A5', '#9B59B6', '#3498DB', '#E67E22', '#2ECC71']  \n",
148 |     "title_color = '#2C3E50'\n",
149 |     "\n",
150 |     "#2.1 Candidates Spread Out Per Party (Box Chart)\n",
151 |     "plt.figure(figsize=(12, 6))\n",
152 |     "candidates_per_constituency = df.groupby(['Party Name', 'PC Name']).size().reset_index(name='Candidate Count')\n",
153 |     "top_10_parties = df['Party Name'].value_counts().head(10).index  \n",
154 |     "df_top_10 = candidates_per_constituency[candidates_per_constituency['Party Name'].isin(top_10_parties)]\n",
155 |     "sns.boxplot(data=df_top_10, x='Party Name', y='Candidate Count', hue='Party Name', palette=colors[:10], legend=False)\n",
156 |     "plt.title('2.1 Candidate Spread by Top 10 Parties', fontsize=14, color=title_color)\n",
157 |     "plt.xlabel('Party Name', fontsize=12)\n",
158 |     "plt.ylabel('Candidates per Constituency', fontsize=12)\n",
159 |     "plt.xticks(rotation=45) \n",
160 |     "plt.show()\n",
161 |     "\n",
162 |     "#2.2 vote share percentages of the top 10 parties(horizontal bar chart)\n",
163 |     "party_votes = df.groupby('Party Name')['Votes Secured - Total'].sum().sort_values(ascending=False).head(10)\n",
164 |     "vote_share = (party_votes / df['Votes Secured - Total'].sum()) * 100  \n",
165 |     "plt.figure(figsize=(12, 6))\n",
166 |     "sns.barplot(x=vote_share.values, y=vote_share.index, hue=vote_share.index, palette=colors[:10], edgecolor='black', linewidth=1, legend=False)\n",
167 |     "plt.title('2.2 Top 10 Parties by Vote Share (%)', fontsize=14, color=title_color)\n",
168 |     "plt.xlabel('Vote Share (%)', fontsize=12)\n",
169 |     "plt.ylabel('Party Name', fontsize=12)\n",
170 |     "plt.show()\n",
171 |     "\n",
172 |     "#Summary for Objective 2\n",
173 |     "top_10_parties_candidates = df['Party Name'].value_counts().head(10)\n",
174 |     "print(\"Top 10 Parties by Candidate Count:\\n\", top_10_parties_candidates)\n",
175 |     "print(\"\\nTop 10 Parties by Vote Share (%):\\n\", vote_share)\n",
176 |     "print(\"\\nHow Candidates Spread Out (Top 10):\\n\", df_top_10.groupby('Party Name')['Candidate Count'].describe())\n"
177 |    ]
178 |   },
179 |   {
180 |    "cell_type": "markdown",
181 |    "id": "c8af1045",
182 |    "metadata": {},
183 |    "source": [
184 |     "# Objective 3"
185 |    ]
186 |   },
187 |   {
188 |    "cell_type": "code",
189 |    "execution_count": null,
190 |    "id": "54daa1da",
191 |    "metadata": {},
192 |    "outputs": [],
193 |    "source": [
194 |     "#Objective 3: Examine the Influence of Social Categories on Voting Outcomes\n",
195 |     "colors = ['#FF6F61', '#6B5B95', '#88B04B', '#F9A825', '#45B7D1', \n",
196 |     "          '#D4A5A5', '#9B59B6', '#3498DB', '#E67E22', '#2ECC71']\n",
197 |     "title_color = '#2C3E50'\n",
198 |     "\n",
199 |     "#3.1 Vote Share by Category (Bar Chart)\n",
200 |     "plt.figure(figsize=(10, 6))\n",
201 |     "category_votes = df.groupby('Category')['Votes Secured - Total'].sum()\n",
202 |     "vote_share_by_category = (category_votes / df['Votes Secured - Total'].sum()) * 100\n",
203 |     "sns.barplot(x=vote_share_by_category.index, y=vote_share_by_category.values, hue=vote_share_by_category.index, palette=colors[:len(category_votes)], edgecolor='black', linewidth=1, legend=False)\n",
204 |     "plt.title('3.1 Vote Share by Category (%)', fontsize=14, color=title_color)\n",
205 |     "plt.xlabel('Social Category', fontsize=12)\n",
206 |     "plt.ylabel('Vote Share (%)', fontsize=12)\n",
207 |     "plt.show()\n",
208 |     "\n",
209 |     "#3.2 Vote Spread by Category (Box Chart)\n",
210 |     "plt.figure(figsize=(10, 6))\n",
211 |     "sns.boxplot(data=df, x='Category', y='Votes Secured - Total', hue='Category', palette=colors[:len(category_votes)], legend=False)\n",
212 |     "plt.title('3.2 Vote Spread by Category', fontsize=14, color=title_color)\n",
213 |     "plt.xlabel('Social Category', fontsize=12)\n",
214 |     "plt.ylabel('Votes Secured', fontsize=12)\n",
215 |     "plt.show()\n",
216 |     "\n",
217 |     "# 3.3 Examine the number of winning candidates per social category(Stacked Bar Chart)\n",
218 |     "winners = df.loc[df.groupby('PC Name')['Votes Secured - Total'].idxmax()]\n",
219 |     "plt.figure(figsize=(10, 6))\n",
220 |     "unique_categories = winners['Category'].nunique()\n",
221 |     "sns.countplot(data=winners, x='Category', hue='Category', palette=colors[:unique_categories], edgecolor='black', linewidth=1, legend=False)\n",
222 |     "plt.title('3.3 Winners by Category', fontsize=14, color=title_color)\n",
223 |     "plt.xlabel('Social Category', fontsize=12)\n",
224 |     "plt.ylabel('Number of Winners', fontsize=12)\n",
225 |     "plt.show()\n",
226 |     "\n",
227 |     "#numerical summary for Objective 3 \n",
228 |     "print(\"Vote Share by Category (%):\\n\", vote_share_by_category)\n",
229 |     "print(\"Average Votes by Category:\\n\", df.groupby('Category')['Votes Secured - Total'].mean())\n",
230 |     "print(\"Winners by Category:\\n\", winners['Category'].value_counts())"
231 |    ]
232 |   },
233 |   {
234 |    "cell_type": "markdown",
235 |    "id": "16bae0f9",
236 |    "metadata": {},
237 |    "source": [
238 |     "# Objective 4"
239 |    ]
240 |   },
241 |   {
242 |    "cell_type": "code",
243 |    "execution_count": null,
244 |    "id": "1d156428",
245 |    "metadata": {},
246 |    "outputs": [],
247 |    "source": [
248 |     "#Objective 4:Voter Turnout Variations by State\n",
249 |     "colors = ['#FF6F61', '#6B5B95', '#88B04B', '#F9A825', '#45B7D1', \n",
250 |     "          '#D4A5A5', '#9B59B6', '#3498DB', '#E67E22', '#2ECC71']\n",
251 |     "title_color = '#2C3E50'\n",
252 |     "print(\"Columns I’m Using:\", df.columns.tolist())\n",
253 |     "print(\"\\nFirst Few Rows:\\n\", df[['PC Name', 'Total Votes Polled In The Constituency', 'Total Electors']].head())\n",
254 |     "#turnout percentage\n",
255 |     "df['Voter Turnout (%)'] = (df['Total Votes Polled In The Constituency'] / df['Total Electors']) * 100\n",
256 |     "#Splitting out states from constituency names\n",
257 |     "df['State'] = df['PC Name'].str.split(' - ').str[0].str.strip()\n",
258 |     "print(\"\\nStates I Pulled Out:\\n\", df['State'].unique())\n",
259 |     "\n",
260 |     "#4.1 average voter turnout for top 10 states (Bar Chart)\n",
261 |     "plt.figure(figsize=(12, 6))\n",
262 |     "state_turnout = df.groupby('State')['Voter Turnout (%)'].mean().sort_values(ascending=False).head(10)\n",
263 |     "sns.barplot(x=state_turnout.index, y=state_turnout.values, hue=state_turnout.index, palette=colors[:10], edgecolor='black', linewidth=1, legend=False)\n",
264 |     "plt.title('4.1 Top 10 States by Average Turnout (%)', fontsize=14, color=title_color)\n",
265 |     "plt.xlabel('State', fontsize=12)\n",
266 |     "plt.ylabel('Average Turnout (%)', fontsize=12)\n",
267 |     "plt.xticks(rotation=45)\n",
268 |     "plt.show()\n",
269 |     "\n",
270 |     "#4.2 Turnout Spread by State (Box Chart)\n",
271 |     "plt.figure(figsize=(14, 6))\n",
272 |     "top_10_states = df.groupby('State')['Voter Turnout (%)'].mean().sort_values(ascending=False).head(10).index\n",
273 |     "df_top_10 = df[df['State'].isin(top_10_states)]\n",
274 |     "sns.boxplot(data=df_top_10, x='State', y='Voter Turnout (%)', hue='State', palette=colors[:10], legend=False)\n",
275 |     "plt.title('4.2 Turnout Spread in Top 10 States', fontsize=14, color=title_color)\n",
276 |     "plt.xlabel('State', fontsize=12)\n",
277 |     "plt.ylabel('Voter Turnout (%)', fontsize=12)\n",
278 |     "plt.xticks(rotation=45)\n",
279 |     "plt.show()\n",
280 |     "\n",
281 |     "#Numerical summary for Objective 4 \n",
282 |     "print(\"Average Turnout by State (Top 10):\\n\", state_turnout)\n",
283 |     "print(\"\\nTurnout Stats by State (Top 10):\\n\", df_top_10.groupby('State')['Voter Turnout (%)'].describe())"
284 |    ]
285 |   },
286 |   {
287 |    "cell_type": "markdown",
288 |    "id": "ed306121",
289 |    "metadata": {},
290 |    "source": [
291 |     "# Objective 5"
292 |    ]
293 |   },
294 |   {
295 |    "cell_type": "code",
296 |    "execution_count": null,
297 |    "id": "339134b9",
298 |    "metadata": {},
299 |    "outputs": [],
300 |    "source": [
301 |     "#Objective 5: Impact of Postal Votes on Candidate Performance\n",
302 |     "colors = ['#FF6F61', '#6B5B95', '#88B04B', '#F9A825', '#45B7D1', \n",
303 |     "          '#D4A5A5', '#9B59B6', '#3498DB', '#E67E22', '#2ECC71']\n",
304 |     "title_color = '#2C3E50'\n",
305 |     "#5.1 Compare general, postal, and total votes for the top 10 candidates(Line Chart)\n",
306 |     "plt.figure(figsize=(12, 6))\n",
307 |     "top_10_candidates = df.nlargest(10, 'Votes Secured - Total')\n",
308 |     "x = range(len(top_10_candidates))\n",
309 |     "plt.plot(x, top_10_candidates['Votes Secured - General'], label='General Votes', color=colors[0], marker='o', linewidth=2)\n",
310 |     "plt.plot(x, top_10_candidates['Votes Secured - Postal'], label='Postal Votes', color=colors[1], marker='s', linewidth=2)\n",
311 |     "plt.plot(x, top_10_candidates['Votes Secured - Total'], label='Total Votes', color=colors[2], marker='^', linewidth=2)\n",
312 |     "plt.title('5.1 Votes Breakdown for Top 10 Candidates', fontsize=14, color=title_color)\n",
313 |     "plt.xlabel('Candidate Index', fontsize=12)\n",
314 |     "plt.ylabel('Vote Count', fontsize=12)\n",
315 |     "plt.xticks(x, top_10_candidates['Candidate Name'], rotation=45)\n",
316 |     "plt.legend(title='Vote Type')\n",
317 |     "plt.show()\n",
318 |     "\n",
319 |     "#5.2 General vs Postal Votes(Scatter Chart)\n",
320 |     "plt.figure(figsize=(10, 6))\n",
321 |     "plt.scatter(df['Votes Secured - General'], df['Votes Secured - Postal'], \n",
322 |     "            s=df['Votes Secured - Total'] / 1000, color=colors[3], alpha=0.6, edgecolor='black', linewidth=0.5)\n",
323 |     "plt.title('5.2 General vs. Postal Votes (Size = Total)', fontsize=14, color=title_color)\n",
324 |     "plt.xlabel('General Votes', fontsize=12)\n",
325 |     "plt.ylabel('Postal Votes', fontsize=12)\n",
326 |     "plt.xscale('log')\n",
327 |     "plt.yscale('log')\n",
328 |     "plt.show()\n",
329 |     "\n",
330 |     "#numerical summary for Objective 5 \n",
331 |     "print(\"Average Votes by Type:\\n\", df[['Votes Secured - General', 'Votes Secured - Postal', 'Votes Secured - Total']].mean())\n",
332 |     "print(\"\\nHow Votes Relate:\\n\", df[['Votes Secured - General', 'Votes Secured - Postal', 'Votes Secured - Total']].corr())\n",
333 |     "print(\"\\nTop 10 Candidates by Votes:\\n\", top_10_candidates[['Candidate Name', 'PC Name', 'Votes Secured - General', 'Votes Secured - Postal', 'Votes Secured - Total']])\n"
334 |    ]
335 |   },
336 |   {
337 |    "cell_type": "markdown",
338 |    "id": "77db5f2d",
339 |    "metadata": {},
340 |    "source": [
341 |     "# Objective 6"
342 |    ]
343 |   },
344 |   {
345 |    "cell_type": "code",
346 |    "execution_count": null,
347 |    "id": "af02840f",
348 |    "metadata": {},
349 |    "outputs": [],
350 |    "source": [
351 |     "#Objective 6: Correlations Between Electors and Vote Metrics\n",
352 |     "colors = ['#FF6F61', '#6B5B95', '#88B04B', '#F9A825', '#45B7D1', \n",
353 |     "          '#D4A5A5', '#9B59B6', '#3498DB', '#E67E22', '#2ECC71']\n",
354 |     "title_color = '#2C3E50'\n",
355 |     "\n",
356 |     "#Select relevant columns\n",
357 |     "metrics = ['Total Electors', 'Total Votes Polled In The Constituency', 'Valid Votes']\n",
358 |     "df_metrics = df[metrics]\n",
359 |     "\n",
360 |     "#6.1 correlation between electors, votes polled, and valid votes(Heatmap)\n",
361 |     "plt.figure(figsize=(8, 6))\n",
362 |     "corr_matrix = df_metrics.corr()\n",
363 |     "sns.heatmap(corr_matrix, annot=True, cmap='coolwarm')\n",
364 |     "plt.title('6.1 How Electors, Votes Polled, and Valid Votes Connect', fontsize=14, color=title_color)\n",
365 |     "plt.show()\n",
366 |     "\n",
367 |     "#6.2 pairwise relationships among electors, votes polled, and valid votes(pairwise plot)\n",
368 |     "pair_plot = sns.pairplot(df_metrics, diag_kind='hist', plot_kws={'color': colors[0]}, diag_kws={'color': colors[1]})\n",
369 |     "pair_plot.fig.suptitle('6.2 Pairing Up Electors, Votes Polled, and Valid Votes', fontsize=14, color=title_color, y=1.02)\n",
370 |     "plt.show()\n",
371 |     "\n",
372 |     "# Numerical summary for Objective 6 \n",
373 |     "print(\"Correlation Breakdown:\\n\", corr_matrix)\n",
374 |     "print(\"\\nBasic Stats:\\n\", df_metrics.describe())\n"
375 |    ]
376 |   },
377 |   {
378 |    "cell_type": "markdown",
379 |    "id": "77386b5d",
380 |    "metadata": {},
381 |    "source": [
382 |     "# Objective 7"
383 |    ]
384 |   },
385 |   {
386 |    "cell_type": "code",
387 |    "execution_count": null,
388 |    "id": "a12f944f",
389 |    "metadata": {},
390 |    "outputs": [],
391 |    "source": [
392 |     "#Objective 7: Analyze the Prevalence and Distribution of NOTA Votes\n",
393 |     "#Loading my original data\n",
394 |     "df_original = pd.read_csv('election_2024.csv')\n",
395 |     "colors = ['#FF6F61', '#6B5B95', '#88B04B', '#F9A825', '#45B7D1', \n",
396 |     "          '#D4A5A5', '#9B59B6', '#3498DB', '#E67E22', '#2ECC71']\n",
397 |     "title_color = '#2C3E50'\n",
398 |     "\n",
399 |     "#Pulling states \n",
400 |     "df_original['State'] = df_original['PC Name'].str.split(' - ').str[0].str.strip()\n",
401 |     "nota_df = df_original[df_original['Candidate Name'] == 'NOTA'].copy()\n",
402 |     "#numeric consistency\n",
403 |     "nota_df.loc[:, 'Votes Secured - Total'] = pd.to_numeric(nota_df['Votes Secured - Total'], errors='coerce').fillna(0)\n",
404 |     "\n",
405 |     "#7.1 Visualize the total NOTA votes for the top 10 states(Bar Chart)\n",
406 |     "plt.figure(figsize=(12, 6))\n",
407 |     "nota_by_state = nota_df.groupby('State')['Votes Secured - Total'].sum().sort_values(ascending=False).head(10)\n",
408 |     "sns.barplot(x=nota_by_state.index, y=nota_by_state.values, hue=nota_by_state.index, palette=colors[:10], edgecolor='black', linewidth=1, legend=False)\n",
409 |     "plt.title('7.1 Top 10 States for NOTA Votes', fontsize=14, color=title_color)\n",
410 |     "plt.xlabel('State', fontsize=12)\n",
411 |     "plt.ylabel('Total NOTA Votes', fontsize=12)\n",
412 |     "plt.xticks(rotation=45)\n",
413 |     "plt.show()\n",
414 |     "\n",
415 |     "#7.2 proportional distribution of NOTA votes among the top 10 states(Pie Chart)\n",
416 |     "plt.figure(figsize=(8, 8))\n",
417 |     "nota_share = (nota_by_state / nota_by_state.sum()) * 100\n",
418 |     "plt.pie(nota_share, labels=nota_share.index, autopct='%1.1f%%', colors=colors[:10])\n",
419 |     "plt.title('7.2 NOTA Share in Top 10 States', fontsize=14, color=title_color)\n",
420 |     "plt.show()\n",
421 |     "\n",
422 |     "# Numerical summary for Objective 7 \n",
423 |     "print(\"NOTA Votes by State (Top 10):\\n\", nota_by_state)\n",
424 |     "print(\"\\nNOTA Share by State (Top 10, %):\\n\", nota_share)\n",
425 |     "print(\"\\nTotal NOTA Votes Overall:\", nota_df['Votes Secured - Total'].sum())"
426 |    ]
427 |   }
428 |  ],
429 |  "metadata": {
430 |   "language_info": {
431 |    "name": "python"
432 |   }
433 |  },
434 |  "nbformat": 4,
435 |  "nbformat_minor": 5
436 | }
437 | 


--------------------------------------------------------------------------------