├── tanishkapythonreport.docx └── main.ipynb /tanishkapythonreport.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tanishka-Rana-1/Data-Analysis/HEAD/tanishkapythonreport.docx -------------------------------------------------------------------------------- /main.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "df1c78b2", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import numpy as np\n", 11 | "import pandas as pd\n", 12 | "import seaborn as sns\n", 13 | "import matplotlib.pyplot as plt\n", 14 | "\n", 15 | "#reading dataset\n", 16 | "df = pd.read_csv('election_2024.csv')\n", 17 | "print(df)\n", 18 | "\n", 19 | "#Exploring the dataset\n", 20 | "print(\"Shape of dataset:\", df.shape)\n", 21 | "missing = df.isnull().sum()\n", 22 | "print(\"Missing values in each column:\\n\", missing)\n", 23 | "print(\"Info:\\n\",df.info())\n", 24 | "print(\"Description:\\n\",df.describe())\n", 25 | "\n", 26 | "#-----Cleaning the dataset------------------------------------------------------------------------------------\n", 27 | "\n", 28 | "#filling missing values in 'Gender' column with 'N/A'\n", 29 | "print(\"number of missing values in the 'Gender':\", df['Gender'].isnull().sum())\n", 30 | "df['Gender'] = df['Gender'].fillna('N/A')\n", 31 | "print(\"Gender Gaps After:\", df['Gender'].isnull().sum())\n", 32 | "\n", 33 | "#Age:replacing missing values with 0 and ensuring the column is numeric\n", 34 | "print(\"number of missing values in 'Age':\", df['Age'].isnull().sum()) \n", 35 | "df['Age'] = pd.to_numeric(df['Age'], errors='coerce').fillna(0)\n", 36 | "print(\"Age Gaps After:\", df['Age'].isnull().sum()) \n", 37 | "\n", 38 | "#assigning 'N/A' to missing category\n", 39 | "print(\"number of missing values in 'Category':\", df['Category'].isnull().sum()) \n", 40 | "df['Category'] = df['Category'].fillna('N/A')\n", 41 | "print(\"Category Gaps After:\", df['Category'].isnull().sum()) \n", 42 | "\n", 43 | "#handling missing candidate names\n", 44 | "print(\"number of missing values in the 'Candidate Name':\", df['Candidate Name'].isnull().sum()) \n", 45 | "df['Candidate Name'] = df['Candidate Name'].fillna('Unknown')\n", 46 | "print(\"Candidate Name Gaps After:\", df['Candidate Name'].isnull().sum()) \n", 47 | "\n", 48 | "#Party Name: 'Unknown' for missing values\n", 49 | "print(\"number of missing values in the 'Party Name':\", df['Party Name'].isnull().sum()) \n", 50 | "df['Party Name'] = df['Party Name'].fillna('Unknown')\n", 51 | "print(\"Party Name Gaps After:\", df['Party Name'].isnull().sum())\n", 52 | "\n", 53 | "#Dropping NOTA (not a real contender)\n", 54 | "print(\"Rows Before:\", len(df)) \n", 55 | "df = df[df['Candidate Name'] != 'NOTA'] \n", 56 | "print(\"Rows After Dropping NOTA:\", len(df)) \n", 57 | "\n", 58 | "\n", 59 | "#Making columns numeric, 0 for blanks\n", 60 | "numeric_cols = ['Total Votes Polled In The Constituency', 'Valid Votes', \n", 61 | " 'Votes Secured - General', 'Votes Secured - Postal', \n", 62 | " 'Votes Secured - Total', 'Total Electors'] \n", 63 | "for col in numeric_cols:\n", 64 | " print(\"Before -\", col, \"Gaps:\", df[col].isnull().sum()) \n", 65 | " df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0) \n", 66 | " print(\"After -\", col, \"Gaps:\", df[col].isnull().sum()) \n", 67 | "\n", 68 | "#rechcek after cleaning\n", 69 | "print(\"Final Size:\", df.shape) \n", 70 | "print(\"Any Gaps Left?:\\n\", df.isnull().sum()) \n", 71 | "print(\"Final Types:\\n\", df.dtypes) " 72 | ] 73 | }, 74 | { 75 | "cell_type": "markdown", 76 | "id": "580706a0", 77 | "metadata": {}, 78 | "source": [ 79 | "# Objective 1" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": null, 85 | "id": "38948bc2", 86 | "metadata": {}, 87 | "outputs": [], 88 | "source": [ 89 | "#Objective 1: Analyze the Demographic Composition of Candidates\n", 90 | "colors = ['#FF6F61', '#6B5B95', '#88B04B', '#F9A825', '#45B7D1']\n", 91 | "title_color = '#2C3E50' # Dark blue\n", 92 | "\n", 93 | "#1.1 Visualize the distribution of candidates by gender using a bar chart\n", 94 | "plt.figure(figsize=(8, 5))\n", 95 | "sns.countplot(data=df, x='Gender', hue='Gender', palette=colors[:3], edgecolor='black', linewidth=1, legend=False)\n", 96 | "plt.title('1.1 Gender Split of Candidates', fontsize=14, color=title_color)\n", 97 | "plt.xlabel('Gender', fontsize=12)\n", 98 | "plt.ylabel('Candidate Count', fontsize=12)\n", 99 | "plt.show()\n", 100 | "\n", 101 | "#1.2 Ages(Histogram with a Curve)\n", 102 | "plt.figure(figsize=(10, 5))\n", 103 | "sns.histplot(data=df[df['Age'] > 0], x='Age', bins=20, kde=True, color=colors[1], edgecolor='black', linewidth=1)\n", 104 | "plt.title('1.2 Age Range of Candidates', fontsize=14, color=title_color)\n", 105 | "plt.xlabel('Age', fontsize=12)\n", 106 | "plt.ylabel('Candidate Count', fontsize=12)\n", 107 | "plt.show()\n", 108 | "\n", 109 | "#1.3 Proportional breakdown of candidates by social category using a pie chart\n", 110 | "plt.figure(figsize=(8, 8))\n", 111 | "category_counts = df['Category'].value_counts()\n", 112 | "plt.pie(category_counts, labels=category_counts.index, autopct='%1.1f%%', colors=colors)\n", 113 | "plt.title('1.3 Social Category Breakdown', fontsize=14, color=title_color)\n", 114 | "plt.show()\n", 115 | "\n", 116 | "# 1.4 gender representation across ten constituencies with the highest candidate counts(stacked bar chart)\n", 117 | "plt.figure(figsize=(12, 6))\n", 118 | "top_10_constituencies = df['PC Name'].value_counts().head(10).index \n", 119 | "df_top_10 = df[df['PC Name'].isin(top_10_constituencies)]\n", 120 | "unique_genders = df_top_10['Gender'].nunique() \n", 121 | "sns.countplot(data=df_top_10, x='PC Name', hue='Gender', palette=colors[:unique_genders], edgecolor='black', linewidth=1)\n", 122 | "plt.title('1.4 Gender in Top 10 Constituencies', fontsize=14, color=title_color)\n", 123 | "plt.xlabel('Constituency', fontsize=12)\n", 124 | "plt.ylabel('Candidate Count', fontsize=12)\n", 125 | "plt.xticks(rotation=45) \n", 126 | "plt.legend(title='Gender')\n", 127 | "plt.show()\n" 128 | ] 129 | }, 130 | { 131 | "cell_type": "markdown", 132 | "id": "cb22fcc0", 133 | "metadata": {}, 134 | "source": [ 135 | "# Objective 2" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": null, 141 | "id": "436b4445", 142 | "metadata": {}, 143 | "outputs": [], 144 | "source": [ 145 | "#Objective 2: Evaluate Party Performance Across Constituencies\n", 146 | "colors = ['#FF6F61', '#6B5B95', '#88B04B', '#F9A825', '#45B7D1', \n", 147 | " '#D4A5A5', '#9B59B6', '#3498DB', '#E67E22', '#2ECC71'] \n", 148 | "title_color = '#2C3E50'\n", 149 | "\n", 150 | "#2.1 Candidates Spread Out Per Party (Box Chart)\n", 151 | "plt.figure(figsize=(12, 6))\n", 152 | "candidates_per_constituency = df.groupby(['Party Name', 'PC Name']).size().reset_index(name='Candidate Count')\n", 153 | "top_10_parties = df['Party Name'].value_counts().head(10).index \n", 154 | "df_top_10 = candidates_per_constituency[candidates_per_constituency['Party Name'].isin(top_10_parties)]\n", 155 | "sns.boxplot(data=df_top_10, x='Party Name', y='Candidate Count', hue='Party Name', palette=colors[:10], legend=False)\n", 156 | "plt.title('2.1 Candidate Spread by Top 10 Parties', fontsize=14, color=title_color)\n", 157 | "plt.xlabel('Party Name', fontsize=12)\n", 158 | "plt.ylabel('Candidates per Constituency', fontsize=12)\n", 159 | "plt.xticks(rotation=45) \n", 160 | "plt.show()\n", 161 | "\n", 162 | "#2.2 vote share percentages of the top 10 parties(horizontal bar chart)\n", 163 | "party_votes = df.groupby('Party Name')['Votes Secured - Total'].sum().sort_values(ascending=False).head(10)\n", 164 | "vote_share = (party_votes / df['Votes Secured - Total'].sum()) * 100 \n", 165 | "plt.figure(figsize=(12, 6))\n", 166 | "sns.barplot(x=vote_share.values, y=vote_share.index, hue=vote_share.index, palette=colors[:10], edgecolor='black', linewidth=1, legend=False)\n", 167 | "plt.title('2.2 Top 10 Parties by Vote Share (%)', fontsize=14, color=title_color)\n", 168 | "plt.xlabel('Vote Share (%)', fontsize=12)\n", 169 | "plt.ylabel('Party Name', fontsize=12)\n", 170 | "plt.show()\n", 171 | "\n", 172 | "#Summary for Objective 2\n", 173 | "top_10_parties_candidates = df['Party Name'].value_counts().head(10)\n", 174 | "print(\"Top 10 Parties by Candidate Count:\\n\", top_10_parties_candidates)\n", 175 | "print(\"\\nTop 10 Parties by Vote Share (%):\\n\", vote_share)\n", 176 | "print(\"\\nHow Candidates Spread Out (Top 10):\\n\", df_top_10.groupby('Party Name')['Candidate Count'].describe())\n" 177 | ] 178 | }, 179 | { 180 | "cell_type": "markdown", 181 | "id": "c8af1045", 182 | "metadata": {}, 183 | "source": [ 184 | "# Objective 3" 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": null, 190 | "id": "54daa1da", 191 | "metadata": {}, 192 | "outputs": [], 193 | "source": [ 194 | "#Objective 3: Examine the Influence of Social Categories on Voting Outcomes\n", 195 | "colors = ['#FF6F61', '#6B5B95', '#88B04B', '#F9A825', '#45B7D1', \n", 196 | " '#D4A5A5', '#9B59B6', '#3498DB', '#E67E22', '#2ECC71']\n", 197 | "title_color = '#2C3E50'\n", 198 | "\n", 199 | "#3.1 Vote Share by Category (Bar Chart)\n", 200 | "plt.figure(figsize=(10, 6))\n", 201 | "category_votes = df.groupby('Category')['Votes Secured - Total'].sum()\n", 202 | "vote_share_by_category = (category_votes / df['Votes Secured - Total'].sum()) * 100\n", 203 | "sns.barplot(x=vote_share_by_category.index, y=vote_share_by_category.values, hue=vote_share_by_category.index, palette=colors[:len(category_votes)], edgecolor='black', linewidth=1, legend=False)\n", 204 | "plt.title('3.1 Vote Share by Category (%)', fontsize=14, color=title_color)\n", 205 | "plt.xlabel('Social Category', fontsize=12)\n", 206 | "plt.ylabel('Vote Share (%)', fontsize=12)\n", 207 | "plt.show()\n", 208 | "\n", 209 | "#3.2 Vote Spread by Category (Box Chart)\n", 210 | "plt.figure(figsize=(10, 6))\n", 211 | "sns.boxplot(data=df, x='Category', y='Votes Secured - Total', hue='Category', palette=colors[:len(category_votes)], legend=False)\n", 212 | "plt.title('3.2 Vote Spread by Category', fontsize=14, color=title_color)\n", 213 | "plt.xlabel('Social Category', fontsize=12)\n", 214 | "plt.ylabel('Votes Secured', fontsize=12)\n", 215 | "plt.show()\n", 216 | "\n", 217 | "# 3.3 Examine the number of winning candidates per social category(Stacked Bar Chart)\n", 218 | "winners = df.loc[df.groupby('PC Name')['Votes Secured - Total'].idxmax()]\n", 219 | "plt.figure(figsize=(10, 6))\n", 220 | "unique_categories = winners['Category'].nunique()\n", 221 | "sns.countplot(data=winners, x='Category', hue='Category', palette=colors[:unique_categories], edgecolor='black', linewidth=1, legend=False)\n", 222 | "plt.title('3.3 Winners by Category', fontsize=14, color=title_color)\n", 223 | "plt.xlabel('Social Category', fontsize=12)\n", 224 | "plt.ylabel('Number of Winners', fontsize=12)\n", 225 | "plt.show()\n", 226 | "\n", 227 | "#numerical summary for Objective 3 \n", 228 | "print(\"Vote Share by Category (%):\\n\", vote_share_by_category)\n", 229 | "print(\"Average Votes by Category:\\n\", df.groupby('Category')['Votes Secured - Total'].mean())\n", 230 | "print(\"Winners by Category:\\n\", winners['Category'].value_counts())" 231 | ] 232 | }, 233 | { 234 | "cell_type": "markdown", 235 | "id": "16bae0f9", 236 | "metadata": {}, 237 | "source": [ 238 | "# Objective 4" 239 | ] 240 | }, 241 | { 242 | "cell_type": "code", 243 | "execution_count": null, 244 | "id": "1d156428", 245 | "metadata": {}, 246 | "outputs": [], 247 | "source": [ 248 | "#Objective 4:Voter Turnout Variations by State\n", 249 | "colors = ['#FF6F61', '#6B5B95', '#88B04B', '#F9A825', '#45B7D1', \n", 250 | " '#D4A5A5', '#9B59B6', '#3498DB', '#E67E22', '#2ECC71']\n", 251 | "title_color = '#2C3E50'\n", 252 | "print(\"Columns I’m Using:\", df.columns.tolist())\n", 253 | "print(\"\\nFirst Few Rows:\\n\", df[['PC Name', 'Total Votes Polled In The Constituency', 'Total Electors']].head())\n", 254 | "#turnout percentage\n", 255 | "df['Voter Turnout (%)'] = (df['Total Votes Polled In The Constituency'] / df['Total Electors']) * 100\n", 256 | "#Splitting out states from constituency names\n", 257 | "df['State'] = df['PC Name'].str.split(' - ').str[0].str.strip()\n", 258 | "print(\"\\nStates I Pulled Out:\\n\", df['State'].unique())\n", 259 | "\n", 260 | "#4.1 average voter turnout for top 10 states (Bar Chart)\n", 261 | "plt.figure(figsize=(12, 6))\n", 262 | "state_turnout = df.groupby('State')['Voter Turnout (%)'].mean().sort_values(ascending=False).head(10)\n", 263 | "sns.barplot(x=state_turnout.index, y=state_turnout.values, hue=state_turnout.index, palette=colors[:10], edgecolor='black', linewidth=1, legend=False)\n", 264 | "plt.title('4.1 Top 10 States by Average Turnout (%)', fontsize=14, color=title_color)\n", 265 | "plt.xlabel('State', fontsize=12)\n", 266 | "plt.ylabel('Average Turnout (%)', fontsize=12)\n", 267 | "plt.xticks(rotation=45)\n", 268 | "plt.show()\n", 269 | "\n", 270 | "#4.2 Turnout Spread by State (Box Chart)\n", 271 | "plt.figure(figsize=(14, 6))\n", 272 | "top_10_states = df.groupby('State')['Voter Turnout (%)'].mean().sort_values(ascending=False).head(10).index\n", 273 | "df_top_10 = df[df['State'].isin(top_10_states)]\n", 274 | "sns.boxplot(data=df_top_10, x='State', y='Voter Turnout (%)', hue='State', palette=colors[:10], legend=False)\n", 275 | "plt.title('4.2 Turnout Spread in Top 10 States', fontsize=14, color=title_color)\n", 276 | "plt.xlabel('State', fontsize=12)\n", 277 | "plt.ylabel('Voter Turnout (%)', fontsize=12)\n", 278 | "plt.xticks(rotation=45)\n", 279 | "plt.show()\n", 280 | "\n", 281 | "#Numerical summary for Objective 4 \n", 282 | "print(\"Average Turnout by State (Top 10):\\n\", state_turnout)\n", 283 | "print(\"\\nTurnout Stats by State (Top 10):\\n\", df_top_10.groupby('State')['Voter Turnout (%)'].describe())" 284 | ] 285 | }, 286 | { 287 | "cell_type": "markdown", 288 | "id": "ed306121", 289 | "metadata": {}, 290 | "source": [ 291 | "# Objective 5" 292 | ] 293 | }, 294 | { 295 | "cell_type": "code", 296 | "execution_count": null, 297 | "id": "339134b9", 298 | "metadata": {}, 299 | "outputs": [], 300 | "source": [ 301 | "#Objective 5: Impact of Postal Votes on Candidate Performance\n", 302 | "colors = ['#FF6F61', '#6B5B95', '#88B04B', '#F9A825', '#45B7D1', \n", 303 | " '#D4A5A5', '#9B59B6', '#3498DB', '#E67E22', '#2ECC71']\n", 304 | "title_color = '#2C3E50'\n", 305 | "#5.1 Compare general, postal, and total votes for the top 10 candidates(Line Chart)\n", 306 | "plt.figure(figsize=(12, 6))\n", 307 | "top_10_candidates = df.nlargest(10, 'Votes Secured - Total')\n", 308 | "x = range(len(top_10_candidates))\n", 309 | "plt.plot(x, top_10_candidates['Votes Secured - General'], label='General Votes', color=colors[0], marker='o', linewidth=2)\n", 310 | "plt.plot(x, top_10_candidates['Votes Secured - Postal'], label='Postal Votes', color=colors[1], marker='s', linewidth=2)\n", 311 | "plt.plot(x, top_10_candidates['Votes Secured - Total'], label='Total Votes', color=colors[2], marker='^', linewidth=2)\n", 312 | "plt.title('5.1 Votes Breakdown for Top 10 Candidates', fontsize=14, color=title_color)\n", 313 | "plt.xlabel('Candidate Index', fontsize=12)\n", 314 | "plt.ylabel('Vote Count', fontsize=12)\n", 315 | "plt.xticks(x, top_10_candidates['Candidate Name'], rotation=45)\n", 316 | "plt.legend(title='Vote Type')\n", 317 | "plt.show()\n", 318 | "\n", 319 | "#5.2 General vs Postal Votes(Scatter Chart)\n", 320 | "plt.figure(figsize=(10, 6))\n", 321 | "plt.scatter(df['Votes Secured - General'], df['Votes Secured - Postal'], \n", 322 | " s=df['Votes Secured - Total'] / 1000, color=colors[3], alpha=0.6, edgecolor='black', linewidth=0.5)\n", 323 | "plt.title('5.2 General vs. Postal Votes (Size = Total)', fontsize=14, color=title_color)\n", 324 | "plt.xlabel('General Votes', fontsize=12)\n", 325 | "plt.ylabel('Postal Votes', fontsize=12)\n", 326 | "plt.xscale('log')\n", 327 | "plt.yscale('log')\n", 328 | "plt.show()\n", 329 | "\n", 330 | "#numerical summary for Objective 5 \n", 331 | "print(\"Average Votes by Type:\\n\", df[['Votes Secured - General', 'Votes Secured - Postal', 'Votes Secured - Total']].mean())\n", 332 | "print(\"\\nHow Votes Relate:\\n\", df[['Votes Secured - General', 'Votes Secured - Postal', 'Votes Secured - Total']].corr())\n", 333 | "print(\"\\nTop 10 Candidates by Votes:\\n\", top_10_candidates[['Candidate Name', 'PC Name', 'Votes Secured - General', 'Votes Secured - Postal', 'Votes Secured - Total']])\n" 334 | ] 335 | }, 336 | { 337 | "cell_type": "markdown", 338 | "id": "77db5f2d", 339 | "metadata": {}, 340 | "source": [ 341 | "# Objective 6" 342 | ] 343 | }, 344 | { 345 | "cell_type": "code", 346 | "execution_count": null, 347 | "id": "af02840f", 348 | "metadata": {}, 349 | "outputs": [], 350 | "source": [ 351 | "#Objective 6: Correlations Between Electors and Vote Metrics\n", 352 | "colors = ['#FF6F61', '#6B5B95', '#88B04B', '#F9A825', '#45B7D1', \n", 353 | " '#D4A5A5', '#9B59B6', '#3498DB', '#E67E22', '#2ECC71']\n", 354 | "title_color = '#2C3E50'\n", 355 | "\n", 356 | "#Select relevant columns\n", 357 | "metrics = ['Total Electors', 'Total Votes Polled In The Constituency', 'Valid Votes']\n", 358 | "df_metrics = df[metrics]\n", 359 | "\n", 360 | "#6.1 correlation between electors, votes polled, and valid votes(Heatmap)\n", 361 | "plt.figure(figsize=(8, 6))\n", 362 | "corr_matrix = df_metrics.corr()\n", 363 | "sns.heatmap(corr_matrix, annot=True, cmap='coolwarm')\n", 364 | "plt.title('6.1 How Electors, Votes Polled, and Valid Votes Connect', fontsize=14, color=title_color)\n", 365 | "plt.show()\n", 366 | "\n", 367 | "#6.2 pairwise relationships among electors, votes polled, and valid votes(pairwise plot)\n", 368 | "pair_plot = sns.pairplot(df_metrics, diag_kind='hist', plot_kws={'color': colors[0]}, diag_kws={'color': colors[1]})\n", 369 | "pair_plot.fig.suptitle('6.2 Pairing Up Electors, Votes Polled, and Valid Votes', fontsize=14, color=title_color, y=1.02)\n", 370 | "plt.show()\n", 371 | "\n", 372 | "# Numerical summary for Objective 6 \n", 373 | "print(\"Correlation Breakdown:\\n\", corr_matrix)\n", 374 | "print(\"\\nBasic Stats:\\n\", df_metrics.describe())\n" 375 | ] 376 | }, 377 | { 378 | "cell_type": "markdown", 379 | "id": "77386b5d", 380 | "metadata": {}, 381 | "source": [ 382 | "# Objective 7" 383 | ] 384 | }, 385 | { 386 | "cell_type": "code", 387 | "execution_count": null, 388 | "id": "a12f944f", 389 | "metadata": {}, 390 | "outputs": [], 391 | "source": [ 392 | "#Objective 7: Analyze the Prevalence and Distribution of NOTA Votes\n", 393 | "#Loading my original data\n", 394 | "df_original = pd.read_csv('election_2024.csv')\n", 395 | "colors = ['#FF6F61', '#6B5B95', '#88B04B', '#F9A825', '#45B7D1', \n", 396 | " '#D4A5A5', '#9B59B6', '#3498DB', '#E67E22', '#2ECC71']\n", 397 | "title_color = '#2C3E50'\n", 398 | "\n", 399 | "#Pulling states \n", 400 | "df_original['State'] = df_original['PC Name'].str.split(' - ').str[0].str.strip()\n", 401 | "nota_df = df_original[df_original['Candidate Name'] == 'NOTA'].copy()\n", 402 | "#numeric consistency\n", 403 | "nota_df.loc[:, 'Votes Secured - Total'] = pd.to_numeric(nota_df['Votes Secured - Total'], errors='coerce').fillna(0)\n", 404 | "\n", 405 | "#7.1 Visualize the total NOTA votes for the top 10 states(Bar Chart)\n", 406 | "plt.figure(figsize=(12, 6))\n", 407 | "nota_by_state = nota_df.groupby('State')['Votes Secured - Total'].sum().sort_values(ascending=False).head(10)\n", 408 | "sns.barplot(x=nota_by_state.index, y=nota_by_state.values, hue=nota_by_state.index, palette=colors[:10], edgecolor='black', linewidth=1, legend=False)\n", 409 | "plt.title('7.1 Top 10 States for NOTA Votes', fontsize=14, color=title_color)\n", 410 | "plt.xlabel('State', fontsize=12)\n", 411 | "plt.ylabel('Total NOTA Votes', fontsize=12)\n", 412 | "plt.xticks(rotation=45)\n", 413 | "plt.show()\n", 414 | "\n", 415 | "#7.2 proportional distribution of NOTA votes among the top 10 states(Pie Chart)\n", 416 | "plt.figure(figsize=(8, 8))\n", 417 | "nota_share = (nota_by_state / nota_by_state.sum()) * 100\n", 418 | "plt.pie(nota_share, labels=nota_share.index, autopct='%1.1f%%', colors=colors[:10])\n", 419 | "plt.title('7.2 NOTA Share in Top 10 States', fontsize=14, color=title_color)\n", 420 | "plt.show()\n", 421 | "\n", 422 | "# Numerical summary for Objective 7 \n", 423 | "print(\"NOTA Votes by State (Top 10):\\n\", nota_by_state)\n", 424 | "print(\"\\nNOTA Share by State (Top 10, %):\\n\", nota_share)\n", 425 | "print(\"\\nTotal NOTA Votes Overall:\", nota_df['Votes Secured - Total'].sum())" 426 | ] 427 | } 428 | ], 429 | "metadata": { 430 | "language_info": { 431 | "name": "python" 432 | } 433 | }, 434 | "nbformat": 4, 435 | "nbformat_minor": 5 436 | } 437 | --------------------------------------------------------------------------------