├── README.md └── sonu.py /README.md: -------------------------------------------------------------------------------- 1 | # Python-Dataset-Project 2 | Analyzing Fatal Encounters in the U.S. Using Python 3 | This report presents a data-driven analysis of fatal police shootings in the United States, focusing on key demographic patterns such as gender, age, and race. The dataset used for this analysis—df_fatalities—compiles verified instances of individuals fatally shot by law enforcement officers while on duty. 4 | Law enforcement-related fatalities are a major public concern in the U.S., raising questions about policing practices, accountability, and the demographic disparities in the use of deadly force. By examining this dataset, we aim to gain insights into: 5 | Gender disparities in fatal police encounters 6 | Age distribution and how manner of death varies by gender 7 | Race-based trends in fatalities 8 | Statistical distribution of ages through histogram and KDE plots 9 | This exploratory data analysis uses various visualizations—including bar charts, box plots, and histograms—to highlight trends and potential biases. Our ultimate goal is to provide a clear, fact-based picture of these events to support informed discussions and policymaking. 10 | -------------------------------------------------------------------------------- /sonu.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import matplotlib.pyplot as plt 3 | import seaborn as sns 4 | 5 | # Set style 6 | sns.set(style="whitegrid") 7 | 8 | # Load dataset with encoding fix 9 | df_fatalities = pd.read_csv("C:\\Users\\sonuk\\OneDrive\\Documents\\Desktop\\Deaths_by_Police_US.csv", encoding='ISO-8859-1') 10 | 11 | print("\n Dataset Overview:") 12 | print(df_fatalities) 13 | 14 | print("\n Head of the dataset:") 15 | print(df_fatalities.head()) 16 | 17 | print("\n Tail of the dataset:") 18 | print(df_fatalities.tail()) 19 | 20 | print("\n Summary Statistics:") 21 | print(df_fatalities.describe()) 22 | 23 | print("\n Information:") 24 | print(df_fatalities.info()) 25 | 26 | print("\n Column Names:") 27 | print(df_fatalities.columns) 28 | 29 | print("\n Shape of Dataset:") 30 | print(df_fatalities.shape) 31 | 32 | # ------------------------- 33 | # Objective 1: Deaths by Gender 34 | # ------------------------- 35 | gender_counts = df_fatalities['gender'].value_counts() 36 | plt.figure(figsize=(6, 4)) 37 | sns.barplot(x=gender_counts.index, y=gender_counts.values, hue=gender_counts.index, palette='Set2', legend=False) 38 | plt.title('Total Number of Deaths by Gender') 39 | plt.xlabel('Gender') 40 | plt.ylabel('Number of Deaths') 41 | plt.tight_layout() 42 | plt.savefig('gender_deaths.png') 43 | plt.show() 44 | 45 | # ------------------------- 46 | # Objective 2: Age vs Manner of Death by Gender 47 | # ------------------------- 48 | plt.figure(figsize=(10, 6)) 49 | sns.boxplot(x='manner_of_death', y='age', hue='gender', data=df_fatalities) 50 | plt.title('Age Distribution by Manner of Death and Gender') 51 | plt.xlabel('Manner of Death') 52 | plt.ylabel('Age') 53 | plt.tight_layout() 54 | plt.savefig('age_manner_gender_boxplot.png') 55 | plt.show() 56 | 57 | # ------------------------- 58 | # Objective 3: Age Summary 59 | # ------------------------- 60 | print("===== Age Summary Statistics =====") 61 | print(df_fatalities['age'].describe()) 62 | 63 | print("\nTop 10 Most Common Ages:") 64 | print(df_fatalities['age'].value_counts().head(10)) 65 | 66 | top_ages = df_fatalities['age'].value_counts().head(10).reset_index() 67 | top_ages.columns = ['age', 'count'] 68 | plt.figure(figsize=(8, 4)) 69 | sns.barplot(x='age', y='count', hue='age', data=top_ages, palette='coolwarm', legend=False) 70 | plt.title('Top 10 Most Common Ages of People Killed') 71 | plt.xlabel('Age') 72 | plt.ylabel('Count') 73 | plt.tight_layout() 74 | plt.savefig('age_summary.png') 75 | plt.show() 76 | 77 | # ------------------------- 78 | # Objective 4: Histogram + KDE of Age 79 | # ------------------------- 80 | plt.figure(figsize=(10, 6)) 81 | sns.histplot(df_fatalities['age'].dropna(), kde=True, bins=30, color='skyblue') 82 | plt.title('Age Distribution (Histogram + KDE)') 83 | plt.xlabel('Age') 84 | plt.ylabel('Frequency') 85 | plt.tight_layout() 86 | plt.savefig('age_distribution.png') 87 | plt.show() 88 | 89 | # ------------------------- 90 | # Objective 5: Race of People Killed 91 | # ------------------------- 92 | race_counts = df_fatalities['race'].value_counts().reset_index() 93 | race_counts.columns = ['race', 'count'] 94 | plt.figure(figsize=(8, 5)) 95 | sns.barplot(x='race', y='count', hue='race', data=race_counts, palette='muted', legend=False) 96 | plt.title('Total Number of People Killed by Race') 97 | plt.xlabel('Race') 98 | plt.ylabel('Number of Deaths') 99 | plt.tight_layout() 100 | plt.savefig('race_deaths.png') 101 | plt.show() 102 | --------------------------------------------------------------------------------