├── Crime_Data.csv
├── README.md
└── pyproject.py


/README.md:
--------------------------------------------------------------------------------
1 | # Crime-Data-Analysis
2 | A data analysis and visualization project that explores crime patterns in US using Python, numpy, pandas, seaborn, and matplotlib.
3 | 


--------------------------------------------------------------------------------
/pyproject.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import numpy as np
  3 | import matplotlib.pyplot as plt
  4 | import seaborn as sns
  5 | 
  6 | # Set Seaborn style for compelling visuals
  7 | sns.set_style("whitegrid")
  8 | sns.set_palette("husl")
  9 | plt.rcParams.update({'font.size': 12, 'axes.titlesize': 16, 'axes.labelsize': 12})
 10 | 
 11 | # Load the dataset
 12 | df = pd.read_csv("Crime_Data.csv")
 13 | 
 14 | # --- Data Cleaning ---
 15 | df["Mocodes"].fillna("Null", inplace=True)
 16 | df["Vict Sex"].fillna("Unknown", inplace=True)
 17 | df["Vict Descent"].fillna("Unknown", inplace=True)
 18 | df["Premis Desc"].fillna("Unknown", inplace=True)
 19 | df["Weapon Desc"].fillna("Unknown", inplace=True)
 20 | df["Weapon Used Cd"].fillna("N/A", inplace=True)
 21 | df["Crm Cd 1"].fillna("N/A", inplace=True)
 22 | df["Crm Cd 2"].fillna("N/A", inplace=True)
 23 | df["Crm Cd 3"].fillna("N/A", inplace=True)
 24 | 
 25 | # Remove duplicate rows
 26 | df.drop_duplicates(inplace=True)
 27 | 
 28 | # OBJECTIVE 1: Analyze and Visualize Top 10 Crime Categories
 29 | crime_counts = df['Crm Cd Desc'].value_counts()
 30 | top_crimes = crime_counts.head(10)
 31 | 
 32 | short_labels = {
 33 |     'VEHICLE - STOLEN': 'Stolen Vehicle',
 34 |     'BATTERY - SIMPLE ASSAULT': 'Battery Assault',
 35 |     'BURGLARY FROM VEHICLE': 'Burglary (Vehicle)',
 36 |     'VANDALISM - FELONY ($400 & OVER, ALL CHURCH VANDALISMS)': 'Felony Vandalism',
 37 |     'ASSAULT WITH DEADLY WEAPON, AGGRAVATED ASSAULT': 'Aggr. Assault w/ Weapon',
 38 |     'INTIMATE PARTNER - SIMPLE ASSAULT': 'IP Assault',
 39 |     'BURGLARY': 'Burglary',
 40 |     'THEFT PLAIN - PETTY ($950 & UNDER)': 'Petty Theft',
 41 |     'THEFT FROM MOTOR VEHICLE - PETTY ($950 & UNDER)': 'Theft from Vehicle',
 42 |     'VANDALISM - MISDEMEANOR ($399 OR UNDER)': 'Misdemeanor Vandalism'
 43 | }
 44 | top_crimes.index = top_crimes.index.map(short_labels)
 45 | 
 46 | # Top 10 Crime Categories Bar Plot (Enhanced)
 47 | plt.figure(figsize=(10, 6))
 48 | sns.barplot(x=top_crimes.values, y=top_crimes.index, palette="cubehelix", width=0.5)
 49 | plt.title("Top 10 Crime Categories", fontsize=20, weight='bold')
 50 | plt.xlabel("Number of Crimes", fontsize=16)
 51 | plt.ylabel("Crime Type", fontsize=16)
 52 | plt.xticks(fontsize=14)
 53 | plt.yticks(fontsize=14, fontweight='bold')
 54 | plt.tight_layout()
 55 | plt.grid(True, axis='x', linestyle='--', alpha=0.7)
 56 | plt.show()
 57 | 
 58 | # OBJECTIVE 2: Time-Series Analysis of Crime Trends by Month and Year
 59 | df['DATE OCC'] = pd.to_datetime(df['DATE OCC'], errors='coerce')
 60 | df['Year'] = df['DATE OCC'].dt.year
 61 | df['Month'] = df['DATE OCC'].dt.month
 62 | df['Day'] = df['DATE OCC'].dt.day
 63 | 
 64 | df_filtered = df[(df['Year'] >= 2000) & (df['Year'] <= pd.Timestamp.now().year)]
 65 | crime_trends_by_month = df.groupby('Month').size()
 66 | 
 67 | # Enhanced Time Series Analysis (Average Crime Trends by Month)
 68 | plt.figure(figsize=(10, 6))
 69 | sns.barplot(x=crime_trends_by_month.index, y=crime_trends_by_month.values, palette="crest", width=0.7)
 70 | plt.title("Average Crime Trends by Month", fontsize=20, weight='bold')
 71 | plt.xlabel("Month", fontsize=16)
 72 | plt.ylabel("Number of Crimes", fontsize=16)
 73 | plt.xticks(ticks=np.arange(12), labels=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 
 74 |                                         'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'], fontsize=14)
 75 | plt.yticks(fontsize=14, fontweight='bold')
 76 | plt.tight_layout()
 77 | 
 78 | # Adding gridlines for a more refined look
 79 | plt.grid(True, axis='y', linestyle='--', alpha=0.7)
 80 | plt.show()
 81 | 
 82 | # OBJECTIVE 3: Crime Hotspot Detection Using Grid-Based Heatmap
 83 | lat_bins = np.linspace(df["LAT"].min(), df["LAT"].max(), 4)
 84 | lon_bins = np.linspace(df["LON"].min(), df["LON"].max(), 4)
 85 | 
 86 | crime_density, _, _ = np.histogram2d(df["LAT"], df["LON"], bins=[lat_bins, lon_bins])
 87 | 
 88 | # Enhanced Heatmap with Light Shades
 89 | plt.figure(figsize=(10, 6))
 90 | plt.imshow(crime_density.T, cmap='Blues', origin='lower', aspect='auto',
 91 |            extent=[df["LON"].min(), df["LON"].max(), df["LAT"].min(), df["LAT"].max()])
 92 | plt.colorbar(label='Crime Density')
 93 | plt.title("Crime Hotspots (Enhanced Heatmap)", fontsize=20, weight='bold')
 94 | plt.xlabel("Longitude", fontsize=16)
 95 | plt.ylabel("Latitude", fontsize=16)
 96 | plt.tight_layout()
 97 | 
 98 | # Adding gridlines and annotations for better clarity
 99 | plt.grid(True, linestyle='-', color='black', alpha=0.3)
100 | plt.show()
101 | 
102 | # OBJECTIVE 4: Victim Age Distribution by Crime Type and Gender
103 | df_valid = df[(df['Vict Age'] > 0) & (df['Vict Age'] <= 100)]
104 | top5_crimes = df_valid['Crm Cd Desc'].value_counts().head(5).index
105 | df_top5 = df_valid[df_valid['Crm Cd Desc'].isin(top5_crimes)]
106 | 
107 | # Stripplot for Victim Age Distribution by Crime Type and Gender
108 | plt.figure(figsize=(12, 6))
109 | sns.stripplot(data=df_top5, x='Crm Cd Desc', y='Vict Age', hue='Vict Sex', 
110 |               jitter=True, dodge=True, alpha=0.7, palette='coolwarm')
111 | plt.title("Victim Age Distribution by Crime Type and Gender", fontsize=20, weight='bold')
112 | plt.xlabel("Crime Type", fontsize=16)
113 | plt.ylabel("Victim Age", fontsize=16)
114 | plt.xticks(rotation=25, fontsize=14)
115 | plt.legend(title='Victim Sex', fontsize=14)
116 | plt.tight_layout()
117 | plt.show()
118 | 
119 | # OBJECTIVE 5: Analyze Top 5 Weapons Used in Crimes
120 | weapon_counts = df['Weapon Desc'].value_counts().head(5)
121 | 
122 | # Enhanced Pie Chart (Exploded View & Color Palette)
123 | plt.figure(figsize=(8, 8))  # Increased size of the pie chart
124 | explode = (0.1, 0.1, 0.1, 0.1, 0.1)  # Exploding each slice slightly
125 | 
126 | # Create the pie chart without labels, displaying only percentages
127 | colors = sns.color_palette("Spectral", n_colors=len(weapon_counts))  # Ensure correct number of colors
128 | plt.pie(weapon_counts, autopct='%1.1f%%', startangle=120, colors=colors, 
129 |         textprops={'fontsize': 12, 'fontweight': 'bold'}, explode=explode, labels=None, pctdistance=0.85) 
130 | 
131 | plt.title("Top 5 Weapons Used in Crimes", fontsize=18, weight='bold')
132 | plt.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle
133 | 
134 | # Create a color-coded legend on the right side
135 | labels = weapon_counts.index.tolist()  # Convert Index to list of labels
136 | plt.legend(handles=[plt.Line2D([0], [0], marker='o', color='w', markerfacecolor=color, markersize=10) 
137 |                     for color in colors], labels=labels, title="Weapon Descriptions", loc='center left', 
138 |            bbox_to_anchor=(1.05, 0.5), fontsize=12)
139 | 
140 | plt.tight_layout()
141 | plt.show()
142 | 
143 | # OBJECTIVE 6: Victim Age Distribution (Histogram)
144 | df_valid = df[(df['Vict Age'] > 0) & (df['Vict Age'] <= 100)]
145 | 
146 | # Histogram for Victim Age Distribution
147 | plt.figure(figsize=(10, 6))
148 | sns.histplot(df_valid['Vict Age'], bins=20, kde=True, color="green")
149 | plt.title("Victim Age Distribution", fontsize=20, weight='bold')
150 | plt.xlabel("Age", fontsize=16)
151 | plt.ylabel("Frequency", fontsize=16)
152 | plt.tight_layout()
153 | plt.show()
154 | 
155 | # OBJECTIVE 7: Analyze Crime Distribution by Status Description 
156 | status_desc_counts = df['Status Desc'].value_counts()
157 | 
158 | # Bar Plot for Crime Distribution by Status Description
159 | plt.figure(figsize=(10, 6))
160 | status_desc_counts.plot(kind='bar', stacked=True, color=sns.color_palette("Set2"))
161 | plt.title("Distribution of Crimes by Status Description", fontsize=20, weight='bold')
162 | plt.xlabel("Status", fontsize=16)
163 | plt.ylabel("Number of Crimes", fontsize=16)
164 | plt.xticks(rotation=0, fontsize=14)
165 | plt.tight_layout()
166 | plt.show()
167 | 
168 | # OBJECTIVE 8: Monthly Crime Trends by Area in 2020
169 | df['Month'] = df['DATE OCC'].dt.strftime('%b')
170 | df_2020 = df[df['Year'] == 2020]
171 | month_order = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 
172 |                'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
173 | df_2020['Month'] = pd.Categorical(df_2020['Month'], categories=month_order, ordered=True)
174 | 
175 | monthly_crimes = df_2020.groupby(['Month', 'AREA NAME']).size().unstack(fill_value=0)
176 | 
177 | # Lineplot for Monthly Crime Trends by Area (2020)
178 | plt.figure(figsize=(10, 8))
179 | for area in monthly_crimes.columns:
180 |     sns.lineplot(y=monthly_crimes.index, x=monthly_crimes[area], label=area, marker='o')
181 | 
182 | plt.title("Monthly Crime Trends by Area (2020, Horizontal View)", fontsize=20, weight='bold')
183 | plt.xlabel("Number of Crimes", fontsize=16)
184 | plt.ylabel("Month", fontsize=16)
185 | plt.legend(title="Area", bbox_to_anchor=(1.05, 1), loc='upper left', fontsize=14)
186 | plt.tight_layout()
187 | plt.show()
188 | 
189 | # Save statistics to CSV
190 | df.to_csv("crime_data_cleaned.csv")
191 | print("-----Analysis Done!-------")
192 | 


--------------------------------------------------------------------------------