├── CA2 PYTHON PROJECT.py ├── PYTHON REPORT.docx └── README.md /CA2 PYTHON PROJECT.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import matplotlib.pyplot as plt 4 | import seaborn as sns 5 | 6 | #LOADING DATASET 7 | dataset=pd.read_csv("C:/Users/mi/Downloads/Crime_Data_from_2020_to_present (1).csv") 8 | print(dataset) 9 | 10 | #EXPLORING DATASET 11 | print("Information: \n",dataset.info()) 12 | print("Description: \n",dataset.describe()) 13 | 14 | #HANDLING MISING VALUES 15 | print("Missing values ",dataset.isnull().sum()) 16 | 17 | #Remove duplicates 18 | dataset.drop_duplicates(inplace=True) 19 | 20 | #Clean column names 21 | dataset.columns = dataset.columns.str.strip() 22 | 23 | # Fill object (text) columns with 'PYTHON' 24 | obj_cols = ['Vict Sex', 'Vict Descent', 'Premis Desc', 'Weapon Desc', 'Cross Street', 'Mocodes'] 25 | for col in obj_cols: 26 | dataset[col] = dataset[col].fillna('PYTHON') 27 | 28 | # Fill numeric columns with median 29 | dataset['Weapon Used Cd'] = dataset['Weapon Used Cd'].fillna(dataset['Weapon Used Cd'].median()) 30 | dataset['Crm Cd 1'] = dataset['Crm Cd 1'].fillna(dataset['Crm Cd 1'].median()) 31 | 32 | #`Crm Cd 2`, `Crm Cd 3`, `Crm Cd 4` are not important,drop them 33 | dataset.drop(columns=['Crm Cd 2', 'Crm Cd 3', 'Crm Cd 4'], inplace=True) 34 | 35 | # 5. Final check 36 | print("Final DataFrame shape:", dataset.shape) 37 | print("\nRemaining missing values:\n", dataset.isnull().sum()) 38 | print("\nDataset Info:") 39 | 40 | dataset.info() 41 | 42 | # Count the frequency of each crime type 43 | crime_counts = dataset['Crm Cd Desc'].value_counts() 44 | 45 | # Get top 10 most frequent crime types 46 | crime_counts = dataset['Crm Cd Desc'].value_counts().head(10).reset_index() 47 | crime_counts.columns = ['Crime Type', 'Count'] 48 | 49 | # Add a dummy hue column to satisfy seaborn's future requirement 50 | crime_counts['Hue'] = crime_counts['Crime Type'] 51 | 52 | # Plot using seaborn with hue and palette 53 | plt.figure(figsize=(12, 6)) 54 | sns.barplot( 55 | data=crime_counts, 56 | x='Count', 57 | y='Crime Type', 58 | hue='Hue', 59 | palette='viridis', 60 | dodge=False, 61 | legend=False # disables legend since hue is just for coloring 62 | ) 63 | plt.title('Top 10 Most Common Crime Types (2020–Present)', fontsize=14) 64 | plt.xlabel('Number of Incidents') 65 | plt.ylabel('Crime Type') 66 | plt.tight_layout() 67 | plt.show() 68 | 69 | # Convert DATE OCC to datetime 70 | dataset['DATE OCC'] = pd.to_datetime(dataset['DATE OCC'], errors='coerce') 71 | 72 | # Drop rows with invalid dates 73 | dataset = dataset.dropna(subset=['DATE OCC']) 74 | 75 | # Extract year and month 76 | dataset['Year'] = dataset['DATE OCC'].dt.year 77 | dataset['Month'] = dataset['DATE OCC'].dt.month 78 | 79 | # Crimes Per Year 80 | crimes_per_year = dataset['Year'].value_counts().sort_index() 81 | 82 | plt.figure(figsize=(10, 5)) 83 | sns.lineplot( 84 | x=crimes_per_year.index, 85 | y=crimes_per_year.values, 86 | marker='o', 87 | color='steelblue' # Use a solid color to avoid palette warnings 88 | ) 89 | plt.title('Total Crimes Per Year') 90 | plt.xlabel('Year') 91 | plt.ylabel('Number of Crimes') 92 | plt.grid(True) 93 | plt.tight_layout() 94 | plt.show() 95 | 96 | # Crimes Per Month (All Years Combined) 97 | crimes_per_month = dataset['Month'].value_counts().sort_index() 98 | 99 | # Month number to name mapping 100 | month_labels = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 101 | 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] 102 | 103 | plt.figure(figsize=(10, 5)) 104 | sns.barplot( 105 | x=month_labels, 106 | y=crimes_per_month.values, 107 | color='skyblue' # Use a single color to avoid using palette 108 | ) 109 | plt.title('Total Crimes Per Month (All Years Combined)') 110 | plt.xlabel('Month') 111 | plt.ylabel('Number of Crimes') 112 | plt.tight_layout() 113 | plt.show() 114 | 115 | dataset = dataset.dropna(subset=["LAT", "LON"]) 116 | 117 | dataset['DATE OCC'] = pd.to_datetime(dataset['DATE OCC'], errors='coerce') 118 | 119 | # Extract time features 120 | dataset['Hour'] = dataset['TIME OCC'] // 100 121 | dataset['Month'] = dataset['DATE OCC'].dt.month 122 | dataset['Year'] = dataset['DATE OCC'].dt.year 123 | dataset['DayOfWeek'] = dataset['DATE OCC'].dt.day_name() 124 | 125 | #Crimes by Hour 126 | plt.figure(figsize=(10, 4)) 127 | sns.countplot(data=dataset, x='Hour', hue='Hour', palette='magma', legend=False) 128 | plt.title("Crimes by Hour of Day") 129 | plt.xlabel("Hour") 130 | plt.ylabel("Count") 131 | plt.grid(True) 132 | plt.tight_layout() 133 | plt.show() 134 | 135 | # Crime Count by Day of the Week 136 | plt.figure(figsize=(10, 4)) 137 | sns.countplot(data=dataset, x='DayOfWeek', hue='DayOfWeek', order=[ 138 | 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'], palette='viridis', legend=False) 139 | plt.title("Crimes by Day of the Week") 140 | plt.xlabel("Day") 141 | plt.ylabel("Count") 142 | plt.xticks(rotation=45) 143 | plt.tight_layout() 144 | plt.show() 145 | 146 | # Top 10 Crime Types 147 | top_crimes = dataset['Crm Cd Desc'].value_counts().nlargest(10).reset_index() 148 | top_crimes.columns = ['Crm Cd Desc', 'Count'] 149 | plt.figure(figsize=(10, 4)) 150 | sns.barplot(data=top_crimes, y='Crm Cd Desc', x='Count', hue='Crm Cd Desc', palette='coolwarm', legend=False) 151 | plt.title("Top 10 Reported Crimes") 152 | plt.xlabel("Count") 153 | plt.ylabel("Crime Type") 154 | plt.tight_layout() 155 | plt.show() 156 | 157 | # Crimes by Month 158 | plt.figure(figsize=(10, 4)) 159 | sns.countplot(data=dataset, x='Month', hue='Month', palette='Blues', legend=False) 160 | plt.title("Crimes by Month") 161 | plt.xlabel("Month") 162 | plt.ylabel("Number of Crimes") 163 | plt.tight_layout() 164 | plt.show() 165 | 166 | # Select numeric columns 167 | numeric_dataset = dataset.select_dtypes(include=['number']) 168 | 169 | # Compute the correlation matrix 170 | correlation_matrix = numeric_dataset.corr() 171 | 172 | # Plot the heatmap 173 | plt.figure(figsize=(10, 8)) 174 | sns.heatmap(correlation_matrix, annot=True, fmt=".2f", cmap='coolwarm', square=True) 175 | plt.title("Correlation Heatmap of Numeric Features") 176 | plt.tight_layout() 177 | plt.show() 178 | 179 | # Prepare necessary fields 180 | dataset['DATE OCC'] = pd.to_datetime(dataset['DATE OCC'], errors='coerce') 181 | dataset['Hour'] = dataset['TIME OCC'] // 100 182 | dataset['DayOfWeek'] = dataset['DATE OCC'].dt.day_name() 183 | 184 | # Filter top 5 most common crimes 185 | top_crimes = dataset['Crm Cd Desc'].value_counts().nlargest(5).index 186 | subset = dataset[dataset['Crm Cd Desc'].isin(top_crimes)] 187 | 188 | # Victim Age by Crime Type 189 | plt.figure(figsize=(12, 6)) 190 | sns.boxplot(data=subset, x='Crm Cd Desc', y='Vict Age', hue='Crm Cd Desc', palette='Set3', legend=False) 191 | plt.title("Victim Age Distribution by Crime Type") 192 | plt.xlabel("Crime Type") 193 | plt.ylabel("Victim Age") 194 | plt.xticks(rotation=45) 195 | plt.tight_layout() 196 | plt.show() 197 | 198 | # Victim Age by Gender 199 | plt.figure(figsize=(8, 5)) 200 | sns.boxplot(data=dataset, x='Vict Sex', y='Vict Age', hue='Vict Sex', palette='coolwarm', legend=False) 201 | plt.title("Victim Age by Gender") 202 | plt.xlabel("Victim Gender") 203 | plt.ylabel("Victim Age") 204 | plt.tight_layout() 205 | plt.show() 206 | 207 | 208 | # Victim Age Distribution KDE 209 | plt.figure(figsize=(10, 5)) 210 | sns.kdeplot(data=dataset, x='Vict Age', fill=True, color='skyblue') 211 | plt.title("Victim Age Distribution (KDE)") 212 | plt.xlabel("Victim Age") 213 | plt.ylabel("Density") 214 | plt.tight_layout() 215 | plt.show() 216 | 217 | # Victim Age KDE by Gender 218 | plt.figure(figsize=(10, 5)) 219 | sns.kdeplot(data=dataset, x='Vict Age', hue='Vict Sex', fill=True, common_norm=False, palette='Set2') 220 | plt.title("Victim Age Distribution by Gender") 221 | plt.xlabel("Victim Age") 222 | plt.ylabel("Density") 223 | plt.tight_layout() 224 | plt.show() 225 | 226 | -------------------------------------------------------------------------------- /PYTHON REPORT.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Gangwaniharsh/Python-Project/628900b789a67ef611cdd80027698c0342398442/PYTHON REPORT.docx -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Python-Project 2 | New Project 3 | --------------------------------------------------------------------------------