├── Final_python_project_file.docx ├── Python_Project_Final.py ├── README.md └── final_dataset_python.csv /Final_python_project_file.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vansh12-bit/Python-Project/ee8891384164a32a617d8994e5bc2841447134d3/Final_python_project_file.docx -------------------------------------------------------------------------------- /Python_Project_Final.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | import seaborn as sns 5 | import scipy.stats as stats 6 | from scipy.stats import ttest_ind 7 | from statsmodels.stats.proportion import proportions_ztest 8 | 9 | df = pd.read_csv("C:/Users/ASUS/Documents/final_dataset_python.csv") 10 | print(df.head()) 11 | print(df.tail()) 12 | print(df.describe()) 13 | print(df.info()) 14 | 15 | 16 | #Objective No.1 17 | # Summary statistics for Speed_limit and Number_of_Vehicles 18 | summary_stats = df.describe() 19 | # Identify the most common types of accidents and their severity 20 | most_common_accidents = df["Accident_Severity"].value_counts() 21 | 22 | #Objective No.2 23 | # (2.1): Bar chart for Speed_limit and Number_of_Vehicles 24 | summary_stats.loc["mean", ["Speed_limit", "Number_of_Vehicles"]].plot(kind='bar', color=['blue', 'red']) 25 | plt.title("Average Speed Limit & Number of Vehicles in Accidents") 26 | plt.xlabel("Category") 27 | plt.ylabel("Average Value") 28 | plt.show() 29 | 30 | # (2.2): Pie chart for most common accident severities 31 | plt.figure(figsize=(6, 6)) 32 | most_common_accidents.plot(kind='pie', autopct='%1.1f%%', colors=['green', 'orange', 'purple']) 33 | plt.title("Distribution of Accident Severities") 34 | plt.ylabel("") # Hide default ylabel for better visualization 35 | plt.show() 36 | 37 | # (2.3): Histogram for accidents per day of the week 38 | plt.figure(figsize=(8, 5)) 39 | df["Day_of_Week"].value_counts().sort_index().plot(kind='bar', color='cyan') 40 | plt.title("Accidents Noticed Based on Day of the Week") 41 | plt.xlabel("Day of the Week") 42 | plt.ylabel("Number of Accidents") 43 | plt.xticks(rotation=45) 44 | plt.show() 45 | 46 | print(df["Accident_Severity"].unique()) # See all categories 47 | 48 | #Objective 3: 49 | # Define the color palette for each severity level 50 | palette = { 51 | "Serious": "red", 52 | "Slight": "blue", 53 | "Fatal": "black", 54 | "Fetal": "purple" # Only include if your data actually has "Fetal" 55 | } 56 | 57 | # Plotting accident locations 58 | plt.figure(figsize=(10, 6)) 59 | sns.scatterplot(data=df, x="Longitude", y="Latitude", hue="Accident_Severity", palette=palette) 60 | plt.title("Accident Locations Based on Latitude and Longitude") 61 | plt.xlabel("Longitude") 62 | plt.ylabel("Latitude") 63 | plt.legend(title="Accident Severity") 64 | plt.show() 65 | 66 | #Objective 4: 67 | # Scatter plot to study correlation 68 | plt.figure(figsize=(8, 6)) 69 | plt.scatter(df["Number_of_Vehicles"], df["Number_of_Casualties"], color='blue', alpha=0.5) 70 | plt.title("Relationship Between Number of Casualties and Number of Vehicles") 71 | plt.xlabel("Number of Vehicles Involved") 72 | plt.ylabel("Number of Casualties") 73 | plt.grid(True) 74 | plt.show() 75 | 76 | 77 | #Objective 5: 78 | plt.figure(figsize=(10, 6)) 79 | sns.countplot(data=df, x="Carriageway_Hazards", hue="Accident_Severity", palette="Set2") 80 | plt.title("Impact of Carriageway Hazards on Accident Severity") 81 | plt.xlabel("Carriageway Hazards") 82 | plt.ylabel("Accident Frequency") 83 | plt.xticks(rotation=45) 84 | plt.legend(title="Severity") 85 | plt.show() 86 | 87 | #Objective 6: 88 | # Filter data for only 'Dry' and 'Wet/Damp' road conditions 89 | df_filtered = df[df['Road_Surface_Conditions'].isin(['Dry', 'Wet/Damp'])] 90 | 91 | # Create two separate samples 92 | dry_casualties = df_filtered[df_filtered['Road_Surface_Conditions'] == 'Dry']['Number_of_Casualties'] 93 | wet_casualties = df_filtered[df_filtered['Road_Surface_Conditions'] == 'Wet/Damp']['Number_of_Casualties'] 94 | 95 | # Remove any missing or null values 96 | dry_casualties = dry_casualties.dropna() 97 | wet_casualties = wet_casualties.dropna() 98 | 99 | # Perform two-sample t-test (independent samples) 100 | t_stat, p_value = ttest_ind(dry_casualties, wet_casualties, equal_var=False) # Welch's t-test 101 | 102 | # Display results 103 | print("Two-Sample t-Test (Dry vs Wet Roads):") 104 | print(f"t-statistic = {t_stat:.4f}") 105 | print(f"p-value = {p_value:.4f}") 106 | 107 | # Interpret the result 108 | alpha = 0.05 109 | if p_value < alpha: 110 | print("Result: Reject the null hypothesis.") 111 | print("Conclusion: There is a statistically significant difference in mean casualties between Dry and Wet roads.") 112 | else: 113 | print("Result: Fail to reject the null hypothesis.") 114 | print("Conclusion: No significant difference in mean casualties between Dry and Wet roads.") 115 | 116 | #Objective 7: 117 | # Count number of accidents in Urban and Rural areas 118 | urban_count = df[df['Urban_or_Rural_Area'] == 'Urban'].shape[0] 119 | rural_count = df[df['Urban_or_Rural_Area'] == 'Rural'].shape[0] 120 | 121 | # Total number of accidents 122 | total_count = urban_count + rural_count 123 | 124 | # Number of accidents in each group 125 | successes = [urban_count, rural_count] 126 | 127 | # Total observations in each group 128 | nobs = [total_count, total_count] 129 | 130 | # Perform two-proportion z-test 131 | z_stat, p_value = proportions_ztest(count=successes, nobs=nobs) 132 | 133 | # Display results 134 | print("Proportion Z-Test (Urban vs Rural Accidents):") 135 | print(f"z-statistic = {z_stat:.4f}") 136 | print(f"p-value = {p_value:.4f}") 137 | 138 | # Interpret the result 139 | alpha = 0.05 140 | if p_value < alpha: 141 | print("Result: Reject the null hypothesis.") 142 | print("Conclusion: There is a significant difference in accident proportions between Urban and Rural areas.") 143 | else: 144 | print("Result: Fail to reject the null hypothesis.") 145 | print("Conclusion: No significant difference in accident proportions between Urban and Rural areas.") 146 | 147 | #Objective 8: 148 | 149 | # Convert 'Accident Date' column to datetime format 150 | df['Accident_Date'] = pd.to_datetime(df['Accident Date'], dayfirst=True) 151 | 152 | # Create a new column for month or week 153 | df['Month'] = df['Accident_Date'].dt.to_period('M') # For monthly trend 154 | # df['Week'] = df['Accident_Date'].dt.to_period('W') # Uncomment for weekly trend 155 | 156 | # Group by Month and Accident_Severity 157 | monthly_trend = df.groupby(['Month', 'Accident_Severity']).size().unstack().fillna(0) 158 | 159 | # Plotting 160 | plt.figure(figsize=(12, 6)) 161 | for severity in monthly_trend.columns: 162 | plt.plot(monthly_trend.index.astype(str), monthly_trend[severity], label=severity) 163 | 164 | plt.title("Monthly Accident Frequency by Severity Level") 165 | plt.xlabel("Month") 166 | plt.ylabel("Number of Accidents") 167 | plt.legend(title='Accident Severity') 168 | plt.xticks(rotation=45) 169 | plt.tight_layout() 170 | plt.grid(True) 171 | plt.show() 172 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Python-Project 2 | Using python tool=kits for data visualization. 3 | --------------------------------------------------------------------------------