├── Final_python_project_file.docx
├── Python_Project_Final.py
├── README.md
└── final_dataset_python.csv


/Final_python_project_file.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Vansh12-bit/Python-Project/ee8891384164a32a617d8994e5bc2841447134d3/Final_python_project_file.docx


--------------------------------------------------------------------------------
/Python_Project_Final.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import numpy as np
  3 | import matplotlib.pyplot as plt
  4 | import seaborn as sns
  5 | import scipy.stats as stats
  6 | from scipy.stats import ttest_ind
  7 | from statsmodels.stats.proportion import proportions_ztest
  8 | 
  9 | df = pd.read_csv("C:/Users/ASUS/Documents/final_dataset_python.csv")
 10 | print(df.head())
 11 | print(df.tail())
 12 | print(df.describe())
 13 | print(df.info())
 14 | 
 15 | 
 16 | #Objective No.1
 17 | # Summary statistics for Speed_limit and Number_of_Vehicles
 18 | summary_stats = df.describe()
 19 | # Identify the most common types of accidents and their severity
 20 | most_common_accidents = df["Accident_Severity"].value_counts()
 21 | 
 22 | #Objective No.2
 23 | # (2.1): Bar chart for Speed_limit and Number_of_Vehicles
 24 | summary_stats.loc["mean", ["Speed_limit", "Number_of_Vehicles"]].plot(kind='bar', color=['blue', 'red'])
 25 | plt.title("Average Speed Limit & Number of Vehicles in Accidents")
 26 | plt.xlabel("Category")
 27 | plt.ylabel("Average Value")
 28 | plt.show()
 29 | 
 30 | # (2.2): Pie chart for most common accident severities
 31 | plt.figure(figsize=(6, 6))
 32 | most_common_accidents.plot(kind='pie', autopct='%1.1f%%', colors=['green', 'orange', 'purple'])
 33 | plt.title("Distribution of Accident Severities")
 34 | plt.ylabel("")  # Hide default ylabel for better visualization
 35 | plt.show()
 36 | 
 37 | # (2.3): Histogram for accidents per day of the week
 38 | plt.figure(figsize=(8, 5))
 39 | df["Day_of_Week"].value_counts().sort_index().plot(kind='bar', color='cyan')
 40 | plt.title("Accidents Noticed Based on Day of the Week")
 41 | plt.xlabel("Day of the Week")
 42 | plt.ylabel("Number of Accidents")
 43 | plt.xticks(rotation=45)
 44 | plt.show()
 45 | 
 46 | print(df["Accident_Severity"].unique())  # See all categories
 47 | 
 48 | #Objective 3:
 49 | # Define the color palette for each severity level
 50 | palette = {
 51 |     "Serious": "red",
 52 |     "Slight": "blue",
 53 |     "Fatal": "black",
 54 |     "Fetal": "purple"  # Only include if your data actually has "Fetal"
 55 | }
 56 | 
 57 | # Plotting accident locations
 58 | plt.figure(figsize=(10, 6))
 59 | sns.scatterplot(data=df, x="Longitude", y="Latitude", hue="Accident_Severity", palette=palette)
 60 | plt.title("Accident Locations Based on Latitude and Longitude")
 61 | plt.xlabel("Longitude")
 62 | plt.ylabel("Latitude")
 63 | plt.legend(title="Accident Severity")
 64 | plt.show()
 65 | 
 66 | #Objective 4:
 67 | # Scatter plot to study correlation
 68 | plt.figure(figsize=(8, 6))
 69 | plt.scatter(df["Number_of_Vehicles"], df["Number_of_Casualties"], color='blue', alpha=0.5)
 70 | plt.title("Relationship Between Number of Casualties and Number of Vehicles")
 71 | plt.xlabel("Number of Vehicles Involved")
 72 | plt.ylabel("Number of Casualties")
 73 | plt.grid(True)
 74 | plt.show()
 75 | 
 76 | 
 77 | #Objective 5:
 78 | plt.figure(figsize=(10, 6))
 79 | sns.countplot(data=df, x="Carriageway_Hazards", hue="Accident_Severity", palette="Set2")
 80 | plt.title("Impact of Carriageway Hazards on Accident Severity")
 81 | plt.xlabel("Carriageway Hazards")
 82 | plt.ylabel("Accident Frequency")
 83 | plt.xticks(rotation=45)
 84 | plt.legend(title="Severity")
 85 | plt.show()
 86 | 
 87 | #Objective 6:
 88 | # Filter data for only 'Dry' and 'Wet/Damp' road conditions
 89 | df_filtered = df[df['Road_Surface_Conditions'].isin(['Dry', 'Wet/Damp'])]
 90 | 
 91 | # Create two separate samples
 92 | dry_casualties = df_filtered[df_filtered['Road_Surface_Conditions'] == 'Dry']['Number_of_Casualties']
 93 | wet_casualties = df_filtered[df_filtered['Road_Surface_Conditions'] == 'Wet/Damp']['Number_of_Casualties']
 94 | 
 95 | # Remove any missing or null values
 96 | dry_casualties = dry_casualties.dropna()
 97 | wet_casualties = wet_casualties.dropna()
 98 | 
 99 | # Perform two-sample t-test (independent samples)
100 | t_stat, p_value = ttest_ind(dry_casualties, wet_casualties, equal_var=False)  # Welch's t-test
101 | 
102 | # Display results
103 | print("Two-Sample t-Test (Dry vs Wet Roads):")
104 | print(f"t-statistic = {t_stat:.4f}")
105 | print(f"p-value = {p_value:.4f}")
106 | 
107 | # Interpret the result
108 | alpha = 0.05
109 | if p_value < alpha:
110 |     print("Result: Reject the null hypothesis.")
111 |     print("Conclusion: There is a statistically significant difference in mean casualties between Dry and Wet roads.")
112 | else:
113 |     print("Result: Fail to reject the null hypothesis.")
114 |     print("Conclusion: No significant difference in mean casualties between Dry and Wet roads.")
115 | 
116 | #Objective 7:
117 |     # Count number of accidents in Urban and Rural areas
118 | urban_count = df[df['Urban_or_Rural_Area'] == 'Urban'].shape[0]
119 | rural_count = df[df['Urban_or_Rural_Area'] == 'Rural'].shape[0]
120 | 
121 | # Total number of accidents
122 | total_count = urban_count + rural_count
123 | 
124 | # Number of accidents in each group
125 | successes = [urban_count, rural_count]
126 | 
127 | # Total observations in each group
128 | nobs = [total_count, total_count]
129 | 
130 | # Perform two-proportion z-test
131 | z_stat, p_value = proportions_ztest(count=successes, nobs=nobs)
132 | 
133 | # Display results
134 | print("Proportion Z-Test (Urban vs Rural Accidents):")
135 | print(f"z-statistic = {z_stat:.4f}")
136 | print(f"p-value = {p_value:.4f}")
137 | 
138 | # Interpret the result
139 | alpha = 0.05
140 | if p_value < alpha:
141 |     print("Result: Reject the null hypothesis.")
142 |     print("Conclusion: There is a significant difference in accident proportions between Urban and Rural areas.")
143 | else:
144 |     print("Result: Fail to reject the null hypothesis.")
145 |     print("Conclusion: No significant difference in accident proportions between Urban and Rural areas.")
146 | 
147 | #Objective 8:
148 |     
149 | # Convert 'Accident Date' column to datetime format
150 | df['Accident_Date'] = pd.to_datetime(df['Accident Date'], dayfirst=True)
151 | 
152 | # Create a new column for month or week
153 | df['Month'] = df['Accident_Date'].dt.to_period('M')  # For monthly trend
154 | # df['Week'] = df['Accident_Date'].dt.to_period('W')  # Uncomment for weekly trend
155 | 
156 | # Group by Month and Accident_Severity
157 | monthly_trend = df.groupby(['Month', 'Accident_Severity']).size().unstack().fillna(0)
158 | 
159 | # Plotting
160 | plt.figure(figsize=(12, 6))
161 | for severity in monthly_trend.columns:
162 |     plt.plot(monthly_trend.index.astype(str), monthly_trend[severity], label=severity)
163 | 
164 | plt.title("Monthly Accident Frequency by Severity Level")
165 | plt.xlabel("Month")
166 | plt.ylabel("Number of Accidents")
167 | plt.legend(title='Accident Severity')
168 | plt.xticks(rotation=45)
169 | plt.tight_layout()
170 | plt.grid(True)
171 | plt.show()
172 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Python-Project
2 | Using python tool=kits for data visualization.
3 | 


--------------------------------------------------------------------------------