├── Figure_1.png
├── Figure_2.png
├── Figure_3.png
├── T-ouput2.png
├── T-output.png
└── DSM5.py


/Figure_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Rahat-karim/PRODIGY_TrackCode_DS_Task5/HEAD/Figure_1.png


--------------------------------------------------------------------------------
/Figure_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Rahat-karim/PRODIGY_TrackCode_DS_Task5/HEAD/Figure_2.png


--------------------------------------------------------------------------------
/Figure_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Rahat-karim/PRODIGY_TrackCode_DS_Task5/HEAD/Figure_3.png


--------------------------------------------------------------------------------
/T-ouput2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Rahat-karim/PRODIGY_TrackCode_DS_Task5/HEAD/T-ouput2.png


--------------------------------------------------------------------------------
/T-output.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Rahat-karim/PRODIGY_TrackCode_DS_Task5/HEAD/T-output.png


--------------------------------------------------------------------------------
/DSM5.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pandas as pd
 3 | import matplotlib.pyplot as plt
 4 | import seaborn as sns
 5 | import folium
 6 | from folium.plugins import HeatMap
 7 | from sklearn.model_selection import train_test_split
 8 | from sklearn.ensemble import RandomForestClassifier
 9 | from sklearn.metrics import classification_report
10 | 
11 | # Path to the CSV file
12 | file_path = 'US_Accidents_March23.csv'
13 | 
14 | # Load the dataset
15 | try:
16 |     df = pd.read_csv(file_path, on_bad_lines='skip')
17 | except pd.errors.ParserError as e:
18 |     print(f"ParserError: {e}")
19 |     exit()
20 | except FileNotFoundError as e:
21 |     print(f"FileNotFoundError: {e}")
22 |     exit()
23 | 
24 | # Check for missing values
25 | print(df.isnull().sum())
26 | 
27 | # Drop rows with significant missing data or fill with appropriate values
28 | df.dropna(subset=['Start_Time', 'End_Time', 'Weather_Condition'], inplace=True)
29 | 
30 | # Convert date columns to datetime with error handling
31 | df['Start_Time'] = pd.to_datetime(df['Start_Time'], errors='coerce')
32 | df['End_Time'] = pd.to_datetime(df['End_Time'], errors='coerce')
33 | 
34 | # Drop rows where datetime conversion failed
35 | df.dropna(subset=['Start_Time', 'End_Time'], inplace=True)
36 | 
37 | # Distribution of accidents over time
38 | df['Hour'] = df['Start_Time'].dt.hour
39 | sns.histplot(df['Hour'], bins=24, kde=True)
40 | plt.title('Distribution of Accidents by Hour')
41 | plt.xlabel('Hour of the Day')
42 | plt.ylabel('Number of Accidents')
43 | plt.show()
44 | 
45 | # Distribution by weather conditions
46 | sns.countplot(x='Weather_Condition', data=df)
47 | plt.title('Distribution of Accidents by Weather Condition')
48 | plt.xticks(rotation=90)
49 | plt.xlabel('Weather Condition')
50 | plt.ylabel('Number of Accidents')
51 | plt.show()
52 | 
53 | # Visualizing Accident Hotspots
54 | # Sample subset for performance (optional)
55 | df_sample = df.sample(n=10000, random_state=1)
56 | 
57 | # Create a base map
58 | base_map = folium.Map(location=[df_sample['Start_Lat'].mean(), df_sample['Start_Lng'].mean()], zoom_start=5)
59 | 
60 | # Add heatmap
61 | heat_data = [[row['Start_Lat'], row['Start_Lng']] for index, row in df_sample.iterrows()]
62 | HeatMap(heat_data).add_to(base_map)
63 | 
64 | # Save map
65 | base_map.save("accident_hotspots.html")
66 | 
67 | # Correlation analysis
68 | corr = df[['Hour', 'Severity', 'Weather_Condition']].apply(lambda x: pd.factorize(x)[0]).corr()
69 | sns.heatmap(corr, annot=True, cmap='coolwarm')
70 | plt.title('Correlation between Factors')
71 | plt.show()
72 | 
73 | # Further analysis and machine learning (optional)
74 | 
75 | # Feature engineering
76 | df['Day_of_Week'] = df['Start_Time'].dt.dayofweek
77 | 
78 | # Select features and target variable
79 | features = df[['Hour', 'Day_of_Week', 'Weather_Condition']].apply(lambda x: pd.factorize(x)[0])
80 | target = df['Severity']
81 | 
82 | # Split data
83 | X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.3, random_state=42)
84 | 
85 | # Train model
86 | clf = RandomForestClassifier(n_estimators=100, random_state=42)
87 | clf.fit(X_train, y_train)
88 | 
89 | # Predict and evaluate
90 | y_pred = clf.predict(X_test)
91 | print(classification_report(y_test, y_pred))


--------------------------------------------------------------------------------