├── PYTHON KA PROJECT (1).docx
├── STEP01 CLEANING.py
└── STEP02 ALL VISUALIZATIONS.py


/PYTHON KA PROJECT (1).docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QUANTUMVERSECODER/PythonProject/c8aee3f6e551d357b0a1a0816b343f88ef6e08be/PYTHON KA PROJECT (1).docx


--------------------------------------------------------------------------------
/STEP01 CLEANING.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import re
 3 | 
 4 | #STEP 01: CLEANING THE DATASET 
 5 | # Load the dataset
 6 | df = pd.read_csv(r"C:\Users\91887\Downloads\project dataset.csv")
 7 | 
 8 | # 1: Fill missing categorical values with 'Unknown'
 9 | categorical_cols = ['County', 'City', 'Electric Utility']
10 | for col in categorical_cols:
11 |     df[col] = df[col].fillna('Unknown')
12 | 
13 | # 2: Fill missing numerical values
14 | df['Electric Range'] = df['Electric Range'].fillna(df['Electric Range'].median())
15 | df['Base MSRP'] = df['Base MSRP'].fillna(df['Base MSRP'].median())
16 | df['Postal Code'] = df['Postal Code'].fillna(df['Postal Code'].mode()[0])
17 | df['2020 Census Tract'] = df['2020 Census Tract'].fillna(df['2020 Census Tract'].mode()[0])
18 | df['Legislative District'] = df['Legislative District'].fillna(df['Legislative District'].mode()[0])
19 | 
20 | # 3: Parse Vehicle Location (split POINT (lon lat) into two columns)
21 | def extract_lat_lon(point_str):
22 |     if isinstance(point_str, str):
23 |         match = re.search(r'POINT \((-?\d+\.\d+) (-?\d+\.\d+)\)', point_str)
24 |         if match:
25 |             return float(match.group(1)), float(match.group(2))
26 |     return None, None
27 | 
28 | df['Latitude'], df['Longitude'] = zip(*df['Vehicle Location'].map(extract_lat_lon))
29 | df['Vehicle Location'] = df['Vehicle Location'].fillna('Unknown location')
30 | 
31 | # 4: Standardize text columns 
32 | text_cols = ['County', 'City', 'Make', 'Model', 'Electric Vehicle Type', 'Electric Utility']
33 | for col in text_cols:
34 |     df[col] = df[col].str.strip().str.title()
35 | 
36 | #  5: Drop irrelevant columns 
37 | df.drop(columns=['VIN (1-10)', 'DOL Vehicle ID'], inplace=True)
38 | 
39 | # Save cleaned version 
40 | df.to_csv(r"C:\Users\91887\Downloads\cleaned_project_dataset.csv", index=False)
41 | 
42 | 
43 | 
44 | 


--------------------------------------------------------------------------------
/STEP02 ALL VISUALIZATIONS.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import matplotlib.pyplot as plt
  3 | import seaborn as sns
  4 | import plotly.express as px
  5 | 
  6 | 
  7 | df = pd.read_csv(r"C:\Users\91887\Downloads\cleaned_project_dataset.csv")
  8 | 
  9 | 
 10 | 
 11 | # V1-> HORIZONTAL BAR CHART OF  TOP 10 EV MAKES ->
 12 | 
 13 | 
 14 | sns.set(style="whitegrid")
 15 | top_makes = df['Make'].value_counts().head(10)
 16 | 
 17 | plt.figure(figsize=(10,6))
 18 | sns.barplot(x=top_makes.values, y=top_makes.index, palette="viridis")
 19 | plt.title(" V1-> Top 10 Electric Vehicle Brands", fontsize=17)
 20 | plt.xlabel("Number of Vehicles")
 21 | plt.ylabel("Brand")
 22 | plt.tight_layout()
 23 | plt.show()
 24 | 
 25 | 
 26 | # V2-> PIE CHART OF DISTRIBUTION OF EV TYPES ->
 27 | 
 28 | 
 29 | ev_type_counts = df['Electric Vehicle Type'].value_counts()
 30 | plt.figure(figsize=(8,8))
 31 | plt.pie(ev_type_counts.values, 
 32 |         labels=ev_type_counts.index, 
 33 |         autopct='%1.1f%%', 
 34 |         startangle=140, 
 35 |         colors=sns.color_palette("coolwarm", len(ev_type_counts)))
 36 | plt.title("Distribution of Electric Vehicle Types", fontsize=16)
 37 | plt.axis('equal')  # Makes the pie chart a circle
 38 | plt.show()
 39 | 
 40 | 
 41 | # V3-> LOLLIPOP CHART OF AVERAGE ELECTRIC RANGE->
 42 | 
 43 | top_makes = df['Make'].value_counts().head(10).index
 44 | filtered_df = df[df['Make'].isin(top_makes)]
 45 | 
 46 | avg_range = filtered_df.groupby('Make')['Electric Range'].mean().sort_values()
 47 | 
 48 | plt.figure(figsize=(10,6))
 49 | plt.hlines(y=avg_range.index, xmin=0, xmax=avg_range.values, color='skyblue', linewidth=3)
 50 | plt.plot(avg_range.values, avg_range.index, "o", color='blue')
 51 | plt.title("Average Electric Range by Top 10 EV Makes", fontsize=16)
 52 | plt.xlabel("Average Electric Range (miles)")
 53 | plt.ylabel("BRAND")
 54 | plt.grid(axis='x', linestyle='--', alpha=0.7)
 55 | plt.tight_layout()
 56 | plt.show()
 57 | 
 58 | # V4-> GEO INTERACTIVE CHART OF DISTRIBUTION OF EV REGISTRAION(BY COUNTY)
 59 | 
 60 | fips_map = {
 61 |     'Adams': '53001', 'Asotin': '53003', 'Benton': '53005', 'Chelan': '53007',
 62 |     'Clallam': '53009', 'Clark': '53011', 'Columbia': '53013', 'Cowlitz': '53015',
 63 |     'Douglas': '53017', 'Ferry': '53019', 'Franklin': '53021', 'Garfield': '53023',
 64 |     'Grant': '53025', 'Grays Harbor': '53027', 'Island': '53029', 'Jefferson': '53031',
 65 |     'King': '53033', 'Kitsap': '53035', 'Kittitas': '53037', 'Klickitat': '53039',
 66 |     'Lewis': '53041', 'Lincoln': '53043', 'Mason': '53045', 'Okanogan': '53047',
 67 |     'Pacific': '53049', 'Pend Oreille': '53051', 'Pierce': '53053', 'San Juan': '53055',
 68 |     'Skagit': '53057', 'Skamania': '53059', 'Snohomish': '53061', 'Spokane': '53063',
 69 |     'Stevens': '53065', 'Thurston': '53067', 'Wahkiakum': '53069', 'Walla Walla': '53071',
 70 |     'Whatcom': '53073', 'Whitman': '53075', 'Yakima': '53077'
 71 | }
 72 | 
 73 | 
 74 | county_counts = df['County'].value_counts().reset_index()
 75 | county_counts.columns = ['County', 'EV Count']
 76 | county_counts['FIPS'] = county_counts['County'].map(fips_map)
 77 | 
 78 | 
 79 | fig = px.choropleth(
 80 |     county_counts,
 81 |     geojson="https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json",
 82 |     locations='FIPS',
 83 |     color='EV Count',
 84 |     color_continuous_scale="Viridis",
 85 |     scope="usa",
 86 |     labels={'EV Count': 'Number of EVs'},
 87 |     title="EV Registrations by County in Washington State",
 88 |     custom_data=['County', 'EV Count']  
 89 | )
 90 | 
 91 | 
 92 | fig.update_traces(
 93 |     hovertemplate="<b>%{customdata[0]} County</b><br>EV Count: %{customdata[1]}<extra></extra>"
 94 | )
 95 | 
 96 | fig.update_layout(geo_scope='usa')
 97 | fig.show()
 98 | 
 99 | 
100 | # V5-> TREEMAP OF EV TYPE DISTRIBUTION->
101 | 
102 | ev_type_counts = df['Electric Vehicle Type'].value_counts().reset_index()
103 | ev_type_counts.columns = ['EV Type', 'Count']
104 | 
105 | fig = px.treemap(
106 |     ev_type_counts,
107 |     path=['EV Type'],  
108 |     values='Count',
109 |     color='Count',
110 |     color_continuous_scale='Blues',
111 |     title='Distribution of Electric Vehicle Types'
112 | )
113 | 
114 | fig.show()
115 | 
116 | 
117 | # V6 -> SUNBURST OF EV COUNT BY BRAND AND MODEL ->
118 | 
119 | make_model_counts = df.groupby(['Make', 'Model']).size().reset_index(name='Count')
120 | 
121 | 
122 | fig = px.sunburst(
123 |     make_model_counts,
124 |     path=['Make', 'Model'], 
125 |     values='Count',
126 |     color='Count',
127 |     color_continuous_scale='RdBu',
128 |     title='Electric Vehicle Distribution by Brand and Model'
129 | )
130 | 
131 | fig.show()
132 | 
133 | # V7-> SCATTER PLOT + MARGINAL HISTROGRAM OF RELATIONSHIP BETWEEN BASE MSRP &  ELECTRIC RANGE->
134 | 
135 | fig = px.scatter(
136 |     df,
137 |     x='Base MSRP',
138 |     y='Electric Range',
139 |     color='Make',
140 |     hover_data=['Model'],
141 |     title='Base MSRP vs Electric Range by BRAND',
142 |     marginal_x='histogram',
143 |     marginal_y='histogram',
144 |     opacity=0.6,
145 |     template='plotly_white'
146 | )
147 | 
148 | fig.show()
149 | 


--------------------------------------------------------------------------------