├── PYTHON KA PROJECT (1).docx
├── STEP01 CLEANING.py
└── STEP02 ALL VISUALIZATIONS.py
/PYTHON KA PROJECT (1).docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QUANTUMVERSECODER/PythonProject/c8aee3f6e551d357b0a1a0816b343f88ef6e08be/PYTHON KA PROJECT (1).docx
--------------------------------------------------------------------------------
/STEP01 CLEANING.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import re
3 |
4 | #STEP 01: CLEANING THE DATASET
5 | # Load the dataset
6 | df = pd.read_csv(r"C:\Users\91887\Downloads\project dataset.csv")
7 |
8 | # 1: Fill missing categorical values with 'Unknown'
9 | categorical_cols = ['County', 'City', 'Electric Utility']
10 | for col in categorical_cols:
11 | df[col] = df[col].fillna('Unknown')
12 |
13 | # 2: Fill missing numerical values
14 | df['Electric Range'] = df['Electric Range'].fillna(df['Electric Range'].median())
15 | df['Base MSRP'] = df['Base MSRP'].fillna(df['Base MSRP'].median())
16 | df['Postal Code'] = df['Postal Code'].fillna(df['Postal Code'].mode()[0])
17 | df['2020 Census Tract'] = df['2020 Census Tract'].fillna(df['2020 Census Tract'].mode()[0])
18 | df['Legislative District'] = df['Legislative District'].fillna(df['Legislative District'].mode()[0])
19 |
20 | # 3: Parse Vehicle Location (split POINT (lon lat) into two columns)
21 | def extract_lat_lon(point_str):
22 | if isinstance(point_str, str):
23 | match = re.search(r'POINT \((-?\d+\.\d+) (-?\d+\.\d+)\)', point_str)
24 | if match:
25 | return float(match.group(1)), float(match.group(2))
26 | return None, None
27 |
28 | df['Latitude'], df['Longitude'] = zip(*df['Vehicle Location'].map(extract_lat_lon))
29 | df['Vehicle Location'] = df['Vehicle Location'].fillna('Unknown location')
30 |
31 | # 4: Standardize text columns
32 | text_cols = ['County', 'City', 'Make', 'Model', 'Electric Vehicle Type', 'Electric Utility']
33 | for col in text_cols:
34 | df[col] = df[col].str.strip().str.title()
35 |
36 | # 5: Drop irrelevant columns
37 | df.drop(columns=['VIN (1-10)', 'DOL Vehicle ID'], inplace=True)
38 |
39 | # Save cleaned version
40 | df.to_csv(r"C:\Users\91887\Downloads\cleaned_project_dataset.csv", index=False)
41 |
42 |
43 |
44 |
--------------------------------------------------------------------------------
/STEP02 ALL VISUALIZATIONS.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import matplotlib.pyplot as plt
3 | import seaborn as sns
4 | import plotly.express as px
5 |
6 |
7 | df = pd.read_csv(r"C:\Users\91887\Downloads\cleaned_project_dataset.csv")
8 |
9 |
10 |
11 | # V1-> HORIZONTAL BAR CHART OF TOP 10 EV MAKES ->
12 |
13 |
14 | sns.set(style="whitegrid")
15 | top_makes = df['Make'].value_counts().head(10)
16 |
17 | plt.figure(figsize=(10,6))
18 | sns.barplot(x=top_makes.values, y=top_makes.index, palette="viridis")
19 | plt.title(" V1-> Top 10 Electric Vehicle Brands", fontsize=17)
20 | plt.xlabel("Number of Vehicles")
21 | plt.ylabel("Brand")
22 | plt.tight_layout()
23 | plt.show()
24 |
25 |
26 | # V2-> PIE CHART OF DISTRIBUTION OF EV TYPES ->
27 |
28 |
29 | ev_type_counts = df['Electric Vehicle Type'].value_counts()
30 | plt.figure(figsize=(8,8))
31 | plt.pie(ev_type_counts.values,
32 | labels=ev_type_counts.index,
33 | autopct='%1.1f%%',
34 | startangle=140,
35 | colors=sns.color_palette("coolwarm", len(ev_type_counts)))
36 | plt.title("Distribution of Electric Vehicle Types", fontsize=16)
37 | plt.axis('equal') # Makes the pie chart a circle
38 | plt.show()
39 |
40 |
41 | # V3-> LOLLIPOP CHART OF AVERAGE ELECTRIC RANGE->
42 |
43 | top_makes = df['Make'].value_counts().head(10).index
44 | filtered_df = df[df['Make'].isin(top_makes)]
45 |
46 | avg_range = filtered_df.groupby('Make')['Electric Range'].mean().sort_values()
47 |
48 | plt.figure(figsize=(10,6))
49 | plt.hlines(y=avg_range.index, xmin=0, xmax=avg_range.values, color='skyblue', linewidth=3)
50 | plt.plot(avg_range.values, avg_range.index, "o", color='blue')
51 | plt.title("Average Electric Range by Top 10 EV Makes", fontsize=16)
52 | plt.xlabel("Average Electric Range (miles)")
53 | plt.ylabel("BRAND")
54 | plt.grid(axis='x', linestyle='--', alpha=0.7)
55 | plt.tight_layout()
56 | plt.show()
57 |
58 | # V4-> GEO INTERACTIVE CHART OF DISTRIBUTION OF EV REGISTRAION(BY COUNTY)
59 |
60 | fips_map = {
61 | 'Adams': '53001', 'Asotin': '53003', 'Benton': '53005', 'Chelan': '53007',
62 | 'Clallam': '53009', 'Clark': '53011', 'Columbia': '53013', 'Cowlitz': '53015',
63 | 'Douglas': '53017', 'Ferry': '53019', 'Franklin': '53021', 'Garfield': '53023',
64 | 'Grant': '53025', 'Grays Harbor': '53027', 'Island': '53029', 'Jefferson': '53031',
65 | 'King': '53033', 'Kitsap': '53035', 'Kittitas': '53037', 'Klickitat': '53039',
66 | 'Lewis': '53041', 'Lincoln': '53043', 'Mason': '53045', 'Okanogan': '53047',
67 | 'Pacific': '53049', 'Pend Oreille': '53051', 'Pierce': '53053', 'San Juan': '53055',
68 | 'Skagit': '53057', 'Skamania': '53059', 'Snohomish': '53061', 'Spokane': '53063',
69 | 'Stevens': '53065', 'Thurston': '53067', 'Wahkiakum': '53069', 'Walla Walla': '53071',
70 | 'Whatcom': '53073', 'Whitman': '53075', 'Yakima': '53077'
71 | }
72 |
73 |
74 | county_counts = df['County'].value_counts().reset_index()
75 | county_counts.columns = ['County', 'EV Count']
76 | county_counts['FIPS'] = county_counts['County'].map(fips_map)
77 |
78 |
79 | fig = px.choropleth(
80 | county_counts,
81 | geojson="https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json",
82 | locations='FIPS',
83 | color='EV Count',
84 | color_continuous_scale="Viridis",
85 | scope="usa",
86 | labels={'EV Count': 'Number of EVs'},
87 | title="EV Registrations by County in Washington State",
88 | custom_data=['County', 'EV Count']
89 | )
90 |
91 |
92 | fig.update_traces(
93 | hovertemplate="%{customdata[0]} County
EV Count: %{customdata[1]}"
94 | )
95 |
96 | fig.update_layout(geo_scope='usa')
97 | fig.show()
98 |
99 |
100 | # V5-> TREEMAP OF EV TYPE DISTRIBUTION->
101 |
102 | ev_type_counts = df['Electric Vehicle Type'].value_counts().reset_index()
103 | ev_type_counts.columns = ['EV Type', 'Count']
104 |
105 | fig = px.treemap(
106 | ev_type_counts,
107 | path=['EV Type'],
108 | values='Count',
109 | color='Count',
110 | color_continuous_scale='Blues',
111 | title='Distribution of Electric Vehicle Types'
112 | )
113 |
114 | fig.show()
115 |
116 |
117 | # V6 -> SUNBURST OF EV COUNT BY BRAND AND MODEL ->
118 |
119 | make_model_counts = df.groupby(['Make', 'Model']).size().reset_index(name='Count')
120 |
121 |
122 | fig = px.sunburst(
123 | make_model_counts,
124 | path=['Make', 'Model'],
125 | values='Count',
126 | color='Count',
127 | color_continuous_scale='RdBu',
128 | title='Electric Vehicle Distribution by Brand and Model'
129 | )
130 |
131 | fig.show()
132 |
133 | # V7-> SCATTER PLOT + MARGINAL HISTROGRAM OF RELATIONSHIP BETWEEN BASE MSRP & ELECTRIC RANGE->
134 |
135 | fig = px.scatter(
136 | df,
137 | x='Base MSRP',
138 | y='Electric Range',
139 | color='Make',
140 | hover_data=['Model'],
141 | title='Base MSRP vs Electric Range by BRAND',
142 | marginal_x='histogram',
143 | marginal_y='histogram',
144 | opacity=0.6,
145 | template='plotly_white'
146 | )
147 |
148 | fig.show()
149 |
--------------------------------------------------------------------------------