├── PYTHON KA PROJECT (1).docx ├── STEP01 CLEANING.py └── STEP02 ALL VISUALIZATIONS.py /PYTHON KA PROJECT (1).docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QUANTUMVERSECODER/PythonProject/c8aee3f6e551d357b0a1a0816b343f88ef6e08be/PYTHON KA PROJECT (1).docx -------------------------------------------------------------------------------- /STEP01 CLEANING.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import re 3 | 4 | #STEP 01: CLEANING THE DATASET 5 | # Load the dataset 6 | df = pd.read_csv(r"C:\Users\91887\Downloads\project dataset.csv") 7 | 8 | # 1: Fill missing categorical values with 'Unknown' 9 | categorical_cols = ['County', 'City', 'Electric Utility'] 10 | for col in categorical_cols: 11 | df[col] = df[col].fillna('Unknown') 12 | 13 | # 2: Fill missing numerical values 14 | df['Electric Range'] = df['Electric Range'].fillna(df['Electric Range'].median()) 15 | df['Base MSRP'] = df['Base MSRP'].fillna(df['Base MSRP'].median()) 16 | df['Postal Code'] = df['Postal Code'].fillna(df['Postal Code'].mode()[0]) 17 | df['2020 Census Tract'] = df['2020 Census Tract'].fillna(df['2020 Census Tract'].mode()[0]) 18 | df['Legislative District'] = df['Legislative District'].fillna(df['Legislative District'].mode()[0]) 19 | 20 | # 3: Parse Vehicle Location (split POINT (lon lat) into two columns) 21 | def extract_lat_lon(point_str): 22 | if isinstance(point_str, str): 23 | match = re.search(r'POINT \((-?\d+\.\d+) (-?\d+\.\d+)\)', point_str) 24 | if match: 25 | return float(match.group(1)), float(match.group(2)) 26 | return None, None 27 | 28 | df['Latitude'], df['Longitude'] = zip(*df['Vehicle Location'].map(extract_lat_lon)) 29 | df['Vehicle Location'] = df['Vehicle Location'].fillna('Unknown location') 30 | 31 | # 4: Standardize text columns 32 | text_cols = ['County', 'City', 'Make', 'Model', 'Electric Vehicle Type', 'Electric Utility'] 33 | for col in text_cols: 34 | df[col] = df[col].str.strip().str.title() 35 | 36 | # 5: Drop irrelevant columns 37 | df.drop(columns=['VIN (1-10)', 'DOL Vehicle ID'], inplace=True) 38 | 39 | # Save cleaned version 40 | df.to_csv(r"C:\Users\91887\Downloads\cleaned_project_dataset.csv", index=False) 41 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /STEP02 ALL VISUALIZATIONS.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import matplotlib.pyplot as plt 3 | import seaborn as sns 4 | import plotly.express as px 5 | 6 | 7 | df = pd.read_csv(r"C:\Users\91887\Downloads\cleaned_project_dataset.csv") 8 | 9 | 10 | 11 | # V1-> HORIZONTAL BAR CHART OF TOP 10 EV MAKES -> 12 | 13 | 14 | sns.set(style="whitegrid") 15 | top_makes = df['Make'].value_counts().head(10) 16 | 17 | plt.figure(figsize=(10,6)) 18 | sns.barplot(x=top_makes.values, y=top_makes.index, palette="viridis") 19 | plt.title(" V1-> Top 10 Electric Vehicle Brands", fontsize=17) 20 | plt.xlabel("Number of Vehicles") 21 | plt.ylabel("Brand") 22 | plt.tight_layout() 23 | plt.show() 24 | 25 | 26 | # V2-> PIE CHART OF DISTRIBUTION OF EV TYPES -> 27 | 28 | 29 | ev_type_counts = df['Electric Vehicle Type'].value_counts() 30 | plt.figure(figsize=(8,8)) 31 | plt.pie(ev_type_counts.values, 32 | labels=ev_type_counts.index, 33 | autopct='%1.1f%%', 34 | startangle=140, 35 | colors=sns.color_palette("coolwarm", len(ev_type_counts))) 36 | plt.title("Distribution of Electric Vehicle Types", fontsize=16) 37 | plt.axis('equal') # Makes the pie chart a circle 38 | plt.show() 39 | 40 | 41 | # V3-> LOLLIPOP CHART OF AVERAGE ELECTRIC RANGE-> 42 | 43 | top_makes = df['Make'].value_counts().head(10).index 44 | filtered_df = df[df['Make'].isin(top_makes)] 45 | 46 | avg_range = filtered_df.groupby('Make')['Electric Range'].mean().sort_values() 47 | 48 | plt.figure(figsize=(10,6)) 49 | plt.hlines(y=avg_range.index, xmin=0, xmax=avg_range.values, color='skyblue', linewidth=3) 50 | plt.plot(avg_range.values, avg_range.index, "o", color='blue') 51 | plt.title("Average Electric Range by Top 10 EV Makes", fontsize=16) 52 | plt.xlabel("Average Electric Range (miles)") 53 | plt.ylabel("BRAND") 54 | plt.grid(axis='x', linestyle='--', alpha=0.7) 55 | plt.tight_layout() 56 | plt.show() 57 | 58 | # V4-> GEO INTERACTIVE CHART OF DISTRIBUTION OF EV REGISTRAION(BY COUNTY) 59 | 60 | fips_map = { 61 | 'Adams': '53001', 'Asotin': '53003', 'Benton': '53005', 'Chelan': '53007', 62 | 'Clallam': '53009', 'Clark': '53011', 'Columbia': '53013', 'Cowlitz': '53015', 63 | 'Douglas': '53017', 'Ferry': '53019', 'Franklin': '53021', 'Garfield': '53023', 64 | 'Grant': '53025', 'Grays Harbor': '53027', 'Island': '53029', 'Jefferson': '53031', 65 | 'King': '53033', 'Kitsap': '53035', 'Kittitas': '53037', 'Klickitat': '53039', 66 | 'Lewis': '53041', 'Lincoln': '53043', 'Mason': '53045', 'Okanogan': '53047', 67 | 'Pacific': '53049', 'Pend Oreille': '53051', 'Pierce': '53053', 'San Juan': '53055', 68 | 'Skagit': '53057', 'Skamania': '53059', 'Snohomish': '53061', 'Spokane': '53063', 69 | 'Stevens': '53065', 'Thurston': '53067', 'Wahkiakum': '53069', 'Walla Walla': '53071', 70 | 'Whatcom': '53073', 'Whitman': '53075', 'Yakima': '53077' 71 | } 72 | 73 | 74 | county_counts = df['County'].value_counts().reset_index() 75 | county_counts.columns = ['County', 'EV Count'] 76 | county_counts['FIPS'] = county_counts['County'].map(fips_map) 77 | 78 | 79 | fig = px.choropleth( 80 | county_counts, 81 | geojson="https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json", 82 | locations='FIPS', 83 | color='EV Count', 84 | color_continuous_scale="Viridis", 85 | scope="usa", 86 | labels={'EV Count': 'Number of EVs'}, 87 | title="EV Registrations by County in Washington State", 88 | custom_data=['County', 'EV Count'] 89 | ) 90 | 91 | 92 | fig.update_traces( 93 | hovertemplate="%{customdata[0]} County
EV Count: %{customdata[1]}" 94 | ) 95 | 96 | fig.update_layout(geo_scope='usa') 97 | fig.show() 98 | 99 | 100 | # V5-> TREEMAP OF EV TYPE DISTRIBUTION-> 101 | 102 | ev_type_counts = df['Electric Vehicle Type'].value_counts().reset_index() 103 | ev_type_counts.columns = ['EV Type', 'Count'] 104 | 105 | fig = px.treemap( 106 | ev_type_counts, 107 | path=['EV Type'], 108 | values='Count', 109 | color='Count', 110 | color_continuous_scale='Blues', 111 | title='Distribution of Electric Vehicle Types' 112 | ) 113 | 114 | fig.show() 115 | 116 | 117 | # V6 -> SUNBURST OF EV COUNT BY BRAND AND MODEL -> 118 | 119 | make_model_counts = df.groupby(['Make', 'Model']).size().reset_index(name='Count') 120 | 121 | 122 | fig = px.sunburst( 123 | make_model_counts, 124 | path=['Make', 'Model'], 125 | values='Count', 126 | color='Count', 127 | color_continuous_scale='RdBu', 128 | title='Electric Vehicle Distribution by Brand and Model' 129 | ) 130 | 131 | fig.show() 132 | 133 | # V7-> SCATTER PLOT + MARGINAL HISTROGRAM OF RELATIONSHIP BETWEEN BASE MSRP & ELECTRIC RANGE-> 134 | 135 | fig = px.scatter( 136 | df, 137 | x='Base MSRP', 138 | y='Electric Range', 139 | color='Make', 140 | hover_data=['Model'], 141 | title='Base MSRP vs Electric Range by BRAND', 142 | marginal_x='histogram', 143 | marginal_y='histogram', 144 | opacity=0.6, 145 | template='plotly_white' 146 | ) 147 | 148 | fig.show() 149 | --------------------------------------------------------------------------------