├── Report.docx ├── Screenshots.docx └── Project.py /Report.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmegDines/Project-python/HEAD/Report.docx -------------------------------------------------------------------------------- /Screenshots.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmegDines/Project-python/HEAD/Screenshots.docx -------------------------------------------------------------------------------- /Project.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import matplotlib.pyplot as plt 3 | import seaborn as sns 4 | 5 | df = pd.read_csv("Electric_Vehicle_Population_Uncleaned.csv") 6 | 7 | 8 | df['Make'] = df['Make'].str.strip().str.upper() 9 | df['Model'] = df['Model'].str.strip().str.title() 10 | df['City'] = df['City'].str.replace(r'[^a-zA-Z\s]', '', regex=True).str.strip() 11 | df['Postal Code'] = df['Postal Code'].astype(str).str.extract(r'(\d{5})') 12 | df['Model Year'] = df['Model Year'].astype(str).str.extract(r'(\d{4})').astype(float) 13 | 14 | df.drop_duplicates(inplace=True) 15 | df.dropna(subset=['County', 'City', 'Electric Vehicle Type', 'Electric Utility'], inplace=True) 16 | 17 | df['Electric Range'] = pd.to_numeric(df['Electric Range'], errors='coerce') 18 | df['Base MSRP'] = pd.to_numeric(df['Base MSRP'], errors='coerce') 19 | 20 | 21 | sns.set(style="whitegrid") 22 | plt.figure(figsize=(12, 10)) 23 | 24 | 25 | plt.subplot(2, 2, 1) 26 | top_makes = df['Make'].value_counts().head(10) 27 | top_makes_df = top_makes.reset_index() 28 | top_makes_df.columns = ['Make', 'Count'] 29 | top_makes_df['Hue'] = top_makes_df['Make'] 30 | sns.barplot(data=top_makes_df, x='Count', y='Make', hue='Hue', palette="viridis", legend=False) 31 | plt.title("Top 10 EV Makes") 32 | 33 | 34 | plt.subplot(2, 2, 2) 35 | ev_type_counts = df['Electric Vehicle Type'].value_counts() 36 | plt.pie(ev_type_counts.values, labels=ev_type_counts.index, autopct='%1.1f%%', startangle=140, colors=sns.color_palette("pastel")) 37 | plt.title("Electric Vehicle Type Distribution") 38 | 39 | plt.subplot(2, 2, 3) 40 | range_by_year = df.groupby('Model Year')['Electric Range'].mean().dropna() 41 | sns.lineplot(x=range_by_year.index, y=range_by_year.values, marker='o') 42 | plt.title("Avg Electric Range by Model Year") 43 | plt.xlabel("Model Year") 44 | plt.ylabel("Average Range (mi)") 45 | 46 | plt.subplot(2, 2, 4) 47 | top_counties = df['County'].value_counts().head(10) 48 | top_counties_df = top_counties.reset_index() 49 | top_counties_df.columns = ['County', 'Count'] 50 | top_counties_df['Hue'] = top_counties_df['County'] 51 | sns.barplot(data=top_counties_df, x='Count', y='County', hue='Hue', palette="coolwarm", legend=False) 52 | plt.title("Top 10 Counties by EV Registrations") 53 | 54 | plt.figure(figsize=(8, 6)) 55 | avg_price_by_type = df.groupby('Electric Vehicle Type')['Base MSRP'].mean().dropna().sort_values(ascending=False) 56 | avg_price_df = avg_price_by_type.reset_index() 57 | avg_price_df['Hue'] = avg_price_df['Electric Vehicle Type'] 58 | 59 | sns.barplot(data=avg_price_df, x='Base MSRP', y='Electric Vehicle Type', hue='Hue', palette='Set2', legend=False) 60 | plt.title("Average Base MSRP by EV Type") 61 | plt.xlabel("Average MSRP ($)") 62 | plt.ylabel("EV Type") 63 | 64 | plt.figure(figsize=(14, 6)) 65 | 66 | sns.boxplot(data=df, x='Model Year', y='Electric Range', color='lightblue') 67 | 68 | plt.figure(figsize=(14, 6)) 69 | sns.barplot(data=df, x='Model', y='Electric Range') 70 | plt.title('Electric Range Distribution by Model') 71 | plt.xlabel('Model') 72 | plt.ylabel('Electric Range (miles)') 73 | plt.xticks(rotation=45) 74 | 75 | plt.tight_layout() 76 | plt.show() 77 | 78 | yearly_stats = df.groupby('Model Year')[['Electric Range', 'Base MSRP']].mean().reset_index() 79 | 80 | plt.figure(figsize=(10, 6)) 81 | sns.lineplot(data=yearly_stats, x='Model Year', y='Electric Range', label='Avg Electric Range', marker='o') 82 | sns.lineplot(data=yearly_stats, x='Model Year', y='Base MSRP', label='Avg Base MSRP', marker='s') 83 | plt.title('Trends in Electric Range and Base MSRP Over Time') 84 | plt.xlabel('Model Year') 85 | plt.ylabel('Average Value') 86 | plt.grid(True) 87 | 88 | plt.tight_layout() 89 | plt.show() 90 | 91 | 92 | --------------------------------------------------------------------------------