├── 1.py
└── README.md


/1.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | import matplotlib.pyplot as plt
  4 | import seaborn as sns
  5 | 
  6 | # Loading dataset
  7 | dt=pd.read_csv("C:\\Users\\HP\\OneDrive\\Desktop\\toolbox\\Electric_Vehicle_Population_Data1.csv",encoding="unicode_escape")
  8 | print(dt)
  9 | 
 10 | #Exploring dataset
 11 | print("Information: \n",dt.info())
 12 | print("Description: \n",dt.describe())
 13 | 
 14 | #Handling Missing Values
 15 | print("Missing values before handling:\n", dt.isnull().sum())
 16 | dt = dt.dropna(subset=['Model', 'County', 'City', 'State', 'Postal Code', 'Electric Vehicle Type', 'Base MSRP', 'Legislative District', 'DOL Vehicle ID', 'Vehicle Location', 'Electric Utility', '2020 Census Tract'])
 17 | dt['Electric Range'] = dt['Electric Range'].fillna(0)
 18 | dt['Base MSRP'] = dt['Base MSRP'].fillna(0)
 19 | dt['Clean Alternative Fuel Vehicle (CAFV) Eligibility'] = dt['Clean Alternative Fuel Vehicle (CAFV) Eligibility'].fillna("Unknown")
 20 | print("missing values ",dt.isnull().sum())
 21 | 
 22 | #remove duplicate rows
 23 | dt=dt.drop_duplicates()
 24 | print(dt)
 25 | 
 26 | #Basic operation Performed
 27 | print("1st 12 rows of Dataset: \n",dt.head(12))
 28 | print("1st 12 rows of Dataset: \n",dt.tail(12))
 29 | print("Shape of Dataset: \n",dt.shape)
 30 | print("Column of Dataset: \n",dt.columns)
 31 | print("Datatype of Dataset: \n",dt.dtypes)
 32 | dt.to_csv("cleaned_dataset.csv", index=False)
 33 | print("New Dataset Succesfully")
 34 | 
 35 | # Remove the column from given dataset 
 36 | print(dt.drop(['Electric Utility','2020 Census Tract'],axis=1,inplace=True))
 37 | print("Information: \n",dt.info())
 38 | 
 39 | # change the column
 40 | print(dt.columns)
 41 | dt.columns = dt.columns.str.strip()
 42 | dt.rename(columns={'Electric Vehicle Type': 'EVT'},inplace=True)
 43 | print(dt.columns)
 44 | print(dt.columns)
 45 | dt.columns = dt.columns.str.strip()
 46 | dt.rename(columns={'Legislative District': 'LD'},inplace=True)
 47 | print(dt.columns)
 48 | 
 49 | # Clean column names
 50 | dt.columns = dt.columns.str.strip()
 51 | 
 52 | #find top EV locations
 53 | dt = dt.dropna(subset=['City', 'County', 'Postal Code']) #any missing values
 54 | 
 55 | #Top 10 cities with most evs
 56 | top_cities = dt['City'].value_counts().head(10)
 57 | print("Top 10 Cities:\n", top_cities)
 58 | 
 59 | # Countplot for LD
 60 | plt.figure(figsize=(10, 6))
 61 | sns.countplot(x='Model Year', hue='Model Year', data=dt, palette='coolwarm', legend=False)
 62 | plt.title("Count Plot")
 63 | plt.show()
 64 | 
 65 | # Scatterplot for Age vs Salary 
 66 | sns.scatterplot(x='Electric Range', y='LD', data=dt, hue='LD', palette='coolwarm')
 67 | plt.title("Scatter Plot")
 68 | plt.show()
 69 | 
 70 | # Select a subset of data with fewer unique cities and postal codes
 71 | top_cities = dt['City'].value_counts().nlargest(10).index  # Top 10 cities
 72 | filtered_data = dt[dt['City'].isin(top_cities)]
 73 | 
 74 | #select a numerical column
 75 | column='LD'
 76 | plt.figure(figsize=(8,5))
 77 | sns.boxplot(x=dt[column])
 78 | plt.title("Boxplot for outliers Detection")
 79 | 
 80 | #The distribution is slightly skewed left
 81 | Q1=dt[column].quantile(0.25)
 82 | print('Q1:',Q1)
 83 | Q3=dt[column].quantile(0.75)
 84 | print('Q3:',Q3)
 85 | IQR=Q3-Q1
 86 | print('IQR:',IQR)
 87 | 
 88 | lower_bound=Q1-1.5*IQR
 89 | print('lower bound:',lower_bound)
 90 | upper_bound=Q3+1.5*IQR
 91 | print('upper bound:',upper_bound)
 92 | 
 93 | # Identifying outliers
 94 | outliers = dt[(dt[column] < lower_bound) | (dt[column] > upper_bound)]
 95 | print("Outliers detected:\n",outliers)
 96 | 
 97 | # Boxplot of DOL Vehicle ID
 98 | plt.figure(figsize=(8, 5))
 99 | sns.boxplot(x=dt['DOL Vehicle ID'], color='teal')
100 | plt.title("Boxplot of DOL Vehicle ID")
101 | plt.xlabel("DOL Vehicle ID")
102 | plt.grid(True)
103 | plt.show()
104 | 
105 | # Distribution of Electric Range
106 | plt.figure(figsize=(8, 5))
107 | sns.histplot(dt['Electric Range'], bins=30, kde=True, color='purple')
108 | plt.title("Distribution of Electric Range")
109 | plt.xlabel("Electric Range")
110 | plt.ylabel("Frequency")
111 | plt.grid(True)
112 | plt.show()
113 | 
114 | # City-wise EV Count (Top 10)
115 | top_cities = dt['City'].value_counts().head(10)
116 | plt.figure(figsize=(10, 6))
117 | sns.barplot(x=top_cities.values, y=top_cities.index, palette='magma')
118 | plt.title("Top 10 Cities with Most EVs")
119 | plt.xlabel("Number of EVs")
120 | plt.ylabel("City")
121 | plt.grid(axis='x', linestyle='--', alpha=0.7)
122 | plt.show()
123 | 
124 | # Fill missing numerical values with 0 and categorical with 'Unknown'
125 | dt['Electric Range'] = dt['Electric Range'].fillna(0)
126 | dt['Base MSRP'] = dt['Base MSRP'].fillna(0)
127 | dt['Clean Alternative Fuel Vehicle (CAFV) Eligibility'] = dt['Clean Alternative Fuel Vehicle (CAFV) Eligibility'].fillna("Unknown")
128 | 
129 | # Drop rows with missing critical values
130 | dt.dropna(subset=['Model', 'County', 'City', 'Postal Code', 'EVT', 'Base MSRP'], inplace=True)
131 | 
132 | # Drop duplicates
133 | dt.drop_duplicates(inplace=True)
134 | 
135 | # Create correlation heatmap for numeric columns
136 | plt.figure(figsize=(10, 6))
137 | sns.heatmap(dt.corr(numeric_only=True), annot=True, cmap='coolwarm')
138 | plt.title("Correlation Heatmap")
139 | plt.show()
140 | 
141 | # Pairplot for numerical features
142 | num_cols = dt.select_dtypes(include=['int64', 'float64']).columns
143 | sns.pairplot(dt[num_cols])
144 | plt.suptitle("Pairplot of Numerical Features", y=1.02)
145 | plt.show()
146 | 
147 | # Barplot for top 10 Postal Codes
148 | top_postal = dt['Postal Code'].value_counts().head(10)
149 | plt.figure(figsize=(10, 5))
150 | sns.barplot(x=top_postal.index, y=top_postal.values, palette='magma')
151 | plt.title("Top 10 Postal Codes")
152 | plt.ylabel("Count")
153 | plt.xticks(rotation=45)
154 | plt.tight_layout()
155 | plt.show()
156 | 
157 | # Boxplot of Electric Range by City (top 10 cities)
158 | top_cities = dt['City'].value_counts().head(10).index
159 | filtered_dt = dt[dt['City'].isin(top_cities)]
160 | plt.figure(figsize=(12, 6))
161 | sns.boxplot(x='City', y='Electric Range', data=filtered_dt)
162 | plt.title("Electric Range by City")
163 | plt.xticks(rotation=45)
164 | plt.tight_layout()
165 | plt.show()
166 | 
167 | # EV Type Distribution in Top 5 Counties
168 | top5_counties = dt['County'].value_counts().head(5).index
169 | filtered = dt[dt['County'].isin(top5_counties)]
170 | plt.figure(figsize=(13, 9))
171 | sns.countplot(data=filtered, x='County', hue='EVT', palette='Set1')
172 | plt.title("EV Type Distribution in Top 5 Counties")
173 | plt.xlabel("County")
174 | plt.ylabel("Count")
175 | plt.legend(title='EV Type')
176 | plt.show()
177 | 
178 | # Most Popular EV Models
179 | top_models = dt['Model'].value_counts().head(10)
180 | plt.figure(figsize=(15, 10))
181 | sns.barplot(x=top_models.values, y=top_models.index, palette='Accent')
182 | plt.title("Top 10 Most Popular EV Models")
183 | plt.xlabel("Count")
184 | plt.ylabel("Model")
185 | plt.grid(axis='x', linestyle='--', alpha=0.7)
186 | plt.show()
187 | 
188 | # Average Electric Range by Model (Top 10)
189 | avg_range_by_model = dt.groupby('Model')['Electric Range'].mean().sort_values(ascending=False).head(10)
190 | plt.figure(figsize=(12, 6))
191 | sns.barplot(x=avg_range_by_model.values, y=avg_range_by_model.index, palette='cool')
192 | plt.title("Average Electric Range by Top EV Models")
193 | plt.xlabel("Average Electric Range")
194 | plt.ylabel("Model")
195 | plt.grid(axis='x', linestyle='--', alpha=0.7)
196 | plt.show()
197 | 
198 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Python-project-
2 | Data-Visualization 
3 | 


--------------------------------------------------------------------------------