├── 12306063NikeshKumarPythonREport.docx
├── COVID-19_Outcomes_by_Vaccination_Status_-_Historical.csv
└── pythonproject1.py


/12306063NikeshKumarPythonREport.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/95041/python_nikesh/e171af6f12c617d83e323799d7117124a6abbf47/12306063NikeshKumarPythonREport.docx


--------------------------------------------------------------------------------
/pythonproject1.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding: utf-8
  3 | 
  4 | # In[1]:
  5 | 
  6 | 
  7 | import numpy as np
  8 | import pandas as pd
  9 | import seaborn as sns
 10 | import matplotlib.pyplot as plt
 11 | 
 12 | 
 13 | # In[2]:
 14 | 
 15 | 
 16 | df = pd.read_csv('COVID.csv')
 17 | 
 18 | 
 19 | # In[3]:
 20 | 
 21 | 
 22 | print(df.columns)
 23 | df.head()
 24 | 
 25 | 
 26 | # In[4]:
 27 | 
 28 | 
 29 | df['Week End'] = pd.to_datetime(df['Week End'])
 30 | 
 31 | # Sort by date
 32 | df = df.sort_values('Week End')
 33 | 
 34 | # (Optional) Filter for only 'All' age group to reduce noise
 35 | df_all = df[df['Age Group'] == 'All']
 36 | 
 37 | # Plotting
 38 | plt.figure(figsize=(14, 6))
 39 | plt.plot(df_all['Week End'], df_all['Unvaccinated Rate'], label='Unvaccinated Rate', color='red')
 40 | plt.plot(df_all['Week End'], df_all['Vaccinated Rate'], label='Vaccinated Rate', color='green')
 41 | plt.plot(df_all['Week End'], df_all['Boosted Rate'], label='Boosted Rate', color='blue')
 42 | 
 43 | plt.title('COVID-19 Case Rates by Vaccination Status Over Time')
 44 | plt.xlabel('Week End Date')
 45 | plt.ylabel('Case Rate')
 46 | plt.legend()
 47 | plt.grid(True)
 48 | plt.tight_layout()
 49 | plt.show()
 50 | 
 51 | 
 52 | # In[5]:
 53 | 
 54 | 
 55 | df['Week End'] = pd.to_datetime(df['Week End'])
 56 | 
 57 | # Filter to only "All" age group and "Deaths" outcome
 58 | df_deaths = df[(df['Age Group'] == 'All') & (df['Outcome'] == 'Deaths')]
 59 | 
 60 | # Drop rows with missing values in the rates
 61 | df_deaths = df_deaths.dropna(subset=['Unvaccinated Rate', 'Vaccinated Rate', 'Boosted Rate'])
 62 | 
 63 | # Calculate average rates over time
 64 | avg_unvacc = df_deaths['Unvaccinated Rate'].mean()
 65 | avg_vacc = df_deaths['Vaccinated Rate'].mean()
 66 | avg_boosted = df_deaths['Boosted Rate'].mean()
 67 | 
 68 | # Create a pie chart
 69 | labels = ['Unvaccinated', 'Vaccinated', 'Boosted']
 70 | sizes = [avg_unvacc, avg_vacc, avg_boosted]
 71 | colors = ['red', 'green', 'blue']
 72 | 
 73 | plt.figure(figsize=(7, 7))
 74 | plt.pie(sizes, labels=labels, colors=colors, autopct='%1.1f%%', startangle=140)
 75 | plt.title('Average Death Rate by Vaccination Status')
 76 | plt.axis('equal')
 77 | plt.show()
 78 | 
 79 | 
 80 | # In[7]:
 81 | 
 82 | 
 83 | df['Week End'] = pd.to_datetime(df['Week End'])
 84 | 
 85 | # Use only rows with Outcome = 'Cases' and Age Group = 'All'
 86 | df_cases = df[(df['Outcome'] == 'Cases') & (df['Age Group'] == 'All')]
 87 | df_cases = df_cases.dropna(subset=['Unvaccinated Rate'])
 88 | 
 89 | # Convert dates to ordinal (for numerical regression)
 90 | df_cases['Date_Ordinal'] = df_cases['Week End'].map(pd.Timestamp.toordinal)
 91 | 
 92 | # X (dates as numbers) and y (unvaccinated case rate)
 93 | X = df_cases['Date_Ordinal'].values
 94 | y = df_cases['Unvaccinated Rate'].values
 95 | 
 96 | # Manual linear regression: compute slope (m) and intercept (b)
 97 | x_mean = np.mean(X)
 98 | y_mean = np.mean(y)
 99 | m = np.sum((X - x_mean) * (y - y_mean)) / np.sum((X - x_mean)**2)
100 | b = y_mean - m * x_mean
101 | 
102 | # Predict existing values
103 | y_pred = m * X + b
104 | 
105 | # Predict future (next 10 weeks)
106 | future_dates = pd.date_range(start=df_cases['Week End'].max(), periods=10, freq='W')
107 | future_ordinals = future_dates.map(pd.Timestamp.toordinal).values
108 | future_preds = m * future_ordinals + b
109 | 
110 | # Plotting
111 | plt.figure(figsize=(14, 6))
112 | plt.plot(df_cases['Week End'], y, label='Actual Cases (Unvaccinated)', color='blue')
113 | plt.plot(df_cases['Week End'], y_pred, label='Predicted (Trend)', color='orange', linestyle='--')
114 | plt.plot(future_dates, future_preds, label='Future Surge Prediction', color='red', linestyle='dotted')
115 | 
116 | plt.title('Manual Prediction of COVID-19 Case Surges (Unvaccinated)')
117 | plt.xlabel('Week End Date')
118 | plt.ylabel('Case Rate')
119 | plt.legend()
120 | plt.grid(True)
121 | plt.tight_layout()
122 | plt.show()
123 | 
124 | 
125 | # Blue line = actual past data
126 | # 
127 | # Orange dashed = predicted trend line (manual regression)
128 | # 
129 | # Red dotted = forecasted surge for next 10 weeks
130 | 
131 | # In[9]:
132 | 
133 | 
134 | df['Week End'] = pd.to_datetime(df['Week End'])
135 | 
136 | # Filter for Age Group 'All' and Outcome 'Deaths'
137 | df_deaths = df[(df['Age Group'] == 'All') & (df['Outcome'] == 'Deaths')]
138 | 
139 | # Drop missing values
140 | df_deaths = df_deaths.dropna(subset=['Unvaccinated Rate', 'Vaccinated Rate', 'Boosted Rate'])
141 | 
142 | # Set plot style
143 | sns.set(style='whitegrid')
144 | 
145 | # Plot histogram
146 | plt.figure(figsize=(12, 6))
147 | sns.histplot(df_deaths['Unvaccinated Rate'], color='red', label='Unvaccinated', kde=True, stat='density', bins=15)
148 | sns.histplot(df_deaths['Vaccinated Rate'], color='green', label='Vaccinated', kde=True, stat='density', bins=15)
149 | sns.histplot(df_deaths['Boosted Rate'], color='blue', label='Boosted', kde=True, stat='density', bins=15)
150 | 
151 | plt.title('Distribution of COVID-19 Mortality Rates by Vaccination Status')
152 | plt.xlabel('Mortality Rate')
153 | plt.ylabel('Density')
154 | plt.legend()
155 | plt.tight_layout()
156 | plt.show()
157 | 
158 | 
159 | # In[ ]:
160 | 
161 | 
162 | 
163 | 
164 | 
165 | # In[14]:
166 | 
167 | 
168 | # 2. Pie Chart – Outcome Distribution
169 | plt.figure(figsize=(8,8))
170 | outcome_counts = df['Outcome'].value_counts()
171 | plt.pie(outcome_counts, labels=outcome_counts.index, autopct='%1.1f%%', startangle=140, colors=sns.color_palette("Set2"))
172 | plt.title('COVID Outcomes Distribution')
173 | plt.show()
174 | 
175 | 
176 | 
177 | # In[15]:
178 | 
179 | 
180 | # 3. Line Chart – Unvaccinated Rate Trends by Age Group
181 | plt.figure(figsize=(14,7))
182 | age_groups = df['Age Group'].unique()
183 | for group in age_groups:
184 |     subset = df[df['Age Group'] == group]
185 |     if not subset['Unvaccinated Rate'].isna().all():
186 |         plt.plot(subset['Week End'], subset['Unvaccinated Rate'], label=group)
187 | 
188 | plt.title('Unvaccinated Rate Over Time by Age Group')
189 | plt.xlabel('Week End')
190 | plt.ylabel('Unvaccinated Rate')
191 | plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
192 | plt.xticks(rotation=45)
193 | plt.tight_layout()
194 | plt.show()
195 | 
196 | 
197 | # In[16]:
198 | 
199 | 
200 | # 4. Boxplot – Spread of Vaccination Rates
201 | plt.figure(figsize=(12,6))
202 | sns.boxplot(data=df[['Vaccinated Rate', 'Boosted Rate', 'Unvaccinated Rate']])
203 | plt.title('Distribution of Vaccination Rates')
204 | plt.ylabel('Rate')
205 | plt.tight_layout()
206 | plt.show()
207 | 
208 | 
209 | # In[17]:
210 | 
211 | 
212 | #Analyze the relationship between vaccination status and COVID-19 outcomes using correlation analysis.
213 | corr_columns = [
214 |     'Unvaccinated Rate', 'Vaccinated Rate', 'Boosted Rate',
215 |     'Crude Vaccinated Ratio', 'Crude Boosted Ratio',
216 |     'Age-Adjusted Vaccinated Rate', 'Age-Adjusted Boosted Rate',
217 |     'Age-Adjusted Unvaccinated Rate'
218 | ]
219 | 
220 | # Clean data: drop rows with NaN in selected columns
221 | corr_df = df[corr_columns].dropna()
222 | 
223 | # Compute correlation matrix
224 | corr_matrix = corr_df.corr()
225 | 
226 | # Plot heatmap
227 | plt.figure(figsize=(12,8))
228 | sns.heatmap(corr_matrix, annot=True, cmap='YlGnBu', linewidths=0.5, fmt='.2f')
229 | plt.title('Correlation Between Vaccination Metrics and Outcomes')
230 | plt.tight_layout()
231 | plt.show()
232 | 
233 | 
234 | # In[18]:
235 | 
236 | 
237 | #Compare the number of COVID-19 deaths across different age groups to identify the most affected demographic segments.
238 | # Filter for 'Deaths' outcome only
239 | deaths_df = df[df['Outcome'] == 'Deaths']
240 | 
241 | # Group by Age Group and count deaths
242 | death_counts = deaths_df['Age Group'].value_counts().sort_index()
243 | 
244 | # Plotting bar chart
245 | plt.figure(figsize=(12,6))
246 | sns.barplot(x=death_counts.index, y=death_counts.values, palette='coolwarm')
247 | plt.title('COVID-19 Deaths by Age Group')
248 | plt.xlabel('Age Group')
249 | plt.ylabel('Number of Deaths')
250 | plt.xticks(rotation=45)
251 | plt.tight_layout()
252 | plt.show()
253 | 
254 | 
255 | # In[19]:
256 | 
257 | 
258 | #Compare the impact of booster doses on death outcomes across different age groups.
259 | deaths = df[df['Outcome'] == 'Deaths']
260 | 
261 | # Group by Age Group
262 | boosted_vs_death = deaths.groupby('Age Group').agg({
263 |     'Boosted Rate': 'mean',
264 |     'Outcome': 'count'  # Counting death cases
265 | }).rename(columns={'Outcome': 'Total Deaths'}).reset_index()
266 | 
267 | # Plotting
268 | fig, ax1 = plt.subplots(figsize=(12,6))
269 | 
270 | # Bar chart for deaths
271 | sns.barplot(data=boosted_vs_death, x='Age Group', y='Total Deaths', color='salmon', label='Total Deaths')
272 | 
273 | # Line plot (secondary y-axis) for boosted rate
274 | ax2 = ax1.twinx()
275 | sns.lineplot(data=boosted_vs_death, x='Age Group', y='Boosted Rate', marker='o', sort=False, ax=ax2, color='green', label='Avg Boosted Rate')
276 | 
277 | # Titles and labels
278 | ax1.set_title('Booster Rate vs COVID Deaths by Age Group')
279 | ax1.set_xlabel('Age Group')
280 | ax1.set_ylabel('Total Deaths')
281 | ax2.set_ylabel('Average Boosted Rate')
282 | 
283 | # Legends
284 | ax1.legend(loc='upper left')
285 | ax2.legend(loc='upper right')
286 | 
287 | plt.xticks(rotation=45)
288 | plt.tight_layout()
289 | plt.show()
290 | 
291 | 
292 | # In[20]:
293 | 
294 | 
295 | death_data = pd.DataFrame({
296 |     'Vaccinated': df['Age-Adjusted Vaccinated Rate'],
297 |     'Unvaccinated': df['Age-Adjusted Unvaccinated Rate'],
298 |     'Boosted': df['Age-Adjusted Boosted Rate']
299 | })
300 | 
301 | # Drop rows with missing values
302 | death_data = death_data.dropna()
303 | 
304 | # Melt the dataframe for boxplot compatibility
305 | death_melted = death_data.melt(var_name='Status', value_name='Death Rate')
306 | 
307 | # Plot
308 | plt.figure(figsize=(10,6))
309 | sns.boxplot(x='Status', y='Death Rate', data=death_melted, palette='pastel')
310 | plt.title('Distribution of Age-Adjusted COVID Death Rates by Vaccination Status')
311 | plt.xlabel('Vaccination Status')
312 | plt.ylabel('Death Rate')
313 | plt.tight_layout()
314 | plt.show()
315 | 
316 | 
317 | # Examine the distribution and variability of COVID-19 death rates among vaccinated and unvaccinated populations.
318 | 
319 | # In[ ]:
320 | 
321 | 
322 | 
323 | 
324 | 


--------------------------------------------------------------------------------