├── 12306063NikeshKumarPythonREport.docx ├── COVID-19_Outcomes_by_Vaccination_Status_-_Historical.csv └── pythonproject1.py /12306063NikeshKumarPythonREport.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/95041/python_nikesh/e171af6f12c617d83e323799d7117124a6abbf47/12306063NikeshKumarPythonREport.docx -------------------------------------------------------------------------------- /pythonproject1.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | # In[1]: 5 | 6 | 7 | import numpy as np 8 | import pandas as pd 9 | import seaborn as sns 10 | import matplotlib.pyplot as plt 11 | 12 | 13 | # In[2]: 14 | 15 | 16 | df = pd.read_csv('COVID.csv') 17 | 18 | 19 | # In[3]: 20 | 21 | 22 | print(df.columns) 23 | df.head() 24 | 25 | 26 | # In[4]: 27 | 28 | 29 | df['Week End'] = pd.to_datetime(df['Week End']) 30 | 31 | # Sort by date 32 | df = df.sort_values('Week End') 33 | 34 | # (Optional) Filter for only 'All' age group to reduce noise 35 | df_all = df[df['Age Group'] == 'All'] 36 | 37 | # Plotting 38 | plt.figure(figsize=(14, 6)) 39 | plt.plot(df_all['Week End'], df_all['Unvaccinated Rate'], label='Unvaccinated Rate', color='red') 40 | plt.plot(df_all['Week End'], df_all['Vaccinated Rate'], label='Vaccinated Rate', color='green') 41 | plt.plot(df_all['Week End'], df_all['Boosted Rate'], label='Boosted Rate', color='blue') 42 | 43 | plt.title('COVID-19 Case Rates by Vaccination Status Over Time') 44 | plt.xlabel('Week End Date') 45 | plt.ylabel('Case Rate') 46 | plt.legend() 47 | plt.grid(True) 48 | plt.tight_layout() 49 | plt.show() 50 | 51 | 52 | # In[5]: 53 | 54 | 55 | df['Week End'] = pd.to_datetime(df['Week End']) 56 | 57 | # Filter to only "All" age group and "Deaths" outcome 58 | df_deaths = df[(df['Age Group'] == 'All') & (df['Outcome'] == 'Deaths')] 59 | 60 | # Drop rows with missing values in the rates 61 | df_deaths = df_deaths.dropna(subset=['Unvaccinated Rate', 'Vaccinated Rate', 'Boosted Rate']) 62 | 63 | # Calculate average rates over time 64 | avg_unvacc = df_deaths['Unvaccinated Rate'].mean() 65 | avg_vacc = df_deaths['Vaccinated Rate'].mean() 66 | avg_boosted = df_deaths['Boosted Rate'].mean() 67 | 68 | # Create a pie chart 69 | labels = ['Unvaccinated', 'Vaccinated', 'Boosted'] 70 | sizes = [avg_unvacc, avg_vacc, avg_boosted] 71 | colors = ['red', 'green', 'blue'] 72 | 73 | plt.figure(figsize=(7, 7)) 74 | plt.pie(sizes, labels=labels, colors=colors, autopct='%1.1f%%', startangle=140) 75 | plt.title('Average Death Rate by Vaccination Status') 76 | plt.axis('equal') 77 | plt.show() 78 | 79 | 80 | # In[7]: 81 | 82 | 83 | df['Week End'] = pd.to_datetime(df['Week End']) 84 | 85 | # Use only rows with Outcome = 'Cases' and Age Group = 'All' 86 | df_cases = df[(df['Outcome'] == 'Cases') & (df['Age Group'] == 'All')] 87 | df_cases = df_cases.dropna(subset=['Unvaccinated Rate']) 88 | 89 | # Convert dates to ordinal (for numerical regression) 90 | df_cases['Date_Ordinal'] = df_cases['Week End'].map(pd.Timestamp.toordinal) 91 | 92 | # X (dates as numbers) and y (unvaccinated case rate) 93 | X = df_cases['Date_Ordinal'].values 94 | y = df_cases['Unvaccinated Rate'].values 95 | 96 | # Manual linear regression: compute slope (m) and intercept (b) 97 | x_mean = np.mean(X) 98 | y_mean = np.mean(y) 99 | m = np.sum((X - x_mean) * (y - y_mean)) / np.sum((X - x_mean)**2) 100 | b = y_mean - m * x_mean 101 | 102 | # Predict existing values 103 | y_pred = m * X + b 104 | 105 | # Predict future (next 10 weeks) 106 | future_dates = pd.date_range(start=df_cases['Week End'].max(), periods=10, freq='W') 107 | future_ordinals = future_dates.map(pd.Timestamp.toordinal).values 108 | future_preds = m * future_ordinals + b 109 | 110 | # Plotting 111 | plt.figure(figsize=(14, 6)) 112 | plt.plot(df_cases['Week End'], y, label='Actual Cases (Unvaccinated)', color='blue') 113 | plt.plot(df_cases['Week End'], y_pred, label='Predicted (Trend)', color='orange', linestyle='--') 114 | plt.plot(future_dates, future_preds, label='Future Surge Prediction', color='red', linestyle='dotted') 115 | 116 | plt.title('Manual Prediction of COVID-19 Case Surges (Unvaccinated)') 117 | plt.xlabel('Week End Date') 118 | plt.ylabel('Case Rate') 119 | plt.legend() 120 | plt.grid(True) 121 | plt.tight_layout() 122 | plt.show() 123 | 124 | 125 | # Blue line = actual past data 126 | # 127 | # Orange dashed = predicted trend line (manual regression) 128 | # 129 | # Red dotted = forecasted surge for next 10 weeks 130 | 131 | # In[9]: 132 | 133 | 134 | df['Week End'] = pd.to_datetime(df['Week End']) 135 | 136 | # Filter for Age Group 'All' and Outcome 'Deaths' 137 | df_deaths = df[(df['Age Group'] == 'All') & (df['Outcome'] == 'Deaths')] 138 | 139 | # Drop missing values 140 | df_deaths = df_deaths.dropna(subset=['Unvaccinated Rate', 'Vaccinated Rate', 'Boosted Rate']) 141 | 142 | # Set plot style 143 | sns.set(style='whitegrid') 144 | 145 | # Plot histogram 146 | plt.figure(figsize=(12, 6)) 147 | sns.histplot(df_deaths['Unvaccinated Rate'], color='red', label='Unvaccinated', kde=True, stat='density', bins=15) 148 | sns.histplot(df_deaths['Vaccinated Rate'], color='green', label='Vaccinated', kde=True, stat='density', bins=15) 149 | sns.histplot(df_deaths['Boosted Rate'], color='blue', label='Boosted', kde=True, stat='density', bins=15) 150 | 151 | plt.title('Distribution of COVID-19 Mortality Rates by Vaccination Status') 152 | plt.xlabel('Mortality Rate') 153 | plt.ylabel('Density') 154 | plt.legend() 155 | plt.tight_layout() 156 | plt.show() 157 | 158 | 159 | # In[ ]: 160 | 161 | 162 | 163 | 164 | 165 | # In[14]: 166 | 167 | 168 | # 2. Pie Chart – Outcome Distribution 169 | plt.figure(figsize=(8,8)) 170 | outcome_counts = df['Outcome'].value_counts() 171 | plt.pie(outcome_counts, labels=outcome_counts.index, autopct='%1.1f%%', startangle=140, colors=sns.color_palette("Set2")) 172 | plt.title('COVID Outcomes Distribution') 173 | plt.show() 174 | 175 | 176 | 177 | # In[15]: 178 | 179 | 180 | # 3. Line Chart – Unvaccinated Rate Trends by Age Group 181 | plt.figure(figsize=(14,7)) 182 | age_groups = df['Age Group'].unique() 183 | for group in age_groups: 184 | subset = df[df['Age Group'] == group] 185 | if not subset['Unvaccinated Rate'].isna().all(): 186 | plt.plot(subset['Week End'], subset['Unvaccinated Rate'], label=group) 187 | 188 | plt.title('Unvaccinated Rate Over Time by Age Group') 189 | plt.xlabel('Week End') 190 | plt.ylabel('Unvaccinated Rate') 191 | plt.legend(loc='center left', bbox_to_anchor=(1, 0.5)) 192 | plt.xticks(rotation=45) 193 | plt.tight_layout() 194 | plt.show() 195 | 196 | 197 | # In[16]: 198 | 199 | 200 | # 4. Boxplot – Spread of Vaccination Rates 201 | plt.figure(figsize=(12,6)) 202 | sns.boxplot(data=df[['Vaccinated Rate', 'Boosted Rate', 'Unvaccinated Rate']]) 203 | plt.title('Distribution of Vaccination Rates') 204 | plt.ylabel('Rate') 205 | plt.tight_layout() 206 | plt.show() 207 | 208 | 209 | # In[17]: 210 | 211 | 212 | #Analyze the relationship between vaccination status and COVID-19 outcomes using correlation analysis. 213 | corr_columns = [ 214 | 'Unvaccinated Rate', 'Vaccinated Rate', 'Boosted Rate', 215 | 'Crude Vaccinated Ratio', 'Crude Boosted Ratio', 216 | 'Age-Adjusted Vaccinated Rate', 'Age-Adjusted Boosted Rate', 217 | 'Age-Adjusted Unvaccinated Rate' 218 | ] 219 | 220 | # Clean data: drop rows with NaN in selected columns 221 | corr_df = df[corr_columns].dropna() 222 | 223 | # Compute correlation matrix 224 | corr_matrix = corr_df.corr() 225 | 226 | # Plot heatmap 227 | plt.figure(figsize=(12,8)) 228 | sns.heatmap(corr_matrix, annot=True, cmap='YlGnBu', linewidths=0.5, fmt='.2f') 229 | plt.title('Correlation Between Vaccination Metrics and Outcomes') 230 | plt.tight_layout() 231 | plt.show() 232 | 233 | 234 | # In[18]: 235 | 236 | 237 | #Compare the number of COVID-19 deaths across different age groups to identify the most affected demographic segments. 238 | # Filter for 'Deaths' outcome only 239 | deaths_df = df[df['Outcome'] == 'Deaths'] 240 | 241 | # Group by Age Group and count deaths 242 | death_counts = deaths_df['Age Group'].value_counts().sort_index() 243 | 244 | # Plotting bar chart 245 | plt.figure(figsize=(12,6)) 246 | sns.barplot(x=death_counts.index, y=death_counts.values, palette='coolwarm') 247 | plt.title('COVID-19 Deaths by Age Group') 248 | plt.xlabel('Age Group') 249 | plt.ylabel('Number of Deaths') 250 | plt.xticks(rotation=45) 251 | plt.tight_layout() 252 | plt.show() 253 | 254 | 255 | # In[19]: 256 | 257 | 258 | #Compare the impact of booster doses on death outcomes across different age groups. 259 | deaths = df[df['Outcome'] == 'Deaths'] 260 | 261 | # Group by Age Group 262 | boosted_vs_death = deaths.groupby('Age Group').agg({ 263 | 'Boosted Rate': 'mean', 264 | 'Outcome': 'count' # Counting death cases 265 | }).rename(columns={'Outcome': 'Total Deaths'}).reset_index() 266 | 267 | # Plotting 268 | fig, ax1 = plt.subplots(figsize=(12,6)) 269 | 270 | # Bar chart for deaths 271 | sns.barplot(data=boosted_vs_death, x='Age Group', y='Total Deaths', color='salmon', label='Total Deaths') 272 | 273 | # Line plot (secondary y-axis) for boosted rate 274 | ax2 = ax1.twinx() 275 | sns.lineplot(data=boosted_vs_death, x='Age Group', y='Boosted Rate', marker='o', sort=False, ax=ax2, color='green', label='Avg Boosted Rate') 276 | 277 | # Titles and labels 278 | ax1.set_title('Booster Rate vs COVID Deaths by Age Group') 279 | ax1.set_xlabel('Age Group') 280 | ax1.set_ylabel('Total Deaths') 281 | ax2.set_ylabel('Average Boosted Rate') 282 | 283 | # Legends 284 | ax1.legend(loc='upper left') 285 | ax2.legend(loc='upper right') 286 | 287 | plt.xticks(rotation=45) 288 | plt.tight_layout() 289 | plt.show() 290 | 291 | 292 | # In[20]: 293 | 294 | 295 | death_data = pd.DataFrame({ 296 | 'Vaccinated': df['Age-Adjusted Vaccinated Rate'], 297 | 'Unvaccinated': df['Age-Adjusted Unvaccinated Rate'], 298 | 'Boosted': df['Age-Adjusted Boosted Rate'] 299 | }) 300 | 301 | # Drop rows with missing values 302 | death_data = death_data.dropna() 303 | 304 | # Melt the dataframe for boxplot compatibility 305 | death_melted = death_data.melt(var_name='Status', value_name='Death Rate') 306 | 307 | # Plot 308 | plt.figure(figsize=(10,6)) 309 | sns.boxplot(x='Status', y='Death Rate', data=death_melted, palette='pastel') 310 | plt.title('Distribution of Age-Adjusted COVID Death Rates by Vaccination Status') 311 | plt.xlabel('Vaccination Status') 312 | plt.ylabel('Death Rate') 313 | plt.tight_layout() 314 | plt.show() 315 | 316 | 317 | # Examine the distribution and variability of COVID-19 death rates among vaccinated and unvaccinated populations. 318 | 319 | # In[ ]: 320 | 321 | 322 | 323 | 324 | --------------------------------------------------------------------------------