├── README.md ├── COVID_report.pdf ├── zip_heatmap.png ├── COVID_report.docx ├── case_rate_boxplot.png ├── case_rate_density.png ├── cases_histogram.png ├── zip_cases_barplot.png ├── zip_tests_barplot.png ├── correlation_heatmap.png ├── weekly_trends_area.png ├── weekly_trends_line.png ├── zip_deaths_barplot.png ├── cases_vs_tests_scatter.png ├── obj3.py ├── obj1.py ├── obj5.py ├── obj6.py ├── obj2.py └── obj4.py /README.md: -------------------------------------------------------------------------------- 1 | # INT375_project -------------------------------------------------------------------------------- /COVID_report.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sanjeeb-kr/INT375_project/HEAD/COVID_report.pdf -------------------------------------------------------------------------------- /zip_heatmap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sanjeeb-kr/INT375_project/HEAD/zip_heatmap.png -------------------------------------------------------------------------------- /COVID_report.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sanjeeb-kr/INT375_project/HEAD/COVID_report.docx -------------------------------------------------------------------------------- /case_rate_boxplot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sanjeeb-kr/INT375_project/HEAD/case_rate_boxplot.png -------------------------------------------------------------------------------- /case_rate_density.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sanjeeb-kr/INT375_project/HEAD/case_rate_density.png -------------------------------------------------------------------------------- /cases_histogram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sanjeeb-kr/INT375_project/HEAD/cases_histogram.png -------------------------------------------------------------------------------- /zip_cases_barplot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sanjeeb-kr/INT375_project/HEAD/zip_cases_barplot.png -------------------------------------------------------------------------------- /zip_tests_barplot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sanjeeb-kr/INT375_project/HEAD/zip_tests_barplot.png -------------------------------------------------------------------------------- /correlation_heatmap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sanjeeb-kr/INT375_project/HEAD/correlation_heatmap.png -------------------------------------------------------------------------------- /weekly_trends_area.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sanjeeb-kr/INT375_project/HEAD/weekly_trends_area.png -------------------------------------------------------------------------------- /weekly_trends_line.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sanjeeb-kr/INT375_project/HEAD/weekly_trends_line.png -------------------------------------------------------------------------------- /zip_deaths_barplot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sanjeeb-kr/INT375_project/HEAD/zip_deaths_barplot.png -------------------------------------------------------------------------------- /cases_vs_tests_scatter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sanjeeb-kr/INT375_project/HEAD/cases_vs_tests_scatter.png -------------------------------------------------------------------------------- /obj3.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import seaborn as sns 3 | import matplotlib.pyplot as plt 4 | 5 | df = pd.read_csv('C:/Users/V/Desktop/cleaned_covid_data.csv') 6 | 7 | corr_matrix = df[['Cases - Weekly', 'Deaths - Weekly', 'Tests - Weekly']].corr() 8 | 9 | plt.figure(figsize=(8, 6)) 10 | sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', vmin=-1, vmax=1, center=0) 11 | plt.title('Correlation Matrix: Weekly Cases, Deaths, Tests') 12 | plt.tight_layout() 13 | plt.savefig('correlation_heatmap.png') 14 | plt.show() 15 | 16 | print("Correlation Matrix:") 17 | print(corr_matrix) 18 | -------------------------------------------------------------------------------- /obj1.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | 4 | df = pd.read_csv("C:/Users/V/Desktop/COVID-19.csv") 5 | 6 | df = df.drop_duplicates() 7 | 8 | numeric_cols = ['Cases - Weekly', 'Deaths - Weekly', 'Tests - Weekly', 'Case Rate - Weekly'] 9 | for col in numeric_cols: 10 | df[col] = df[col].fillna(df[col].median()) 11 | 12 | df['ZIP Code'] = df['ZIP Code'].astype(str) 13 | df['Week Start'] = pd.to_datetime(df['Week Start']) 14 | df['Week End'] = pd.to_datetime(df['Week End']) 15 | for col in numeric_cols: 16 | df[col] = df[col].astype(float) 17 | 18 | df.to_csv('C:/Users/V/Desktop/cleaned_covid_data.csv', index=False) 19 | 20 | print("Data cleaning completed. Summary:") 21 | print(df.info()) 22 | print(df.head()) 23 | -------------------------------------------------------------------------------- /obj5.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import seaborn as sns 3 | import matplotlib.pyplot as plt 4 | 5 | df = pd.read_csv('C:/Users/V/Desktop/cleaned_covid_data.csv') 6 | 7 | plt.figure(figsize=(8, 6)) 8 | sns.boxplot(y='Case Rate - Weekly', data=df, color='lightblue') 9 | plt.title('Boxplot of Weekly Case Rates Across ZIP Codes') 10 | plt.ylabel('Case Rate (Normalized)') 11 | plt.tight_layout() 12 | plt.savefig('case_rate_boxplot.png') 13 | plt.show() 14 | 15 | Q1 = df['Case Rate - Weekly'].quantile(0.25) 16 | Q3 = df['Case Rate - Weekly'].quantile(0.75) 17 | IQR = Q3 - Q1 18 | outliers = df[(df['Case Rate - Weekly'] < Q1 - 1.5 * IQR) | (df['Case Rate - Weekly'] > Q3 + 1.5 * IQR)] 19 | print("Outliers in Case Rates:") 20 | print(outliers[['ZIP Code', 'Week Start', 'Case Rate - Weekly']]) 21 | -------------------------------------------------------------------------------- /obj6.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import seaborn as sns 3 | import matplotlib.pyplot as plt 4 | 5 | df = pd.read_csv('C:/Users/V/Desktop/cleaned_covid_data.csv') 6 | 7 | print("Summary Statistics:") 8 | print(df[['Cases - Weekly', 'Deaths - Weekly', 'Tests - Weekly', 'Case Rate - Weekly']].describe()) 9 | 10 | plt.figure(figsize=(8, 6)) 11 | sns.histplot(df['Cases - Weekly'], bins=30, kde=True, color='blue') 12 | plt.title('Distribution of Weekly Cases') 13 | plt.xlabel('Cases (Normalized)') 14 | plt.ylabel('Frequency') 15 | plt.tight_layout() 16 | plt.savefig('cases_histogram.png') 17 | plt.show() 18 | 19 | plt.figure(figsize=(8, 6)) 20 | sns.scatterplot(x='Tests - Weekly', y='Cases - Weekly', data=df, hue='Deaths - Weekly', size='Deaths - Weekly', palette='coolwarm') 21 | plt.title('Weekly Cases vs Tests (Colored by Deaths)') 22 | plt.xlabel('Tests (Normalized)') 23 | plt.ylabel('Cases (Normalized)') 24 | plt.tight_layout() 25 | plt.savefig('cases_vs_tests_scatter.png') 26 | plt.show() 27 | 28 | plt.figure(figsize=(8, 6)) 29 | sns.kdeplot(df['Case Rate - Weekly'], fill=True, color='green') 30 | plt.title('Density Plot of Weekly Case Rates') 31 | plt.xlabel('Case Rate (Normalized)') 32 | plt.ylabel('Density') 33 | plt.tight_layout() 34 | plt.savefig('case_rate_density.png') 35 | plt.show() 36 | -------------------------------------------------------------------------------- /obj2.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import matplotlib.pyplot as plt 3 | import seaborn as sns 4 | 5 | df = pd.read_csv("C:/Users/V/Desktop/cleaned_covid_data.csv") 6 | df['Week Start'] = pd.to_datetime(df['Week Start']) 7 | 8 | weekly_data = df.groupby('Week Start')[['Cases - Weekly', 'Deaths - Weekly', 'Tests - Weekly']].sum().reset_index() 9 | 10 | plt.figure(figsize=(12, 6)) 11 | sns.lineplot(x='Week Start', y='Cases - Weekly', data=weekly_data, label='Cases', marker='o') 12 | sns.lineplot(x='Week Start', y='Deaths - Weekly', data=weekly_data, label='Deaths', marker='s') 13 | sns.lineplot(x='Week Start', y='Tests - Weekly', data=weekly_data, label='Tests', marker='^') 14 | plt.title('Weekly COVID-19 Trends Across All ZIP Codes') 15 | plt.xlabel('Week Start Date') 16 | plt.ylabel('Count (Normalized)') 17 | plt.legend() 18 | plt.grid(True) 19 | plt.xticks(rotation=45) 20 | plt.tight_layout() 21 | plt.savefig('weekly_trends_line.png') 22 | plt.show() 23 | 24 | plt.figure(figsize=(12, 6)) 25 | plt.stackplot(weekly_data['Week Start'], weekly_data['Cases - Weekly'], weekly_data['Deaths - Weekly'], weekly_data['Tests - Weekly'], 26 | labels=['Cases', 'Deaths', 'Tests'], alpha=0.5) 27 | plt.title('Weekly COVID-19 Trends (Stacked Area)') 28 | plt.xlabel('Week Start Date') 29 | plt.ylabel('Count (Normalized)') 30 | plt.legend(loc='upper left') 31 | plt.grid(True) 32 | plt.xticks(rotation=45) 33 | plt.tight_layout() 34 | plt.savefig('weekly_trends_area.png') 35 | plt.show() 36 | -------------------------------------------------------------------------------- /obj4.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import seaborn as sns 3 | import matplotlib.pyplot as plt 4 | 5 | df = pd.read_csv('C:/Users/V/Desktop/cleaned_covid_data.csv') 6 | 7 | zip_data = df.groupby('ZIP Code')[['Cases - Weekly', 'Deaths - Weekly', 'Tests - Weekly']].sum().reset_index() 8 | 9 | top_cases = zip_data.nlargest(10, 'Cases - Weekly') 10 | plt.figure(figsize=(10, 6)) 11 | sns.barplot(x='Cases - Weekly', y='ZIP Code', hue='ZIP Code', data=top_cases, palette='viridis', legend=False) 12 | plt.title('Top 10 ZIP Codes by Total Weekly Cases') 13 | plt.xlabel('Total Cases (Normalized)') 14 | plt.ylabel('ZIP Code') 15 | plt.tight_layout() 16 | plt.savefig('zip_cases_barplot.png') 17 | plt.show() 18 | 19 | top_deaths = zip_data.nlargest(10, 'Deaths - Weekly') 20 | plt.figure(figsize=(10, 6)) 21 | sns.barplot(x='Deaths - Weekly', y='ZIP Code', hue='ZIP Code', data=top_deaths, palette='magma', legend=False) 22 | plt.title('Top 10 ZIP Codes by Total Weekly Deaths') 23 | plt.xlabel('Total Deaths (Normalized)') 24 | plt.ylabel('ZIP Code') 25 | plt.tight_layout() 26 | plt.savefig('zip_deaths_barplot.png') 27 | plt.show() 28 | 29 | top_tests = zip_data.nlargest(10, 'Tests - Weekly') 30 | plt.figure(figsize=(10, 6)) 31 | sns.barplot(x='Tests - Weekly', y='ZIP Code', hue='ZIP Code', data=top_tests, palette='plasma', legend=False) 32 | plt.title('Top 10 ZIP Codes by Total Weekly Tests') 33 | plt.xlabel('Total Tests (Normalized)') 34 | plt.ylabel('ZIP Code') 35 | plt.tight_layout() 36 | plt.savefig('zip_tests_barplot.png') 37 | plt.show() 38 | 39 | pivot_data = zip_data.set_index('ZIP Code')[['Cases - Weekly', 'Deaths - Weekly', 'Tests - Weekly']] 40 | plt.figure(figsize=(10, 8)) 41 | sns.heatmap(pivot_data, cmap='Reds', annot=True, fmt='.2f') 42 | plt.title('ZIP Code Severity Heatmap (Cases, Deaths, Tests)') 43 | plt.tight_layout() 44 | plt.savefig('zip_heatmap.png') 45 | plt.show() 46 | --------------------------------------------------------------------------------