├── AH_Provisional_COVID-19_Deaths_by_County_and_Age_for_2020.csv ├── Figure_1.png ├── Figure_2.png ├── README.md ├── airlines.json └── project2.py /Figure_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henrylong612/matplotlib_data_visualization/7ca5529be57da0866090f5c81c544ee868dd3d97/Figure_1.png -------------------------------------------------------------------------------- /Figure_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henrylong612/matplotlib_data_visualization/7ca5529be57da0866090f5c81c544ee868dd3d97/Figure_2.png -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Matplotlib Data Visualization 2 | 3 | This GitHub repository contains my submission for project_02 of Mike Izbicki's CS40 class. Instructions for project_02 can be found [here](https://github.com/mikeizbicki/cmc-csci040/tree/2022fall/project_02). 4 | 5 | ## Annual Total Number of Flights by Airport from 2004 to 2015 6 | ![Annual Total Number of Flights by Airport from 2004 to 2015](Figure_1.png) 7 | 8 | These data were taken from the Travel section of jdorfman's [Awesome JSON Datasets](https://github.com/jdorfman/awesome-json-datasets). These data show the total number of annual flights flown from the top ten United States international airports from the year 2004 to the year 2015. (The years 2003 and 2016 had incomplete data and were not included.) The data are longitudinal because they show how the annual flights fluctuate over time. 9 | 10 | ## Approximate Number of COVID-19 Deaths in Minnesota in 2020 by Age Group 11 | ![Approximate Number of COVID-19 Deaths in Minnesota in 2020 by Age Group](Figure_2.png) 12 | 13 | This data was taken from the Centers for Disease Control and Prevention via [Data.gov](https://catalog.data.gov/dataset/ah-provisional-covid-19-deaths-by-county-and-age-for-2020-5aa74). The data show the approximate total number of deaths from COVID-19 in Minnesota in 2020 for each age group. In certain rural counties, the data say that death counts are "between 1-9 and have been suppressed in accordance with NCHS confidentiality standards." When this was the case, I used the approximate death count of 5. The data are cross-sectional because they do not focus on changes in death counts over time. 14 | 15 | 16 | -------------------------------------------------------------------------------- /project2.py: -------------------------------------------------------------------------------- 1 | import json 2 | import csv 3 | from datetime import datetime 4 | import matplotlib.pyplot as plt 5 | import matplotlib.dates as mdates 6 | import numpy as np 7 | 8 | 9 | units=[] 10 | with open('/Users/hankilong/Documents/Henry Long Claremont McKenna College/CMC Sophomore/FA 2022/CSCI/project_02/airlines.json') as f: 11 | text=f.read() 12 | units+=json.loads(text) 13 | 14 | dates = range(2004,2016) 15 | date_old=0 16 | date_new=0 17 | flight=0 18 | 19 | ATL_flights=[] 20 | for unit in units: 21 | if unit["Airport"]["Code"]=='ATL': 22 | date_new=unit["Time"]["Year"] 23 | if date_new!=date_old: 24 | ATL_flights.append(flight) 25 | flight=unit["Statistics"]["Flights"]["Total"] 26 | date_old=date_new 27 | elif date_new==date_old: 28 | flight+=unit["Statistics"]["Flights"]["Total"] 29 | ATL_flights=ATL_flights[2:] 30 | plt.plot(dates,ATL_flights,label="ATL") 31 | 32 | DFW_flights=[] 33 | for unit in units: 34 | if unit["Airport"]["Code"]=='DFW': 35 | date_new=unit["Time"]["Year"] 36 | if date_new!=date_old: 37 | DFW_flights.append(flight) 38 | flight=unit["Statistics"]["Flights"]["Total"] 39 | date_old=date_new 40 | elif date_new==date_old: 41 | flight+=unit["Statistics"]["Flights"]["Total"] 42 | DFW_flights=DFW_flights[2:] 43 | plt.plot(dates,DFW_flights,label="DFW") 44 | 45 | LAX_flights=[] 46 | for unit in units: 47 | if unit["Airport"]["Code"]=='LAX': 48 | date_new=unit["Time"]["Year"] 49 | if date_new!=date_old: 50 | LAX_flights.append(flight) 51 | flight=unit["Statistics"]["Flights"]["Total"] 52 | date_old=date_new 53 | elif date_new==date_old: 54 | flight+=unit["Statistics"]["Flights"]["Total"] 55 | LAX_flights=LAX_flights[2:] 56 | plt.plot(dates,LAX_flights,label="LAX") 57 | 58 | ORD_flights=[] 59 | for unit in units: 60 | if unit["Airport"]["Code"]=='ORD': 61 | date_new=unit["Time"]["Year"] 62 | if date_new!=date_old: 63 | ORD_flights.append(flight) 64 | flight=unit["Statistics"]["Flights"]["Total"] 65 | date_old=date_new 66 | elif date_new==date_old: 67 | flight+=unit["Statistics"]["Flights"]["Total"] 68 | ORD_flights=ORD_flights[2:] 69 | plt.plot(dates,ORD_flights,label="ORD") 70 | 71 | CLT_flights=[] 72 | for unit in units: 73 | if unit["Airport"]["Code"]=='CLT': 74 | date_new=unit["Time"]["Year"] 75 | if date_new!=date_old: 76 | CLT_flights.append(flight) 77 | flight=unit["Statistics"]["Flights"]["Total"] 78 | date_old=date_new 79 | elif date_new==date_old: 80 | flight+=unit["Statistics"]["Flights"]["Total"] 81 | CLT_flights=CLT_flights[2:] 82 | plt.plot(dates,CLT_flights,label="CLT") 83 | 84 | MCO_flights=[] 85 | for unit in units: 86 | if unit["Airport"]["Code"]=='MCO': 87 | date_new=unit["Time"]["Year"] 88 | if date_new!=date_old: 89 | MCO_flights.append(flight) 90 | flight=unit["Statistics"]["Flights"]["Total"] 91 | date_old=date_new 92 | elif date_new==date_old: 93 | flight+=unit["Statistics"]["Flights"]["Total"] 94 | MCO_flights=MCO_flights[2:] 95 | plt.plot(dates,MCO_flights,label="MCO") 96 | 97 | LAS_flights=[] 98 | for unit in units: 99 | if unit["Airport"]["Code"]=='LAS': 100 | date_new=unit["Time"]["Year"] 101 | if date_new!=date_old: 102 | LAS_flights.append(flight) 103 | flight=unit["Statistics"]["Flights"]["Total"] 104 | date_old=date_new 105 | elif date_new==date_old: 106 | flight+=unit["Statistics"]["Flights"]["Total"] 107 | LAS_flights=LAS_flights[2:] 108 | plt.plot(dates,LAS_flights,label="LAS") 109 | 110 | PHX_flights=[] 111 | for unit in units: 112 | if unit["Airport"]["Code"]=='PHX': 113 | date_new=unit["Time"]["Year"] 114 | if date_new!=date_old: 115 | PHX_flights.append(flight) 116 | flight=unit["Statistics"]["Flights"]["Total"] 117 | date_old=date_new 118 | elif date_new==date_old: 119 | flight+=unit["Statistics"]["Flights"]["Total"] 120 | PHX_flights=PHX_flights[2:] 121 | plt.plot(dates,PHX_flights,label="PHX") 122 | 123 | MIA_flights=[] 124 | for unit in units: 125 | if unit["Airport"]["Code"]=='MIA': 126 | date_new=unit["Time"]["Year"] 127 | if date_new!=date_old: 128 | MIA_flights.append(flight) 129 | flight=unit["Statistics"]["Flights"]["Total"] 130 | date_old=date_new 131 | elif date_new==date_old: 132 | flight+=unit["Statistics"]["Flights"]["Total"] 133 | MIA_flights=MIA_flights[2:] 134 | plt.plot(dates,MIA_flights,label="MIA") 135 | 136 | plt.title('Annual Total Number of Flights by Airport from 2004 to 2015') 137 | plt.xlabel('Year') 138 | plt.xticks(np.arange(2003, 2016, 1)) 139 | plt.ylabel('Annual Total Number of Flights') 140 | plt.legend(bbox_to_anchor=(1,1), loc='upper left', borderaxespad=0) 141 | plt.show() 142 | 143 | 144 | with open('/Users/hankilong/Documents/Henry Long Claremont McKenna College/CMC Sophomore/FA 2022/CSCI/project_02/AH_Provisional_COVID-19_Deaths_by_County_and_Age_for_2020.csv') as f: 145 | spreadsheet=list(csv.reader(f)) 146 | 147 | youth=0 148 | twenties=0 149 | thirties=0 150 | forties=0 151 | fifties=0 152 | sixties=0 153 | seventy_plus=0 154 | 155 | for row in spreadsheet: 156 | if row[4]=='MN': 157 | if row[10]=='0-19 Years': 158 | if row [11]=='': 159 | youth+=5 160 | elif row[11]!='': 161 | youth+=int(row[11]) 162 | if row[10]=='20-24 Years' or row[10]=='25-29 Years': 163 | if row [11]=='': 164 | twenties+=5 165 | elif row[11]!='': 166 | twenties+=int(row[11]) 167 | if row[10]=='30-34 Years' or row[10]=='35-39 Years': 168 | if row [11]=='': 169 | thirties+=5 170 | elif row[11]!='': 171 | thirties+=int(row[11]) 172 | if row[10]=='40-44 Years' or row[10]=='45-49 Years': 173 | if row [11]=='': 174 | forties+=5 175 | elif row[11]!='': 176 | forties+=int(row[11]) 177 | if row[10]=='50-54 Years' or row[10]=='55-59 Years': 178 | if row [11]=='': 179 | fifties+=5 180 | elif row[11]!='': 181 | fifties+=int(row[11]) 182 | if row[10]=='60-64 Years' or row[10]=='65-69 Years': 183 | if row [11]=='': 184 | sixties+=5 185 | elif row[11]!='': 186 | sixties+=int(row[11]) 187 | if row[10]=='70-74 Years' or row[10]=='75 Years and Over': 188 | if row [11]=='': 189 | seventy_plus+=5 190 | elif row[11]!='': 191 | seventy_plus+=int(row[11]) 192 | 193 | terms=['0-19 Years','20-29 Years','30-39 Years','40-49 Years','50-59 Years','60-69 Years','70+ Years'] 194 | counts=[youth,twenties,thirties,forties,fifties,sixties,seventy_plus] 195 | 196 | fig, ax = plt.subplots() 197 | ax.bar(terms, counts) 198 | plt.title('Approximate Number of COVID-19 Deaths in Minnesota in 2020 by Age Group') 199 | plt.xlabel('Age Group') 200 | plt.ylabel('Approximate Number of COVID-19 Deaths in Minnesota in 2020') 201 | plt.show() 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | """ 210 | with open('/Users/hankilong/Documents/Henry Long Claremont McKenna College/CMC Sophomore/FA 2022/CSCI/project_02/School_Learning_Modalities.csv') as f: 211 | spreadsheet=list(csv.reader(f)) 212 | 213 | ca_2021=0 214 | ca_in_person_2021=0 215 | tx_2021=0 216 | tx_in_person_2021=0 217 | 218 | ca_2022=0 219 | ca_in_person_2022=0 220 | tx_2022=0 221 | tx_in_person_2022=0 222 | 223 | for row in spreadsheet: 224 | if row[2][:10]=='08/01/2021': 225 | if row[7]=='CA': 226 | ca_2021+=1 227 | if row[3]=='Remote': 228 | ca_in_person_2021+=1 229 | if row[7]=='TX': 230 | tx_2021+=1 231 | if row[3]=='In Person': 232 | tx_in_person_2021+=1 233 | if row[2][:10]=='07/31/2022': 234 | if row[7]=='CA': 235 | ca_2022+=1 236 | if row[3]=='Remote': 237 | ca_in_person_2022+=1 238 | if row[7]=='TX': 239 | tx_2022+=1 240 | if row[3]=='In Person': 241 | tx_in_person_2022+=1 242 | 243 | ca_percent_in_person_2021=100*ca_in_person_2021/ca_2021 244 | tx_percent_in_person_2021=100*tx_in_person_2021/tx_2021 245 | 246 | ca_percent_in_person_2022=100*ca_in_person_2022/ca_2022 247 | ca_percent_in_person_2022=100*tx_in_person_2022/tx_2022 248 | 249 | print(ca_percent_in_person_2021) 250 | 251 | terms=['California Percent in Person 2021', 'Texas Percent in Person 2021', 'California Percent in Person 2022', 'Texas Percent in Person 2022'] 252 | counts=[ca_percent_in_person_2021,tx_percent_in_person_2021,ca_percent_in_person_2022,ca_percent_in_person_2022] 253 | 254 | fig, ax = plt.subplots() 255 | ax.bar(terms, counts) 256 | plt.show() 257 | """ 258 | --------------------------------------------------------------------------------