├── Screen Shot 2019-09-06 at 5.25.15 PM.png ├── CommAreas ├── README.md ├── geo_export_77af1a6a-f8ec-47f4-977c-40956cd94f97.dbf ├── geo_export_77af1a6a-f8ec-47f4-977c-40956cd94f97.shp ├── geo_export_77af1a6a-f8ec-47f4-977c-40956cd94f97.shx └── geo_export_77af1a6a-f8ec-47f4-977c-40956cd94f97.prj ├── make_gif.py ├── mask_given_shapefile.py ├── README.md ├── validation ├── table1_validation_model_d02d02.py ├── MODEL_EVALUATION_r2_mse_bias_for_d02_d03.py ├── MODEL_EVALUATION_CMAQCheck_part1.py ├── model_validation_statistics.py └── compareHourlyWrfToClimateStations.py ├── DataPreprocessing ├── run_vertint.csh ├── cmaq_to_stations.py ├── wrf_to_stations_step2.py └── compareHourlyWrfToClimateStations.py ├── Validation ├── VCD_comparison.py └── station_validation.py ├── convert_netcdf_to_geotif.py ├── emissions_chg_plot.py ├── plot_cmaq.py ├── compare_CMAQ_to_EPAstation.py ├── model_column_comparison.py ├── PostProcessing ├── plot_cmaq_may2021.py ├── timeseries_epa_stn_cmaq_may2021.py └── o3_profile.py ├── correlation_wrf_cmaq_smoke.py ├── three_panel_epa_gif.py ├── chi_map_cropped.py └── plot_CMAQ_diff.py /Screen Shot 2019-09-06 at 5.25.15 PM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stacymonty/Chi_AQ/HEAD/Screen Shot 2019-09-06 at 5.25.15 PM.png -------------------------------------------------------------------------------- /CommAreas/README.md: -------------------------------------------------------------------------------- 1 | # shapefiles taken from 2 | https://data.cityofchicago.org/Facilities-Geographic-Boundaries/Boundaries-Community-Areas-current-/cauq-8yn6 3 | 4 | -------------------------------------------------------------------------------- /CommAreas/geo_export_77af1a6a-f8ec-47f4-977c-40956cd94f97.dbf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stacymonty/Chi_AQ/HEAD/CommAreas/geo_export_77af1a6a-f8ec-47f4-977c-40956cd94f97.dbf -------------------------------------------------------------------------------- /CommAreas/geo_export_77af1a6a-f8ec-47f4-977c-40956cd94f97.shp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stacymonty/Chi_AQ/HEAD/CommAreas/geo_export_77af1a6a-f8ec-47f4-977c-40956cd94f97.shp -------------------------------------------------------------------------------- /CommAreas/geo_export_77af1a6a-f8ec-47f4-977c-40956cd94f97.shx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stacymonty/Chi_AQ/HEAD/CommAreas/geo_export_77af1a6a-f8ec-47f4-977c-40956cd94f97.shx -------------------------------------------------------------------------------- /CommAreas/geo_export_77af1a6a-f8ec-47f4-977c-40956cd94f97.prj: -------------------------------------------------------------------------------- 1 | GEOGCS["WGS84(DD)", DATUM["WGS84", SPHEROID["WGS84", 6378137.0, 298.257223563]], PRIMEM["Greenwich", 0.0], UNIT["degree", 0.017453292519943295], AXIS["Geodetic longitude", EAST], AXIS["Geodetic latitude", NORTH]] -------------------------------------------------------------------------------- /make_gif.py: -------------------------------------------------------------------------------- 1 | #libraires 2 | import moviepy.editor as mpy 3 | import os 4 | import glob 5 | 6 | 7 | filestartswith='NO2' 8 | dir='~/' 9 | 10 | # 11 | gif_name = 'NO2' 12 | fps = 10 13 | file_list = ['NO2_day%i_hour%i.png'%(j,k) for j in range(5) for k in range(24)] 14 | clip = mpy.ImageSequenceClip(file_list, fps=fps) 15 | clip.write_gif('{}.gif'.format(gif_name), fps=fps) 16 | -------------------------------------------------------------------------------- /mask_given_shapefile.py: -------------------------------------------------------------------------------- 1 | def mask_given_shapefile(lon,lat,shapefile): 2 | ''' 3 | Make a mask given a shapefile 4 | lon - array of grid lons 5 | lat - array of grid lats 6 | shapefile - geopandas geodataframe shapefile 7 | ''' 8 | union=gpd.GeoSeries(unary_union(shapefile.geometry)) 9 | mask=np.ones(lon.shape,dtype=bool) 10 | mask[:] = False 11 | for i in range(len(lon)): 12 | for j in range(len(lon[0])): 13 | pt = Point(lon[i][j],lat[i][j]) 14 | mask[i][j] = pt.within(union[0]) 15 | # 16 | return mask 17 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Chi_AQ 2 | Use this repository to plot Chicago air quality using model and satellite data. 3 | Still in development, for sure. Options to customize plotting is still in chi_map_cropped.py 4 | 5 | Output of chi_map_cropped.py looks like this (with some minor adjustments in photoshop): 6 | ![Image of Chicago Air Quality given shapefile](https://github.com/stacymonty/Chi_AQ/blob/master/Screen%20Shot%202019-09-06%20at%205.25.15%20PM.png) 7 | 8 | The outside line crops image of the gridded data to the merged chicago shapefile. 9 | Doesn't actually crop the data to the shapefile -- future work. 10 | 11 | -------------------------------------------------------------------------------- /validation/table1_validation_model_d02d02.py: -------------------------------------------------------------------------------- 1 | # model validation table for CHEMICALS 2 | 3 | import pandas as pd 4 | import numpy as np 5 | import scipy.stats as st 6 | 7 | 8 | def stats(data,prediction): 9 | x,y=data[~np.isnan(data)],prediction[~np.isnan(data)] # get rid of NaNs 10 | mu_d,mu_p = np.mean(x),np.mean(y) 11 | bias = np.sum(x-y)/len(x) 12 | rmse = np.sqrt(np.mean((y-x)**2)) 13 | r,p = st.pearsonr(x,y) 14 | return mu_d,mu_p,bias,rmse,r,p 15 | 16 | 17 | 18 | fnames = ['NO2_d03_2018_8_EPA_CMAQ_Combine.csv','NO2_d03_2019_1_EPA_CMAQ_Combine.csv', 19 | 'NO2_d02_2018_8_EPA_CMAQ_Combine.csv','NO2_d02_2019_1_EPA_CMAQ_Combine.csv', 20 | 'O3_d03_2018_8_EPA_CMAQ_Combine.csv','O3_d03_2019_1_EPA_CMAQ_Combine.csv', 21 | 'O3_d02_2018_8_EPA_CMAQ_Combine.csv','O3_d02_2019_1_EPA_CMAQ_Combine.csv', 22 | 'PM25_TOT_d03_2018_8_EPA_CMAQ_Combine.csv','PM25_TOT_d03_2019_1_EPA_CMAQ_Combine.csv', 23 | 'PM25_TOT_d02_2018_8_EPA_CMAQ_Combine.csv','PM25_TOT_d02_2019_1_EPA_CMAQ_Combine.csv'] 24 | 25 | out = [] 26 | 27 | for i in range(len(fnames)): 28 | f = pd.read_csv(fnames[i]) 29 | if i>3 and i<8: 30 | s = stats(f['Sample Measurement']*1000,f['CMAQ']) 31 | else: 32 | s = stats(f['Sample Measurement'],f['CMAQ']) 33 | out.append(s) 34 | 35 | out = pd.DataFrame(out) 36 | out.columns=['mu_d','mu_p','bias','rmse','r','p'] 37 | 38 | out.index=['NO2_d03_Summer','NO2_d03_Winter','NO2_d02_Summer','NO2_d02_Winter','O3_d03_Summer','O3_d03_Winter','O3_d02_Summer','O3_d02_Winter','PM25_d03_Summer','PM25_d03_Winter','PM25_d02_Summer','PM25_d02_Winter',] 39 | 40 | out -------------------------------------------------------------------------------- /DataPreprocessing/run_vertint.csh: -------------------------------------------------------------------------------- 1 | !/bin/csh 2 | 3 | #For creating VCD from CMAQ output -- needs mcip files and cmaq cctm files 4 | 5 | # For future runs: check indir, fname_start, begdate + enddate 6 | # Make sure indir/column and indir/mcip exists 7 | # 8 | # Location of top directory CONC and MCIP files 9 | setenv indir /projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_BASE_FINAL_wint_1.33km_sf_rrtmg_5_8_1_v3852 10 | # # Location to create vert files 11 | setenv outdir $indir/column 12 | # # Location of CONC files 13 | setenv concdir $indir 14 | # # Location of METCRO3D files 15 | setenv mcipdir $indir/mcip 16 | # How the netcdffile is named at the front 17 | setenv fname_start CCTM_CONC_v3852_ 18 | 19 | # 20 | # #Starting and ending times (inclusive) of times 21 | setenv begdate_g "2019-01-01" # YYYYMMDD 22 | setenv enddate_g "2019-02-01" # YYYYMMDD 23 | # 24 | # # beg date julian 25 | setenv begdate_j `date -ud "${begdate_g}" +%Y%j` 26 | # # end date julian 27 | setenv enddate_j `date -ud "${enddate_g}" +%Y%j` 28 | # # curr date (updated in loop) julian 29 | setenv curdate_j $begdate_j 30 | # # curr date (updated in loop) gregorian 31 | setenv curdate_g $begdate_g 32 | # # curr date (updated in loop) gregorian 33 | setenv curdate_g_f `date -ud "${curdate_g}" +%Y%m%d` 34 | # 35 | # # Main loop 36 | while ( $curdate_j <= $enddate_j) 37 | # 38 | # # Set name of input file 39 | setenv infile $indir/$fname_start$curdate_g_f".nc" 40 | # # Name of output file 41 | setenv outfile $outdir/$fname_start$curdate_g_f"_column.nc" 42 | # # Name of Metfile 43 | setenv metfile $mcipdir/"METCRO3D_Chicago_LADCO_"$curdate_g".nc" 44 | 45 | vertintegral<< TEST_DONE 46 | infile 47 | 48 | 49 | 50 | metfile 51 | outfile 52 | TEST_DONE 53 | 54 | 55 | setenv curdate_g `date -ud "${curdate_g}+1days" +%Y-%m-%d` 56 | setenv curdate_j `date -ud "${curdate_g}" +%Y%j` 57 | setenv curdate_g_f `date -ud "${curdate_g}" +%Y%m%d` 58 | 59 | #TEST_DONE 60 | 61 | echo "----------------------------- " 62 | echo $curdate_g 63 | 64 | end 65 | -------------------------------------------------------------------------------- /Validation/VCD_comparison.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # --------------------------------------------------------------------- 4 | # Stacy Montgomery, Jan 2021 5 | # 6 | # Use after you crop the L2 7 | 8 | # --------------------------------------------------------------------- 9 | # USER INPUT 10 | # --------------------------------------------------------------------- 11 | from netCDF4 import Dataset 12 | import numpy as np 13 | import matplotlib.pyplot as plt 14 | import pandas as pd 15 | import os 16 | import netCDF4 17 | import math 18 | from scipy.interpolate import griddata 19 | import scipy.stats as st 20 | 21 | # Projections -- this will be used in naming files later 22 | domain = 'Chicago' 23 | # grid file 24 | grid='/home/asm0384/ChicagoStudy/inputs/grid/latlon_ChicagoLADCO_d03.nc' 25 | lon,lat = np.array(Dataset(grid,'r')['lon']),np.array(Dataset(grid,'r')['lat']) 26 | 27 | var='NO2' 28 | 29 | #Directory to where L2 TropOMI files are stored 30 | dir='/projects/b1045/TropOMI/'+var+'/l2_cut/' 31 | 32 | #from netcdf file, what do you want 33 | varname='nitrogendioxide_tropospheric_column' 34 | varprecision='qa_value' 35 | tagdir = '~/tag/' 36 | 37 | filestartswith = 'S5P_OFFL_L2__NO2____' # 'S5P_OFFL_L2__O3' 38 | 39 | summer_regrid = pd.read_csv('~/rbdinterp_linear_smooth_201808.csv',index_col=0) 40 | summer_regrid2 = pd.read_csv('~/rbdinterp_linear_smooth_201808_pt2.csv',index_col=0) 41 | 42 | wint_avg_trop= np.asarray(pd.read_csv('~/rbdinterp_linear_smooth_201901_NO2.csv',index_col=0))*1000 43 | summer_avg_trop = np.asarray((summer_regrid2+summer_regrid)/2)*1000 44 | 45 | 46 | # pull in column 47 | 48 | dwint = '/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_BASE_FINAL_wint_1.33km_sf_rrtmg_5_8_1_v3852/' 49 | dsum = '/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_BASE_FINAL_1.33km_sf_rrtmg_5_8_1_v3852/' 50 | 51 | filestartswith = 'CCTM_CONC_v385' # 52 | fs = next(os.walk(dwint))[2] 53 | fs = [x for x in fs if x.startswith(filestartswith)] 54 | f_wint=sorted(fs) 55 | fs = next(os.walk(dsum))[2] 56 | fs = [x for x in fs if x.startswith(filestartswith)] 57 | f_sum=sorted(fs) 58 | 59 | time = 13 60 | summer_cmaq_trop = np.asarray([Dataset(dsum+f_sum[i])['NO2'][time][0:31].sum(axis=0) for i in range(len(f_sum))]) 61 | winter_cmaq_trop = np.asarray([Dataset(dwint+f_wint[i])['NO2'][time][0:31].sum(axis=0) for i in range(len(f_wint))]) 62 | 63 | summer_cmaq_avg_trop = summer_cmaq_trop.mean(axis=0) 64 | winter_cmaq_avg_trop =winter_cmaq_trop.mean(axis=0) 65 | 66 | 67 | 68 | # functions 69 | def stats_normalized(data,prediction): 70 | x,y=data[~np.isnan(data)],prediction[~np.isnan(data)] # get rid of NaNs 71 | mu_d,mu_p = np.mean(x),np.mean(y) 72 | nmb = np.sum(y-x)/np.sum(x)*100 73 | nme = np.sum(np.abs(y-x))/np.sum(x)*100 74 | r,p = st.pearsonr(x,y) 75 | return mu_d,mu_p,nmb,nme,r,p 76 | 77 | tropstats = pd.DataFrame(np.asarray([stats_normalized(summer_avg_trop,summer_cmaq_avg_trop),stats_normalized(wint_avg_trop,winter_cmaq_avg_trop)])) 78 | tropstats.columns = ['mu_d','mu_p','bias','rmse','r','p'] 79 | tropstats.index = ['Summer 2018','Winter 2019'] 80 | tropstats.to_csv('~/TroposphereStats.csv') 81 | 82 | -------------------------------------------------------------------------------- /convert_netcdf_to_geotif.py: -------------------------------------------------------------------------------- 1 | # CONVERT CMAQ NETCDF OUTPUT FILE TO RASTER / GEOTIF / SHAPEFILE 2 | 3 | import rioxarray 4 | import xarray 5 | import numpy as np 6 | 7 | d = '/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_BASE_FINAL_1.33km_sf_rrtmg_5_8_1_v3852/postprocess/o3/' 8 | 9 | dir='/projects/b1045/jschnell/ForStacy/' 10 | ll='latlon_ChicagoLADCO_d03.nc' 11 | ll = xarray.open_dataset(dir+ ll) 12 | 13 | fnames = ['COMBINE_ACONC_201808.nc', 'dailymaxozone_201808.nc', 'NO2_201808.nc ', 'O3_201808.nc', 'pm25_201808.nc', 'COMBINE_ACONC_201901.nc', 'dailymaxozone_201901.nc', 'NO2_201901.nc', 'O3_201901.nc', 'pm25_201901.nc'] 14 | 15 | 16 | # ------- DOING AUG O3 17 | 18 | fnames_out = ['dailymaxozone_201808'+str(i+1).zfill(2)+'.tif' for i in range(31)] #set up names for files out 19 | 20 | for i in range(31): # number of days in these files 21 | xds = xarray.open_dataset(d+ 'dailymaxozone_201808.nc') 22 | # I'm writing out each time step as its own file name and only taking the first layer 23 | # hence taking the ith time step and 0th layer 24 | data = xds["O3"][i][0] 25 | # Key here is literally spelling out the indices and the coordinates with x, y 26 | foo=xarray.DataArray(data, coords={"x": np.arange(0,len(ll.lat)),"y": np.arange(0,len(ll.lat[0])),"latitude": (["x","y"],ll.lat),"longitude": (["x","y"],ll.lon)},dims=["x","y"]) 27 | # and this is how you write out the file 28 | foo.T.rio.to_raster(fnames_out[i]) 29 | 30 | # ------- DOING JAN O3 31 | 32 | fnames_out = ['dailymaxozone_201901'+str(i+1).zfill(2)+'.tif' for i in range(31)] 33 | 34 | for i in range(31): # number of days in these files 35 | xds = xarray.open_dataset(d+ 'dailymaxozone_201901.nc') 36 | data = xds["O3"][i][0] 37 | foo=xarray.DataArray(data, coords={"x": np.arange(0,len(ll.lat)),"y": np.arange(0,len(ll.lat[0])),"latitude": (["x","y"],ll.lat),"longitude": (["x","y"],ll.lon)},dims=["x","y"]) 38 | foo.T.rio.to_raster(fnames_out[i]) 39 | 40 | # ------- DOING JAN O3 41 | 42 | fnames_out = ['dailymaxozone_201901'+str(i+1).zfill(2)+'.tif' for i in range(31)] 43 | 44 | for i in range(31): # number of days in these files 45 | xds = xarray.open_dataset(d+ 'dailymaxozone_201901.nc') 46 | data = xds["O3"][0][0] # theres only 1 timestep here 47 | foo=xarray.DataArray(data, coords={"x": np.arange(0,len(ll.lat)),"y": np.arange(0,len(ll.lat[0])),"latitude": (["x","y"],ll.lat),"longitude": (["x","y"],ll.lon)},dims=["x","y"]) 48 | foo.T.rio.to_raster(fnames_out[i]) 49 | 50 | # ------- AVERAGE O3 51 | 52 | fnames_in = ['O3_201808.nc','O3_201901.nc' ] 53 | fnames_out = ['O3_201808.tif','O3_201901.tif' ] 54 | 55 | for i in range(len(fnames_in)): # number of days in these files 56 | xds = xarray.open_dataset(d+ fnames_in[i]) 57 | data = xds["O3"][0][0] 58 | foo=xarray.DataArray(data, coords={"x": np.arange(0,len(ll.lat)),"y": np.arange(0,len(ll.lat[0])),"latitude": (["x","y"],ll.lat),"longitude": (["x","y"],ll.lon)},dims=["x","y"]) 59 | foo.T.rio.to_raster(fnames_out[i]) 60 | 61 | 62 | # ------- DOING NO2 63 | 64 | fnames_in = ['NO2_201808.nc','NO2_201901.nc' ] 65 | fnames_out = ['NO2_201808.tif','NO2_201901.tif' ] 66 | 67 | for i in range(len(fnames_in)): # number of days in these files 68 | xds = xarray.open_dataset(d+ fnames_in[i]) 69 | data = xds["NO2"][0][0] 70 | foo=xarray.DataArray(data, coords={"x": np.arange(0,len(ll.lat)),"y": np.arange(0,len(ll.lat[0])),"latitude": (["x","y"],ll.lat),"longitude": (["x","y"],ll.lon)},dims=["x","y"]) 71 | foo.T.rio.to_raster(fnames_out[i]) 72 | -------------------------------------------------------------------------------- /emissions_chg_plot.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | #------------------------------------------ 4 | # Libraries 5 | #-------------- 6 | from matplotlib import pyplot as plt ; from matplotlib import colors 7 | import numpy as np; import numpy.ma as ma; from matplotlib.patches import Path, PathPatch 8 | import pandas as pd; from shapely.geometry import Point, shape, Polygon;import fiona 9 | from shapely.ops import unary_union, cascaded_union; from geopandas.tools import sjoin 10 | import geopandas as gpd; import geoplot; import glob; import os; from datetime import timedelta, date; 11 | from netCDF4 import Dataset 12 | import scipy.ndimage; from cartopy import crs as ccrs; from cartopy.io.shapereader import Reader 13 | import matplotlib.path as mpath; import seaborn as sns 14 | 15 | # files 16 | dir='/projects/b1045/jschnell/ForStacy/' 17 | ll='latlon_ChicagoLADCO_d03.nc' 18 | emis='emis_20180801_noSchoolnoBusnoRefuse_minus_base.nc' 19 | emis='emis_20180801_noSchool_minus_base.nc' 20 | ll=Dataset(dir+ll,'r') 21 | lat,lon=ll['lat'][:],ll['lon'][:] 22 | 23 | path='/home/asm0384/shapefiles/commareas/geo_export_77af1a6a-f8ec-47f4-977c-40956cd94f97.shp' 24 | 25 | # Start pulling and cropping data 26 | chi = gpd.GeoDataFrame.from_file(path) 27 | 28 | 29 | #pull in files and variables 30 | ncfile= Dataset(dir+emis,'r') 31 | df_lat,df_lon=pd.DataFrame(lat),pd.DataFrame(lon) 32 | no2= pd.DataFrame(Dataset(dir+emis,'r')['NO2'][13][0][:])*10e2 33 | df=pd.DataFrame(no2[:]) 34 | 35 | #find all rows and columns where the change is 0 and drop them 36 | no2_drop=df.loc[~(df==0).all(axis=1)] 37 | 38 | # given where no2 values are 0, filter out the lat lons 39 | # ie. drop the outside parts that are 0 change in the array 40 | data= np.array(df.loc[~(df==0).all(axis=1)]) 41 | lat= np.array(df_lat.loc[~(df==0).all(axis=1)]) 42 | lon= np.array(df_lon.loc[~(df==0).all(axis=1)]) 43 | 44 | 45 | # files 46 | emis1='emis_20180801_noSchoolnoBusnoRefuse_minus_base.nc' 47 | 48 | #pull in files and variables 49 | ncfile1= Dataset(dir+emis1,'r') 50 | 51 | no21= pd.DataFrame(Dataset(dir+emis1,'r')['NO2'][13][0][:])*10e2 52 | df1=pd.DataFrame(no21[:]) 53 | 54 | # drop outside parts that are 0 in the array 55 | data1= np.array(df1.loc[~(df1==0).all(axis=1)]) 56 | lat1= np.array(df_lat.loc[~(df1==0).all(axis=1)]) 57 | lon1= np.array(df_lon.loc[~(df1==0).all(axis=1)]) 58 | data=data-data1 59 | 60 | crs_new = ccrs. AlbersEqualArea(central_longitude=(chi.bounds.mean().minx+chi.bounds.mean().maxx)/2) 61 | 62 | 63 | # get shape outside 64 | union=gpd.GeoSeries(unary_union(chi.geometry)) 65 | outsideofunion=pd.DataFrame([list(union[0][2].exterior.xy)[0], list(union[0][2].exterior.xy)[1]]) 66 | 67 | # make fig object 68 | fig, axs = plt.subplots(subplot_kw={'projection': crs_new},figsize=(6, 6)) 69 | 70 | #set up data for plotting via levels 71 | vmax=pd.DataFrame(data).max().max() 72 | vmin= int(pd.DataFrame(data).min().min()) 73 | vmax=-.5 74 | vmin=-1.5 75 | levels = np.linspace(vmin, int(vmax), 15) 76 | 77 | 78 | # get rid of values outside the levels we are contouring to 79 | data[pd.DataFrame(data)vmax]=vmax 97 | 98 | # set boundary as outer extent by making a matplotlib path object and adding that geometry 99 | # i think setting the boundary before you plot the data actually crops the data to the shape, so set ax first 100 | axs.set_boundary(mpath.Path(outsideofunion.T,closed=True), transform= crs_new, use_as_clip_path=True) 101 | axs.add_geometries(Reader(path).geometries(), crs=crs_new,facecolor='None', edgecolor='black') 102 | #bold dis 45 103 | axs.add_geometries(gpd.geoseries.GeoSeries(chi_shapefile[chi_shapefile['NAMELSAD']=='State House District 45'].geometry), crs=crs_new,facecolor='none', edgecolor='black', linewidth=2.0) 104 | 105 | #plot the gridded data by using contourf 106 | cs=plt.pcolormesh(lon,lat,data,cmap= "magma_r", transform=crs_new,vmin=vmin,vmax=vmax) 107 | # add landmarks with scatterplot 108 | midway= -87.7522,41.7868 109 | ohare = -87.9073, 41.9842 110 | loop = -87.6251,41.8786 111 | axs.annotate(xy=midway,s="Midway",color='white') 112 | axs.annotate(xy=ohare,s="OHare",color='white') 113 | axs.annotate(xy=loop,s="Loop",color='white') 114 | 115 | # annotate dist: gpd.geoseries.GeoSeries(chi_shapefile[chi_shapefile['NAMELSAD']=='State House District 45'].geometry).centroid 116 | axs.annotate(xy=(-88.10863773846053, 41.90002038299817), s="Dist 45", color='white') 117 | 118 | # set axes extents from shapefile 119 | yl=41.65;yu=42.3 120 | xu=-87.47;xl=-88.3 121 | axs.set_extent([xl,xu,yl,yu],crs= crs_new) 122 | 123 | # title 124 | axs.set_title(var+' at Daytime, Aug. 2018') 125 | 126 | #add colorbar and label 127 | cbar=plt.colorbar(cs,boundaries=levels) 128 | #cbar.ax.set_ylabel('100 * ' +ncfile[0][var].units) 129 | cbar.set_ticks(levels) 130 | 131 | # add state lines 132 | import cartopy.feature as cfeature 133 | states_provinces = cfeature.NaturalEarthFeature( 134 | category='cultural', 135 | name='admin_1_states_provinces_lines', 136 | scale='50m',facecolor='none') 137 | 138 | axs.add_feature(cfeature.STATES, edgecolor='black') 139 | 140 | #add chi neighbs 141 | #rep_districts_shapefile.plot(ax=axs, transform= crs_new,facecolor='None',edgecolor='grey',alpha=0.5) 142 | 143 | #add epa monitors 144 | #where are EPA monitors in CHI area 145 | #latttt=[41.920009, 42.062053, 41.755832, 41.855243,41.984332, 41.801180, 41.751400] 146 | #lonbbb=[-87.672995,-87.675254,-87.545350,-87.752470,-87.792002,-87.832349, -87.713488] 147 | #axs.scatter(lonbbb, latttt, marker = '*', color = 'white', s = 30) 148 | 149 | plt.savefig(var+'_10lvl_daytime_dist45.png') 150 | 151 | plt.show() 152 | 153 | -------------------------------------------------------------------------------- /validation/MODEL_EVALUATION_r2_mse_bias_for_d02_d03.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from scipy.stats import pearsonr 3 | import numpy as np 4 | from sklearn.metrics import mean_squared_error 5 | 6 | 7 | def corr(x,y): 8 | x,y=np.asarray(x),np.asarray(y) 9 | nas = np.logical_or(np.isnan(x), np.isnan(y)) 10 | x,y = x[~nas], y[~nas] 11 | corr = pearsonr(x,y)[0] 12 | bias = (np.array(y)-np.array(x)).mean() 13 | #mse = mean_squared_error(x,y) 14 | return corr,bias#,mse 15 | 16 | 17 | def get_corrs(fnames): 18 | corrs=[]; bias=[]; mses=[] 19 | # 20 | for i in range(len(fnames)): 21 | df=pd.read_csv(fnames[i]) 22 | latlon=[str(df.Latitude[i]) + " " + str(df.Longitude[i]) for i in range(len(df))] 23 | df['latlon']=latlon 24 | if df['Units of Measure'].unique()[0]=='Parts per million': df['Sample Measurement']=df['Sample Measurement']*1000 25 | elif df['Units of Measure'].unique()[1]=='Parts per million': df['Sample Measurement']=df['Sample Measurement']*1000 26 | x,y = np.array(df['Sample Measurement']),np.array(df['CMAQ']) 27 | cor,bia=corr(x,y) 28 | corrs.append(cor); bias.append(bia); #mses.append(mse) 29 | 30 | # 31 | return corrs, bias, mses 32 | 33 | 34 | 35 | fnamesd02=['CO_d02_2018_8_EPA_CMAQ_Combine.csv', 36 | 'CO_d02_2019_1_EPA_CMAQ_Combine.csv', 37 | 'NO2_d02_2018_8_EPA_CMAQ_Combine.csv', 38 | 'NO2_d02_2019_1_EPA_CMAQ_Combine.csv', 39 | 'O3_d02_2018_8_EPA_CMAQ_Combine.csv', 40 | 'O3_d02_2019_1_EPA_CMAQ_Combine.csv', 41 | 'SO2_d02_2018_8_EPA_CMAQ_Combine.csv', 42 | 'SO2_d02_2019_1_EPA_CMAQ_Combine.csv'] 43 | 44 | fnamesd03=['CO_d03_2018_8_EPA_CMAQ_Combine.csv', 45 | 'CO_d03_2019_1_EPA_CMAQ_Combine.csv', 46 | 'NO2_d03_2018_8_EPA_CMAQ_Combine.csv', 47 | 'NO2_d03_2019_1_EPA_CMAQ_Combine.csv', 48 | 'O3_d03_2018_8_EPA_CMAQ_Combine.csv', 49 | 'O3_d03_2019_1_EPA_CMAQ_Combine.csv', 50 | 'SO2_d03_2018_8_EPA_CMAQ_Combine.csv', 51 | 'SO2_d03_2019_1_EPA_CMAQ_Combine.csv'] 52 | 53 | 54 | corrd02,biasd02,msed02= get_corrs(fnamesd02) 55 | corrd03,biasd03,msed03= get_corrs(fnamesd03) 56 | 57 | namesd03 =[fnamesd03[i].split('_EPA_CMAQ_Combine.csv')[0] for i in range(len(fnamesd03))] 58 | namesd02=[fnamesd02[i].split('_EPA_CMAQ_Combine.csv')[0] for i in range(len(fnamesd02))] 59 | 60 | chems=['Aug CO','Jan CO','Aug NO2','Jan NO2','Aug O3','Jan O3','Aug SO2','Jan SO2'] 61 | 62 | final=pd.DataFrame([chems,corrd02,biasd02,corrd03,biasd03]).T 63 | final.columns=['chem/date','r2 d02','bias d02','r2 d03','bias d03'] 64 | 65 | pd.options.display.float_format = '{:,.2f}'.format 66 | final 67 | 68 | 69 | 70 | def get_corrs_monthly(fnames): 71 | corrs=[]; bias=[]; mses=[] 72 | # 73 | for i in range(len(fnames)): 74 | df=pd.read_csv(fnames[i]) 75 | #for i in range(1): 76 | if df['Units of Measure'][0]=='Parts per million': df['Sample Measurement']=df['Sample Measurement']*1000 77 | df['date']=pd.to_datetime(df['level_0']) 78 | df = df.set_index('date').sort_index() 79 | df=df.groupby(['Longitude','Latitude']).mean() 80 | x,y = np.array(df['Sample Measurement']),np.array(df['CMAQ']) 81 | cor,bia,mse=corr(x,y) 82 | corrs.append(cor); bias.append(bia); mses.append(mse) 83 | # 84 | return corrs, bias, mses 85 | 86 | 87 | # 7day weekly means 88 | df=pd.read_csv(fnamesd03[0]); df['date']=pd.to_datetime(df['level_0']); df = df.set_index('date').sort_index(); df=df.groupby(['Longitude','Latitude']).resample('7d').mean() 89 | x,y = np.array(df['Sample Measurement']),np.array(df['CMAQ']) 90 | cor,bia,mse=corr(x,y) 91 | df.plot.scatter('Sample Measurement','CMAQ',c='Longitude',colormap='viridis') 92 | 93 | #7day rolling means 94 | df=pd.read_csv(fnamesd03[0]); df['date']=pd.to_datetime(df['level_0']); df = df.set_index('date').sort_index(); df=df.groupby(['Longitude','Latitude']).rolling('3h').mean() 95 | x,y = np.array(df['Sample Measurement']),np.array(df['CMAQ']) 96 | cor,bia,mse=corr(x,y) 97 | df.plot.scatter('Sample Measurement','CMAQ',c='Longitude',colormap='viridis') 98 | 99 | 100 | # REDO THIS: Two sentences in the methods 101 | # Subset d03 and d02 --> pull same stations, see how the r2 changes 102 | # make daily and monthly 103 | # here's a more holistic picture 104 | 105 | # >> make january o3 gif 106 | # Bias of 30 -- is there even any measurements for o3 in january 107 | # i think the offset is just stepwise changed 108 | 109 | # making the gifs 110 | # throw area average hours together and rank 111 | #--> almost the worst 95%ile, max 112 | 113 | # d02-> d03 gifs do roads show up can you see individiaul power plants 114 | 115 | 116 | # Only d03 stations with d02 117 | #------------------------------------------------------ 118 | 119 | head=['State Code', 'County Code', 'Site Num', 'Parameter Code', 'POC', 'Latitude', 'Longitude', 'Datum', 'Parameter Name', 'Date Local', 'Time Local', 'Date GMT', 'Time GMT', 'Sample Measurement', 'Units of Measure', 'MDL', 'Uncertainty', 'Qualifier', 'Method Type', 'Method Code', 'Method Name', 'State Name', 'County Name', 'Date of Last Change','date'] 120 | 121 | def only_d03_corr(df,df2): 122 | #for i in range(1): 123 | df['date']=pd.to_datetime(df['level_0']) 124 | df2['date']=pd.to_datetime(df2['level_0']) 125 | latlon=[str(df.Latitude[i]) + " " + str(df.Longitude[i]) for i in range(len(df))] 126 | latlon2=[str(df2.Latitude[i]) + " " + str(df2.Longitude[i]) for i in range(len(df2))] 127 | df['latlon']=latlon; 128 | df2['latlon']=latlon2; 129 | if df['Units of Measure'].unique()[0]=='Parts per million': df['Sample Measurement']=df['Sample Measurement']*1000; df2['Sample Measurement']=df2['Sample Measurement']*1000 130 | elif df['Units of Measure'].unique()[1]=='Parts per million': df['Sample Measurement']=df['Sample Measurement']*1000; df2['Sample Measurement']=df2['Sample Measurement']*1000 131 | m=pd.merge(df2,df,on=['latlon','date'],suffixes=('_d02', '_d03')) 132 | x,y,z = np.array(m['Sample Measurement_d02']),np.array(m['CMAQ_d02']),np.array(m['CMAQ_d03']) 133 | corrd02=corr(x,y)[:2] 134 | corrd03=corr(x,z)[:2] 135 | nstations=len(m.latlon.unique()) 136 | return corrd02,corrd03,nstations,np.nanmean(x),y.mean(),z.mean() 137 | 138 | 139 | corrd02,corrd03=[],[] 140 | biasd02,biasd03=[],[] 141 | nstation=[] 142 | avgstn=[];avgcmq2=[];avgcmq3=[] 143 | for i in range(len(fnamesd02)): 144 | df,df2=pd.read_csv(fnamesd03[i]),pd.read_csv(fnamesd02[i]) 145 | c2,c3,ns,xm,ym,zm = only_d03_corr(df,df2) 146 | corrd02.append(c2[0]);corrd03.append(c3[0]) 147 | biasd02.append(c2[1]);biasd03.append(c3[1]); 148 | nstation.append(ns) 149 | avgstn.append(xm); avgcmq2.append(ym); avgcmq3.append(zm) 150 | 151 | 152 | 153 | chems=['Aug CO','Jan CO','Aug NO2','Jan NO2','Aug O3','Jan O3','Aug SO2','Jan SO2'] 154 | 155 | final=pd.DataFrame([chems,corrd02,biasd02,corrd03,biasd03,avgstn,avgcmq2,avgcmq3,nstation]).T 156 | 157 | final.columns=['chem/date','r2 d02','bias d02','r2 d03','bias d03','avg stn','avg d02','avg d03','n station'] 158 | 159 | pd.options.display.float_format = '{:,.2f}'.format 160 | final 161 | 162 | -------------------------------------------------------------------------------- /compare_CMAQ_to_EPAstation.py: -------------------------------------------------------------------------------- 1 | #!/bin/bash python3 2 | 3 | #---------------------------------------------------------# 4 | # Stacy Montgomery, Aug 2019 5 | # Purpose: find aqs stations within model domain, 6 | # pull & format aqs data for comparison 7 | # 8 | 9 | # Link to air tech website with year you're interested in -- NOT WORKING 10 | #linktoaqs='http://files.airnowtech.org/?prefix=airnow/2018/' 11 | # USE: 12 | #---------------------------------------------------------# 13 | 14 | # LIBRARIES 15 | #---------------------------------------------------------# 16 | from datetime import timedelta, date, datetime; import pandas as pd 17 | import numpy as np 18 | from netCDF4 import Dataset 19 | from wrf import latlon_coords, getvar 20 | import glob, os 21 | import matplotlib.pyplot as plt 22 | 23 | #import requests 24 | #from bs4 import BeautifulSoup 25 | 26 | 27 | # USER INPUT 28 | #---------------------------------------------------------# 29 | # Find stations within bounding box 30 | #llon,llat,ulon,ulat=-98.854465,39.517152,-74.289036,49.678626 #use bounds from griddesc 31 | 32 | # Date range to pull from AQS --- if commented out, defined by cmaq files avail 33 | #start_dt = date(2018, 8, 1); end_dt = date(2018, 9, 1) 34 | 35 | 36 | # Directories for cmaq + EPA 37 | dir_cmaq='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_Amy_noMUNI_1.33km_sf_rrtmg_5_8_1_v3852/postprocess/' 38 | dir_epa='/home/asm0384/CMAQcheck/' 39 | 40 | # to get grid, pull WRF coords 41 | runname='wrf_pure_PXLSM' 42 | dirToWRF='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/'+runname+'/' # to get grid 43 | 44 | grid='/projects/b1045/jschnell/ForStacy/latlon_ChicagoLADCO_d03.nc' 45 | 46 | # CMAQ RUN things 47 | domain='d03' 48 | time='hourly' 49 | year='2018' 50 | month='8' 51 | epa_code=['42401','42602','44201']; var=['SO2','NO2','O3'] #numerical identifiers and corresponding vars 52 | epa_files =[dir_epa+'%s_%s_%s.csv'%(time,epa_code[i],year,) for i in range(len(epa_code))] 53 | 54 | 55 | 56 | # USER DEF FUNC 57 | #---------------------------------------------------------# 58 | 59 | #------ DATERANGE 60 | # 61 | # 62 | # * dates must be in yyyymmdd format 63 | def daterange(date1, date2): 64 | for n in range(int ((date2 - date1).days)+1): 65 | yield date1 + timedelta(n) 66 | 67 | #------ VARfromIND 68 | # 69 | # 70 | def getVARfromIND(ncfile,indxy, filenames,varname): 71 | t2d01=[ncfile[z][varname][i] for z in range(len(filenames)) for i in range(24)] 72 | t2d01_xx= [[t2d01[t][indxy[l]] for t in range(24*len(filenames_d01))] for l in range(len(indxy))] 73 | return t2d01_xx 74 | 75 | #------ FIND INDEX 76 | # 77 | # 78 | # adapted from : http://kbkb-wx-python.blogspot.com/2016/08/find-nearest-latitude-and-longitude.html 79 | def find_index(stn_lon, stn_lat, wrf_lon, wrf_lat): 80 | # stn -- points 81 | # wrf -- list 82 | #for iz in range(1): 83 | xx=[];yy=[] 84 | for i in range(len(stn_lat)): 85 | #for i in range(1): 86 | abslat = np.abs(wrf_lat-stn_lat[i]) 87 | abslon= np.abs(wrf_lon-stn_lon[i]) 88 | c = np.maximum(abslon,abslat) 89 | latlon_idx = np.argmin(c) 90 | x, y = np.where(c == np.min(c)) 91 | #add indices of nearest wrf point station 92 | xx.append(x) 93 | yy.append(y) 94 | # 95 | xx=[xx[i][0] for i in range(len(xx))];yy=[yy[i][0] for i in range(len(yy))] 96 | #return indices list 97 | return xx, yy 98 | 99 | #------ PULL CMAQ 100 | # 101 | # 102 | def filter_EPA(file, start_dt, end_dt,llat,ulat,llon,ulon,cmaq_lon,cmaq_lat,cmaq,VAR): 103 | #read in file 104 | f=pd.read_csv(file) 105 | # Crop given bounding box 106 | df=f[(f['Latitude'] >= llat) & (f['Latitude'] <= ulat)] 107 | df=df[(df['Longitude'] >= llon) & (df['Longitude'] <= ulon)] 108 | df['Datetime GMT']=pd.to_datetime(df['Date GMT']+ ' ' + df['Time GMT']) 109 | df= df[(df['Datetime GMT'] >= pd.to_datetime(start_dt) ) & (df['Datetime GMT'] <= pd.to_datetime(end_dt))] 110 | lon,lat=df['Longitude'].unique(),df['Latitude'].unique() 111 | df.reset_index(inplace=True) 112 | return lon,lat,df 113 | #somehow make the 0s match up 114 | 115 | 116 | #------ RESAMPLE DF 117 | # Take in real data, fill in missing values with missing values but keep that date open 118 | # 119 | def resample_df(df,lat,lon,start_dt,end_dt): 120 | dff=pd.DataFrame() 121 | # get list of target dates 122 | t_index = pd.DatetimeIndex(start=start_dt, end=end_dt, freq='1h') 123 | #set index as dates 124 | df.set_index('Datetime GMT',inplace=True) 125 | # go through each locations and fill in missing dates 126 | for i in range(len(lat)): 127 | check=df[(df['Latitude']==lat[i]) & (df['Longitude']==lon[i])] 128 | #aka: if there are multiple sensors of same thing, just average 129 | if len(check['POC'].unique())>1 or len(check) > len(t_index): 130 | sample = check.resample('H').mean().reindex(t_index).fillna(float('nan'))['Sample Measurement'] 131 | df2=check[check['POC']==1].resample('H').asfreq().reindex(t_index).fillna(float('nan')) 132 | df2['Sample Measurement']=sample 133 | #print('%s in %s,%s is irregular'%(check['Site Num'][0] ,check['County Name'][0],check['State Name'][0],)) 134 | else: #just fill out values 135 | df2 = check.resample('H').asfreq().reindex(t_index).fillna(float('nan')) 136 | #averaged or not, add to final df 137 | dff=dff.append(df2) 138 | #return index with index rather than dates 139 | dff.reset_index(inplace=True) 140 | return dff 141 | 142 | 143 | # MAIN 144 | #---------------------------------------------------------# 145 | 146 | # $1 Get CMAQ file names 147 | cmaq_files=[] 148 | os.chdir(dir_cmaq) 149 | for file in glob.glob("COMBINE_ACONC_*"): 150 | cmaq_files.append(file) 151 | 152 | cmaq_files.sort() 153 | dates=[cmaq_files[z].split("COMBINE_ACONC_")[1].split(".nc")[0] for z in range(len(cmaq_files))] 154 | start_dt=datetime(int(dates[0][0:4]),int(dates[0][4:6]),int(dates[0][6:8])) 155 | end_dt=datetime(int(dates[-1][0:4]),int(dates[-1][4:6]),int(dates[-1][6:8]),23) 156 | 157 | 158 | # Get cmaq grid 159 | #cmaq_lat,cmaq_lon=Dataset(grid)['LAT'][0][0],Dataset(grid)['LON'][0][0] 160 | cmaq_lat,cmaq_lon = np.asarray(Dataset(grid)['lat']),np.asarray(Dataset(grid)['lon']) 161 | llat,ulat,llon,ulon=cmaq_lat.min(), cmaq_lat.max(), cmaq_lon.min(), cmaq_lon.max() 162 | 163 | # cmas output 164 | # fname='COMBINE_ACONC_20180810.nc' 165 | cmaq=[Dataset(dir_cmaq+cmaq_files[i]) for i in range(len(cmaq_files))] 166 | t_index = pd.DatetimeIndex(start=start_dt, end=end_dt, freq='1h') 167 | 168 | # drop last day to make loop better? 169 | 170 | # Loop through each variable and check 171 | for loop in range(len(epa_files)): 172 | lon,lat,df= filter_EPA(epa_files[loop], start_dt, end_dt, llat,ulat,llon,ulon,cmaq_lon,cmaq_lat,cmaq,var[loop]) 173 | xx,yy= find_index(lon, lat, cmaq_lon, cmaq_lat) 174 | dff= resample_df(df,lat,lon,start_dt,end_dt) 175 | dff['CMAQ']=float('nan') 176 | for numday in range(len(cmaq)): 177 | s=pd.DataFrame([[cmaq[numday][var[loop]][time][0][xx[idx]][yy[idx]] for time in range(24)] for idx in range(len(xx))]).T 178 | # 179 | for station in range(len(xx)): 180 | dff['CMAQ'][24*numday+ station*len(t_index):(24*numday+ station*len(t_index)+24)]=s[station] 181 | #dff['level_0'][(24*numday+ station*len(t_index)):(24*numday+ station*len(t_index)+24)] # check eq 182 | # 183 | dff.to_csv(dir_epa+'%s_%s_%s_%s_EPA_CMAQ_Combine.csv'%(var[loop],domain,year,month)); 184 | # 185 | print('Done with %s'%(var[loop])); 186 | 187 | 188 | # plot cmaq comparison 189 | epa_condense=[dir_epa+'%s_%s_%s_%s_EPA_CMAQ_Combine.csv'%(var[loop],domain,year,month) for loop in range(len(epa_code))] 190 | so2_epa,no2_epa,o3_epa,co_epa = [pd.read_csv(epa_condense[i]) for i in range(len(epa_condense))] 191 | 192 | 193 | #1 to 1 plots 194 | o3df=o3.groupby('Site Num') 195 | o3df.plot.scatter('Sample Measurement','CMAQ',title='Site Num') 196 | -------------------------------------------------------------------------------- /validation/MODEL_EVALUATION_CMAQCheck_part1.py: -------------------------------------------------------------------------------- 1 | #!/bin/bash python3 2 | 3 | #---------------------------------------------------------# 4 | # Stacy Montgomery, Aug 2019 5 | # Purpose: find aqs stations within model domain, 6 | # pull & format aqs data for comparison 7 | # 8 | 9 | # Link to air tech website with year you're interested in -- NOT WORKING 10 | #linktoaqs='http://files.airnowtech.org/?prefix=airnow/2018/' 11 | # USE: 12 | #---------------------------------------------------------# 13 | 14 | # LIBRARIES 15 | #---------------------------------------------------------# 16 | from datetime import timedelta, date, datetime; import pandas as pd 17 | import numpy as np 18 | from netCDF4 import Dataset 19 | from wrf import latlon_coords, getvar 20 | import glob, os 21 | import matplotlib.pyplot as plt 22 | 23 | #import requests 24 | #from bs4 import BeautifulSoup 25 | 26 | 27 | # USER INPUT 28 | #---------------------------------------------------------# 29 | # Find stations within bounding box 30 | #llon,llat,ulon,ulat=-98.854465,39.517152,-74.289036,49.678626 #use bounds from griddesc 31 | 32 | # Date range to pull from AQS --- if commented out, defined by cmaq files avail 33 | #start_dt = date(2018, 8, 1); end_dt = date(2018, 9, 1) 34 | 35 | 36 | # Directories for cmaq + EPA 37 | dir_cmaq='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_Amy_noMUNI_1.33km_sf_rrtmg_5_8_1_v3852/postprocess/' 38 | dir_epa='/home/asm0384/CMAQcheck/' 39 | dir_ncdc='/' 40 | 41 | # to get grid, pull WRF coords 42 | runname='wrf_pure_PXLSM' 43 | dirToWRF='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/'+runname+'/' # to get grid 44 | 45 | grid='/projects/b1045/jschnell/ForStacy/latlon_ChicagoLADCO_d03.nc' 46 | 47 | # CMAQ RUN things 48 | domain='d02' 49 | time='hourly' 50 | year='2018' 51 | epa_code=['42401','42602','44201']; var=['SO2','NO2','O3'] #numerical identifiers and corresponding vars 52 | epa_files =[dir_epa+'%s_%s_%s.csv'%(time,epa_code[i],year,) for i in range(len(epa_code))] 53 | 54 | 55 | 56 | # USER DEF FUNC 57 | #---------------------------------------------------------# 58 | 59 | #------ DATERANGE 60 | # 61 | # 62 | # * dates must be in yyyymmdd format 63 | def daterange(date1, date2): 64 | for n in range(int ((date2 - date1).days)+1): 65 | yield date1 + timedelta(n) 66 | 67 | #------ VARfromIND 68 | # 69 | # 70 | def getVARfromIND(ncfile,indxy, filenames,varname): 71 | t2d01=[ncfile[z][varname][i] for z in range(len(filenames)) for i in range(24)] 72 | t2d01_xx= [[t2d01[t][indxy[l]] for t in range(24*len(filenames_d01))] for l in range(len(indxy))] 73 | return t2d01_xx 74 | 75 | #------ FIND INDEX 76 | # 77 | # 78 | # adapted from : http://kbkb-wx-python.blogspot.com/2016/08/find-nearest-latitude-and-longitude.html 79 | def find_index(stn_lon, stn_lat, wrf_lon, wrf_lat): 80 | # stn -- points 81 | # wrf -- list 82 | #for iz in range(1): 83 | xx=[];yy=[] 84 | for i in range(len(stn_lat)): 85 | #for i in range(1): 86 | abslat = np.abs(wrf_lat-stn_lat[i]) 87 | abslon= np.abs(wrf_lon-stn_lon[i]) 88 | c = np.maximum(abslon,abslat) 89 | latlon_idx = np.argmin(c) 90 | x, y = np.where(c == np.min(c)) 91 | #add indices of nearest wrf point station 92 | xx.append(x) 93 | yy.append(y) 94 | # 95 | xx=[xx[i][0] for i in range(len(xx))];yy=[yy[i][0] for i in range(len(yy))] 96 | #return indices list 97 | return xx, yy 98 | 99 | #------ PULL CMAQ 100 | # 101 | # 102 | def filter_EPA(file, start_dt, end_dt,llat,ulat,llon,ulon,cmaq_lon,cmaq_lat,cmaq,VAR): 103 | #read in file 104 | f=pd.read_csv(file) 105 | # Crop given bounding box 106 | df=f[(f['Latitude'] >= llat) & (f['Latitude'] <= ulat)] 107 | df=df[(df['Longitude'] >= llon) & (df['Longitude'] <= ulon)] 108 | df['Datetime GMT']=pd.to_datetime(df['Date GMT']+ ' ' + df['Time GMT']) 109 | df= df[(df['Datetime GMT'] >= pd.to_datetime(start_dt) ) & (df['Datetime GMT'] <= pd.to_datetime(end_dt))] 110 | lon,lat=df['Longitude'].unique(),df['Latitude'].unique() 111 | df.reset_index(inplace=True) 112 | return lon,lat,df 113 | #somehow make the 0s match up 114 | 115 | 116 | latd02min, latd02max, lond02min, lond02max = wrf_latd02.min().min(), wrf_latd02.max().max(),wrf_lond02.min().min(),wrf_lond02.max().max() 117 | 118 | 119 | latd03min, latd03max, lond03min, lond03max = wrf_latd03.min().min(), wrf_latd03.max().max(),wrf_lond03.min().min(),wrf_lond03.max().max() 120 | 121 | #------ RESAMPLE DF 122 | # Take in real data, fill in missing values with missing values but keep that date open 123 | # 124 | def resample_df(df,lat,lon,start_dt,end_dt): 125 | dff=pd.DataFrame() 126 | # get list of target dates 127 | t_index = pd.DatetimeIndex(start=start_dt, end=end_dt, freq='1h') 128 | #set index as dates 129 | df.set_index('Datetime GMT',inplace=True) 130 | # go through each locations and fill in missing dates 131 | for i in range(len(lat)): 132 | check=df[(df['Latitude']==lat[i]) & (df['Longitude']==lon[i])] 133 | #aka: if there are multiple sensors of same thing, just average 134 | if len(check['POC'].unique())>1 or len(check) > len(t_index): 135 | sample = check.resample('H').mean().reindex(t_index).fillna(float('nan'))['Sample Measurement'] 136 | df2=check[check['POC']==1].resample('H').asfreq().reindex(t_index).fillna(float('nan')) 137 | df2['Sample Measurement']=sample 138 | #print('%s in %s,%s is irregular'%(check['Site Num'][0] ,check['County Name'][0],check['State Name'][0],)) 139 | else: #just fill out values 140 | df2 = check.resample('H').asfreq().reindex(t_index).fillna(float('nan')) 141 | #averaged or not, add to final df 142 | dff=dff.append(df2) 143 | #return index with index rather than dates 144 | dff.reset_index(inplace=True) 145 | return dff 146 | 147 | 148 | # MAIN 149 | #---------------------------------------------------------# 150 | 151 | # $1 Get CMAQ file names 152 | cmaq_files=[] 153 | os.chdir(dir_cmaq) 154 | for file in glob.glob("COMBINE_ACONC_*"): 155 | cmaq_files.append(file) 156 | 157 | cmaq_files.sort() 158 | dates=[cmaq_files[z].split("COMBINE_ACONC_")[1].split(".nc")[0] for z in range(len(cmaq_files))] 159 | start_dt=datetime(int(dates[0][0:4]),int(dates[0][4:6]),int(dates[0][6:8])) 160 | end_dt=datetime(int(dates[-1][0:4]),int(dates[-1][4:6]),int(dates[-1][6:8]),23) 161 | 162 | 163 | # Get cmaq grid 164 | #cmaq_lat,cmaq_lon=Dataset(grid)['LAT'][0][0],Dataset(grid)['LON'][0][0] 165 | cmaq_lat,cmaq_lon = np.asarray(Dataset(grid)['lat']),np.asarray(Dataset(grid)['lon']) 166 | llat,ulat,llon,ulon=cmaq_lat.min(), cmaq_lat.max(), cmaq_lon.min(), cmaq_lon.max() 167 | 168 | # cmas output 169 | # fname='COMBINE_ACONC_20180810.nc' 170 | cmaq=[Dataset(dir_cmaq+cmaq_files[i]) for i in range(len(cmaq_files))] 171 | t_index = pd.DatetimeIndex(start=start_dt, end=end_dt, freq='1h') 172 | 173 | # drop last day to make loop better? 174 | 175 | # Loop through each variable and check 176 | for loop in range(len(epa_files)): 177 | lon,lat,df= filter_EPA(epa_files[loop], start_dt, end_dt, llat,ulat,llon,ulon,cmaq_lon,cmaq_lat,cmaq,var[loop]) 178 | xx,yy= find_index(lon, lat, cmaq_lon, cmaq_lat) 179 | dff= resample_df(df,lat,lon,start_dt,end_dt) 180 | dff['CMAQ']=float('nan') 181 | for numday in range(len(cmaq)): 182 | s=pd.DataFrame([[cmaq[numday][var[loop]][time][0][xx[idx]][yy[idx]] for time in range(24)] for idx in range(len(xx))]).T 183 | # 184 | for station in range(len(xx)): 185 | dff['CMAQ'][24*numday+ station*len(t_index):(24*numday+ station*len(t_index)+24)]=s[station] 186 | #dff['level_0'][(24*numday+ station*len(t_index)):(24*numday+ station*len(t_index)+24)] # check eq 187 | # 188 | dff.to_csv(dir_epa+'%s_%s_EPA_CMAQ_Combine.csv'%(var[loop],domain)); 189 | # 190 | print('Done with %s'%(var[loop])); 191 | 192 | 193 | 194 | 195 | @@@@@##### 196 | 197 | 198 | Here's a vid of me making a laser driver for my LED based gas sensor. The voltage from a charged LED is dropped by impending light. Incoming light is impeded by absorption from gases at this wavelength. The time for the voltage drop corresponds to the absorption of gas between LEDs, thus giving us the concentration of gas. 199 | 200 | 201 | 202 | -------------------------------------------------------------------------------- /DataPreprocessing/cmaq_to_stations.py: -------------------------------------------------------------------------------- 1 | #!/bin/bash python3 2 | 3 | #---------------------------------------------------------# 4 | # Stacy Montgomery, Aug 2019 5 | # Purpose: find aqs stations within model domain, 6 | # pull & format aqs data for comparison 7 | # 8 | 9 | # Link to air tech website with year you're interested in -- NOT WORKING 10 | #linktoaqs='http://files.airnowtech.org/?prefix=airnow/2018/' 11 | # USE: 12 | #---------------------------------------------------------# 13 | 14 | # LIBRARIES 15 | #---------------------------------------------------------# 16 | from datetime import timedelta, date,datetime; import pandas as pd 17 | import numpy as np 18 | from netCDF4 import Dataset 19 | from wrf import latlon_coords, getvar 20 | import glob, os 21 | import matplotlib.pyplot as plt 22 | 23 | #import requests 24 | #from bs4 import BeautifulSoup 25 | 26 | 27 | # USER INPUT 28 | #---------------------------------------------------------# 29 | # Find stations within bounding box 30 | #llon,llat,ulon,ulat=-98.854465,39.517152,-74.289036,49.678626 #use bounds from griddesc 31 | 32 | # Date range to pull from AQS --- if commented out, defined by cmaq files avail 33 | #start_dt = date(2018, 8, 1); end_dt = date(2018, 9, 1) 34 | 35 | 36 | # Directories for cmaq + EPA 37 | dir_epa='/home/asm0384/CMAQcheck/' 38 | grid='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/mcip/PXLSM/ChicagoLADCO_d02/lat_lon_chicago_d02.nc' 39 | #grid='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/mcip/PXLSM/ChicagoLADCO_d03/latlon_ChicagoLADCO_d03.nc' 40 | 41 | dir_cmaq='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_BASE_FINAL_4km_sf_rrtmg_10_8_1_v3852/postprocess/' 42 | dir_cmaq='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_BASE_FINAL_wint_4km_sf_rrtmg_10_8_1_v3852/postprocess/' 43 | 44 | # to get grid, pull WRF coords 45 | #runname='wrf_pure_PXLSM' 46 | #dirToWRF='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/'+runname+'/' # to get grid 47 | 48 | # CMAQ RUN things 49 | domain='d02' 50 | time='hourly' 51 | year='2019' 52 | month='1' 53 | #epa_code=['42401','42602','44201','42101']; var=['SO2','NO2','O3','CO'] #numerical identifiers and corresponding vars 54 | #epa_code=['44201'];var=['O3'] 55 | epa_code=['88101'] 56 | var = ['PM25_TOT'] 57 | epa_files =[dir_epa+'%s_%s_%s.csv'%(time,epa_code[i],year,) for i in range(len(epa_code))] 58 | 59 | #names of lat lons in the cmaq grid 60 | la,lo='lat','lon' # for 1.3km 61 | la,lo='LAT','LON' # for 4km 62 | 63 | # USER DEF FUNC 64 | #---------------------------------------------------------# 65 | 66 | 67 | #------ DATERANGE 68 | # 69 | # 70 | # * dates must be in yyyymmdd format 71 | def daterange(date1, date2): 72 | for n in range(int ((date2 - date1).days)+1): 73 | yield date1 + timedelta(n) 74 | 75 | #------ VARfromIND 76 | # 77 | # 78 | def getVARfromIND(ncfile,indxy, filenames,varname): 79 | t2d01=[ncfile[z][varname][i] for z in range(len(filenames)) for i in range(24)] 80 | t2d01_xx= [[t2d01[t][indxy[l]] for t in range(24*len(filenames_d01))] for l in range(len(indxy))] 81 | return t2d01_xx 82 | 83 | #------ FIND INDEX 84 | # 85 | # 86 | # adapted from : http://kbkb-wx-python.blogspot.com/2016/08/find-nearest-latitude-and-longitude.html 87 | def find_index(stn_lon, stn_lat, wrf_lon, wrf_lat): 88 | # stn -- points 89 | # wrf -- list 90 | #for iz in range(1): 91 | xx=[];yy=[] 92 | for i in range(len(stn_lat)): 93 | #for i in range(1): 94 | abslat = np.abs(wrf_lat-stn_lat[i]) 95 | abslon= np.abs(wrf_lon-stn_lon[i]) 96 | c = np.maximum(abslon,abslat) 97 | latlon_idx = np.argmin(c) 98 | x, y = np.where(c == np.min(c)) 99 | #add indices of nearest wrf point station 100 | xx.append(x) 101 | yy.append(y) 102 | # 103 | xx=[xx[i][0] for i in range(len(xx))];yy=[yy[i][0] for i in range(len(yy))] 104 | #return indices list 105 | return xx, yy 106 | 107 | #------ PULL CMAQ 108 | # 109 | 110 | def pull_cmaq(file, start_dt, end_dt,llat,ulat,llon,ulon,cmaq_lon,cmaq_lat,cmaq,VAR): 111 | #read in file 112 | f=pd.read_csv(file) 113 | # Crop given bounding box 114 | df=f[(f['Latitude'] >= llat) & (f['Latitude'] <= ulat)] 115 | df=df[(df['Longitude'] >= llon) & (df['Longitude'] <= ulon)] 116 | df['Datetime GMT']=pd.to_datetime(df['Date GMT']+ ' ' + df['Time GMT']) 117 | df= df[(df['Datetime GMT'] >= pd.to_datetime(start_dt) ) & (df['Datetime GMT'] <= pd.to_datetime(end_dt))] 118 | lon,lat=df['Longitude'].unique(),df['Latitude'].unique() 119 | df.reset_index(inplace=True) 120 | return lon,lat,df 121 | #somehow make the 0s match up 122 | 123 | 124 | #------ RESAMPLE DF 125 | # Take in real data, fill in missing values with missing values but keep that date open 126 | # 127 | def resample_df(df,lat,lon,start_dt,end_dt): 128 | dff=pd.DataFrame() 129 | # get list of target dates 130 | t_index = pd.DatetimeIndex(start=start_dt, end=end_dt, freq='1h') 131 | #set index as dates 132 | df.set_index('Datetime GMT',inplace=True) 133 | # go through each locations and fill in missing dates 134 | for i in range(len(lat)): 135 | check=df[(df['Latitude']==lat[i]) & (df['Longitude']==lon[i])] 136 | #aka: if there are multiple sensors of same thing, just average 137 | if len(check['POC'].unique())>1 or len(check) > len(t_index): 138 | sample = check.resample('H').mean().reindex(t_index).fillna(float('nan'))['Sample Measurement'] 139 | df2=check[check['POC']==1].resample('H').asfreq().reindex(t_index).fillna(float('nan')) 140 | df2['Sample Measurement']=sample 141 | #print('%s in %s,%s is irregular'%(check['Site Num'][0] ,check['County Name'][0],check['State Name'][0],)) 142 | else: #just fill out values 143 | df2 = check.resample('H').asfreq().reindex(t_index).fillna(float('nan')) 144 | #averaged or not, add to final df 145 | dff=dff.append(df2) 146 | #return index with index rather than dates 147 | dff.reset_index(inplace=True) 148 | return dff 149 | 150 | --------------------------------------------------# 151 | 152 | # $1 Get CMAQ file names 153 | cmaq_files=[] 154 | os.chdir(dir_cmaq) 155 | for file in glob.glob("COMBINE_ACONC_*"): 156 | cmaq_files.append(file) 157 | 158 | cmaq_files.sort() 159 | cmaq_files.remove(cmaq_files[-1]) 160 | 161 | dates=[cmaq_files[z].split("COMBINE_ACONC_")[1].split(".nc")[0] for z in range(len(cmaq_files))] 162 | start_dt=date(int(dates[0][0:4]),int(dates[0][4:6]),int(dates[0][6:8])) 163 | end_dt=datetime(int(dates[-1][0:4]),int(dates[-1][4:6]),int(dates[-1][6:8]),23,0) 164 | 165 | # Get first date range, pull monitoring station range ... unecessary 166 | #dtrange=[] 167 | #for dt in daterange(start_dt, end_dt): 168 | # dtrange.append(dt.strftime("%Y%m%d")) 169 | 170 | # Get cmaq grid 171 | #grid='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/mcip/PXLSM/ChicagoLADCO_d02/GRIDCRO2D_Chicago_LADCO_2018-08-20.nc' 172 | #cmaq_lat,cmaq_lon=Dataset(grid)['LAT'][0][0],Dataset(grid)['LON'][0][0] 173 | 174 | cmaq_lat,cmaq_lon = np.asarray(Dataset(grid)[la]),np.asarray(Dataset(grid)[lo]) 175 | 176 | if len(cmaq_lat.shape) == 4: 177 | cmaq_lat,cmaq_lon = cmaq_lat[0][0],cmaq_lon[0][0] 178 | 179 | print('CMAQLATSHAPE') 180 | print(cmaq_lat.shape) 181 | #cmaq_lat,cmaq_lon = np.asarray(Dataset(grid)['LAT'])[0][0],np.asarray(Dataset(grid)['LON'])[0][0] 182 | llat,ulat,llon,ulon=cmaq_lat.min(), cmaq_lat.max(), cmaq_lon.min(), cmaq_lon.max() 183 | 184 | cmaq=[Dataset(dir_cmaq+cmaq_files[i]) for i in range(len(cmaq_files))] 185 | t_index = pd.DatetimeIndex(start=start_dt, end=end_dt, freq='1h') 186 | 187 | # Loop through each variable and check 188 | for loop in range(len(epa_files)): 189 | lon,lat,df=pull_cmaq(epa_files[loop], start_dt, end_dt, llat,ulat,llon,ulon,cmaq_lon,cmaq_lat,cmaq,var[loop]) 190 | xx,yy= find_index(lon, lat, cmaq_lon, cmaq_lat) 191 | dff= resample_df(df,lat,lon,start_dt,end_dt) 192 | dff['CMAQ']=float('nan') 193 | for numday in range(len(cmaq)): 194 | s=pd.DataFrame([[cmaq[numday][var[loop]][time][0][xx[idx]][yy[idx]] for time in range(24)] for idx in range(len(xx))]).T 195 | # 196 | for station in range(len(xx)): 197 | dff['CMAQ'][(24*(numday)+station*len(t_index)):(24*(numday)+ station*len(t_index)+24)]=s[station] 198 | # Output the var 199 | dff.to_csv(dir_epa+'%s_%s_%s_%s_EPA_CMAQ_Combine.csv'%(var[loop],domain,year,month)); 200 | print('Done with %s'%(var[loop])); 201 | 202 | #end 203 | 204 | 205 | 206 | 207 | 208 | -------------------------------------------------------------------------------- /model_column_comparison.py: -------------------------------------------------------------------------------- 1 | #!/bin/python 2 | 3 | #model to column comparison 4 | #--------------------------------------- 5 | 6 | 7 | #------------------------------------------ 8 | # Libraries 9 | #-------------- 10 | from matplotlib import pyplot as plt ; from matplotlib import colors 11 | import numpy as np; import numpy.ma as ma; from matplotlib.patches import Path, PathPatch 12 | import pandas as pd; from shapely.geometry import Point, shape, Polygon;import fiona 13 | from shapely.ops import unary_union, cascaded_union; from geopandas.tools import sjoin 14 | import geopandas as gpd; import geoplot; import glob; import os; from datetime import timedelta, date; 15 | from netCDF4 import Dataset 16 | import scipy.ndimage; from cartopy import crs as ccrs; from cartopy.io.shapereader import Reader 17 | import matplotlib.path as mpath; import seaborn as sns 18 | #------------------------------------------ 19 | 20 | column_dir = '/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_BASE_FINAL_1.33km_sf_rrtmg_5_8_1_v3852/column/' 21 | onlyfiles = next(os.walk(column_dir))[2] 22 | onlyfiles.sort() 23 | 24 | fig_dir = '~/figs_for_dan/' 25 | 26 | path='/home/asm0384/shapefiles/commareas/geo_export_77af1a6a-f8ec-47f4-977c-40956cd94f97.shp' 27 | chi_shapefile = gpd.GeoDataFrame.from_file(path) 28 | 29 | #get lat lon from grid file 30 | dir='/projects/b1045/jschnell/ForStacy/' 31 | ll='latlon_ChicagoLADCO_d03.nc' 32 | llx=Dataset(dir+ll,'r') 33 | lat,lon=llx['lat'][:],llx['lon'][:] 34 | 35 | model_columns = np.asarray([Dataset(column_dir + onlyfiles[i],'r')['NO2'] for i in range(len(onlyfiles))]) 36 | 37 | model_columns_month = np.array([model_columns[i].mean(axis=0) for i in range(len(model_columns))]).mean(axis=0) 38 | #model_columns_no2 = model_columns[i]['NO2'] 39 | 40 | model_columns_month_ish= model_columns_month*10**-20 41 | 42 | sat_columns = pd.read_csv('/projects/b1045/NO2/l3/nitrogendioxide_tropospheric_column.csv',index_col = 0) 43 | 44 | sat_columns = sat_columns*10**5 45 | model_columns_month_ish = model_columns_month_ish*10**5 46 | 47 | # Make scatter 48 | #-------------------------------- 49 | 50 | plt.scatter(sat_columns, model_columns_month_ish, alpha = 0.84, color = 'purple') 51 | plt.scatter(sat_columns, model_columns_month_ish, alpha = 0.84, color = 'purple') 52 | plt.xlim(0,0.00021*10**5) 53 | plt.ylim(0,0.00021*10**5) 54 | plt.xlabel('TropOMI Column ('+ Dataset(column_dir + onlyfiles[i],'r')['NO2'].units + '*10^15)') 55 | plt.ylabel('CMAQ Column') 56 | plt.plot([-100,100],[-100,100],c='black',alpha = 0.5) 57 | 58 | from scipy.stats import pearsonr 59 | sat_columns = np.array(sat_columns) 60 | scr, mcr = sat_columns.ravel(), model_columns_month_ish.ravel() 61 | bad = np.isnan(scr) 62 | 63 | r = round(pearsonr(mcr[~bad],scr[~bad])[0],2) 64 | 65 | plt.title('R = '+ str(r)) 66 | 67 | plt.savefig(fig_dir+'sat_to_model.png') 68 | 69 | fig,ax = plt.subplots(figsize = (6,6)) 70 | from palettable.colorbrewer.sequential import OrRd_4 71 | 72 | [plt.scatter(stn_epa_mix[i]['Sample Measurement'], pd.DataFrame(stnpixel_from_cmaq)[i], alpha = 0.99, color = OrRd_4.mpl_colors[i+1]) for i in range(len(epa_lat))] 73 | plt.xlim(0,27) 74 | plt.ylim(0,27) 75 | plt.xlabel('EPA Station (ppb)') 76 | plt.ylabel('CMAQ (ppb)') 77 | plt.plot([-100,100],[-100,100],c='black',alpha = 0.5) 78 | 79 | ab = np.array([np.array(stn_epa_mix[i]['Sample Measurement']) for i in range(len(stn_epa_mix))]).ravel() 80 | ba = np.array([np.array(pd.DataFrame(stnpixel_from_cmaq)[i]) for i in range(len(stn_epa_mix))]).ravel() 81 | 82 | bad = np.isnan(ab) 83 | 84 | r = round(pearsonr(ab[~bad],ba[~bad])[0],2) 85 | 86 | plt.title('Daily Average R = '+ str(r)) 87 | 88 | plt.savefig(fig_dir+'stn_to_model.png') 89 | 90 | # make MAPS 91 | #-------------------------------- 92 | 93 | #options 94 | crs_new = ccrs.PlateCarree() 95 | import cartopy.feature as cfeature 96 | from cartopy.feature import NaturalEarthFeature, LAND, COASTLINE 97 | 98 | vmin,vmax = 2, 20 99 | levels = np.arange(vmin, vmax, (vmax-vmin)/10) 100 | cmap = 'magma_r' 101 | xl,xu,yl,yu = lon.min()+1,lon.max()-1,lat.min()+1,lat.max()-1 102 | xl,xu,yl,yu = lon.ravel()[~bad].min(),lon.ravel()[~bad].max(),lat.ravel()[~bad].min(),lat.ravel()[~bad].max() 103 | 104 | # sat column 105 | fig, ax = plt.subplots(subplot_kw={'projection': crs_new},figsize=(6, 8)) 106 | cs = ax.pcolormesh(lon,lat, sat_columns,transform=crs_new,cmap = cmap,vmin=vmin,vmax=vmax) 107 | cbar=plt.colorbar(cs,boundaries=levels,shrink = 0.5) 108 | cbar.set_ticks(levels) 109 | cbar.set_ticks(levels) 110 | ax.set_extent([xl,xu,yl,yu]) 111 | ax.set_title('Regridded TropOMI NO2') 112 | states = cfeature.STATES.with_scale('10m') 113 | ax.add_feature(states) 114 | 115 | plt.savefig(fig_dir+'sat_no2.png') 116 | 117 | # model column 118 | fig, ax = plt.subplots(subplot_kw={'projection': crs_new},figsize=(6, 8)) 119 | cs = ax.pcolormesh(lon,lat, model_columns_month_ish[0],transform=crs_new,cmap = cmap,vmin=vmin,vmax=vmax) 120 | cbar=plt.colorbar(cs,boundaries=levels,shrink = 0.5) 121 | cbar.set_ticks(levels) 122 | ax.set_extent([xl,xu,yl,yu], crs= crs_new) 123 | ax.set_title('CMAQ Column NO2') 124 | states = cfeature.STATES.with_scale('10m') 125 | ax.add_feature(states) 126 | plt.savefig(fig_dir+'model_no2.png') 127 | 128 | 129 | #difference bw model and satellite 130 | from palettable.colorbrewer.diverging import RdGy_10 131 | 132 | difference = model_columns_month_ish[0] - sat_columns 133 | vmin,vmax = difference[~np.isnan(difference)].min(),difference[~np.isnan(difference)].max() 134 | vmin,vmax = -8,8 135 | cmap = RdGy_10.mpl_colormap 136 | levels = np.arange(vmin, vmax, (vmax-vmin)/10) 137 | 138 | fig, ax = plt.subplots(subplot_kw={'projection': crs_new},figsize=(6, 8)) 139 | cs = ax.pcolormesh(lon,lat, difference,transform=crs_new,cmap = cmap,vmin=vmin,vmax=vmax) 140 | cbar=plt.colorbar(cs,boundaries=levels,shrink = 0.5) 141 | cbar.set_ticks(levels) 142 | ax.set_extent([xl,xu,yl,yu]) 143 | states = cfeature.STATES.with_scale('10m') 144 | ax.add_feature(states) 145 | ax.set_title('Delta Column NO2') 146 | plt.savefig(fig_dir+'difference_no2.png') 147 | 148 | 149 | # timeseries_model 150 | 151 | model_columns = np.asarray([Dataset(column_dir + onlyfiles[i],'r')['NO2'] for i in range(len(onlyfiles))]) 152 | model_columns_month = np.array([model_columns[i].mean(axis=0) for i in range(len(model_columns))]) 153 | model_columns_month = model_columns_month*10**-15 154 | cmap = 'magma_r' 155 | levels = np.arange(vmin, vmax, (vmax-vmin)/10) 156 | xl,xu,yl,yu = lon.min()+1,lon.max()-1,lat.min()+1,lat.max()-1 157 | 158 | vmin,vmax = 2, 20 159 | 160 | for i in range(len(model_columns_month)): 161 | # model column 162 | fig, ax = plt.subplots(subplot_kw={'projection': crs_new},figsize=(10, 6)) 163 | cs = ax.pcolormesh(lon,lat, model_columns_month[i][0],transform=crs_new,cmap = cmap,vmin=vmin,vmax=vmax) 164 | cbar=plt.colorbar(cs,boundaries=levels,shrink = 0.5) 165 | cbar.set_ticks(levels) 166 | ax.set_extent([xl,xu,yl,yu], crs= crs_new) 167 | ax.set_title('CMAQ Column NO2') 168 | states = cfeature.STATES.with_scale('10m') 169 | ax.add_feature(states) 170 | plt.savefig(fig_dir+'timeseries_model_column'+str(i)+'.png') 171 | plt.close() 172 | 173 | vmin,vmax = 2, 20 174 | 175 | path='/home/asm0384/shapefiles/commareas/geo_export_77af1a6a-f8ec-47f4-977c-40956cd94f97.shp' 176 | chi_shapefile = gpd.GeoDataFrame.from_file(path) 177 | union=gpd.GeoSeries(unary_union(chi_shapefile.geometry)) 178 | outsideofunion=pd.DataFrame([list(union[0][2].exterior.xy)[0], list(union[0][2].exterior.xy)[1]]) 179 | 180 | for i in range(5): 181 | for j in range(len(model_columns[0])): 182 | #for i in range(1): 183 | # for j in range(1): 184 | data = model_columns[i][j][0]*10**-15 185 | fig, ax = plt.subplots(subplot_kw={'projection': crs_new},figsize=(10, 6)) 186 | #ax.set_boundary(mpath.Path(outsideofunion.T,closed=True), transform= crs_new, use_as_clip_path=True) 187 | cs = ax.pcolormesh(lon,lat, data,transform=crs_new,cmap = cmap,vmin=vmin,vmax=vmax) 188 | cbar=plt.colorbar(cs,boundaries=levels,shrink = 0.5) 189 | cbar.set_ticks(levels) 190 | ax.add_geometries(Reader(path).geometries(), crs=crs_new,facecolor='None', edgecolor='black') 191 | x=[chi_shapefile.bounds.minx.min(), chi_shapefile.bounds.maxx.max()] 192 | y=[chi_shapefile.bounds.miny.min(), chi_shapefile.bounds.maxy.max()] 193 | #ax.set_extent([x[0]-.03,x[1]+.03,y[0]-.03,y[1]+.03],crs= crs_new) 194 | ax.set_extent([xl,xu,yl,yu], crs= crs_new) 195 | ax.set_title('CMAQ Column') 196 | states = cfeature.STATES.with_scale('10m') 197 | ax.add_feature(states) 198 | plt.savefig(fig_dir+'timeseries_model_column_d'+str(i)+'_h'+str(j)+'.png') 199 | plt.close() 200 | 201 | 202 | 203 | -------------------------------------------------------------------------------- /PostProcessing/plot_cmaq_may2021.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # plot_cmaq_may2021.py 4 | 5 | # --------------------------------------------------------------------- 6 | # Stacy Montgomery, NOV 2018 - DEC 2018 7 | # This program takes the cropped l2 files and regrids the data to new domain. 8 | # --------------------------------------------------------------------- 9 | # USER INPUT 10 | # --------------------------------------------------------------------- 11 | from netCDF4 import Dataset 12 | import numpy as np 13 | import matplotlib.pyplot as plt 14 | import pandas as pd 15 | import os 16 | import netCDF4 17 | import math 18 | from scipy.interpolate import griddata 19 | import scipy.stats as st 20 | import cartopy.feature as cfeature 21 | from cartopy import crs as ccrs; 22 | from shapely.ops import unary_union, cascaded_union 23 | from geopandas.tools import sjoin 24 | from shapely.geometry import Point, shape 25 | from cartopy import crs as ccrs; 26 | # --------------------------------------------------------------------- 27 | 28 | # dir to grid file 29 | dir='/projects/b1045/jschnell/ForStacy/' 30 | ll='latlon_ChicagoLADCO_d03.nc' 31 | 32 | dir_epa='/projects/b1045/montgomery/CMAQcheck/' 33 | 34 | dir_cmaq_d03='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_BASE_FINAL_1.33km_sf_rrtmg_5_8_1_v3852/postprocess/' 35 | dir_cmaq_d03_wint='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_BASE_FINAL_wint_1.33km_sf_rrtmg_5_8_1_v3852/postprocess/' 36 | 37 | #names of lat lons in the cmaq grid 38 | la,lo='lat','lon' # for 1.3km 39 | la,lo='LAT','LON' # for 4km 40 | 41 | # CMAQ RUN things 42 | domain=['d03']*3 43 | time='hourly' 44 | year='2018' 45 | month='8' 46 | ssn = 'Summer' 47 | 48 | var = ['NO2','O3','PM25_TOT']*2 49 | var_tit=[r'NO$_2$',r'O$_3$',r'PM$_{2.5,TOT}$'] 50 | #epa_files =[dir_epa+'%s_%s_%s.csv'%(time,epa_code[i],year,) for i in range(len(epa_code))] 51 | epa_files =[dir_epa+'%s_%s_%s_%s_EPA_CMAQ_Combine.csv'%(var[i],domain[i],year,month,) for i in range(len(domain))] 52 | 53 | year='2019' 54 | month='1' 55 | ssn = 'Winter' 56 | 57 | ep2 = [dir_epa+'%s_%s_%s_%s_EPA_CMAQ_Combine.csv'%(var[i],domain[i],year,month,) for i in range(len(domain))] 58 | 59 | epa_files = epa_files +ep2 60 | 61 | startswith = 'COMBINE_ACONC' 62 | 63 | # --------------------------------------------------------------------- 64 | 65 | def pull_cmaq(dir_CMAQ,startswith,cmaq_var): 66 | #pull model files from given directoy 67 | onlyfiles = next(os.walk(dir_CMAQ))[2] 68 | onlyfiles.sort() # so that searching for dates are easier 69 | # pull only CONC files 70 | fnames_CMAQ = [x for x in onlyfiles if x.startswith(startswith)] 71 | # get data files 72 | ncfile_CMAQ_base = [Dataset(dir_CMAQ+ fnames_CMAQ[i],'r') for i in range(len(fnames_CMAQ))] 73 | units_cmaq = [ncfile_CMAQ_base[0][cmaq_var[i]].units for i in range(len(cmaq_var))] 74 | return ncfile_CMAQ_base, units_cmaq 75 | 76 | 77 | 78 | def get_min_max_cmaq(base,var,hrs, mask=False, ma = np.zeros(3)): 79 | basel=[base[i][var][hr] for i in range(len(base)) for hr in hrs] 80 | basel=np.array(basel) 81 | if mask==True: 82 | base_max = np.array([basel[i][0][~mask].max() for i in range(len(basel))]) 83 | base_min = np.array([basel[i][0][~mask].min() for i in range(len(basel))]) 84 | base_mean = np.array([basel[i][0][~mask].mean() for i in range(len(basel))]) 85 | else: 86 | base_max = np.array([basel[i][0].max() for i in range(len(basel))]) 87 | base_min = np.array([basel[i][0].min() for i in range(len(basel))]) 88 | base_mean = np.array([basel[i][0].mean() for i in range(len(basel))]) 89 | return base_max,base_min,base_mean 90 | 91 | 92 | def get_avg_cmaq(base,var,hrs, mask=False, ma = np.zeros(3)): 93 | basel=[base[i][var][hr] for i in range(len(base)) for hr in hrs] 94 | basel=np.array(basel) 95 | return np.mean(basel,axis=0)[0] 96 | 97 | def get_avg_epa(epa_file): 98 | #for t in range(1): 99 | ef = epa_file 100 | epa = pd.read_csv(ef) 101 | epa_drop = pd.DataFrame([epa.level_0.tolist(),epa['Sample Measurement'].tolist(),epa['CMAQ'].tolist(),epa['Latitude'].tolist(),epa['Longitude'].tolist()]).T 102 | epa_drop.columns = ['Datetime','Sample Measurement','CMAQ','Lat','Lon'] 103 | epa_drop['Sample Measurement'] = epa_drop['Sample Measurement'].astype(float) 104 | return epa_drop.groupby(['Lat','Lon']).mean().reset_index() 105 | 106 | 107 | def get_min_max_epa(epa_file): 108 | #for t in range(1): 109 | ef = epa_file 110 | epa = pd.read_csv(ef) 111 | epa_drop = pd.DataFrame([epa.level_0.tolist(),epa['Sample Measurement'].tolist(),epa['CMAQ'].tolist()]).T 112 | epa_drop.columns = ['Datetime','Sample Measurement','CMAQ'] 113 | epa_drop.Datetime = pd.to_datetime(epa_drop.Datetime) 114 | epa_drop = epa_drop.set_index('Datetime') 115 | # 116 | fmax_epa,fmin_epa,fmean_epa,fmax_cmaq,fmin_cmaq,fmean_cmaq = [],[],[],[],[],[] 117 | for i in range(744): 118 | f=epa_drop.loc[epa_drop.index[i]] 119 | fmax_epa.append(f.max().tolist()[0]) 120 | fmin_epa.append(f.min().tolist()[0]) 121 | fmean_epa.append(f.mean().tolist()[0]) 122 | fmax_cmaq.append(f.max().tolist()[1]) 123 | fmin_cmaq.append(f.min().tolist()[1]) 124 | fmean_cmaq.append(f.mean().tolist()[1]) 125 | # Plot by max/min/avg 126 | return epa_drop.index[0:744],fmax_epa,fmin_epa,fmean_epa,fmax_cmaq,fmin_cmaq,fmean_cmaq 127 | 128 | 129 | # --------------------------------------------------------------------- 130 | #START CODE 131 | # ################### ################### ################### ################## 132 | 133 | 134 | base,t_u = pull_cmaq(dir_cmaq_d03,startswith,var[0:3]) 135 | base_wint,t_u = pull_cmaq(dir_cmaq_d03_wint,startswith,var[0:3]) 136 | 137 | datas = [get_avg_cmaq(base,var[i],hrs) for i in range(len(var[0:3]))] + [get_avg_cmaq(base_wint,var[i],hrs) for i in range(len(var[0:3]))] 138 | 139 | 140 | # adjustable plotting parts 141 | llx=Dataset(dir+ll,'r') 142 | lat,lon=llx['lat'][:],llx['lon'][:] 143 | 144 | 145 | #START PLOT 146 | # ################### ################### ################### ################## 147 | 148 | c = ['Orchid','Blue','limegreen'] 149 | c2 = ['Purple','Navy','darkgreen'] 150 | 151 | hrs = np.arange(0,24) 152 | 153 | 154 | units = [r'ppb',r'ppb',r'ug/m$^3$']*3 155 | vmins,vmaxs = [0,25,5,0,25,5],[20,45,13,20,45,13] 156 | 157 | titles = [r'Summer NO$_2$ ',r'Winter NO$_2$', 158 | r'Summer O$_3$ ',r'Winter O$_3$', 159 | r'Summer PM$_2.5$ ',r'Winter PM$_2.5$'] 160 | 161 | 162 | cmaps = ['Purples','Blues','Greens']*2 163 | 164 | 165 | figtit = 'monthly_average_with_overlay.png' 166 | #--- fig 167 | 168 | def create_fig(lon,lat,base,datas,varS,vmins,vmaxs,cmaps,units, titles,figtit,show=False,save=False): 169 | # 170 | crs_new = ccrs.PlateCarree() 171 | fig, axs = plt.subplots(nrows=3,ncols=2,subplot_kw={'projection': crs_new},figsize=(8,7)) 172 | axs = axs.T.ravel() 173 | axs[0].set_ylabel(r'NO$_2$') 174 | axs[1].set_ylabel(r'O$_3$') 175 | axs[3].set_ylabel(r'PM$_2.5$') 176 | axs[0].set_title('Summer') 177 | axs[3].set_title('Winter') 178 | # 179 | for i in range(len(axs)): 180 | print(varS[i]) 181 | epa = get_avg_epa(epa_files[i]) 182 | #if i < 3: data = get_avg_cmaq(base,varS[i],hrs) 183 | #else: data = get_avg_cmaq(base_wint,varS[i],hrs) 184 | data = datas[i] 185 | vmin = vmins[i] 186 | vmax = vmaxs[i] 187 | title = titles[i] 188 | cmap = cmaps[i] 189 | if varS[i] == 'O3': epa['Sample Measurement'] = epa['Sample Measurement']*1000+10 190 | levels = list(np.arange(vmin,vmax,(vmax-vmin)/10))+[vmax] 191 | #plot 192 | cs=axs[i].pcolormesh(lon,lat, data,transform=crs_new,cmap = cmap,vmin=vmin,vmax=vmax)# 193 | cs2 = axs[i].scatter(epa.Lon,epa.Lat,c=epa['Sample Measurement'],cmap = cmap, vmin = vmin, vmax = vmax,s=40,edgecolors = 'black') 194 | # add limits 195 | # 196 | x=[lon.min(),lon.max()] 197 | y=[lat.min(),lat.max()] 198 | axs[i].set_extent([x[0]+.5,x[1]-.5,y[0]+.5,y[1]-.5],crs= crs_new) 199 | # 200 | #if i ==3 or i ==4 or i == 5: 201 | if i < 100: 202 | cbar=plt.colorbar(cs,boundaries= levels,fraction=0.028, pad=0.02,ax=axs[i]) 203 | # 204 | cbar.set_ticks(levels) 205 | cbar.set_label(units[i]) 206 | # add features 207 | states_provinces = cfeature.NaturalEarthFeature(category='cultural',name='admin_1_states_provinces_lines',edgecolor='black',facecolor='none',scale='10m',alpha = 0.3) 208 | borders = cfeature.NaturalEarthFeature(scale='50m',category='cultural',name='admin_0_countries',edgecolor='black',facecolor='none',alpha=0.6) 209 | land = cfeature.NaturalEarthFeature('physical', 'lakes', '10m', edgecolor='black', facecolor='none') 210 | axs[i].add_feature(land, edgecolor='black') 211 | axs[i].add_feature(borders, edgecolor='black') 212 | axs[i].add_feature(states_provinces, edgecolor='black') 213 | #axs[i].set_title(i) 214 | # add title 215 | #axs[i].set_title(title) 216 | plt.tight_layout() 217 | # 218 | # 219 | if save == True: plt.savefig(figtit) 220 | # 221 | if show==True: plt.show() 222 | 223 | 224 | create_fig(lon,lat,base,datas,var,vmins,vmaxs,cmaps,units, titles,figtit,show=True,save=False) 225 | 226 | 227 | 228 | -------------------------------------------------------------------------------- /correlation_wrf_cmaq_smoke.py: -------------------------------------------------------------------------------- 1 | 2 | #------------------------------------------ 3 | # Libraries 4 | #-------------- 5 | from matplotlib import pyplot as plt ; from matplotlib import colors 6 | import numpy as np; import numpy.ma as ma; from matplotlib.patches import Path, PathPatch 7 | import pandas as pd; from shapely.geometry import Point, shape, Polygon;import fiona 8 | from shapely.ops import unary_union, cascaded_union; from geopandas.tools import sjoin 9 | import geopandas as gpd; import geoplot; import glob; import os; from datetime import timedelta, date; 10 | from netCDF4 import Dataset 11 | import scipy.ndimage; from cartopy import crs as ccrs; from cartopy.io.shapereader import Reader 12 | import matplotlib.path as mpath; import seaborn as sns 13 | import timeit 14 | from cartopy import crs as ccrs 15 | import cartopy 16 | 17 | #------------------------------------------ 18 | 19 | 20 | 21 | dir_WRF='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_BASE_FINAL_1.33km_sf_rrtmg_5_8_1_v3852/' 22 | dir_CMAQ = '/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_BASE_FINAL_1.33km_sf_rrtmg_5_8_1_v3852/postprocess/' 23 | dir_GRID='/projects/b1045/jschnell/ForStacy/latlon_ChicagoLADCO_d03.nc' 24 | dir_EMIS = '/projects/b1045/wrf-cmaq/input/emis/Chicago_LADCO/ChicagoLADCO_d03/' 25 | emis_dir = '/projects/b1045/wrf-cmaq/input/emis/Chicago_LADCO/ChicagoLADCO_d03/' 26 | 27 | #variables of interest 28 | var=['O3','NO2','NO','CO','ISOP','SO2','FORM','PM25_TOT'] 29 | wrf_var=['T2','PSFC','RAINC','RAINNC','Q2','V10','U10'] 30 | smoke_var = ['NO2','NO','CO','ISOP','SO2', 'FORM'] 31 | 32 | # User defined functions 33 | #------------------------------------------ 34 | def common_data(list1, list2): 35 | result = False 36 | # traverse in the 1st list 37 | for x in list1: 38 | # traverse in the 2nd list 39 | for y in list2: 40 | # if one common 41 | if x == y: 42 | result = True 43 | return result 44 | return result 45 | 46 | #------------------------------------------ 47 | 48 | #load chicago shapefile 49 | path='/home/asm0384/shapefiles/commareas/geo_export_77af1a6a-f8ec-47f4-977c-40956cd94f97.shp' 50 | chi_shapefile = gpd.GeoDataFrame.from_file(path) 51 | 52 | #get names of files given directoy 53 | onlyfiles = next(os.walk(dir_CMAQ))[2] 54 | onlyfiles=sorted(onlyfiles) 55 | fnames_cmaq = [x for x in onlyfiles if x.startswith("COMBINE_ACONC")] 56 | fnames_wrf= ['wrfout_d01_'+str(fnames_cmaq[i]).split('_')[-1].split('.nc')[0][0:4]+'-'+str(fnames_cmaq[i]).split('_')[-1].split('.nc')[0][4:6]+'-'+str(fnames_cmaq[i]).split('_')[-1].split('.nc')[0][6:]+'_00:00:00' for i in range(len(fnames_cmaq))] 57 | 58 | fnames_cmaq = fnames_cmaq[:-1] 59 | fnames_wrf = fnames_wrf[:-1] 60 | 61 | #dates 62 | dates=[fnames_wrf[i].split('wrfout_d01_')[1].split('_')[0] for i in range(len(fnames_wrf))] 63 | dates = dates[:-1] 64 | dates2 = ['2018'+'08'+("{:02d}".format(i)) for i in range(1,32)] 65 | 66 | version = 'emissions_v0' 67 | # emissions dir 68 | #Get number of files in directory with L2 domain CSV files 69 | emis_files = next(os.walk(emis_dir))[2] 70 | emis_files = [x for x in emis_files if x.startswith("emis_mole_all")] 71 | emis_files =sorted(emis_files) # so that searching for dates are easier 72 | maskfiles = [common_data(emis_files[i].split('_'), dates2) for i in range(len(emis_files))] 73 | emis_files = np.array(emis_files)[maskfiles] 74 | 75 | 76 | #pull in model files and variables 77 | # for example: finding the difference between the 11th day and the 0th day of NO2: 78 | # cmaq_ncfile[10]['NO2'][0]-cmaq_ncfile[0]['NO2'][0] 79 | cmaq_ncfile= [Dataset(dir_CMAQ+ fnames_cmaq[i],'r') for i in range(len(fnames_cmaq))] 80 | wrf_ncfile=[Dataset(dir_WRF + fnames_wrf[i],'r') for i in range(len(fnames_wrf))] 81 | emis_ncfile=[Dataset(dir_EMIS + emis_files[i],'r') for i in range(len(emis_files))] 82 | 83 | units_cmaq = [cmaq_ncfile[0][var[i]].units for i in range(len(var))] 84 | units_wrf = [wrf_ncfile[0][wrf_var[i]].units for i in range(len(wrf_var))] 85 | units_smoke = [emis_ncfile[0][smoke_var[i]].units for i in range(len(smoke_var))] 86 | 87 | #get lat lon from grid file 88 | ll=Dataset(dir_GRID,'r') 89 | lat,lon=ll['lat'][:],ll['lon'][:] 90 | 91 | #wrflatlon 92 | wrflon, wrflat = wrf_ncfile[0]['XLONG'][0],wrf_ncfile[0]['XLAT'][0] 93 | 94 | 95 | # pull out variables 96 | #------------------------------------------ 97 | union=gpd.GeoSeries(unary_union(chi_shapefile.geometry)[2]) 98 | 99 | # routine to mask mask over chicago shapefile 100 | mask=np.ones(lon.shape,dtype=bool) 101 | mask[:] = False 102 | 103 | for i in range(len(lon)): 104 | for j in range(len(lon[0])): 105 | pt = Point(lon[i][j],lat[i][j]) 106 | mask[i][j] = pt.within(union[0]) 107 | 108 | # routine to mask mask over chicago shapefile 109 | mask_wrf=np.ones(wrflon.shape,dtype=bool) 110 | mask_wrf[:] = False 111 | 112 | for i in range(len(wrflon)): 113 | for j in range(len(wrflon[0])): 114 | pt = Point(wrflon[i][j], wrflat[i][j]) 115 | mask_wrf[i][j] = pt.within(union[0]) 116 | 117 | 118 | 119 | hours = pd.date_range(dates[0]+" 00:00", dates[-2]+" 23:00",freq="60min") 120 | 121 | 122 | # pull out variables 123 | #------------------------------------------ 124 | 125 | def pull_vars(ncfile,var,mask): 126 | var_crop = [] 127 | for i in range(len(var)): 128 | if ncfile == cmaq_ncfile: crop = [ncfile[j][var[i]][h][0][mask] for h in range(24) for j in range(len(ncfile))] 129 | elif ncfile == wrf_ncfile: crop = [ncfile[j][var[i]][h][mask_wrf] for h in range(24) for j in range(len(ncfile))] 130 | elif ncfile == emis_ncfile: crop = [ncfile[j][var[i]][h][0][mask] for h in range(24) for j in range(len(ncfile))] 131 | else: print('ERROR') 132 | # 133 | var_crop.append(crop) 134 | return var_crop 135 | 136 | mask_ravel = np.array(mask).ravel() 137 | lon_ravel = np.array(lon).ravel()[np.array(mask).ravel()] 138 | lat_ravel = np.array(lat).ravel()[np.array(mask).ravel()] 139 | 140 | var_crop=pull_vars(cmaq_ncfile,var,mask) 141 | var_crop_emis=pull_vars(emis_ncfile,smoke_var,mask) 142 | var_crop_wrf=pull_vars(wrf_ncfile,wrf_var,mask_wrf) 143 | 144 | #rainc,rainnc = np.asarray(var_crop_wrf[3]), np.asarray(var_crop_wrf[2]) 145 | rain_cumulative = np.asarray(var_crop_wrf[3]) + np.asarray(var_crop_wrf[2]) 146 | 147 | rain = [[] for i in range(len(rain_cumulative))] 148 | 149 | # remove the cumulative nature of rain variables 150 | for i in range(len(rain_cumulative)): 151 | if i == 0: rain[0] = np.zeros(rain_cumulative[0].shape).tolist() 152 | else: rain[i] = (rain_cumulative[i]-rain_cumulative[i-1]).tolist() 153 | 154 | #wrf_var=['T2','PSFC','RAINC','RAINNC','Q2','V10','U10'] 155 | var_crop_wrf = np.array([var_crop_wrf[0]]+ [var_crop_wrf[1]]+ [var_crop_wrf[4]]+ [var_crop_wrf[5]]+[var_crop_wrf[6]]+[rain]) 156 | wrf_var = ['T2','PSFC','Q2','V10','U10','RAIN'] 157 | 158 | var_crop_emis_tot = [np.array(var_crop_emis[i]).ravel() for i in range(len(var_crop_emis))] 159 | var_crop_tot = [np.array(var_crop[i]).ravel() for i in range(len(var_crop))] 160 | var_crop_wrf_tot = [var_crop_wrf[i].ravel() for i in range(len(var_crop_wrf))] 161 | 162 | var_crop_wrf_tot = var_crop_wrf_tot+ np.array([(np.array(var_crop_wrf_tot[-1])**2+np.array(var_crop_wrf_tot[-2])**2)**.5]).tolist() 163 | wrf_var = wrf_var + ['Wind_TOT'] 164 | 165 | var_to_wrf = var_crop_tot + var_crop_wrf_tot 166 | var_to_emis = var_crop_tot + var_crop_emis_tot 167 | 168 | #make corr matric 169 | corr_matrix_vw = np.zeros([len(var_to_wrf), len(var_to_wrf)]); corr_matrix_ve = np.zeros([len(var_to_emis), len(var_to_emis)]) 170 | 171 | from scipy.stats import pearsonr 172 | 173 | for i in range(len(var_to_wrf)): 174 | for j in range(len(var_to_wrf)): 175 | corr_matrix_vw[i][j]= pearsonr(var_to_wrf[i], var_to_wrf[j])[0] 176 | 177 | for i in range(len(var_to_emis)): 178 | for j in range(len(var_to_emis)): 179 | corr_matrix_ve[i][j]= pearsonr(var_to_emis[i], var_to_emis[j])[0] 180 | 181 | maskvw = np.zeros_like(corr_matrix_vw_df) 182 | maskvw[np.triu_indices_from(maskvw)] = True 183 | 184 | maskve = np.zeros_like(corr_matrix_ve_df) 185 | maskve[np.triu_indices_from(maskve)] = True 186 | 187 | # Start plotting cmaq v wrf 188 | titles_vw = [var[i]+'_CMAQ' for i in range(len(var))] + [wrf_var[i]+'_WRF' for i in range(len(wrf_var))] 189 | 190 | corr_matrix_vw_df = pd.DataFrame(corr_matrix_vw) 191 | corr_matrix_ve_df = pd.DataFrame(corr_matrix_ve) 192 | corr_matrix_vw_df.columns = titles_vw 193 | corr_matrix_vw_df.index = titles_vw 194 | corr_matrix_ve_df.columns = titles_ve 195 | corr_matrix_ve_df.index = titles_ve 196 | 197 | # Make heat maps of variables 198 | fig = plt.subplots(figsize = (8,7)) 199 | sns.heatmap(corr_matrix_vw_df,center = 0,annot = True,mask = maskvw, fmt='.2f') 200 | plt.tight_layout() 201 | plt.savefig('correlation_matrix_vw.svg') 202 | plt.show() 203 | 204 | # Start plotting cmaq v wrf 205 | titles_ve = [var[i]+'_CMAQ' for i in range(len(var))] + [smoke_var[i]+'_SMK' for i in range(len(smoke_var))] 206 | corr_matrix_ve_df = pd.DataFrame(corr_matrix_ve) 207 | corr_matrix_ve_df.columns = titles_ve 208 | corr_matrix_ve_df.index = titles_ve 209 | 210 | # Make heat maps of variables 211 | fig = plt.subplots(figsize = (8,7)) 212 | sns.heatmap(corr_matrix_ve_df,center = 0,annot = True,mask = maskve, fmt='.2f') 213 | plt.tight_layout() 214 | plt.savefig('correlation_matrix_ve.svg') 215 | plt.show() 216 | -------------------------------------------------------------------------------- /PostProcessing/timeseries_epa_stn_cmaq_may2021.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Time series 4 | # Stacy Montgomery 5 | # May 2021 6 | 7 | #---------------------------------------------------------# 8 | from datetime import timedelta, date,datetime; 9 | import pandas as pd 10 | import numpy as np 11 | from netCDF4 import Dataset 12 | from wrf import latlon_coords, getvar 13 | import glob, os 14 | import matplotlib.pyplot as plt 15 | import matplotlib.dates as mdates 16 | from shapely.geometry import Point, shape, Polygon 17 | import fiona 18 | from shapely.ops import unary_union, cascaded_union 19 | from geopandas.tools import sjoin 20 | import geopandas as gpd; import geoplot; 21 | import glob; 22 | import os; 23 | #---------------------------------------------------------# 24 | 25 | dir_epa='/projects/b1045/montgomery/CMAQcheck/' 26 | 27 | dir_cmaq_d03='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_BASE_FINAL_1.33km_sf_rrtmg_5_8_1_v3852/postprocess/' 28 | dir_cmaq_d02='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_BASE_FINAL_4km_sf_rrtmg_10_8_1_v3852/postprocess/' 29 | 30 | #names of lat lons in the cmaq grid 31 | la,lo='lat','lon' # for 1.3km 32 | la,lo='LAT','LON' # for 4km 33 | 34 | # CMAQ RUN things 35 | domain=['d03']*3+['d02']*3 36 | time='hourly' 37 | year='2018' 38 | month='8' 39 | ssn = 'Summer' 40 | 41 | var = ['NO2','O3','PM25_TOT']*2 42 | var_tit=[r'NO$_2$',r'O$_3$',r'PM$_{2.5,TOT}$'] 43 | #epa_files =[dir_epa+'%s_%s_%s.csv'%(time,epa_code[i],year,) for i in range(len(epa_code))] 44 | epa_files =[dir_epa+'%s_%s_%s_%s_EPA_CMAQ_Combine.csv'%(var[i],domain[i],year,month,) for i in range(len(domain))] 45 | 46 | #------ DATERANGE 47 | 48 | def pull_cmaq(dir_CMAQ,startswith,cmaq_var): 49 | #pull model files from given directoy 50 | onlyfiles = next(os.walk(dir_CMAQ))[2] 51 | onlyfiles.sort() # so that searching for dates are easier 52 | # pull only CONC files 53 | fnames_CMAQ = [x for x in onlyfiles if x.startswith(startswith)] 54 | # get data files 55 | ncfile_CMAQ_base = [Dataset(dir_CMAQ+ fnames_CMAQ[i],'r') for i in range(len(fnames_CMAQ))] 56 | units_cmaq = [ncfile_CMAQ_base[0][cmaq_var[i]].units for i in range(len(cmaq_var))] 57 | return ncfile_CMAQ_base, units_cmaq 58 | 59 | 60 | 61 | def mask_given_shapefile(lon,lat,shapefile): 62 | ''' 63 | Make a mask given a shapefile 64 | lon - array of grid lons 65 | lat - array of grid lats 66 | shapefile - geopandas geodataframe shapefile 67 | ''' 68 | union=gpd.GeoSeries(unary_union(shapefile.geometry)) 69 | mask=np.ones(lon.shape,dtype=bool) 70 | mask[:] = True 71 | for i in range(len(lon)): 72 | for j in range(len(lon[0])): 73 | pt = Point(lon[i][j],lat[i][j]) 74 | if pt.within(union[0]): 75 | mask[i][j] = False 76 | # 77 | return mask 78 | 79 | 80 | 81 | def get_min_max_cmaq(base,var,hrs, mask=False, ma = np.zeros(3)): 82 | basel=[base[i][var][hr] for i in range(len(base)) for hr in hrs] 83 | basel=np.array(basel) 84 | if mask==True: 85 | base_max = np.array([basel[i][0][~mask].max() for i in range(len(basel))]) 86 | base_min = np.array([basel[i][0][~mask].min() for i in range(len(basel))]) 87 | base_mean = np.array([basel[i][0][~mask].mean() for i in range(len(basel))]) 88 | else: 89 | base_max = np.array([basel[i][0].max() for i in range(len(basel))]) 90 | base_min = np.array([basel[i][0].min() for i in range(len(basel))]) 91 | base_mean = np.array([basel[i][0].mean() for i in range(len(basel))]) 92 | return base_max,base_min,base_mean 93 | 94 | 95 | def get_min_max_epa(epa_file): 96 | #for t in range(1): 97 | ef = epa_files[0] 98 | epa = pd.read_csv(ef) 99 | epa_drop = pd.DataFrame([epa.level_0.tolist(),epa['Sample Measurement'].tolist(),epa['CMAQ'].tolist()]).T 100 | epa_drop.columns = ['Datetime','Sample Measurement','CMAQ'] 101 | epa_drop.Datetime = pd.to_datetime(epa_drop.Datetime) 102 | epa_drop = epa_drop.set_index('Datetime') 103 | # 104 | fmax_epa,fmin_epa,fmean_epa,fmax_cmaq,fmin_cmaq,fmean_cmaq = [],[],[],[],[],[] 105 | for i in range(744): 106 | f=epa_drop.loc[epa_drop.index[i]] 107 | fmax_epa.append(f.max().tolist()[0]) 108 | fmin_epa.append(f.min().tolist()[0]) 109 | fmean_epa.append(f.mean().tolist()[0]) 110 | fmax_cmaq.append(f.max().tolist()[1]) 111 | fmin_cmaq.append(f.min().tolist()[1]) 112 | fmean_cmaq.append(f.mean().tolist()[1]) 113 | # Plot by max/min/avg 114 | return epa_drop.index[0:744],fmax_epa,fmin_epa,fmean_epa,fmax_cmaq,fmin_cmaq,fmean_cmaq 115 | 116 | 117 | 118 | #epa_drop.groupby('Datetime').mean() 119 | 120 | 121 | 122 | 123 | #START CODE 124 | # ################### ################### ################### ################## 125 | 126 | c = ['Orchid','Blue','limegreen'] 127 | c2 = ['Purple','Navy','darkgreen'] 128 | 129 | fig, axs = plt.subplots(nrows=3,ncols=1,figsize=(7.5,9)) 130 | axs=axs.ravel() 131 | 132 | for i in range(3): 133 | ax = axs[i] 134 | dt,fmax_epa,fmin_epa,fmean_epa,fmax_cmaq,fmin_cmaq,fmean_cmaq = get_min_max_epa(epa_files[i]) 135 | #station 136 | #if var[i]=='O3': fmax_epa,fmin_epa,fmean_epa = np.array(fmax_epa)*1000,np.array(fmin_epa)*1000,np.array(fmean_epa)*1000 137 | ax.plot(dt, fmean_epa, '--',color=c[i],label='Station Mean') 138 | ax.fill_between(dt,fmin_epa, fmax_epa,facecolor=c[i],alpha=0.1) 139 | #cmaq 140 | ax.plot(dt[0:744], fmean_cmaq[0:744],'--',color=c2[i],label='CMAQ Mean') 141 | ax.fill_between(dt[0:744],fmin_cmaq[0:744], fmax_cmaq[0:744],facecolor=c2[i],alpha=0.1) 142 | #extra info 143 | if var[i]== 'O3': ax.set_ylim([0,100]) 144 | else: ax.set_ylim([0,50]) 145 | ax.set_xlim(dt[0],dt[-1]) 146 | # set week major ticks 147 | fmt_wk = mdates.DayLocator(interval=7) 148 | ax.xaxis.set_major_locator(fmt_wk) 149 | # set dayminor ticks 150 | fmt_day = mdates.DayLocator() 151 | ax.xaxis.set_minor_locator(fmt_day) 152 | # format title 153 | ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d')) 154 | if var[i] == 'PM25_TOT': ax.set_ylabel(var[i]+r' (ug/m$^3$)') 155 | else: ax.set_ylabel(var[i]+' (ppb)') 156 | ax.legend() 157 | #ax.set_title(var_tit[i]) 158 | if i ==0: ax.set_title(ssn) 159 | 160 | plt.tight_layout() 161 | 162 | plt.savefig('timseries_epa_cmaq_%s-%s.png'%(year,month)) 163 | 164 | plt.show() 165 | 166 | 167 | # CMAQ 168 | # ################### ################### ################## 169 | 170 | 171 | #pull model files from given directoy 172 | onlyfiles = next(os.walk(dir_cmaq_d03))[2] 173 | onlyfiles.sort() # so that searching for dates are easier 174 | startswith = 'COMBINE_ACONC' 175 | 176 | # pull only CONC files 177 | fnames_CMAQ = [x for x in onlyfiles if x.startswith(startswith)] 178 | fnames_CMAQ = fnames_CMAQ[:-1] 179 | 180 | #get lat lon from grid file 181 | dir='/projects/b1045/jschnell/ForStacy/' 182 | ll='latlon_ChicagoLADCO_d03.nc' 183 | llx=Dataset(dir+ll,'r') 184 | lat,lon=llx['lat'][:],llx['lon'][:] 185 | 186 | # shapes and directories == https://www.census.gov/cgi-bin/geo/shapefiles/index.php?year=2019&layergroup=State+Legislative+Districts 187 | path='/projects/b1045/montgomery/shapefiles/Chicago/commareas/geo_export_77af1a6a-f8ec-47f4-977c-40956cd94f97.shp' 188 | path2 ='/projects/b1045/montgomery/shapefiles/Chicago/cook/Cook_County_Border.shp' 189 | chi_shapefile = gpd.GeoDataFrame.from_file(path2) 190 | mask = mask_given_shapefile(lon,lat,chi_shapefile) 191 | 192 | #pull temp 193 | #base,t_u = pull_cmaq(dir_cmaq_d03,startswith,var[0:3]) 194 | #base_max,base_min,base_mean = get_min_max_cmaq(base,var[0],hrs) 195 | #base_max_chi,base_min_chi,base_mean_chi = get_min_max_cmaq(base,var[0],hrs,mask=True,ma = mask) 196 | 197 | 198 | base,t_u = pull_cmaq(dir_cmaq_d03,startswith,var[0:3]) 199 | hrs = np.arange(0,24) 200 | dt,fmax_epa,fmin_epa,fmean_epa,fmax_cmaq,fmin_cmaq,fmean_cmaq = get_min_max_epa(epa_files[0]) 201 | 202 | c = ['Orchid','Blue','limegreen'] 203 | c2 = ['Purple','Navy','darkgreen'] 204 | 205 | fig, axs = plt.subplots(nrows=3,ncols=1,figsize=(7.5,9)) 206 | axs=axs.ravel() 207 | 208 | for i in range(3): 209 | ax = axs[i] 210 | base_max,base_min,base_mean = get_min_max_cmaq(base,var[i],hrs) 211 | base_max_chi,base_min_chi,base_mean_chi = get_min_max_cmaq(base,var[i],hrs,mask=True,ma = mask) 212 | #dt,fmax_epa,fmin_epa,fmean_epa,fmax_cmaq,fmin_cmaq,fmean_cmaq = get_min_max_epa(epa_files[i]) 213 | # 214 | ax.plot(dt, base_mean[0:744], '--',color=c[i],label='Domain Mean') 215 | ax.fill_between(dt,base_min[0:744], base_max[0:744],facecolor=c[i],alpha=0.1) 216 | #cmaq 217 | ax.plot(dt[0:744], base_mean_chi[0:744],'--',color=c2[i],label='Chicago Mean') 218 | ax.fill_between(dt[0:744],base_min_chi[0:744], base_max_chi[0:744],facecolor=c2[i],alpha=0.1) 219 | #extra info 220 | if var[i]== 'O3': ax.set_ylim([0,100]) 221 | else: ax.set_ylim([0,50]) 222 | ax.set_xlim(dt[0],dt[-1]) 223 | # set week major ticks 224 | fmt_wk = mdates.DayLocator(interval=7) 225 | ax.xaxis.set_major_locator(fmt_wk) 226 | # set dayminor ticks 227 | fmt_day = mdates.DayLocator() 228 | ax.xaxis.set_minor_locator(fmt_day) 229 | # format title 230 | ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d')) 231 | if var[i] == 'PM25_TOT': ax.set_ylabel(var[i]+r' (ug/m$^3$)') 232 | else: ax.set_ylabel(var[i]+' (ppb)') 233 | ax.legend() 234 | #ax.set_title(var_tit[i]) 235 | if i ==0: ax.set_title(ssn) 236 | 237 | plt.tight_layout() 238 | 239 | plt.savefig('timseries_ONLY_cmaq_%s-%s.png'%(year,month)) 240 | 241 | plt.show() 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | #locator = mdates.AutoDateLocator(minticks=3, maxticks=7) 257 | #formatter = mdates.ConciseDateFormatter(locator) 258 | #ax.xaxis.set_major_locator(locator) 259 | #ax.xaxis.set_major_formatter(formatter) 260 | 261 | 262 | 263 | 264 | #PLOT BY STATIONS 265 | #epa['latlon'] = [(epa.Longitude.tolist()[i],epa.Latitude.tolist()[i]) for i in range(len(epa.Latitude))] 266 | #lalo = epa.Latitude.unique().tolist(),epa.Longitude.unique().tolist() 267 | #epa_drop = epa.dropna(axis=0,subset=['Latitude']) 268 | #epa_drop_lalo = epa_drop.Latitude.unique() 269 | #fig,ax = plt.subplots() 270 | # 271 | #for i in epa_drop.Latitude.unique(): 272 | # tmp = epa_drop[epa_drop['Latitude']==i] 273 | # ax.scatter(tmp['level_0'],tmp['Sample Measurement'],label = i) 274 | 275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | 283 | -------------------------------------------------------------------------------- /DataPreprocessing/wrf_to_stations_step2.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # ----------------- 4 | # Step 2 5 | # ----------------- 6 | # from step 1, get indices of the LCD latlon, which is used as an input to this code. 7 | # This code pulls out WRF data into csv files in the order of the LCD station data, which is then used as input to code 3. 8 | 9 | #ERROR-- rain seems to be weird. check write out. plot rain variables 10 | 11 | # --------------------------------------------------------------------------------------------------------- 12 | # ~~~~~~ START USER INPUT ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 13 | 14 | # variables of interest 15 | minTemp = 242; maxTemp = 294; 16 | month='08' 17 | year='2018' 18 | # Location of WRF output 19 | runname='output_BASE_FINAL_1.33km_sf_rrtmg_5_8_1_v3852' 20 | dirout='/home/asm0384/WRFcheck/'+runname+'/' 21 | 22 | # Processed US data, from previous file 23 | #File out names 24 | comp_dataset_name = dirout+'wrfcheck_withstations_'+runname+'_'+month+year+'.csv' # name and directory to write out to 25 | comp_dataset_extra = dirout+'completeddata_mini_extras2.csv' 26 | station_out_name = dirout+'station_out_removedmissing.csv' #name of intermediate file 27 | comp_dataset_name2= dirout+'wrfcheck_withstations_complete_rain.csv' 28 | 29 | 30 | 31 | #location of wrf and filenames 32 | #dirToWRF='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/'+runname+'/' 33 | # Name of run 34 | runname='output_BASE_FINAL_1.33km_sf_rrtmg_5_8_1_v3852' 35 | #BASE_PXLSM_v0 36 | # Location of WRF output 37 | 38 | dirToWRF_d03='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_BASE_FINAL_1.33km_sf_rrtmg_5_8_1_v3852/' 39 | dirToWRF_d02='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_BASE_FINAL_4km_sf_rrtmg_10_8_1_v3852/' 40 | listOfStationsFile = "~/lcd-stations.csv" 41 | dirout='/home/asm0384/WRFcheck/'+runname+'/' 42 | grid='latlon.nc' 43 | 44 | 45 | #dirout='/home/asm0384/WRFcheck/'+runname+'/' 46 | Chatty= True # false if you want to remove print statements 47 | slpon= True #True #need to configure to make SLP 48 | 49 | #start the code 50 | if Chatty: print('Starting ....') 51 | 52 | # -------------------------------------------------------------------------------------------------------- 53 | def getWRFfromIND(ncfile,indxy, filenames,varname): 54 | t2d01=[ncfile[z][varname][i] for z in range(len(ncfile)) for i in range(24)] 55 | t2d01_xx= [[t2d01[t][indxy[l]] for t in range(24*len(ncfile))] for l in range(len(indxy))] 56 | return t2d01_xx 57 | 58 | def getslpfromIND(ncfile,indxy, filenames,varname): 59 | t2d01=[ncfile[z][varname][i] for i in range(24) for z in range(len(ncfile))] 60 | t2d01_xx= [[t2d01[t][indxy[l]] for t in range(24*len(ncfile))] for l in range(len(indxy))] 61 | return t2d01_xx 62 | 63 | def getRHfromIND(ncfile,indxy, filenames): 64 | pq0 = 379.90516; a2 = 17.2693882; a3 = 273.16; a4 = 35.86 65 | q2=[ncfile[z]['Q2'][i]/((pq0 / ncfile[z]['PSFC'][i]) **(a2 * (ncfile[z]['T2'][i] - a3) / (ncfile[z]['T2'][i] - a4))) for z in range(len(ncfile)) for i in range(24)] 66 | t2d01_xx= [[q2[t][indxy[l]] for t in range(24*len(ncfile))] for l in range(len(indxy))] 67 | return t2d01_xx 68 | 69 | # remove missing files 70 | def rm_missing(filenames_d01): 71 | testrm=[] 72 | for i in filenames_d01: 73 | try: 74 | test=Dataset(dirToWRF+i) 75 | except FileNotFoundError: 76 | print(i) 77 | testrm.append(i) 78 | # 79 | for i in testrm: 80 | filenames_d01.remove(i) 81 | #return 82 | return filenames_d01 83 | 84 | #t2d01=[getvar(ncfiled01[z],"slp",timeidx=i).data for i in range(24) for z in range(len(filenames_d01))] 85 | 86 | # -------------------------------------------------------------------------------------------------------- 87 | # ~~~~~~ IMPORT PACKAGES ~~~~~~~~~~~~ 88 | #Station 89 | import glob, os 90 | import pandas as pd, numpy as np, matplotlib.pyplot as plt, cartopy.crs as crs, cartopy.feature as cpf 91 | from netCDF4 import Dataset 92 | from matplotlib.cm import get_cmap 93 | from cartopy.feature import NaturalEarthFeature 94 | from wrf import (to_np, getvar, smooth2d, get_cartopy, cartopy_xlim, cartopy_ylim, latlon_coords) 95 | import time 96 | from timezonefinder import TimezoneFinder 97 | from pytz import timezone 98 | import pytz 99 | from datetime import datetime,date, timedelta 100 | import dateutil.parser as dparser 101 | 102 | tf = TimezoneFinder(in_memory=True) 103 | 104 | 105 | #------------------------------------------------------------------------------ 106 | # ~~~~~~ START MAIN ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 107 | #------------------------------ load in wrf file names ------------------------ 108 | # $1 Get WRF file names 109 | #filenames_d01=[] 110 | #os.chdir(dirToWRF) 111 | #for file in glob.glob("wrfout_d01_*"): 112 | # filenames_d01.append(file) 113 | # 114 | #filenames_d01.sort() #files are now sorted by date and time 115 | 116 | # $1 Get WRF file names 117 | filenames_d02=[] 118 | os.chdir(dirToWRF_d02) 119 | for file in glob.glob("wrfout_d01_*"): 120 | filenames_d02.append(file) 121 | 122 | filenames_d02.sort() #files are now sorted by date and time 123 | filenames_d02=filenames_d02[:-1] 124 | 125 | # $1 Get WRF file names 126 | filenames_d03=[] 127 | os.chdir(dirToWRF_d03) 128 | for file in glob.glob("wrfout_d01_*"): 129 | filenames_d03.append(file) 130 | 131 | filenames_d03.sort() #files are now sorted by date and time 132 | filenames_d03=filenames_d03[:-1] 133 | 134 | # remove missing files 135 | #filenames_d01=rm_missing(filenames_d01) 136 | #filenames_d02=rm_missing(filenames_d02) 137 | #filenames_d03=rm_missing(filenames_d03) 138 | 139 | #dates_d01=[filenames_d01[z].split("wrfout_d01_")[1].split("_00:00:00")[0] for z in range(len(filenames_d01))] 140 | dates_d02=[filenames_d02[z].split("wrfout_d01_")[1].split("_00:00:00")[0] for z in range(len(filenames_d02))] 141 | dates_d03=[filenames_d03[z].split("wrfout_d01_")[1].split("_00:00:00")[0] for z in range(len(filenames_d03))] 142 | 143 | #if dates_d01== dates_d02 and d 144 | ates=dates_d01 145 | #else: 146 | # print('dates are not consistent between domains! Defaulting to d01 dates, may cause errors!') 147 | dates=dates_d02 148 | 149 | #ncfiled01 = [Dataset(filenames_d01[i]) for i in range(len(filenames_d01))] 150 | ncfiled02 = [Dataset(dirToWRF_d02+filenames_d02[i]) for i in range(len(filenames_d02))] 151 | ncfiled03 = [Dataset(dirToWRF_d03+filenames_d03[i]) for i in range(len(filenames_d03))] 152 | 153 | 154 | #get indices for dataset, compress the indices for each domain 155 | STATION= pd.read_csv(comp_dataset_name) 156 | in_d02= STATION['in_d02'].tolist() 157 | in_d03 = STATION['in_d03'].tolist() 158 | yy_d02=np.compress(in_d02,STATION['yy_d02']).tolist();xx_d02= np.compress(in_d02, STATION['xx_d02']).tolist() 159 | yy_d03= np.compress(in_d03, STATION['yy_d03']).tolist();xx_d03= np.compress(in_d03, STATION['xx_d03']).tolist() 160 | 161 | indxyd02clip =[(xx_d02[t],yy_d02[t]) for t in range(len(yy_d02))] 162 | indxyd03clip =[(xx_d03[t],yy_d03[t]) for t in range(len(yy_d03))] 163 | print(indxyd02clip) 164 | #pull variables 165 | start=time.time() 166 | #t2d01 = getWRFfroimIND(ncfiled01,indxyd01, filenames_d01,'T2') 167 | t2d02 = getWRFfromIND(ncfiled02, indxyd02clip, filenames_d02,'T2') 168 | t2d03 = getWRFfromIND(ncfiled03, indxyd03clip, filenames_d03,'T2') 169 | 170 | #raind01 = getWRFfromIND(ncfiled01,indxyd01, filenames_d01,'RAINC') 171 | raind02 = getWRFfromIND(ncfiled02, indxyd02clip, filenames_d02,'RAINC') 172 | raind03 = getWRFfromIND(ncfiled03, indxyd03clip, filenames_d03,'RAINC') 173 | 174 | #rainncd01 = getWRFfromIND(ncfiled01,indxyd01, filenames_d01,'RAINNC') 175 | rainncd02 = getWRFfromIND(ncfiled02, indxyd02clip, filenames_d02,'RAINNC') 176 | rainncd03 = getWRFfromIND(ncfiled03, indxyd03clip, filenames_d03,'RAINNC') 177 | 178 | #rhd01 = getRHfromIND(ncfiled01,indxyd01, filenames_d01) 179 | rhd02 = getRHfromIND(ncfiled02, indxyd02clip, filenames_d02) 180 | rhd03 = getRHfromIND(ncfiled03, indxyd03clip, filenames_d03) 181 | 182 | # 10 might be wrong 183 | 184 | # 10 might be wrong 185 | #u10d01,v10d01 = getWRFfromIND(ncfiled01,indxyd01, filenames_d01,'U10'),getWRFfromIND(ncfiled01,indxyd01, filenames_d01,'V10') 186 | u10d02,v10d02 =getWRFfromIND(ncfiled02,indxyd02clip,filenames_d02,'U10'),getWRFfromIND(ncfiled02,indxyd02clip,filenames_d02, 'V10') 187 | u10d03,v10d03 = getWRFfromIND(ncfiled03,indxyd03clip, filenames_d03, 'U10'),getWRFfromIND(ncfiled03,indxyd03clip, filenames_d03, 'V10') 188 | 189 | 190 | if slpon==True: 191 | # slpd01 = getslpfromIND(ncfiled01,indxyd01, filenames_d01,'PSFC') 192 | slpd02 = getslpfromIND(ncfiled02, indxyd02clip, filenames_d02,'PSFC') 193 | slpd03 = getslpfromIND(ncfiled03, indxyd03clip, filenames_d03,'PSFC') 194 | 195 | end=str(time.time()-start) 196 | print('Time to pull variables from netCDF files: '+ end + 's') 197 | 198 | 199 | #q=[t2d01, t2d02, t2d03, raind01, raind02, raind03, rainncd01, rainncd02, rainncd03,rhd01,rhd02,rhd03, u10d01,v10d01,u10d02,v10d02,u10d03,v10d03] 200 | #q1=['t2d01', 't2d02', 't2d03', 'raind01', 'raind02', 'raind03', 'rainncd01', 'rainncd02', 'rainncd03'] 201 | #del t2d01, t2d02, t2d03, raind01, raind02, raind03, rainncd01, rainncd02, rainncd03 202 | q=[t2d02, t2d03,raind02, raind03, rainncd02, rainncd03, rhd02,rhd03,u10d02,u10d03,v10d02,v10d02] 203 | #name=['t2d01.csv', 't2d02.csv', 't2d03.csv', 'raind01.csv', 'raind02.csv', 'raind03.csv', 'rainncd01.csv', 'rainncd02.csv', 'rainncd03.csv',] 204 | #name=['t2d01.csv', 't2d02.csv', 't2d03.csv', 'raind01.csv', 'raind02.csv', 'raind03.csv', 'rainncd01.csv', 'rainncd02.csv', 'rainncd03.csv','rhd01.csv','rhd02.csv','rhd03.csv', 'u10d01.csv','v10d01.csv','u10d02.csv','v10d02.csv','u10d03.csv','v10d03.csv'] 205 | name=['t2d02.csv', 't2d03.csv', 'raind02.csv', 'raind03.csv', 'rainncd02.csv', 'rainncd03.csv','rhd02.csv','rhd03.csv', 'u10d02.csv','u10d03.csv', 'v10d02.csv','v10d03.csv'] 206 | 207 | for i in range(len(q)): 208 | df= pd.DataFrame(q[i]) 209 | df.to_csv(dirout+name[i]) 210 | 211 | 212 | if slpon==True: 213 | #q1=[slpd01, slpd02, slpd03] 214 | #name1=['slpd01.csv', 'slpd02.csv', 'slpd03.csv'] 215 | q1=[slpd02, slpd03] 216 | name1=[ 'slpd02.csv', 'slpd03.csv'] 217 | for i in range(len(q1)): 218 | df= pd.DataFrame(q1[i]) 219 | df.to_csv(dirout+name1[i]) 220 | 221 | 222 | print("Done with step 2") 223 | 224 | 225 | -------------------------------------------------------------------------------- /PostProcessing/o3_profile.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # o3_column_june2021.py 4 | 5 | 6 | # --------------------------------------------------------------------- 7 | # Stacy Montgomery, NOV 2018 - DEC 2018 8 | # Plot o3 column over Chicago to watch how it transitions 9 | # --------------------------------------------------------------------- 10 | # USER INPUT 11 | # --------------------------------------------------------------------- 12 | from netCDF4 import Dataset 13 | import numpy as np 14 | import matplotlib.pyplot as plt 15 | import pandas as pd 16 | import os 17 | import netCDF4 18 | import math 19 | from scipy.interpolate import griddata 20 | import scipy.stats as st 21 | import cartopy.feature as cfeature 22 | from cartopy import crs as ccrs; 23 | from shapely.ops import unary_union, cascaded_union 24 | from geopandas.tools import sjoin 25 | from shapely.geometry import Point, shape 26 | from cartopy import crs as ccrs; 27 | import geopandas as gpd 28 | import moviepy.editor as mpy 29 | import os 30 | import glob 31 | import pandas as pd; from shapely.geometry import Point, shape, Polygon;import fiona 32 | from shapely.ops import unary_union, cascaded_union; from geopandas.tools import sjoin 33 | import matplotlib.path as mpath; 34 | from cartopy.io.shapereader import Reader 35 | 36 | import matplotlib.colors as colors 37 | 38 | # --------------------------------------------------------------------- 39 | 40 | # dir to grid file 41 | dir='/projects/b1045/jschnell/ForStacy/' 42 | ll='latlon_ChicagoLADCO_d03.nc' 43 | 44 | dir='/home/asm0384/' 45 | ll = 'lat_lon_chicago_d02.nc' 46 | 47 | dir_cmaq_d03='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_BASE_FINAL_1.33km_sf_rrtmg_5_8_1_v3852/' 48 | dir_cmaq_d03_wint='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_BASE_FINAL_wint_1.33km_sf_rrtmg_5_8_1_v3852/' 49 | 50 | dir_cmaq_d02='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_BASE_FINAL_4km_sf_rrtmg_10_8_1_v3852/' 51 | dir_cmaq_d02_wint='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_BASE_FINAL_wint_4km_sf_rrtmg_10_8_1_v3852/' 52 | 53 | 54 | #names of lat lons in the cmaq grid 55 | la,lo='lat','lon' # for 1.3km 56 | 57 | # 58 | year='2018' 59 | month='8' 60 | ssn = 'Summer' 61 | 62 | startswith = 'CCTM_CONC' 63 | 64 | # shapes and directories == https://www.census.gov/cgi-bin/geo/shapefiles/index.php?year=2019&layergroup=State+Legislative+Districts 65 | path='/projects/b1045/montgomery/shapefiles/Chicago/commareas/geo_export_77af1a6a-f8ec-47f4-977c-40956cd94f97.shp' 66 | chi_shapefile = gpd.GeoDataFrame.from_file(path) 67 | crs_new = ccrs.PlateCarree()# get shape outside 68 | union=gpd.GeoSeries(unary_union(chi_shapefile.geometry)) 69 | outsideofunion=pd.DataFrame([list(union[0][2].exterior.xy)[0], list(union[0][2].exterior.xy)[1]]) 70 | 71 | # --------------------------------------------------------------------- 72 | 73 | def pull_cmaq(dir_CMAQ,startswith,cmaq_var): 74 | #pull model files from given directoy 75 | onlyfiles = next(os.walk(dir_CMAQ))[2] 76 | onlyfiles.sort() # so that searching for dates are easier 77 | # pull only CONC files 78 | fnames_CMAQ = [x for x in onlyfiles if x.startswith(startswith)] 79 | print(fnames_CMAQ) 80 | # get data files 81 | ncfile_CMAQ_base = [Dataset(dir_CMAQ+ fnames_CMAQ[i],'r') for i in range(len(fnames_CMAQ))] 82 | return ncfile_CMAQ_base 83 | 84 | 85 | 86 | def find_index(stn_lon, stn_lat, wrf_lon, wrf_lat): 87 | # stn -- list (points) 88 | # wrf -- list (grid) 89 | xx=[];yy=[] 90 | for i in range(len(stn_lat)): 91 | abslat = np.abs(wrf_lat-stn_lat[i]) 92 | abslon= np.abs(wrf_lon-stn_lon[i]) 93 | c = np.maximum(abslon,abslat) 94 | latlon_idx = np.argmin(c) 95 | x, y = np.where(c == np.min(c)) 96 | #add indices of nearest wrf point station 97 | xx.append(x) 98 | yy.append(y) 99 | #return indices list 100 | return xx, yy 101 | 102 | 103 | def mask_given_shapefile(lon,lat,shapefile): 104 | ''' 105 | Make a mask given a shapefile 106 | lon - array of grid lons 107 | lat - array of grid lats 108 | shapefile - geopandas geodataframe shapefile 109 | ''' 110 | union=gpd.GeoSeries(unary_union(shapefile.geometry)) 111 | mask=np.ones(lon.shape,dtype=bool) 112 | mask[:] = False 113 | for i in range(len(lon)): 114 | for j in range(len(lon[0])): 115 | pt = Point(lon[i][j],lat[i][j]) 116 | mask[i][j] = pt.within(union[0]) 117 | # 118 | return mask 119 | 120 | # --------------------------------------------------------------------- 121 | 122 | 123 | 124 | # I think day 10 is best 125 | # what day is best day for pbl fomation 126 | #fig,ax = plt.subplots(10,3,figsize=(11,11)) 127 | #count = 0 128 | 129 | #for day in range(21,30): 130 | # sli= [np.array([np.array(base[day]['O3'][t][l][xl]).T[yl:yu+3] for l in range(35)]) for t in range(24)] 131 | # utc = 6 132 | # sli_morn = np.mean(sli[7+utc:10+utc],axis=0) 133 | # sli_mid = np.mean(sli[11+utc:14+utc],axis=0) 134 | # sli_after = np.mean(sli[15+utc:18+utc],axis=0) 135 | # #fig,ax = plt.subplots(1,3) 136 | # ax[count][0].pcolormesh(lon[xl].T[yl:yu+3],np.arange(0,len(sli[0])),sli_morn,norm=colors.LogNorm(vmin = 0.01, vmax = 0.1)) 137 | # ax[count][1].pcolormesh(lon[xl].T[yl:yu+3],np.arange(0,len(sli[0])),sli_mid,norm=colors.LogNorm(vmin = 0.01, vmax = 0.1)) 138 | # ax[count][2].pcolormesh(lon[xl].T[yl:yu+3],np.arange(0,len(sli[0])),sli_after,norm=colors.LogNorm(vmin = 0.01, vmax = 0.1)) 139 | # # count = count+1 140 | 141 | #plt.tight_layout() 142 | #plt.savefig('pbl_o3_21-30.png') 143 | 144 | # Make average Chicago slice~ 145 | # chicago box: upper lat lon 146 | #lolo = -87.939930; lola= 41.644543 147 | #ulo = -87.524137; ula = 42.023039 148 | #xu,yu = find_index([ulo],[ula],lon,lat) 149 | #xl,yl = find_index([lolo],[lola],lon,lat) 150 | #xu,yu,xl,yl = xu[0]+4,yu[0]+4,xl[0]-4,yl[0]-4 151 | # 152 | # adjustable plotting parts 153 | llx=Dataset(dir+ll,'r') 154 | #lat,lon=llx['lat'][:],llx['lon'][:] 155 | lat,lon=llx['LAT'][0][0],llx['LON'][0][0] 156 | 157 | 158 | #mask = mask_given_shapefile(lon,lat,chi_shapefile) 159 | base = pull_cmaq(dir_cmaq_d02,startswith,"O3") 160 | 161 | # Pull single row from data 162 | la = 41.8 163 | lolo = -87.939930; ulo = -87.524137; 164 | xu,yu = find_index([ulo],[la],lon,lat) 165 | xl,yl = find_index([lolo],[la],lon,lat) 166 | 167 | xu,yu,xl,yl = xu[0][0],yu[0][0],xl[0][0]+2,yl[0][0] 168 | 169 | # # check where we're plotting 170 | crs_new = ccrs.PlateCarree() 171 | fig, axs = plt.subplots(subplot_kw={'projection': crs_new},figsize=(8, 6)) 172 | #axs.scatter(lon[xl:xu].T[yl:yu],lat[xl:xu].T[yl:yu]) 173 | #axs.scatter(lon[xl].T[yl:yu+3],lat[xl].T[yl:yu+3]) 174 | #axs.set_boundary(mpath.Path(outsideofunion.T,closed=True), transform= crs_new, use_as_clip_path=True) 175 | chi_shapefile.plot(ax=axs,facecolor="None") 176 | axs.add_geometries(Reader(path).geometries(), crs=crs_new,facecolor='None', edgecolor='black') 177 | axs.plot(lon[xl].T[yl:yu+3],lat[xl].T[yl:yu+3]) 178 | plt.show() 179 | 180 | #morning = [7,8,9,10] 181 | #midday = [12,13,14] 182 | #after = [16,17,18] 183 | 184 | 185 | # make slices of 2 rep days 186 | # ozone profiles 187 | utc = 6 188 | day = 9 189 | 190 | 191 | base = pull_cmaq(dir_cmaq_d02,startswith,"O3") 192 | sli= [np.array([np.array(base[day]['NO2'][t][l][xl]).T[yl:yu+3] for l in range(35)]) for t in range(24)] 193 | sli_morn = np.mean(sli[7+utc:10+utc],axis=0)*1000 194 | sli_mid = np.mean(sli[11+utc:14+utc],axis=0)*1000 195 | sli_after = np.mean(sli[16+utc:17+utc],axis=0)*1000 196 | 197 | del base 198 | del sli 199 | 200 | wbase = pull_cmaq(dir_cmaq_d02_wint,startswith,"O3") 201 | wbase = wbase[10:] 202 | wint_sli= [np.array([np.array(wbase[day]["NO2"][t][l][xl]).T[yl:yu+3] for l in range(35)]) for t in range(24)] 203 | wsli_morn = np.mean(wint_sli[7+utc:10+utc],axis=0)*1000 204 | wsli_mid = np.mean(wint_sli[11+utc:14+utc],axis=0)*1000 205 | wsli_after = np.mean(wint_sli[16+utc:17+utc],axis=0)*1000 206 | 207 | del wbase 208 | del wint_sli 209 | 210 | 211 | vmin = 0 212 | vmax = 30 213 | 214 | 215 | cmap = 'Purples' 216 | fig,ax = plt.subplots(2,3,figsize = (10,7)) 217 | ax=ax.ravel() 218 | #ax[0].pcolormesh(lon[xl].T[yl:yu],np.arange(0,len(sli[0])),sli_morn,norm=colors.LogNorm(vmin = 0.01, vmax = 0.1)) 219 | #ax[1].pcolormesh(lon[xl].T[yl:yu],np.arange(0,len(sli[0])),sli_mid,norm=colors.LogNorm(vmin = 0.01, vmax = 0.1)) 220 | #cs = ax[2].pcolormesh(lon[xl].T[yl:yu],np.arange(0,len(sli[0])),sli_after,norm=colors.LogNorm(vmin = 0.01, vmax = 0.1)) 221 | im =ax[0].pcolormesh(lon[xl].T[yl:yu+3],np.arange(0,35),sli_morn,vmin = vmin, vmax = vmax,cmap=cmap) 222 | im =ax[1].pcolormesh(lon[xl].T[yl:yu+3],np.arange(0,35),sli_mid,vmin = vmin, vmax = vmax,cmap=cmap) 223 | im = ax[2].pcolormesh(lon[xl].T[yl:yu+3],np.arange(0,35),sli_after,vmin = vmin, vmax = vmax,cmap=cmap) 224 | #cb = plt.colorbar(cs) 225 | ax[0].set_title('(a) Summer 7 - 10 AM'); ax[1].set_title('(b) Summer 11 AM - 2 PM'); ax[2].set_title('(c) Summer 4 - 7 PM'); 226 | 227 | im = ax[3].pcolormesh(lon[xl].T[yl:yu+3],np.arange(0,35),wsli_morn,vmin = vmin, vmax = vmax,cmap=cmap) 228 | im = ax[4].pcolormesh(lon[xl].T[yl:yu+3],np.arange(0,35),wsli_mid,vmin = vmin, vmax = vmax,cmap=cmap) 229 | im = ax[5].pcolormesh(lon[xl].T[yl:yu+3],np.arange(0,35),wsli_after,vmin = vmin, vmax = vmax,cmap=cmap) 230 | ax[3].set_title('(d) Winter 7 - 10 AM'); ax[4].set_title('(e) Winter 11 AM - 2 PM'); ax[5].set_title('(f) Winter 4 - 7 PM'); 231 | 232 | # make lake line 233 | [ax[i].axvline(x=-87.6,alpha=0.8,c = 'k', linestyle="dotted") for i in range(len(ax))] # line showing lakeshore 234 | [ax[i].set_ylim(0,25) for i in range(len(ax))] # line showing lakeshore 235 | fig.colorbar(im, ax=ax.tolist()) 236 | #plt.show() 237 | 238 | plt.savefig('no2_profile_d02.png',transparent=True) 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | # make gif 248 | for day in range(7): 249 | sli= [np.array([np.array(base[day]['O3'][t][l][xl:xu]).T[yl:yu].mean(axis=-1) for l in range(35)]) for t in range(24)] 250 | for t in range(len(sli)): 251 | plt.figure() 252 | plt.pcolormesh(lon[xl].T[yl:yu],np.arange(0,len(sli[0])),sli[t],vmin=0,vmax=.08) 253 | plt.title('Day %i, Hour %i'%(day,t)) 254 | plt.xlabel('Longitude') 255 | plt.ylabel('Layer') 256 | plt.savefig('Chi_o3_day_%i_hour_%i.png'%(day,t)) 257 | plt.close() 258 | 259 | 260 | # 261 | gif_name = 'o3_column' 262 | fps = 6 263 | file_list = ['Chi_o3_day_%i_hour_%i.png'%(day,t) for day in range(7) for t in range(24)] 264 | clip = mpy.ImageSequenceClip(file_list, fps=fps) 265 | clip.write_gif('{}.gif'.format(gif_name), fps=fps) 266 | 267 | 268 | 269 | -------------------------------------------------------------------------------- /Validation/station_validation.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | # model validation table for CHEMICALS 4 | # model validation table for ground-based 5 | # Uses wrf and cmaq output 6 | # Epa data from AQS yearly summaries 7 | # NCDC stations are loaded in 8 | # created normalized and non-normalized testing statistics ... 9 | 10 | import pandas as pd 11 | import numpy as np 12 | import scipy.stats as st 13 | import wrf 14 | from netCDF4 import Dataset 15 | import glob,os 16 | import matplotlib.pyplot as plt 17 | import scipy.stats as st 18 | 19 | #input 20 | #dirToWRF_d02='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_BASE_FINAL_wint_4km_sf_rrtmg_10_8_1_v3852/' 21 | #dirToWRF_d03='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_BASE_FINAL_wint_1.33km_sf_rrtmg_5_8_1_v3852/' 22 | 23 | dirToWRF_d02='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_BASE_FINAL_4km_sf_rrtmg_10_8_1_v3852/' 24 | dirToWRF_d03='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_BASE_FINAL_1.33km_sf_rrtmg_5_8_1_v3852/' 25 | dir = '/home/asm0384/CMAQcheck/' 26 | 27 | fnames = ['NO2_d03_2018_8_EPA_CMAQ_Combine.csv','NO2_d03_2019_1_EPA_CMAQ_Combine.csv', 28 | 'NO2_d02_2018_8_EPA_CMAQ_Combine.csv','NO2_d02_2019_1_EPA_CMAQ_Combine.csv', 29 | 'O3_d03_2018_8_EPA_CMAQ_Combine.csv','O3_d03_2019_1_EPA_CMAQ_Combine.csv', 30 | 'O3_d02_2018_8_EPA_CMAQ_Combine.csv','O3_d02_2019_1_EPA_CMAQ_Combine.csv', 31 | 'PM25_TOT_d03_2018_8_EPA_CMAQ_Combine.csv','PM25_TOT_d03_2019_1_EPA_CMAQ_Combine.csv', 32 | 'PM25_TOT_d02_2018_8_EPA_CMAQ_Combine.csv','PM25_TOT_d02_2019_1_EPA_CMAQ_Combine.csv'] 33 | 34 | # functions 35 | def stats(data,prediction): 36 | x,y=data[~np.isnan(data)],prediction[~np.isnan(data)] # get rid of NaNs 37 | mu_d,mu_p = np.mean(x),np.mean(y) 38 | bias = np.sum(x-y)/len(x) 39 | rmse = np.sqrt(np.mean((y-x)**2)) 40 | r,p = st.pearsonr(x,y) 41 | return mu_d,mu_p,bias,rmse,r,p 42 | 43 | # functions 44 | def stats_normalized(data,prediction): 45 | x,y=data[~np.isnan(data)],prediction[~np.isnan(data)] # get rid of NaNs 46 | mu_d,mu_p = np.mean(x),np.mean(y) 47 | nmb = np.sum(y-x)/np.sum(x)*100 48 | nme = np.sum(np.abs(y-x))/np.sum(x)*100 49 | r,p = st.pearsonr(x,y) 50 | return mu_d,mu_p,nmb,nme,r,p 51 | 52 | def pull_winds(dirwrf,fnames,xx,yy): 53 | fws,fwd = [],[] 54 | for q in range(len(fnames)): 55 | wrfout = wrf.g_uvmet.get_uvmet10_wspd_wdir(Dataset(dirwrf + fnames[q]),wrf.ALL_TIMES) 56 | winds = [[wrfout.data[0][hour][xx[i]][yy[i]] for i in range(len(xx))] for hour in range(24)] 57 | winddir = [[wrfout.data[1][hour][xx[i]][yy[i]] for i in range(len(xx))] for hour in range(24)] 58 | fws.append(winds) 59 | fwd.append(winddir) 60 | # return 61 | return fws,fwd 62 | 63 | 64 | # start 65 | out = [] 66 | out2 = [] 67 | indnames = ['NO2_d03_Summer','NO2_d03_Winter','NO2_d02_Summer','NO2_d02_Winter','O3_d03_Summer','O3_d03_Winter','O3_d02_Summer','O3_d02_Winter','PM25_d03_Summer','PM25_d03_Winter','PM25_d02_Summer','PM25_d02_Winter',] 68 | 69 | for i in range(len(fnames)): 70 | f = pd.read_csv(dir+fnames[i]) 71 | if i>3 and i<8: 72 | s = stats(f['Sample Measurement']*1000,f['CMAQ']) 73 | s2 = stats_normalized(f['Sample Measurement']*1000,f['CMAQ']) 74 | else: 75 | s = stats(f['Sample Measurement'],f['CMAQ']) 76 | s2 = stats_normalized(f['Sample Measurement'],f['CMAQ']) 77 | out.append(s) 78 | out2.append(s2) 79 | #if len(f[f['level_0']=='2018-08-01 00:00:00']) >0: print(indnames[i]+'| number of stations = %i'%len(f[f['level_0']=='2018-08-01 00:00:00'])) 80 | #if len(f[f['level_0']=='2019-01-02 00:00:00']) >0: print(indnames[i]+'winter| number of stations = %i'%len(f[f['level_0']=='2019-01-02 00:00:00'])) 81 | print('%s| number of stations = %.1f'%(indnames[i],len(f['Longitude'].unique())-1)) 82 | 83 | out = pd.DataFrame(out) 84 | out.columns=['mu_d','mu_p','bias','rmse','r','p'] 85 | 86 | out.index=['NO2_d03_Summer','NO2_d03_Winter','NO2_d02_Summer','NO2_d02_Winter','O3_d03_Summer','O3_d03_Winter','O3_d02_Summer','O3_d02_Winter','PM25_d03_Summer','PM25_d03_Winter','PM25_d02_Summer','PM25_d02_Winter',] 87 | 88 | out 89 | 90 | out2 = pd.DataFrame(out2) 91 | out2.columns=['mu_d','mu_p','MB','NME','r','p'] 92 | out2.index=['NO2_d03_Summer','NO2_d03_Winter','NO2_d02_Summer','NO2_d02_Winter','O3_d03_Summer','O3_d03_Winter','O3_d02_Summer','O3_d02_Winter','PM25_d03_Summer','PM25_d03_Winter','PM25_d02_Summer','PM25_d02_Winter',] 93 | 94 | out2.to_csv('~/chemicals_normalized.csv') 95 | 96 | 97 | 98 | # model validation name for meteorology 99 | #getting wrf windspeed/directions: 100 | # NEED TO DO FOR WINTER 101 | # knots to m/s - knots/1.9438444924406 102 | sim = 'output_BASE_FINAL_1.33km_sf_rrtmg_5_8_1_v3852' 103 | 104 | windstn = pd.read_csv('/home/asm0384/WRFcheck/'+sim+'/wrfcheck_withstations_'+sim+'_Wind.csv',index_col=0) 105 | windDirstn = pd.read_csv('/home/asm0384/WRFcheck/'+sim+'/wrfcheck_withstations_'+sim+'_WindDir.csv',index_col=0) 106 | times = pd.read_csv('/home/asm0384/WRFcheck/'+sim+'/completeddata_mini_extras2.csv') 107 | #check winter and summer times for station index 108 | 109 | xx_d02,yy_d02 = np.array(windstn['xx_d02']),np.array(windDirstn['yy_d02']) 110 | xx_d03,yy_d03 = np.array(windstn[windstn['in_d03']==True]['xx_d03']),np.array(windstn[windstn['in_d03']==True]['yy_d03']) 111 | 112 | # 10*24+1:-24-9 113 | # :744 114 | 115 | fws_stn_d03 = np.array(windstn[windstn['in_d03']==True].T[10*24+1:-24-9],dtype='float32') 116 | fwd_stn_d03 = np.array(windDirstn[windDirstn['in_d03']==True].T[10*24+1:-24-9],dtype='float32') 117 | 118 | #fws_stn_d02 = np.array(windstn[windstn['in_d02']==True].T[:744],dtype='float32') 119 | #fwd_stn_d02 = np.array(windDirstn[windDirstn['in_d02']==True].T[:744],dtype='float32') 120 | fws_stn_d02 = np.array(windstn[windstn['in_d02']==True].T[10*24+1:-24-9],dtype='float32') 121 | fwd_stn_d02 = np.array(windDirstn[windDirstn['in_d02']==True].T[10*24+1:-24-9],dtype='float32') 122 | 123 | 124 | filenames_d02=[] 125 | os.chdir(dirToWRF_d02) 126 | for file in glob.glob("wrfout_d01_*"): 127 | filenames_d02.append(file) 128 | 129 | filenames_d02.sort() 130 | 131 | # $1 Get WRF file names 132 | filenames_d03=[] 133 | os.chdir(dirToWRF_d03) 134 | for file in glob.glob("wrfout_d01_*"): 135 | filenames_d03.append(file) 136 | 137 | filenames_d03.sort() 138 | 139 | # pull wind and dir 140 | fws_d02,fwd_d02 = pull_winds(dirToWRF_d02,filenames_d02[10:-1],xx_d02,yy_d02) 141 | fws_d03,fwd_d03 = pull_winds(dirToWRF_d03,filenames_d03[10:-1],xx_d03,yy_d03) 142 | 143 | 144 | # make array and reshape 145 | fws_d03=np.asarray(fws_d03) 146 | fws_d03 = np.array([fws_d03[i][x] for i in range(len(filenames_d02[10:-1])) for x in range(24)]) 147 | fwd_d03 = np.array([fwd_d03[i][x] for i in range(len(filenames_d02[10:-1])) for x in range(24)]) 148 | 149 | fws_d02=np.asarray(fws_d02) 150 | fws_d02 = np.array([fws_d02[i][x] for i in range(len(filenames_d02[10:-1])) for x in range(24)]) 151 | fwd_d02 = np.array([fwd_d02[i][x] for i in range(len(filenames_d02[10:-1])) for x in range(24)]) 152 | 153 | #d03 154 | 155 | b=fws_d03.ravel() 156 | a=fws_stn_d03.ravel()/1.9438444924406 157 | stwspd_d03 = stats_normalized(a[~np.isnan(b)],b[~np.isnan(b)]) 158 | 159 | b=fwd_d03.ravel() 160 | a=fwd_stn_d03.ravel() 161 | stwdir_d03 = stats_normalized(a[~np.isnan(b)],b[~np.isnan(b)]) 162 | 163 | # d02 164 | 165 | b=fws_d02.ravel() 166 | a=fws_stn_d02.ravel()/1.9438444924406 167 | stwspd_d02 = stats_normalized(a[~np.isnan(b)],b[~np.isnan(b)]) 168 | 169 | b=fwd_d02.T.ravel() 170 | a=fwd_stn_d02.ravel() 171 | stwdir_d02 = stats_normalized(a[~np.isnan(b)],b[~np.isnan(b)]) 172 | 173 | dfout=pd.DataFrame([stwspd_d02,stwdir_d02,stwspd_d03,stwdir_d03]) 174 | dfout.index = ['speed_d02','dir_d02','speed_d03','dir_d03'] 175 | dfout.columns = ['mu_d','mu_p','MB','MSE','r','p'] 176 | 177 | 178 | dfout.to_csv('~/windmetrics_summer_normalized.csv') 179 | 180 | ##----------- 181 | # get temperature and RH shit from combine aconc 182 | # pull station data again 183 | tmpstn = pd.read_csv('/home/asm0384/WRFcheck/'+sim+'/wrfcheck_withstations_'+sim+'_082018.csv',index_col=0) 184 | rhstn = pd.read_csv('/home/asm0384/WRFcheck/'+sim+'/wrfcheck_withstations_'+sim+'_RH.csv',index_col=0) 185 | 186 | xx_d02,yy_d02 = np.array(tmpstn['xx_d02']),np.array(tmpstn['yy_d02']) 187 | xx_d03,yy_d03 = np.array(tmpstn[tmpstn['in_d03']==True]['xx_d03']),np.array(tmpstn[tmpstn['in_d03']==True]['yy_d03']) 188 | 189 | # check completedatamini for the times associated with the indices 190 | wint_ind = ':744' 191 | sum_ind = '11*24+1:-9' 192 | 193 | temp_stn_d03 = np.array(tmpstn[tmpstn['in_d03']==True].T[11*24+1:-9],dtype='float32') 194 | rh_stn_d03 = np.array(rhstn[rhstn['in_d03']==True].T[11*24+1:-9],dtype='float32') 195 | 196 | temp_stn_d02 = np.array(tmpstn[tmpstn['in_d02']==True].T[11*24+1:-9],dtype='float32') 197 | rh_stn_d02 = np.array(rhstn[rhstn['in_d02']==True].T[11*24+1:-9],dtype='float32') 198 | 199 | # pull aconc files 200 | filenames_d02=[] 201 | os.chdir(dirToWRF_d02+'/postprocess/') 202 | for file in glob.glob("COMBINE_ACONC*"): 203 | filenames_d02.append(file) 204 | 205 | filenames_d02.sort() 206 | 207 | # $1 Get WRF file names 208 | filenames_d03=[] 209 | os.chdir(dirToWRF_d03+'/postprocess/') 210 | for file in glob.glob("COMBINE_ACONC*"): 211 | filenames_d03.append(file) 212 | 213 | filenames_d03.sort() 214 | 215 | 216 | def get_temp_rh(dirToWRF_d02,filenames_d02,var,xx,yy): 217 | d2=[] 218 | for q in range(len(filenames_d02)): 219 | nc = Dataset(dirToWRF_d02 +'/postprocess/'+ filenames_d02[q]) 220 | d=[[nc[var][hour][0][xx[i]][yy[i]] for i in range(len(xx))] for hour in range(24)] 221 | d2.append(d) 222 | # 223 | d2=np.asarray(d2) 224 | d2 = np.array([d2[i][x] for i in range(len(filenames_d02)) for x in range(24)]) 225 | # 226 | return d2 227 | 228 | temp_d02 = get_temp_rh(dirToWRF_d02,filenames_d02[0:],'SFC_TMP',xx_d02,yy_d02) 229 | temp_d03 = get_temp_rh(dirToWRF_d03,filenames_d03[0:],"SFC_TMP",xx_d03,yy_d03) 230 | rh_d02 = get_temp_rh(dirToWRF_d02,filenames_d02[0:],'RH',xx_d02,yy_d02) 231 | rh_d03 = get_temp_rh(dirToWRF_d03,filenames_d03[0:],'RH',xx_d03,yy_d03) 232 | 233 | 234 | b=temp_d02.ravel() 235 | a=(temp_stn_d02.ravel()-32)*5/9 236 | st_temp_d02 = stats_normalized(a[~np.isnan(b)],b[~np.isnan(b)]) 237 | 238 | b=temp_d03.ravel() 239 | a=(temp_stn_d03.ravel()-32)*5/9 240 | st_temp_d03 = stats_normalized(a[~np.isnan(b)],b[~np.isnan(b)]) 241 | 242 | 243 | b=rh_d02.ravel() 244 | a=rh_stn_d02.ravel() 245 | st_rh_d02 = stats_normalized(a[~np.isnan(b)],b[~np.isnan(b)]) 246 | 247 | b=rh_d03.ravel() 248 | a=rh_stn_d03.ravel() 249 | st_rh_d03 = stats_normalized(a[~np.isnan(b)],b[~np.isnan(b)]) 250 | 251 | 252 | dfout=pd.DataFrame([st_temp_d02,st_rh_d02,st_temp_d03,st_rh_d03]) 253 | dfout.index = ['temp_d02','rh_d02','temp_d03','rh_d03'] 254 | dfout.columns = ['mu_d','mu_p','MB','MSE','r','p'] 255 | 256 | 257 | dfout.to_csv('~/temp_rh_summermetrics_normalized.csv') 258 | 259 | 260 | -------------------------------------------------------------------------------- /three_panel_epa_gif.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | #libraries 4 | from matplotlib import pyplot as plt ; from matplotlib import colors 5 | import numpy as np; import numpy.ma as ma; from matplotlib.patches import Path, PathPatch 6 | import pandas as pd; from shapely.geometry import Point, shape, Polygon;import fiona 7 | from shapely.ops import unary_union, cascaded_union; from geopandas.tools import sjoin 8 | import geopandas as gpd; import glob; import os; from datetime import timedelta, date; 9 | from netCDF4 import Dataset; from cartopy import crs as ccrs; from cartopy.io.shapereader import Reader 10 | import matplotlib.path as mpath; import seaborn as sns; import timeit; from cartopy import crs as ccrs 11 | 12 | import matplotlib.gridspec as gridspec 13 | from scipy.stats import pearsonr 14 | from cartopy.feature import NaturalEarthFeature as cfeature 15 | 16 | 17 | dir_EPA = '/home/asm0384/ChicagoStudy/inputs/EPA_hourly_station_data/' 18 | 19 | 20 | #---------------------------------------------------------------------------------------- 21 | # User input 22 | #---------------------------------------------------------------------------------------- 23 | 24 | gmt_offset = 7 25 | 26 | # directory to model files 27 | dir_CMAQ='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_BASE_FINAL_1.33km_sf_rrtmg_5_8_1_v3852/postprocess/' 28 | dir_SMOKE='/projects/b1045/jschnell/ForAmy/smoke_out/base/' 29 | dir_WRF='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_BASE_FINAL_1.33km_sf_rrtmg_5_8_1_v3852/' 30 | 31 | #directory to grid file 32 | dir_GRID='/projects/b1045/jschnell/ForStacy/latlon_ChicagoLADCO_d03.nc' 33 | 34 | # dir to lat lon 35 | dir='/projects/b1045/jschnell/ForStacy/' 36 | ll='latlon_ChicagoLADCO_d03.nc' 37 | 38 | # CMAQ RUN things 39 | domain='d03' 40 | time='hourly' 41 | year='2018' 42 | month='8' 43 | 44 | #directory to chicago shapefile 45 | dir_shapefile='/home/asm0384/shapefiles/commareas/geo_export_77af1a6a-f8ec-47f4-977c-40956cd94f97.shp' 46 | 47 | # this will use just the epa var 48 | cmaq_var=['O3','NO2','NO','CO','ISOP','SO2','FORM','PM25_TOT'] 49 | smoke_var=['NO2','NO','CO','ISOP','SO2','FORM'] 50 | epa_code=['42401','42602','44201','42101']; var=['SO2','NO2','O3','CO'] 51 | 52 | 53 | # pull epa 54 | dir_epa='/home/asm0384/CMAQcheck/' 55 | 56 | epa_condense=[dir_epa+'%s_%s_%s_%s_EPA_CMAQ_Combine.csv'%(var[loop],domain,year,month) for loop in range(len(epa_code))] 57 | so2_epa,no2_epa,o3_epa,co_epa = [pd.read_csv(epa_condense[i]) for i in range(len(epa_condense))] 58 | 59 | # set up shape of cmaq indexing 60 | shape = (32,24,1,288,315) 61 | 62 | #------------------------------------------------------------------------------------------- 63 | # User defined functions 64 | #------------------------------------------------------------------------------------------- 65 | 66 | def pull_CMAQ(dir_CMAQ_BASE,startswith,cmaq_var,version): 67 | #pull files from given directoy 68 | #for i in range(1): 69 | onlyfiles = next(os.walk(dir_CMAQ_BASE))[2] 70 | onlyfiles.sort() # so that searching for dates are easier 71 | fnames_CMAQ_BASE = [x for x in onlyfiles if x.startswith(startswith)] 72 | ncfile_CMAQ_base = [Dataset(dir_CMAQ_BASE+ fnames_CMAQ_BASE[i],'r') for i in range(len(fnames_CMAQ_BASE))] 73 | units_cmaq = [ncfile_CMAQ_base[0][cmaq_var[i]].units for i in range(len(cmaq_var))] 74 | #full day conc 75 | cmaq_avgs_BASE = []; cmaq_avgs_daily_BASE = []; cmaq_avgs_hourly_BASE = []; all_hours =[] 76 | # make averages for cmaq base 77 | for i in range(len(cmaq_var)): 78 | #for i in range(1): 79 | tmp = np.asarray([ncfile_CMAQ_base[j][cmaq_var[i]] for j in range(len(ncfile_CMAQ_base))]) 80 | hourly = np.average(tmp,axis=0) # hour by hour concs 81 | daily = np.average(tmp,axis=1) # daily average concs 82 | # 83 | monthly = np.average(daily,axis=0) 84 | #if writeoutcsv == True: pd.DataFrame(monthly[0]).to_csv(cmaq_var[i]+'_'+version+'_2018_aug.csv', header=False,index=False) 85 | cmaq_avgs_BASE.append(monthly[0]) 86 | cmaq_avgs_daily_BASE.append(daily) 87 | cmaq_avgs_hourly_BASE.append(hourly) 88 | all_hours.append(tmp) 89 | #return 90 | if Chatty: print('Done with ' +cmaq_var[i]) 91 | #return 92 | return cmaq_avgs_BASE, cmaq_avgs_daily_BASE, cmaq_avgs_hourly_BASE, all_hours, units_cmaq 93 | 94 | 95 | def adjust_spines(ax,spines): 96 | for loc, spine in ax.spines.items(): 97 | if loc in spines: 98 | spine.set_position(('outward', 10)) # outward by 10 points 99 | else: 100 | spine.set_color('none') # don't draw spine 101 | # turn off ticks where there is no spine 102 | if 'left' in spines: 103 | ax.yaxis.set_ticks_position('left') 104 | else: 105 | # no yaxis ticks 106 | ax.yaxis.set_ticks([]) 107 | if 'bottom' in spines: 108 | ax.xaxis.set_ticks_position('bottom') 109 | else: 110 | # no xaxis ticks 111 | ax.xaxis.set_ticks([]) 112 | 113 | 114 | def add_gmt_offset(list_of_hours,gmt_offset): 115 | update_list = [] 116 | for i in range(len(list_of_hours)): 117 | if list_of_hours[i] + gmt_offset > 23: 118 | update_list.append(list_of_hours[i] + gmt_offset - 24) 119 | elif list_of_hours[i] + gmt_offset < 0: 120 | update_list.append(list_of_hours[i] + gmt_offset + 24) 121 | else: update_list.append(list_of_hours[i] + gmt_offset) 122 | # return 123 | return update_list 124 | 125 | 126 | def find_index(stn_lon, stn_lat, wrf_lon, wrf_lat): 127 | # stn -- points in a list (list, can be a list of just 1) 128 | # wrf -- gridded wrf lat/lon (np.array) 129 | #for iz in range(1): 130 | xx=[];yy=[] 131 | for i in range(len(stn_lat)): 132 | abslat = np.abs(wrf_lat-stn_lat[i]) 133 | abslon= np.abs(wrf_lon-stn_lon[i]) 134 | c = np.maximum(abslon,abslat) 135 | latlon_idx = np.argmin(c) 136 | x, y = np.where(c == np.min(c)) 137 | #add indices of nearest wrf point station 138 | xx.append(x) 139 | yy.append(y) 140 | # 141 | xx=[xx[i][0] for i in range(len(xx))];yy=[yy[i][0] for i in range(len(yy))] 142 | #return indices list 143 | return xx, yy 144 | 145 | 146 | 147 | #------------------------------------------------------------------------------------------- 148 | # 149 | #------------------------------------------------------------------------------------------- 150 | 151 | 152 | # get dates 153 | startswith = 'COMBINE_ACONC_' 154 | onlyfiles = next(os.walk(dir_CMAQ))[2] 155 | onlyfiles.sort() # so that searching for dates are easier 156 | fnames_CMAQ = [x for x in onlyfiles if x.startswith(startswith)] 157 | dates=[fnames_CMAQ[i].split(startswith)[1].split('.nc')[0] for i in range(len(fnames_CMAQ))] 158 | 159 | # get lat lon 160 | llx=Dataset(dir+ll,'r') 161 | lat,lon=llx['lat'][:],llx['lon'][:] 162 | 163 | 164 | # ============================================ 165 | # make fancy plot to plot full days 166 | # ============================================ 167 | ''' 168 | no2_epa = chemical data over domain with nearest cmaq pixel. dataframe. 169 | 170 | ''' 171 | 172 | def tri_plot(epa, ncfile_CMAQ, var, v, picdir, printout=False): 173 | #data prep 174 | epa['level_0']=pd.to_datetime(epa['level_0']) 175 | epa['month-day'] = pd.to_datetime(epa['level_0']).dt.to_period('D') 176 | #epa=epa.groupby('month-day').mean() 177 | vmin=round(np.percentile(ncfile_CMAQ[0][var[v]][0][0].ravel(),0.01)) 178 | vmax=round(np.percentile(ncfile_CMAQ[0][var[v]][0][0].ravel(),99.99)) 179 | # start plotting 180 | cmap = 'magma_r' 181 | crs_new = ccrs.PlateCarree() 182 | for d in range(shape[0]): 183 | for h in range(shape[1]): 184 | # set up fig 185 | fig = plt.figure(figsize=(10,8)) 186 | #fig.execute_constrained_layout() 187 | widths = [2, 2] 188 | heights = [5, 2] 189 | gs = fig.add_gridspec(ncols=2, nrows=2, width_ratios=widths,height_ratios=heights) 190 | # set up plot 191 | # 192 | # PLOT 1 193 | # make map plot on top 194 | tmp = epa[epa['level_0']==epa['level_0'][h+d*24]] 195 | levels = np.arange(vmin, vmax, (vmax-vmin)/10) 196 | ax = fig.add_subplot(gs[0, :],projection= crs_new) 197 | cs = ax.pcolor(lon,lat, ncfile_CMAQ[d][var[v]][h][0],transform=crs_new,cmap = cmap,vmin=vmin,vmax=vmax) 198 | ax.scatter(tmp['Longitude'],tmp['Latitude'],c= tmp['Sample Measurement'], cmap = cmap, vmin = vmin, vmax = vmax,s=75,edgecolors = 'black') 199 | cbar=plt.colorbar(cs,boundaries=levels,shrink = 0.75,label='ppbV') 200 | cbar.set_ticks(levels) 201 | states_provinces = cfeature(category='cultural',name='admin_1_states_provinces_lines',scale='50m',facecolor='none') 202 | land = cfeature('physical', 'lakes', '10m',edgecolor='black',facecolor='none') 203 | ax.add_feature(land, edgecolor='black') 204 | ax.add_feature(states_provinces, edgecolor='black',alpha = 0.5) 205 | b = .8 206 | xl,xu,yl,yu = lon.min()+b,lon.max()-b,lat.min()+b,lat.max()-b 207 | ax.set_extent([xl,xu,yl,yu], crs= crs_new) 208 | plt.title(var[v]+' on '+str(epa['level_0'][h+d*24])) 209 | # PLOT 2 210 | # make 1:1 plot 211 | ax1 = fig.add_subplot(gs[1, 1]) 212 | tmp = epa[epa['level_0']==epa['level_0'][h+d*24]] 213 | # 214 | for label in range(len(tmp['County Name'])): 215 | l = tmp.index[label] 216 | if tmp['Sample Measurement'][l] == np.nan: print('movin') 217 | else: ax1.scatter(tmp['Sample Measurement'][l],tmp['CMAQ'][l],label= tmp['County Name'][l],color = plt.get_cmap('Blues',len(tmp))(label)) 218 | # 219 | plt.xlabel('Sample Measurement (ppbv)'); plt.ylabel('CMAQ (ppbv)') 220 | #plt.legend( loc='upper center', bbox_to_anchor=(.5, 1.5), ncol=4, prop={'size': 6},) 221 | scr, mcr = tmp['Sample Measurement'], tmp['CMAQ'] 222 | bad = np.isnan(scr) 223 | r = round(pearsonr(mcr[~bad],scr[~bad])[0],2) 224 | plt.title(f'Station vs. CMAQ Pixel: r = {r}') 225 | plt.xlim([tmp['CMAQ'].min()*.8,tmp['CMAQ'].max()*1.2]); plt.ylim([tmp['CMAQ'].min()*.8,tmp['CMAQ'].max()*1.2]) 226 | ax1.plot([-1000,1000],[-1000,1000],c='black',alpha = 0.75) 227 | # PLOT 3 228 | ##make diurnal plot 229 | #for i in range(1): 230 | ax2 = fig.add_subplot(gs[1, 0]) 231 | tmp2=epa.groupby('level_0').mean()['Sample Measurement'] 232 | tmp2.plot.line(linestyle='--',color= plt.get_cmap('Blues',8)(1),ax=ax2,label='EPA') 233 | tmp2=epa.groupby('level_0').mean()['CMAQ'] 234 | tmp2.index.name = 'Dates' 235 | tmp2.plot.line(color=plt.get_cmap('Blues',8)(5),ax=ax2) 236 | ax2.scatter(tmp2.index[24*d+h],tmp2[24*d+h], marker='*',color='pink',s=200) 237 | ax2.set_xlim(tmp2.index[h]+timedelta(days=d-1), tmp2.index[h]+timedelta(days=d+1)) 238 | # 239 | plt.legend( loc='upper center', ncol=4, prop={'size': 8},) 240 | # 241 | plt.savefig(picdir+var[v]+'_'+'day'+str(d)+'_hour'+str(h)+'.png', orientation='landscape') 242 | plt.close() 243 | if printout== True: print(f'Done with day {d} hour {h}') 244 | 245 | 246 | startswith = 'COMBINE_ACONC_' 247 | 248 | onlyfiles = next(os.walk(dir_CMAQ))[2] 249 | onlyfiles.sort() # so that searching for dates are easier 250 | fnames_CMAQ = [x for x in onlyfiles if x.startswith(startswith)] 251 | ncfile_CMAQ = [Dataset(dir_CMAQ+ fnames_CMAQ[i],'r') for i in range(len(fnames_CMAQ))] 252 | 253 | 254 | picdir = '/home/asm0384/gifs/' 255 | 256 | tri_plot(so2_epa, ncfile_CMAQ, var, 0, picdir, False) 257 | 258 | 259 | v=1; epa = no2_epa 260 | tri_plot(epa, ncfile_CMAQ, var, v, picdir, False) 261 | 262 | 263 | v=2; epa = o3_epa 264 | tri_plot(epa, ncfile_CMAQ, var, v, picdir, False) 265 | 266 | v=3; epa = co_epa 267 | tri_plot(epa, ncfile_CMAQ, var, v, picdir, False) 268 | -------------------------------------------------------------------------------- /validation/model_validation_statistics.py: -------------------------------------------------------------------------------- 1 | # model validation table for CHEMICALS 2 | 3 | import pandas as pd 4 | import numpy as np 5 | import scipy.stats as st 6 | import wrf 7 | from netCDF4 import Dataset 8 | import glob,os 9 | import matplotlib.pyplot as plt 10 | import scipy.stats as st 11 | 12 | #input 13 | #dirToWRF_d02='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_BASE_FINAL_wint_4km_sf_rrtmg_10_8_1_v3852/' 14 | #dirToWRF_d03='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_BASE_FINAL_wint_1.33km_sf_rrtmg_5_8_1_v3852/' 15 | 16 | dirToWRF_d02='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_BASE_FINAL_4km_sf_rrtmg_10_8_1_v3852/' 17 | dirToWRF_d03='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_BASE_FINAL_1.33km_sf_rrtmg_5_8_1_v3852/' 18 | dir = '/projects/b1045/montgomery/' 19 | 20 | fnames = ['NO2_d03_2018_8_EPA_CMAQ_Combine.csv','NO2_d03_2019_1_EPA_CMAQ_Combine.csv', 21 | 'NO2_d02_2018_8_EPA_CMAQ_Combine.csv','NO2_d02_2019_1_EPA_CMAQ_Combine.csv', 22 | 'O3_d03_2018_8_EPA_CMAQ_Combine.csv','O3_d03_2019_1_EPA_CMAQ_Combine.csv', 23 | 'O3_d02_2018_8_EPA_CMAQ_Combine.csv','O3_d02_2019_1_EPA_CMAQ_Combine.csv', 24 | 'PM25_TOT_d03_2018_8_EPA_CMAQ_Combine.csv','PM25_TOT_d03_2019_1_EPA_CMAQ_Combine.csv', 25 | 'PM25_TOT_d02_2018_8_EPA_CMAQ_Combine.csv','PM25_TOT_d02_2019_1_EPA_CMAQ_Combine.csv'] 26 | 27 | # functions 28 | def stats(data,prediction): 29 | x,y=data[~np.isnan(data)],prediction[~np.isnan(data)] # get rid of NaNs 30 | mu_d,mu_p = np.mean(x),np.mean(y) 31 | bias = np.sum(x-y)/len(x) 32 | rmse = np.sqrt(np.mean((y-x)**2)) 33 | r,p = st.pearsonr(x,y) 34 | return mu_d,mu_p,bias,rmse,r,p 35 | 36 | # functions 37 | def stats_normalized(data,prediction): 38 | x,y=data[~np.isnan(data)],prediction[~np.isnan(data)] # get rid of NaNs 39 | mu_d,mu_p = np.mean(x),np.mean(y) 40 | nmb = np.sum(y-x)/np.sum(x)*100 41 | nme = np.sum(np.abs(y-x))/np.sum(x)*100 42 | r,p = st.pearsonr(x,y) 43 | return mu_d,mu_p,nmb,nme,r,p 44 | 45 | def pull_winds(dirwrf,fnames,xx,yy): 46 | fws,fwd = [],[] 47 | for q in range(len(fnames)): 48 | wrfout = wrf.g_uvmet.get_uvmet10_wspd_wdir(Dataset(dirwrf + fnames[q]),wrf.ALL_TIMES) 49 | winds = [[wrfout.data[0][hour][xx[i]][yy[i]] for i in range(len(xx))] for hour in range(24)] 50 | winddir = [[wrfout.data[1][hour][xx[i]][yy[i]] for i in range(len(xx))] for hour in range(24)] 51 | fws.append(winds) 52 | fwd.append(winddir) 53 | # return 54 | return fws,fwd 55 | 56 | 57 | # start 58 | out = [] 59 | out2 = [] 60 | indnames = ['NO2_d03_Summer','NO2_d03_Winter','NO2_d02_Summer','NO2_d02_Winter','O3_d03_Summer','O3_d03_Winter','O3_d02_Summer','O3_d02_Winter','PM25_d03_Summer','PM25_d03_Winter','PM25_d02_Summer','PM25_d02_Winter',] 61 | 62 | for i in range(len(fnames)): 63 | f = pd.read_csv(dir+'/CMAQcheck/'+fnames[i]) 64 | if i>3 and i<8: 65 | s = stats(f['Sample Measurement']*1000,f['CMAQ']) 66 | s2 = stats_normalized(f['Sample Measurement']*1000,f['CMAQ']) 67 | else: 68 | s = stats(f['Sample Measurement'],f['CMAQ']) 69 | s2 = stats_normalized(f['Sample Measurement'],f['CMAQ']) 70 | out.append(s) 71 | out2.append(s2) 72 | #if len(f[f['level_0']=='2018-08-01 00:00:00']) >0: print(indnames[i]+'| number of stations = %i'%len(f[f['level_0']=='2018-08-01 00:00:00'])) 73 | #if len(f[f['level_0']=='2019-01-02 00:00:00']) >0: print(indnames[i]+'winter| number of stations = %i'%len(f[f['level_0']=='2019-01-02 00:00:00'])) 74 | print('%s| number of stations = %.1f'%(indnames[i],len(f['Longitude'].unique())-1)) 75 | 76 | out = pd.DataFrame(out) 77 | out.columns=['mu_d','mu_p','bias','rmse','r','p'] 78 | 79 | out.index=['NO2_d03_Summer','NO2_d03_Winter','NO2_d02_Summer','NO2_d02_Winter','O3_d03_Summer','O3_d03_Winter','O3_d02_Summer','O3_d02_Winter','PM25_d03_Summer','PM25_d03_Winter','PM25_d02_Summer','PM25_d02_Winter',] 80 | 81 | out 82 | 83 | out2 = pd.DataFrame(out2) 84 | out2.columns=['mu_d','mu_p','MB','NME','r','p'] 85 | out2.index=['NO2_d03_Summer','NO2_d03_Winter','NO2_d02_Summer','NO2_d02_Winter','O3_d03_Summer','O3_d03_Winter','O3_d02_Summer','O3_d02_Winter','PM25_d03_Summer','PM25_d03_Winter','PM25_d02_Summer','PM25_d02_Winter',] 86 | 87 | out2.to_csv('~/chemicals_normalized.csv') 88 | 89 | 90 | 91 | # model validation name for meteorology 92 | #getting wrf windspeed/directions: 93 | # NEED TO DO FOR WINTER 94 | # knots to m/s - knots/1.9438444924406 95 | sim = 'output_BASE_FINAL_wint_1.33km_sf_rrtmg_5_8_1_v3852' 96 | 97 | windstn = pd.read_csv(dir+'/WRFcheck/'+sim+'/wrfcheck_withstations_'+sim+'_Wind.csv',index_col=0) 98 | windDirstn = pd.read_csv(dir+'WRFcheck/'+sim+'/wrfcheck_withstations_'+sim+'_WindDir.csv',index_col=0) 99 | times = pd.read_csv(dir+'WRFcheck/'+sim+'/completeddata_mini_extras2.csv') 100 | #check winter and summer times for station index 101 | 102 | xx_d02,yy_d02 = np.array(windstn['xx_d02']),np.array(windDirstn['yy_d02']) 103 | xx_d03,yy_d03 = np.array(windstn[windstn['in_d03']==True]['xx_d03']),np.array(windstn[windstn['in_d03']==True]['yy_d03']) 104 | 105 | # 10*24+1:-24-9 106 | # :744 107 | 108 | fws_stn_d03 = np.array(windstn[windstn['in_d03']==True].T[10*24+1:-24-9],dtype='float32') 109 | fwd_stn_d03 = np.array(windDirstn[windDirstn['in_d03']==True].T[10*24+1:-24-9],dtype='float32') 110 | 111 | #fws_stn_d02 = np.array(windstn[windstn['in_d02']==True].T[:744],dtype='float32') 112 | #fwd_stn_d02 = np.array(windDirstn[windDirstn['in_d02']==True].T[:744],dtype='float32') 113 | fws_stn_d02 = np.array(windstn[windstn['in_d02']==True].T[10*24+1:-24-9],dtype='float32') 114 | fwd_stn_d02 = np.array(windDirstn[windDirstn['in_d02']==True].T[10*24+1:-24-9],dtype='float32') 115 | 116 | 117 | filenames_d02=[] 118 | os.chdir(dirToWRF_d02) 119 | for file in glob.glob("wrfout_d01_*"): 120 | filenames_d02.append(file) 121 | 122 | filenames_d02.sort() 123 | 124 | # $1 Get WRF file names 125 | filenames_d03=[] 126 | os.chdir(dirToWRF_d03) 127 | for file in glob.glob("wrfout_d01_*"): 128 | filenames_d03.append(file) 129 | 130 | filenames_d03.sort() 131 | 132 | # pull wind and dir 133 | fws_d02,fwd_d02 = pull_winds(dirToWRF_d02,filenames_d02[10:-1],xx_d02,yy_d02) 134 | fws_d03,fwd_d03 = pull_winds(dirToWRF_d03,filenames_d03[10:-1],xx_d03,yy_d03) 135 | 136 | 137 | # make array and reshape 138 | fws_d03= np.asarray(fws_d03) 139 | fws_d03 = np.array([fws_d03[i][x] for i in range(len(filenames_d02[10:-1])) for x in range(24)]) 140 | fwd_d03 = np.array([fwd_d03[i][x] for i in range(len(filenames_d02[10:-1])) for x in range(24)]) 141 | 142 | fws_d02=np.asarray(fws_d02) 143 | fws_d02 = np.array([fws_d02[i][x] for i in range(len(filenames_d02[10:-1])) for x in range(24)]) 144 | fwd_d02 = np.array([fwd_d02[i][x] for i in range(len(filenames_d02[10:-1])) for x in range(24)]) 145 | 146 | #d03 147 | 148 | ld03= windstn[windstn['in_d03']==True] 149 | ld03 = ld03[['lat','lon']] 150 | ld03.reset_index(inplace=True,drop=True) 151 | 152 | ld02= windstn[windstn['in_d02']==True] 153 | ld02 = ld02[['lat','lon']] 154 | ld02.reset_index(inplace=True,drop=True) 155 | 156 | 157 | 158 | #write out files 159 | if write ==True: 160 | pd.DataFrame(fws_d03/1.9438444924406).append(ld03.T).to_csv('~/wrf_winds_jan_d03.csv') 161 | pd.DataFrame(fws_d02/1.9438444924406).append(ld02.T).to_csv('~/wrf_winds_jan_d02.csv') 162 | pd.DataFrame(fwd_d03).append(ld03.T).to_csv('~/wrf_winddir_jan_d03.csv') 163 | pd.DataFrame(fwd_d02).append(ld02.T).to_csv('~/wrf_winddir_jan_d02.csv') 164 | 165 | 166 | b=fws_d03.ravel() 167 | a=fws_stn_d03.ravel()/1.9438444924406 168 | stwspd_d03 = stats_normalized(a[~np.isnan(b)],b[~np.isnan(b)]) 169 | 170 | b=fwd_d03.ravel() 171 | a=fwd_stn_d03.ravel() 172 | stwdir_d03 = stats_normalized(a[~np.isnan(b)],b[~np.isnan(b)]) 173 | 174 | # d02 175 | 176 | b=fws_d02.ravel() 177 | a=fws_stn_d02.ravel()/1.9438444924406 178 | stwspd_d02 = stats_normalized(a[~np.isnan(b)],b[~np.isnan(b)]) 179 | 180 | b=fwd_d02.T.ravel() 181 | a=fwd_stn_d02.ravel() 182 | stwdir_d02 = stats_normalized(a[~np.isnan(b)],b[~np.isnan(b)]) 183 | 184 | dfout=pd.DataFrame([stwspd_d02,stwdir_d02,stwspd_d03,stwdir_d03]) 185 | dfout.index = ['speed_d02','dir_d02','speed_d03','dir_d03'] 186 | dfout.columns = ['mu_d','mu_p','MB','MSE','r','p'] 187 | 188 | 189 | dfout.to_csv('~/windmetrics_summer_normalized.csv') 190 | 191 | ##----------- 192 | # get temperature and RH shit from combine aconc 193 | # pull station data again 194 | tmpstn = pd.read_csv('/home/asm0384/WRFcheck/'+sim+'/wrfcheck_withstations_'+sim+'_012019.csv',index_col=0) 195 | rhstn = pd.read_csv('/home/asm0384/WRFcheck/'+sim+'/wrfcheck_withstations_'+sim+'_RH.csv',index_col=0) 196 | 197 | xx_d02,yy_d02 = np.array(tmpstn['xx_d02']),np.array(tmpstn['yy_d02']) 198 | xx_d03,yy_d03 = np.array(tmpstn[tmpstn['in_d03']==True]['xx_d03']),np.array(tmpstn[tmpstn['in_d03']==True]['yy_d03']) 199 | 200 | # check completedatamini for the times associated with the indices 201 | wint_ind = ':744' 202 | sum_ind = '11*24+1:-9' 203 | 204 | temp_stn_d03 = np.array(tmpstn[tmpstn['in_d03']==True].T[11*24+1:-9],dtype='float32') 205 | rh_stn_d03 = np.array(rhstn[rhstn['in_d03']==True].T[11*24+1:-9],dtype='float32') 206 | 207 | temp_stn_d02 = np.array(tmpstn[tmpstn['in_d02']==True].T[11*24+1:-9],dtype='float32') 208 | rh_stn_d02 = np.array(rhstn[rhstn['in_d02']==True].T[11*24+1:-9],dtype='float32') 209 | 210 | # pull aconc files 211 | filenames_d02=[] 212 | os.chdir(dirToWRF_d02+'/postprocess/') 213 | for file in glob.glob("COMBINE_ACONC*"): 214 | filenames_d02.append(file) 215 | 216 | filenames_d02.sort() 217 | 218 | # $1 Get WRF file names 219 | filenames_d03=[] 220 | os.chdir(dirToWRF_d03+'/postprocess/') 221 | for file in glob.glob("COMBINE_ACONC*"): 222 | filenames_d03.append(file) 223 | 224 | filenames_d03.sort() 225 | 226 | 227 | def get_temp_rh(dirToWRF_d02,filenames_d02,var,xx,yy): 228 | d2=[] 229 | for q in range(len(filenames_d02)): 230 | nc = Dataset(dirToWRF_d02 +'/postprocess/'+ filenames_d02[q]) 231 | d=[[nc[var][hour][0][xx[i]][yy[i]] for i in range(len(xx))] for hour in range(24)] 232 | d2.append(d) 233 | # 234 | d2=np.asarray(d2) 235 | d2 = np.array([d2[i][x] for i in range(len(filenames_d02)) for x in range(24)]) 236 | # 237 | return d2 238 | 239 | temp_d02 = get_temp_rh(dirToWRF_d02,filenames_d02[0:],'SFC_TMP',xx_d02,yy_d02) 240 | temp_d03 = get_temp_rh(dirToWRF_d03,filenames_d03[0:],"SFC_TMP",xx_d03,yy_d03) 241 | rh_d02 = get_temp_rh(dirToWRF_d02,filenames_d02[0:],'RH',xx_d02,yy_d02) 242 | rh_d03 = get_temp_rh(dirToWRF_d03,filenames_d03[0:],'RH',xx_d03,yy_d03) 243 | 244 | #d03 245 | 246 | ld03= tmpstn[tmpstn['in_d03']==True] 247 | ld03 = ld03[['lat','lon']] 248 | ld03.reset_index(inplace=True,drop=True) 249 | 250 | ld02= rhstn[rhstn['in_d02']==True] 251 | ld02 = ld02[['lat','lon']] 252 | ld02.reset_index(inplace=True,drop=True) 253 | 254 | 255 | #write out files 256 | if write ==True: 257 | pd.DataFrame(temp_d03).append(ld03.T).to_csv('~/wrf_t2_jan_d03.csv') 258 | pd.DataFrame(temp_d02).append(ld02.T).to_csv('~/wrf_t2_jan_d02.csv') 259 | pd.DataFrame(rh_d03).append(ld03.T).to_csv('~/wrf_rh_jan_d03.csv') 260 | pd.DataFrame(rh_d02).append(ld02.T).to_csv('~/wrf_rh_jan_d02.csv') 261 | 262 | # do stats 263 | 264 | b=temp_d02.ravel() 265 | a=(temp_stn_d02.ravel()-32)*5/9 266 | st_temp_d02 = stats_normalized(a[~np.isnan(b)],b[~np.isnan(b)]) 267 | 268 | b=temp_d03.ravel() 269 | a=(temp_stn_d03.ravel()-32)*5/9 270 | st_temp_d03 = stats_normalized(a[~np.isnan(b)],b[~np.isnan(b)]) 271 | 272 | 273 | b=rh_d02.ravel() 274 | a=rh_stn_d02.ravel() 275 | st_rh_d02 = stats_normalized(a[~np.isnan(b)],b[~np.isnan(b)]) 276 | 277 | b=rh_d03.ravel() 278 | a=rh_stn_d03.ravel() 279 | st_rh_d03 = stats_normalized(a[~np.isnan(b)],b[~np.isnan(b)]) 280 | 281 | 282 | dfout=pd.DataFrame([st_temp_d02,st_rh_d02,st_temp_d03,st_rh_d03]) 283 | dfout.index = ['temp_d02','rh_d02','temp_d03','rh_d03'] 284 | dfout.columns = ['mu_d','mu_p','MB','MSE','r','p'] 285 | 286 | 287 | dfout.to_csv('~/temp_rh_summermetrics_normalized.csv') 288 | 289 | 290 | # now group by each station and get correlation 291 | 292 | fnames = ['NO2_d03_2018_8_EPA_CMAQ_Combine.csv','NO2_d03_2019_1_EPA_CMAQ_Combine.csv', 293 | 'O3_d03_2018_8_EPA_CMAQ_Combine.csv','O3_d03_2019_1_EPA_CMAQ_Combine.csv', 294 | 'PM25_TOT_d03_2018_8_EPA_CMAQ_Combine.csv','PM25_TOT_d03_2019_1_EPA_CMAQ_Combine.csv'] 295 | 296 | level_2 = ['NO2_Sum','NO2_Wint','O3_Sum','O3_Wint','PM25_Sum','PM25_Wint'] 297 | 298 | for i in range(len(fnames)): 299 | fname = fnames[i] 300 | f = pd.read_csv(dir+'/CMAQcheck/'+fname) 301 | fl = f.groupby(['Latitude','Longitude'])[['CMAQ','Sample Measurement']].corr().iloc[0::2,-1] 302 | fl = fl.reset_index() 303 | fl['level_2']=level_2[i] 304 | print(fl) 305 | if i==0: final = fl 306 | else: final = final.append(fl) 307 | # using 308 | 309 | final.to_csv('~/Station_correlations_EPA.csv') 310 | 311 | 312 | 313 | 314 | -------------------------------------------------------------------------------- /chi_map_cropped.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------ 2 | # Fun figure for AGU CVD 3 | # Stacy Montgomery, Sept. 2019 4 | # 5 | # I made some AQ figures, the interesting part is "mpath" 6 | # and using the "exterior" of the shapefile to crop the figure 7 | #------------------------------------------ 8 | 9 | #------------------------------------------ 10 | # Libraries 11 | #-------------- 12 | from matplotlib import pyplot as plt 13 | from mpl_toolkits import basemap as bm 14 | from matplotlib import colors 15 | import numpy as np 16 | import numpy.ma as ma 17 | from matplotlib.patches import Path, PathPatch 18 | import pandas as pd 19 | from shapely.geometry import Point, shape, Polygon 20 | import fiona 21 | from shapely.ops import unary_union, cascaded_union 22 | from geopandas.tools import sjoin 23 | import geopandas as gpd 24 | import geoplot 25 | import glob 26 | import os 27 | from datetime import timedelta, date; 28 | from netCDF4 import Dataset 29 | import scipy.ndimage 30 | from cartopy import crs as ccrs 31 | from cartopy.io.shapereader import Reader 32 | import matplotlib.path as mpath 33 | import seaborn as sns 34 | 35 | #------------------------------------------ 36 | # Find index of points on a gridded array 37 | # stn_lon,stn_lat = list of lat lon points --> lat_list, lon_list = [x1,x2][y1,y2] 38 | # wrf_lon, wrf_lat = np.array of gridded lat lon --> grid_x= np.array([x1,x2,x3],[x4,x5,x6]) 39 | def find_index(stn_lon, stn_lat, wrf_lon, wrf_lat): 40 | # stn -- points in a list (list, can be a list of just 1) 41 | # wrf -- gridded wrf lat/lon (np.array) 42 | #for iz in range(1): 43 | xx=[];yy=[] 44 | for i in range(len(stn_lat)): 45 | abslat = np.abs(wrf_lat-stn_lat[i]) 46 | abslon= np.abs(wrf_lon-stn_lon[i]) 47 | c = np.maximum(abslon,abslat) 48 | latlon_idx = np.argmin(c) 49 | x, y = np.where(c == np.min(c)) 50 | #add indices of nearest wrf point station 51 | xx.append(x) 52 | yy.append(y) 53 | # 54 | xx=[xx[i][0] for i in range(len(xx))];yy=[yy[i][0] for i in range(len(yy))] 55 | #return indices list 56 | return xx, yy 57 | 58 | #------------------------------------------ 59 | # USER INPUT 60 | fout_dir_l3='/home/asm0384/tempfiles/practice/NO2_l3_big/' 61 | plot_file='L3_averaged_Chicago_L2_Chicago_2018-8-1_through_L2_Chicago_2018-8-30_made_1567447847_nx_1250_ny_1000.csv' 62 | varname= 'no2' 63 | path='/home/asm0384/shapefiles/commareas/geo_export_77af1a6a-f8ec-47f4-977c-40956cd94f97.shp' 64 | 65 | # Start pulling and cropping data 66 | chi = gpd.GeoDataFrame.from_file(path) 67 | 68 | #data frame with all data 69 | finalgrid = pd.read_csv(fout_dir_l3+plot_file, index_col =0) 70 | varname ='nitrogendioxide_tropospheric_column' 71 | 72 | #Pull information from title 73 | filename= plot_file 74 | ymd= plot_file.split('_made_') 75 | nxny=ymd[1].split('_nx_')[1].split('_ny_') 76 | nx=int(nxny[0]) 77 | ny=int(nxny[1].split('.csv')[0]) 78 | startdate=ymd[0].split('L3_averaged_Chicago_L2_Chicago_')[1].split('_through')[0] 79 | enddate=ymd[0].split('L3_averaged_Chicago_L2_Chicago_')[1].split('_through_L2_Chicago_')[1] 80 | 81 | finalgrid.describe() 82 | 83 | # NOW CROP OVER CHICAGO 84 | # Initialize grid 85 | grid_nlat=np.zeros((ny,nx)); grid_nlon=np.zeros((ny,nx)); grid_no2=np.zeros((ny,nx)) 86 | 87 | # Return back to grid form 88 | for i in range(ny): 89 | for j in range(nx): 90 | l=i*nx+j 91 | grid_nlat[i][j]=finalgrid['nlats'][l] 92 | grid_nlon[i][j]=finalgrid['nlons'][l] 93 | grid_no2[i][j]=finalgrid[varname][l] 94 | 95 | # Check 96 | #plt.scatter(finalgrid['nlons'],finalgrid['nlats'],c=finalgrid['nitrogendioxide_tropospheric_column']) 97 | #plt.show() 98 | 99 | # Make box around chicago to cut data -- specific for satellite, check to make sure the arrays are increasing in size 100 | x1,y1=find_index([min(chi.bounds.minx)],[min(chi.bounds.miny)], np.array(grid_nlon), np.array (grid_nlat)) 101 | x2,y2=find_index([max(chi.bounds.maxx)],[max(chi.bounds.maxy+.05)], np.array(grid_nlon), np.array (grid_nlat)) 102 | x3,y3=find_index([min(chi.bounds.minx)],[max(chi.bounds.maxy)], np.array(grid_nlon), np.array (grid_nlat)) 103 | x4,y4=find_index([max(chi.bounds.maxx)+.05],[min(chi.bounds.miny)], np.array(grid_nlon), np.array (grid_nlat)) 104 | 105 | #set up zeros array given the bound of chicago 106 | diffy =max(y1,y2,y3,y4)[0]-min(y1,y2,y3,y4)[0] 107 | diffx=max(x1,x2,x3,x4)[0]-min(x1,x2,x3,x4)[0] 108 | 109 | zlon,zlat,z=np.zeros([diffx, diffy]), np.zeros([diffx, diffy]), np.zeros([diffx, diffy]) 110 | 111 | # fill out zeros array from the gridded data 112 | for i in range(diffx): 113 | for j in range(diffy): 114 | z[i][j]= grid_no2[min(x1,x2,x3,x4)[0]+i][min(y1,y2,y3,y4)[0]+j] 115 | zlat[i][j]= grid_nlat[min(x1,x2,x3,x4)[0]+i][min(y1,y2,y3,y4)[0]+j] 116 | zlon[i][j]= grid_nlon[min(x1,x2,x3,x4)[0]+i][min(y1,y2,y3,y4)[0]+j] 117 | 118 | # Check 119 | #ax= chi.plot() 120 | #plt.scatter(zlon,zlat,c=z) 121 | 122 | #plt.show() 123 | 124 | # Make the contour plot 125 | # make finer 126 | import scipy.ndimage 127 | 128 | from cartopy import crs as ccrs 129 | from cartopy.io.shapereader import Reader 130 | import matplotlib.path as mpath 131 | import seaborn as sns 132 | 133 | crs_new = ccrs. AlbersEqualArea(central_longitude=(chi.bounds.mean().minx+chi.bounds.mean().maxx)/2) 134 | 135 | #get data at higher resolution for contouring 136 | lat,lon,data=scipy.ndimage.zoom(zlat, 3),scipy.ndimage.zoom(zlon, 3),scipy.ndimage.zoom(z, 3) 137 | data=data*10e4 138 | 139 | # merge polygons using unary union and get the outside values 140 | # NOTE -- the union makes a multipolygon, but if you reference the largest of the polygons you actually get the outside 141 | union=gpd.GeoSeries(unary_union(chi.geometry)) 142 | outsideofunion=pd.DataFrame([list(union[0][2].exterior.xy)[0], list(union[0][2].exterior.xy)[1]]) 143 | 144 | # make fig object 145 | fig, axs = plt.subplots(subplot_kw={'projection': crs_new},figsize=(5, 5)) 146 | 147 | #set up data for plotting via levels 148 | vmax=pd.DataFrame(data).max().max()+1.5 149 | vmin= int(pd.DataFrame(data).min().min())+2 150 | levels = np.linspace(vmin, int(vmax), int(vmax)+10) 151 | 152 | #locate outside 153 | #plt.scatter(list(union[0][2].exterior.xy)[0], list(union[0][2].exterior.xy)[1]) 154 | 155 | # set boundary as outer extent by making a matplotlib path object and adding that geometry 156 | # i think setting the boundary before you plot the data actually crops the data to the shape, so set ax first 157 | axs.set_boundary(mpath.Path(outsideofunion.T,closed=True), transform= crs_new, use_as_clip_path=True) 158 | axs.add_geometries(Reader(path).geometries(), crs=crs_new,facecolor='None', edgecolor='black') 159 | 160 | #plot the gridded data by using contourf 161 | cs=plt.contourf(lon,lat,data,cmap= "inferno_r", transform=crs_new, levels=levels) 162 | 163 | # add landmarks with scatterplot 164 | midway= 41.7868, -87.7522 165 | ohare = 41.9742, -87.9073 166 | loop = 41.8786, -87.6251 167 | plt.scatter(pd.DataFrame([midway,ohare,loop])[1],pd.DataFrame([midway,ohare,loop])[0],marker = '*',color='white') 168 | 169 | # set axes extents from shapefile 170 | x=[min(chi.bounds.minx), max(chi.bounds.maxx)] 171 | y=[min(chi.bounds.miny), max(chi.bounds.maxy)] 172 | axs.set_extent([x[0]-.03,x[1]+.03,y[0]-.03,y[1]+.03],crs= crs_new) 173 | axs.set_title('1 PM TropOMI NO$_{2}$ Column Density') 174 | 175 | #add colorbar and label 176 | cbar=plt.colorbar(cs,boundaries=np.arange(vmin,11)) 177 | cbar.ax.set_ylabel('10$^{-2}$ molecules m$^{2}$') 178 | cbar.set_ticks(np.arange(vmin, int(vmax),1)) 179 | 180 | # save and show 181 | plt.savefig('/home/asm0384/tropomi_no2_neighbs_1_star.pdf',format='pdf') 182 | plt.show() 183 | 184 | 185 | #------------------------------------------------------------------------------------ 186 | # CMAQ Processing 187 | #-------------------------------- 188 | 189 | print('-----------') 190 | print('Starting CMAQ PROCESSING....') 191 | print('-----------') 192 | 193 | # Directories for cmaq + EPA 194 | dir_cmaq='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_BASE_BC_4km_v0/postprocess/' 195 | 196 | # CMAQ things 197 | domain='d02' 198 | time='hourly' 199 | year='2018' 200 | #epa_code=['42401','42602','44201']; var=['SO2','NO2','O3'] #numerical identifiers and corresponding vars 201 | epa_code=['42602']; var=['NO2'] 202 | 203 | # Get CMAQ file names 204 | cmaq_files=[] 205 | os.chdir(dir_cmaq) 206 | for file in glob.glob("COMBINE_ACONC_*"): 207 | cmaq_files.append(file) 208 | 209 | # Find dates from cmaq 210 | cmaq_files.sort(); 211 | cmaq_files=cmaq_files[0:-2] #get rid of september 212 | 213 | dates=[cmaq_files[z].split("COMBINE_ACONC_")[1].split(".nc")[0] for z in range(len(cmaq_files))] 214 | start_dt=date(int(dates[0][0:4]),int(dates[0][4:6]),int(dates[0][6:8])) 215 | end_dt=date(int(dates[-1][0:4]),int(dates[-1][4:6]),int(dates[-1][6:8])) 216 | 217 | #pull data 218 | cmaq=[Dataset(dir_cmaq+cmaq_files[i]) for i in range(len(cmaq_files))] 219 | t_index = pd.DatetimeIndex(start=start_dt, end=end_dt, freq='1h') 220 | dates_ft=[str(date(int(dates[i][0:4]),int(dates[i][4:6]),int(dates[i][6:8]))) for i in range(len(dates))] 221 | 222 | #get monthly avg of CMAQ data 223 | monthly_avg_no2=[cmaq[i]['NO2'][h] for i in range(len(cmaq)) for h in range(24)] 224 | monthly_avg_no2= sum(monthly_avg_no2)/(len(cmaq)*24) 225 | 226 | # get 1 pm avg no2 227 | pm_avg_no2=[cmaq[i]['NO2'][13] for i in range(len(cmaq))] 228 | pm_avg_no2=sum(monthly_avg_no2)/(len(cmaq)) 229 | 230 | monthly_avg_no2= pm_avg_no2 #stupid 231 | 232 | # get 1 pm avg o3 233 | #monthly_avg_o3=[cmaq[i]['O3'][h] for i in range(len(cmaq)) for h in range(24)] 234 | #monthly_avg_o3= sum(monthly_avg_no2)/(len(cmaq)*24) 235 | 236 | #Pull cmaq grid 237 | grid='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/mcip/PXLSM/ChicagoLADCO_d02/GRIDCRO2D_Chicago_LADCO_2018-08-20.nc' 238 | cmaq_lat,cmaq_lon=Dataset(grid)['LAT'][0][0],Dataset(grid)['LON'][0][0] 239 | 240 | #check the extent and that everything looks right 241 | #plt.scatter(cmaq_lon, cmaq_lat, c= monthly_avg_no2[0]) 242 | 243 | # Find indices of the greatest outside points of the data 244 | x1,y1=find_index([min(chi.bounds.minx)],[min(chi.bounds.miny)], np.array(cmaq_lon), np.array (cmaq_lat)) 245 | x2,y2=find_index([max(chi.bounds.maxx)],[max(chi.bounds.maxy)], np.array(cmaq_lon), np.array (cmaq_lat)) 246 | x3,y3=find_index([min(chi.bounds.minx)],[max(chi.bounds.maxy)], np.array(cmaq_lon), np.array (cmaq_lat)) 247 | x4,y4=find_index([max(chi.bounds.maxx)+.05],[min(chi.bounds.miny)], np.array(cmaq_lon), np.array (cmaq_lat)) 248 | 249 | # make an array the size of the bounds of the shapefile 250 | diffy =max(y1,y2,y3,y4)[0]-min(y1,y2,y3,y4)[0] 251 | diffx=max(x1,x2,x3,x4)[0]-min(x1,x2,x3,x4)[0] 252 | zlon,zlat,z=np.zeros([diffx, diffy]), np.zeros([diffx, diffy]), np.zeros([diffx, diffy]) 253 | 254 | for i in range(diffx): 255 | for j in range(diffy): 256 | z[i][j]= monthly_avg_no2[min(x1,x2,x3,x4)[0]+i][min(y1,y2,y3,y4)[0]+j] 257 | zlat[i][j]= cmaq_lat[min(x1,x2,x3,x4)[0]+i][min(y1,y2,y3,y4)[0]+j] 258 | zlon[i][j]= cmaq_lon[min(x1,x2,x3,x4)[0]+i][min(y1,y2,y3,y4)[0]+j] 259 | 260 | # Make Contour plot 261 | # make finer 262 | 263 | 264 | crs_new = ccrs. AlbersEqualArea(central_longitude=(chi.bounds.mean().minx+chi.bounds.mean().maxx)/2) 265 | 266 | #get data at higher resolution for contouring 267 | lat,lon,data=scipy.ndimage.zoom(zlat, 3),scipy.ndimage.zoom(zlon, 3),scipy.ndimage.zoom(z, 3) 268 | 269 | #merge polygons and get the outside valules 270 | b=gpd.GeoSeries(unary_union(chi.geometry)) 271 | v=pd.DataFrame([list(b[0][2].exterior.xy)[0], list(b[0][2].exterior.xy)[1]]) 272 | 273 | # make fig object 274 | fig, axs = plt.subplots(subplot_kw={'projection': crs_new}, 275 | figsize=(5, 5)) 276 | 277 | #set up data for plotting via levels 278 | vmax=pd.DataFrame(data).max().max() 279 | vmin= pd.DataFrame(data).min().min()-.007 280 | vmin=.3 281 | levels = np.arange(vmin, vmax+.1, .10) 282 | 283 | #locate outside 284 | #plt.scatter(list(b[0][2].exterior.xy)[0], list(b[0][2].exterior.xy)[1]) 285 | 286 | #set boundary as outer extent 287 | axs.set_boundary(mpath.Path(v.T,closed=True), transform= crs_new, use_as_clip_path=True) 288 | 289 | axs.add_geometries(Reader(path).geometries(), crs=crs_new,facecolor='None', edgecolor='black') 290 | cs=plt.contourf(lon,lat,data,cmap= "inferno_r", transform=crs_new, levels=levels) 291 | 292 | x=[min(chi.bounds.minx), max(chi.bounds.maxx)] 293 | y=[min(chi.bounds.miny), max(chi.bounds.maxy)] 294 | 295 | axs.set_extent([x[0]-.03,x[1]+.03,y[0]-.03,y[1]+.03],crs= crs_new) 296 | axs.set_title('1 PM CMAQ NO$_{2}$ Ground Level') 297 | 298 | cbar=plt.colorbar(cs,boundaries=np.arange(vmin,11)) 299 | cbar.ax.set_ylabel('ppbV') 300 | cbar.set_ticks(np.arange(vmin, vmax,.2)) 301 | 302 | midway= 41.7868, -87.7522 303 | ohare = 41.9742, -87.9073 304 | loop = 41.8786, -87.6251 305 | 306 | plt.scatter(pd.DataFrame([midway,ohare,loop])[1],pd.DataFrame([midway,ohare,loop])[0],marker = '*',color='white') 307 | 308 | plt.savefig('/home/asm0384/cmaq_no2_neighbs_1pm.pdf',format='pdf') 309 | 310 | plt.show() 311 | 312 | 313 | #------------------------------------------------------------------------------------ 314 | # INCOME Processing 315 | # This could be done better, I will in the future sort my own chloropleth, the geoplot funciton is 316 | # not broad enough, but it's quick enough to work for me 317 | #-------------------------------- 318 | import geoplot.crs as gcrs 319 | 320 | fname='/home/asm0384/Census_Data_-_Selected_socioeconomic_indicators_in_Chicago__2008___2012.csv' 321 | income=pd.read_csv(fname) 322 | 323 | path='/home/asm0384/shapefiles/commareas/geo_export_77af1a6a-f8ec-47f4-977c-40956cd94f97.shp' 324 | 325 | chi = gpd.GeoDataFrame.from_file(path) 326 | 327 | income['COMMUNITY AREA NAME'][75]="OHARE" 328 | 329 | income['community']=[income['COMMUNITY AREA NAME'][i].upper() for i in range(len(income))] 330 | 331 | 332 | dfmerge=pd.merge(chi,income,on='community') 333 | 334 | # dropna cloropleth graph ... 335 | c=geoplot.choropleth(dfmerge, hue = dfmerge['HARDSHIP INDEX'], 336 | cmap='Blues', figsize=(5, 5), k=None, legend=True) 337 | #legend_values=np.arange(10000,90000,10000)) 338 | 339 | 340 | plt.title('Hardship Index') 341 | 342 | # scatter landmarks 343 | midway= 41.7868, -87.7522 344 | ohare = 41.9742, -87.9073 345 | loop = 41.8786, -87.6251 346 | 347 | #oops doesnt work ... add in post processing ... 348 | plt.scatter(pd.DataFrame([midway,ohare,loop])[1],pd.DataFrame([midway,ohare,loop])[0],marker = '*',color='white') 349 | 350 | plt.savefig('/home/asm0384/HI_neighbs_hot_1.5.pdf',format='pdf') 351 | 352 | plt.show() 353 | -------------------------------------------------------------------------------- /plot_CMAQ_diff.py: -------------------------------------------------------------------------------- 1 | #!/bin/python 2 | 3 | #------------------------------------------ 4 | # Libraries 5 | #-------------- 6 | from matplotlib import pyplot as plt ; from matplotlib import colors 7 | import numpy as np; import numpy.ma as ma; from matplotlib.patches import Path, PathPatch 8 | import pandas as pd; from shapely.geometry import Point, shape, Polygon;import fiona 9 | from shapely.ops import unary_union, cascaded_union; from geopandas.tools import sjoin 10 | import geopandas as gpd; import geoplot; import glob; import os; from datetime import timedelta, date; 11 | from netCDF4 import Dataset 12 | import scipy.ndimage; from cartopy import crs as ccrs; from cartopy.io.shapereader import Reader 13 | import matplotlib.path as mpath; import seaborn as sns 14 | #------------------------------------------ 15 | 16 | # USER INPUT 17 | 18 | # shapes and directories == https://www.census.gov/cgi-bin/geo/shapefiles/index.php?year=2019&layergroup=State+Legislative+Districts 19 | path='/home/asm0384/shapefiles/commareas/geo_export_77af1a6a-f8ec-47f4-977c-40956cd94f97.shp' 20 | chi_shapefile = gpd.GeoDataFrame.from_file(path) 21 | 22 | # dir to grid file 23 | dir='/projects/b1045/jschnell/ForStacy/' 24 | ll='latlon_ChicagoLADCO_d03.nc' 25 | 26 | # dir to model files 27 | dir_CMAQ = '/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_Amy_noMUNI_1.33km_sf_rrtmg_5_8_1_v3852/postprocess/' 28 | 29 | 30 | dir_CMAQ_BASE = '/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_BASE_FINAL_1.33km_sf_rrtmg_5_8_1_v3852/postprocess/' # experimental choice 31 | 32 | dir_WRF='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_BASE_FINAL_1.33km_sf_rrtmg_5_8_1_v3852/' 33 | 34 | dir_EPA = '/home/asm0384/ChicagoStudy/inputs/EPA_hourly_station_data/' 35 | 36 | #write out monthly files to csv? 37 | # read in monthly files from csv? 38 | writeoutcsv = True 39 | writeincsv = False 40 | show = True 41 | 42 | #variables of interest 43 | cmaq_var=['O3','NO2','NO','NOX','CO','ISOP','SO2','FORM','PM25_TOT','PM10'] 44 | cmaq_var=['O3','NO2','NO','CO','ISOP','SO2','FORM'] 45 | 46 | startswith = "COMBINE_ACONC_" 47 | 48 | # --------------------------------------------------------------------- 49 | # USER DEF FUNCTIONS 50 | # --------------------------------------------------------------------- 51 | 52 | #pull in cmaq 53 | startswith = "COMBINE_ACONC" 54 | 55 | 56 | #writeoutcsv = false 57 | def pull_CMAQ(dir_CMAQ_BASE,startswith,cmaq_var,version): 58 | #pull files from given directoy 59 | onlyfiles = next(os.walk(dir_CMAQ_BASE))[2] 60 | onlyfiles.sort() # so that searching for dates are easier 61 | fnames_CMAQ_BASE = [x for x in onlyfiles if x.startswith(startswith)] 62 | numfiles=(len(fnames_CMAQ)) 63 | ncfile_CMAQ_base = [Dataset(dir_CMAQ_BASE+ fnames_CMAQ_BASE[i],'r') for i in range(len(fnames_CMAQ_BASE))] 64 | units_cmaq = [ncfile_CMAQ_base[0][cmaq_var[i]].units for i in range(len(cmaq_var))] 65 | #full day conc 66 | cmaq_avgs_BASE = [] 67 | cmaq_avgs_daily_BASE = [] 68 | cmaq_avgs_hourly_BASE = [] 69 | # make averages for cmaq base 70 | for i in range(len(cmaq_var)): 71 | tmp = np.asarray([ncfile_CMAQ_base[j][cmaq_var[i]] for j in range(len(ncfile_CMAQ_base))]) 72 | hourly = np.average(tmp,axis=0) # hour by hour concs 73 | daily = np.average(tmp,axis=1) # daily average concs 74 | # 75 | monthly = np.average(daily,axis=0) 76 | if writeoutcsv == True: pd.DataFrame(monthly[0]).to_csv(cmaq_var[i]+'_'+version+'_BASE_2018_aug.csv', header=False,index=False) 77 | cmaq_avgs_BASE.append(monthly[0]) 78 | cmaq_avgs_daily_BASE.append(daily) 79 | cmaq_avgs_hourly_BASE.append(hourly) 80 | #return 81 | print('Done with ' +cmaq_var[i]) 82 | #return 83 | return cmaq_avgs_BASE, cmaq_avgs_daily_BASE, cmaq_avgs_hourly_BASE, units_cmaq 84 | 85 | 86 | #writeoutcsv = false 87 | 88 | 89 | 90 | #plot cmaq 91 | 92 | #plotting loop 93 | def plot_cmaq(monthly_tot,var_tot,title_2,cmap,vmaxs,vmins,crs_new,show,add_epa,version,div,shaped): 94 | for i in range(0,len(monthly_tot)): 95 | #for i in range(1): 96 | # set var for plot 97 | var= var_tot[i] 98 | data= np.asarray(monthly_tot[i]) 99 | if var == 'RAINC': pass 100 | else: 101 | if i= pd.to_datetime(dates)[0]) & (tmp['datetime'] <= pd.to_datetime(dates)[-1]) 212 | tmp = tmp.loc[mask] 213 | epa_lat,epa_lon= tmp['Latitude'].unique(), tmp['Longitude'].unique() 214 | tmp_avg = tmp.groupby(['Longitude','Latitude','datetime'])['Arithmetic Mean'].mean().reset_index() 215 | tmp_avg.to_csv(dir_EPA + cmaq_var[i]+ '_'+ version+ '_daily_avg_by_ChiMonitor_Aug2018.csv') 216 | tmp_avg = tmp.groupby(['Longitude','Latitude'])['Arithmetic Mean'].mean().reset_index() 217 | tmp_avg.to_csv(dir_EPA + cmaq_var[i]+ '_' + version +'_by_ChiMonitor_Aug2018.csv') 218 | epa_avgs_latlon.append(tmp_avg) 219 | except: 220 | print('No EPA file for ' + cmaq_var[i]) 221 | epa_avgs_latlon.append(np.nan) 222 | #epa_avgs.append(np.nan) 223 | 224 | # ppm to ppb 225 | epa_avgs_latlon[4]['Arithmetic Mean'] = epa_avgs_latlon[4]['Arithmetic Mean']*1000 226 | epa_avgs_latlon[0]['Arithmetic Mean'] = epa_avgs_latlon[0]['Arithmetic Mean']*1000 227 | 228 | 229 | #---------- PULL IN CMAQ 230 | 231 | #pull in cmaq 232 | 233 | startswith = "COMBINE_ACONC" 234 | version = '_aug2018_monthly_nobusdiff' 235 | base_monthly, base_daily, base_hourly, units = pull_CMAQ(dir_CMAQ_BASE,startswith,cmaq_var,version) 236 | 237 | #pull in cmaq 238 | startswith = "COMBINE_ACONC" 239 | version2 = '_aug2018_monthly_nomunidiff' 240 | dir_CMAQ='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_Amy_noMUNI_1.33km_sf_rrtmg_5_8_1_v3852/postprocess/' 241 | nomuni_monthly, nomuni_daily, nomuni_hourly, units = pull_CMAQ(dir_CMAQ,startswith,cmaq_var,version2) 242 | 243 | #pull in cmaq 244 | 245 | #pull in cmaq 246 | startswith = "COMBINE_ACONC" 247 | version3 = '_aug2018_monthly_nobusdiff' 248 | dir_CMAQ='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_Amy_noBUS_1.33km_sf_rrtmg_5_8_1_v3852/postprocess/' 249 | nobus_monthly, nobus_daily, nobus_hourly, units = pull_CMAQ(dir_CMAQ,startswith,cmaq_var,version3) 250 | 251 | 252 | 253 | #pull in cmaq 254 | startswith = "COMBINE_ACONC" 255 | version4 = '_aug2018_monthly_noschooldiff' 256 | dir_CMAQ='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_Amy_noSCHOOL_1.33km_sf_rrtmg_5_8_1_v3852/postprocess/' 257 | noschool_monthly, noschool_daily, noschool_hourly, units = pull_CMAQ(dir_CMAQ,startswith,cmaq_var,version4) 258 | 259 | 260 | #---------- START PLOTTING 261 | import cartopy.feature as cfeature 262 | 263 | # projection 264 | crs_new = ccrs.PlateCarree()# get shape outside 265 | union=gpd.GeoSeries(unary_union(chi_shapefile.geometry)) 266 | outsideofunion=pd.DataFrame([list(union[0][2].exterior.xy)[0], list(union[0][2].exterior.xy)[1]]) 267 | 268 | 269 | # version and title STUFF 270 | title_2 = " %diff no MUNI, Aug. 2018" 271 | var_tot = cmaq_var cmap = 'magma_r' 272 | version = '_nomuniDIFF_aug2018_' 273 | #monthly_tot = [nomuni_monthly[i]-base_monthly[i] for i in range(len(base_monthly))] 274 | monthly_tot = [(nomuni_monthly[i]-base_monthly[i])/base_monthly[i]*100 for i in range(len(base_monthly))] 275 | 276 | # SET RANGES 277 | vmaxs,vmins = [round(np.percentile(monthly_tot[i],99.999),2) for i in range(len(monthly_tot))],[round(np.percentile(monthly_tot[i],0.01),2) for i in range(len(monthly_tot))] 278 | 279 | # DO WE EPA SCATTER 280 | add_epa = False #True 281 | div = True 282 | if div==True: cmap = 'RdBu_r' 283 | shaped= True 284 | show=False 285 | 286 | version = '_nomuni_PERCENTDIFF_aug2018_' 287 | 288 | plot_cmaq(monthly_tot,var_tot,title_2,cmap,vmaxs,vmins,crs_new,show,add_epa,version,div,shaped) 289 | 290 | #================ no bus percent diff 291 | # version and title STUFF 292 | title_2 = " %diff no BUS, Aug. 2018" 293 | var_tot = cmaq_var cmap = 'magma_r' 294 | version = '_nobus_PERCENTDIFF_aug2018_' 295 | #monthly_tot = [nobus_monthly[i]-base_monthly[i] for i in range(len(base_monthly))] 296 | monthly_tot = [(nobus_monthly[i]-base_monthly[i])/base_monthly[i]*100 for i in range(len(base_monthly))] 297 | 298 | # SET RANGES 299 | vmaxs,vmins = [round(np.percentile(monthly_tot[i],99.999),2) for i in range(len(monthly_tot))],[round(np.percentile(monthly_tot[i],0.01),2) for i in range(len(monthly_tot))] 300 | 301 | # DO WE EPA SCATTER 302 | add_epa = False #True 303 | div = True 304 | if div==True: cmap = 'RdBu_r' 305 | 306 | shaped= True 307 | show=False 308 | 309 | plot_cmaq(monthly_tot,var_tot,title_2,cmap,vmaxs,vmins,crs_new,show,add_epa,version,div,shaped) 310 | 311 | 312 | #================ no SCHOOL percent diff 313 | 314 | 315 | # version and title STUFF 316 | title_2 = " %diff no SCHOOL, Aug. 2018" 317 | var_tot = cmaq_var cmap = 'magma_r' 318 | version = '_noSCHOOL_PERCENTDIFF_aug2018_' 319 | #monthly_tot = [noschool_monthly[i]-base_monthly[i] for i in range(len(base_monthly))] 320 | monthly_tot = [(noschool_monthly[i]-base_monthly[i])/base_monthly[i]*100 for i in range(len(base_monthly))] 321 | 322 | # SET RANGES 323 | vmaxs,vmins = [round(np.percentile(monthly_tot[i],99.999),2) for i in range(len(monthly_tot))],[round(np.percentile(monthly_tot[i],0.01),2) for i in range(len(monthly_tot))] 324 | 325 | # DO WE EPA SCATTER 326 | add_epa = False #True 327 | div = True 328 | if div==True: cmap = 'RdBu_r' 329 | 330 | shaped= True 331 | show=False 332 | 333 | plot_cmaq(monthly_tot,var_tot,title_2,cmap,vmaxs,vmins,crs_new,show,add_epa,version,div,shaped) 334 | 335 | 336 | 337 | #---- base case 338 | monthly_tot = base_monthly #[nbase_monthly[i] for i in range(len(base_monthly))] 339 | version = '_SPOT_' 340 | title_2 = " , Aug. 2018" 341 | vmaxs,vmins = [round(np.percentile(monthly_tot[i],99.99),5)*1000 for i in range(len(monthly_tot))],[round(np.percentile(monthly_tot[i],98),5)*1000 for i in range(len(monthly_tot))] 342 | vmaxs[0],vmins[0] =40,30 343 | shaped = False 344 | add_epa = True 345 | plot_cmaq(monthly_tot,var_tot,title_2,cmap,vmaxs,vmins,crs_new,show,add_epa,version,div,shaped) 346 | 347 | -------------------------------------------------------------------------------- /validation/compareHourlyWrfToClimateStations.py: -------------------------------------------------------------------------------- 1 | 2 | # Stacy Montgomery, April 2019 3 | #Single day WRF output 4 | 5 | # Future work == $[num] 6 | # $1: -- separate out so it can do d01, d02, d03 7 | 8 | # Notes -- NOTE[num] 9 | 10 | # Data for comparison 11 | # LCD data from noaa: https://www.ncei.noaa.gov/data/local-climatological-data/access/2018/ 12 | # LCD station names -- metadata of stations -- must make CSV: https://www.ncdc.noaa.gov/homr/file/lcd-stations.txt 13 | # Currently the UTC offset calculator is for negative offsets, simple loop fix to do positive offsets 14 | 15 | # ~~~~~~ IMPORT PACKAGES ~~~~~~~~~~~~ 16 | #Station 17 | import glob, os 18 | import pandas as pd, numpy as np, matplotlib.pyplot as plt, cartopy.crs as crs, cartopy.feature as cpf 19 | from netCDF4 import Dataset 20 | from matplotlib.cm import get_cmap 21 | from cartopy.feature import NaturalEarthFeature 22 | from wrf import (to_np, getvar, smooth2d, get_cartopy, cartopy_xlim, cartopy_ylim, latlon_coords) 23 | import time 24 | from timezonefinder import TimezoneFinder 25 | from pytz import timezone 26 | import pytz 27 | from datetime import datetime,date, timedelta 28 | import dateutil.parser as dparser 29 | 30 | tf = TimezoneFinder(in_memory=True) 31 | 32 | # ~~~~~~ CUSTOM FUNCTIONS ~~~~~~~~~~~~ 33 | # adapted from : http://kbkb-wx-python.blogspot.com/2016/08/find-nearest-latitude-and-longitude.html 34 | def find_index(stn_lon, stn_lat, wrf_lon, wrf_lat): 35 | # stn -- points 36 | # wrf -- list 37 | xx=[];yy=[] 38 | for i in range(len(stn_lat)): 39 | abslat = np.abs(wrf_lat-stn_lat[i]) 40 | abslon= np.abs(wrf_lon-stn_lon[i]) 41 | c = np.maximum(abslon,abslat) 42 | latlon_idx = np.argmin(c) 43 | x, y = np.where(c == np.min(c)) 44 | #add indices of nearest wrf point station 45 | xx.append(x) 46 | yy.append(y) 47 | #return indices list 48 | return xx, yy 49 | 50 | # modified from https://stackoverflow.com/questions/16685384/finding-the-indices-of-matching-elements-in-list-in-python 51 | def find(lst, a): 52 | return [i for i, x in enumerate(lst) if x==a] 53 | 54 | # modified from ----- 55 | utc = pytz.utc 56 | def offset(lat,lon): 57 | #returns a location's time zone offset from UTC in minutes. 58 | today = datetime.now() 59 | tz_target = timezone(tf.certain_timezone_at(lat=lat, lng=lon)) 60 | # ATTENTION: tz_target could be None! handle error case 61 | today_target = tz_target.localize(today) 62 | today_utc = utc.localize(today) 63 | return (today_utc - today_target).total_seconds() / 3600 64 | 65 | 66 | # pull in real data, apply UTC, and average and remove hourly values 67 | def getRealData(LCD): 68 | date_noTime=[]; time_noDate=[] 69 | date_noTime= [LCD['DATE'][z].split('T')[0] for z in range(len(LCD['DATE']))] 70 | time_noDate=[LCD['DATE'][z].split('T')[1] for z in range(len(LCD['DATE']))] 71 | UTC_offset=offset(lon=LCD['LONGITUDE'][0], lat=LCD['LATITUDE'][0]) 72 | #get day before and after for UTC offset sake 73 | date_onedaybefore=(dparser.parse(dates[0])-timedelta(days=1)).isoformat().split('T')[0] 74 | date_onedayafter=(dparser.parse(dates[-1])+timedelta(days=1)).isoformat().split('T')[0] 75 | start_ind_dataset = find(date_noTime, date_onedaybefore)[0] 76 | end_ind_dataset= find(date_noTime, date_onedayafter)[-1] 77 | if Chatty: print('-> Adding UTC offset to timestamp and averaging repeated values') 78 | # UTC offset calculator 79 | # Get the time and round up or round down, also add the UTC offset such that correct time is in UTC 80 | correctedTime=[]; correctedRain=[]; correctedTemp =[];correctedDate=[] 81 | for i in range(len(LCD[start_ind_dataset: end_ind_dataset])): 82 | datetimeLCD=dparser.parse(LCD['DATE'][start_ind_dataset+i]) 83 | datetimeLCD_UTC = datetimeLCD + timedelta(hours=UTC_offset) 84 | try: 85 | rainz = float(LCD['HourlyPrecipitation'][start_ind_dataset+i]) 86 | except ValueError: 87 | rainz =float('nan') 88 | try: 89 | tempz= float(LCD['HourlyDryBulbTemperature'][start_ind_dataset+i]) 90 | except ValueError: 91 | tempz=float('nan') 92 | if datetimeLCD_UTC.minute >= 30: 93 | correctedTime.append((datetimeLCD_UTC+timedelta(minutes=60-datetimeLCD_UTC.minute)).isoformat().split('T')[1]) 94 | correctedDate.append((datetimeLCD_UTC+timedelta(minutes=60-datetimeLCD_UTC.minute)).isoformat().split('T')[0]) 95 | correctedRain.append(rainz) 96 | correctedTemp.append(tempz) 97 | elif datetimeLCD_UTC.minute < 30: 98 | correctedTime.append((datetimeLCD_UTC+timedelta(minutes=-datetimeLCD_UTC.minute)).isoformat().split('T')[1]) 99 | correctedDate.append((datetimeLCD_UTC+timedelta(minutes=-datetimeLCD_UTC.minute)).isoformat().split('T')[0]) 100 | correctedRain.append(rainz) 101 | correctedTemp.append(tempz) 102 | else: 103 | correctedTime.append((datetimeLCD_UTC).isoformat().split('T')[1]) 104 | correctedDate.append((datetimeLCD_UTC).isoformat().split('T')[0]) 105 | correctedRain.append(rainz) 106 | correctedTemp.append(tempz) 107 | #Now filter LCD so that it only uses UTC date times 108 | start_ind_dataset2 = find(correctedDate, dates[0])[0] 109 | end_ind_dataset2 = find(correctedDate, dates[-1])[-1] 110 | correctedRain=correctedRain[start_ind_dataset2: end_ind_dataset2] 111 | correctedTemp= correctedTemp[start_ind_dataset2: end_ind_dataset2] 112 | correctedTime = correctedTime[start_ind_dataset2: end_ind_dataset2] 113 | correctedDate = correctedDate[start_ind_dataset2: end_ind_dataset2] 114 | #Now nan-average repeating values 115 | correctedRain_noRepeats=[]; correctedTemp_noRepeats =[]; timeCorrected_noRepeats=[]; i=0; dateCorrected_noRepeats=[] 116 | while i < len(correctedTime): 117 | j=0; tmpRain=[];tmpTemp=[] 118 | try: 119 | while i+j < len(correctedTime)-1 and correctedTime[i] == correctedTime[i+j]: 120 | tmpTemp.append(correctedTemp[i+j]) 121 | tmpRain.append(correctedRain[i+j]) 122 | j=j+1 123 | timeCorrected_noRepeats.append(correctedTime[i]) 124 | dateCorrected_noRepeats.append(correctedDate[i]) 125 | if j == 0 and i0 and i Finished averaging duplicate values in station %s dataset'% str(station)) 139 | #VERY quick check to see if all data is available, if not, flag it for later 140 | missing_dates=[];missing_hours=[] 141 | if len(dates) == len(list(set(dateCorrected_noRepeats))): 142 | if Chatty: print('-> No missing dates at station %s' %(stationList[station],)) 143 | else: 144 | if Chatty: print('-> Missing dates at %s' %(stationList[station],)) 145 | missing_dates.append(stationList[station]) 146 | #next 147 | if len(list(set(timeCorrected_noRepeats))) == 24: 148 | if Chatty: print('-> No missing hours at station %s' %(stationList[station],)) 149 | else: 150 | if Chatty: print('-> Missing hours at %s' %(stationList[station],)) 151 | missing_hours.append(stationList[station]) 152 | #return 153 | return correctedRain_noRepeats, correctedTemp_noRepeats, dateCorrected_noRepeats, timeCorrected_noRepeats 154 | 155 | 156 | # Easy bounding box checker 157 | def checkbounds(x,y,x1,y1,x2,y2): 158 | if (xx1 and y>y1): 159 | return True 160 | else: 161 | return False 162 | 163 | 164 | def findStations(): 165 | # Get station names -- NOTE1: LCD station names has no header... may cause index errors if format is different! 166 | stationList=[]; tmp=[] 167 | listOfStations= pd.read_csv(listOfStationsFile, header=None) 168 | listOfStations = listOfStations[np.isfinite(listOfStations[5])] #remove missing station data 169 | listOfStations =listOfStations.reset_index() #be able to index the stations properly 170 | tmp= [format(listOfStations[0][i],'06') for i in range(len(listOfStations))] #add leading zeroes to stations 171 | listOfStations['format'] = tmp; del tmp # add string names to list of stations 172 | stationList=[str(int(listOfStations[5][i]))+listOfStations['format'][i]+".csv" for i in range(len(listOfStations))] 173 | stn_lat =listOfStations[15].to_list(); stn_lon =listOfStations[16].to_list() 174 | stn_latCopy= stn_lat.copy(); stn_lonCopy= stn_lon.copy() 175 | lenOriginalStations=len(stn_lat) 176 | #check domain 177 | #plt.scatter(stn_lon , stn_lat) 178 | #xd03=[lond03min, lond03min, lond03max, lond03max] 179 | #yd03=[latd03min, latd03max, latd03min, latd03max] 180 | #plt.scatter(xd03, yd03) 181 | stnListCpy = [x for x in stationList] 182 | in_d01=[] 183 | #Check bounds and remove from non d01 domains 184 | for z in range(lenOriginalStations): 185 | x,y= stn_lat[z],stn_lon[z] 186 | if checkbounds(x,y,latd01min, lond01min, latd01max, lond01max): 187 | in_d01.append(True) 188 | else: 189 | stnListCpy.remove(stationList[z]) 190 | stn_latCopy.remove(stn_lat[z]) 191 | stn_lonCopy.remove(stn_lon[z]) 192 | #get rid of duplicates 193 | stationList = [x for x in stnListCpy] 194 | stn_lat = [x for x in stn_latCopy] 195 | stn_lon = [x for x in stn_lonCopy] 196 | del stnListCpy, stn_latCopy, stn_lonCopy 197 | # [in]Sanity check 198 | #plt.scatter(stn_lon , stn_lat,c= in_d03) 199 | #xd03=[lond03min, lond03min, lond03max, lond03max] 200 | #yd03=[latd03min, latd03max, latd03min, latd03max] 201 | #plt.scatter(xd03, yd03) 202 | #plt.show() 203 | #Check if stations exist and are in domain bounds, if not remove the station 204 | import requests 205 | stnListCpy = [x for x in stationList]; stn_latCopy= stn_lat.copy(); stn_lonCopy= stn_lon.copy() 206 | for station in range(len(stationList)): 207 | LCD = requests.get(NOAAdataLink + stationList[station]) 208 | #LCD.connect() 209 | if LCD.status_code > 200: 210 | if Chatty: print("-> Link does not exist for %s, removing station" %(stationList[station],)) 211 | stnListCpy.remove(stationList[station]) 212 | stn_latCopy.remove(stn_lat[station]) 213 | stn_lonCopy.remove(stn_lon[station]) 214 | #Remove copies again 215 | stationList = [x for x in stnListCpy] 216 | stn_lat = [x for x in stn_latCopy] 217 | stn_lon = [x for x in stn_lonCopy] 218 | del stnListCpy, stn_latCopy, stn_lonCopy 219 | # now check to see which of these are within d02, d03 domains 220 | in_d02=[False for z in range(len(stn_lat))]; in_d03=[False for z in range(len(stn_lat))] 221 | for z in range(len(stationList)): 222 | x,y= stn_lat[z],stn_lon[z] 223 | if (checkbounds(x,y,latd02min, lond02min, latd02max, lond02max)): 224 | in_d02[z]=True 225 | if (checkbounds(x,y,latd03min, lond03min, latd03max, lond03max)): 226 | in_d03[z]=True 227 | # !!!!!!!!!!---------- !!!!!!!!!!----------- !!!!!!!!!!----------- !!!!!!!!!! 228 | # write out station list so we don't need to do this again: 229 | # !!!!!!!!!!----------- !!!!!!!!!!----------- !!!!!!!!!!----------- !!!!!!!!!! 230 | station_out=pd.DataFrame(stationList) 231 | station_out.columns = ['stn'] 232 | station_out['lat']= stn_lat 233 | station_out['lon']= stn_lon 234 | station_out['in_d02']= in_d02 235 | station_out['in_d02']=in_d03 236 | station_out.to_csv('./station_out_removedmissing.csv') 237 | 238 | 239 | # ~~~~~~ START USER INPUT ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 240 | monthNum=[i for i in range(12)] 241 | daysOfMonths=[31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] 242 | 243 | # variables of interest 244 | minTemp = 242; maxTemp = 294; 245 | 246 | # US Data 247 | NOAAdataLink="https://www.ncei.noaa.gov/data/local-climatological-data/access/2018/" 248 | listOfStationsFile="~/lcd-stations.csv" #metadata of stations 249 | dirToWRF="/projects/b1045/wrf-cmaq/output/Chicago_LADCO/wrf_pure_NoahLSM/" 250 | listOfStationsFile = "~/lcd-stations.csv" 251 | 252 | Chatty= True # false if you want to remove print statements 253 | written= True 254 | 255 | if Chatty: print('Starting ....') 256 | 257 | # ~~~~~~ START MAIN ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 258 | #------------------------------ load in wrf file names ---------- 259 | # $1 Get WRF file names 260 | filenames_d01=[] 261 | os.chdir(dirToWRF) 262 | for file in glob.glob("wrfout_d01_*"): 263 | filenames_d01.append(file) 264 | 265 | filenames_d01.sort() #files are now sorted by date and time 266 | 267 | # $1 Get WRF file names 268 | filenames_d02=[] 269 | os.chdir(dirToWRF) 270 | for file in glob.glob("wrfout_d02_*"): 271 | filenames_d02.append(file) 272 | 273 | filenames_d02.sort() #files are now sorted by date and time 274 | 275 | # $1 Get WRF file names 276 | filenames_d03=[] 277 | os.chdir(dirToWRF) 278 | for file in glob.glob("wrfout_d03_*"): 279 | filenames_d03.append(file) 280 | 281 | filenames_d03.sort() #files are now sorted by date and time 282 | 283 | dates=[filenames_d01[z].split("wrfout_d01_")[1].split("_00:00:00")[0] for z in range(len(filenames_d01))] 284 | 285 | runname='wrf_pure_PXLSM_v0' 286 | dirToWRF='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/wrf_pure_PXLSM_v0/' 287 | listOfStationsFile = "~/lcd-stations.csv" 288 | dirout='/home/asm0384/WRFcheck/'+runname+'/' 289 | 290 | comp_dataset_name = dirout+'station_data_complete_'+runname+'.csv' # name and directory to write out to 291 | comp_dataset_extra = dirout+'completeddata_mini_extras2.csv' 292 | station_out_name = dirout+'station_out_removedmissing.csv' #name of intermediate file 293 | comp_dataset_name2= dirout+'station_complete_rain.csv' 294 | 295 | 296 | 297 | # pull indices for d0# 298 | #assuming all files with d0# are in the same grid 299 | wrf_latd01, wrf_lond01 = latlon_coords(getvar(Dataset(filenames_d01[1]),"RAINNC")) 300 | wrf_latd02, wrf_lond02 = latlon_coords(getvar(Dataset(filenames_d02[1]),"RAINNC")) 301 | wrf_latd03, wrf_lond03 = latlon_coords(getvar(Dataset(filenames_d03[1]),"RAINNC")) 302 | 303 | #get corners of wrf files 304 | latd01min, latd01max, lond01min, lond01max = wrf_latd01.to_pandas().min().min(), wrf_latd01.to_pandas().max().max(),wrf_lond01.to_pandas().min().min(),wrf_lond01.to_pandas().max().max() 305 | latd02min, latd02max, lond02min, lond02max = wrf_latd02.to_pandas().min().min(), wrf_latd02.to_pandas().max().max(),wrf_lond02.to_pandas().min().min(),wrf_lond02.to_pandas().max().max() 306 | latd03min, latd03max, lond03min, lond03max = wrf_latd03.to_pandas().min().min(), wrf_latd03.to_pandas().max().max(),wrf_lond03.to_pandas().min().min(),wrf_lond03.to_pandas().max().max() 307 | 308 | 309 | #------------------------------ station parsing so we get lat lons ---------------- 310 | #------------------------ check to see if you must do this again --------- 311 | 312 | #if written out already 313 | if written: 314 | station_out=pd.read_csv('./station_out_removedmissing.csv') 315 | stn_lat= station_out['lat'] 316 | stn_lon= station_out['lon'] 317 | stationList =station_out['stn'] 318 | in_d02= station_out['in_d02'] 319 | in_d03= station_out['in_d02'] 320 | else: 321 | findStations() 322 | station_out=pd.read_csv('./station_out_removedmissing.csv') 323 | stn_lat= station_out['lat'] 324 | stn_lon= station_out['lon'] 325 | stationList =station_out['stn'] 326 | in_d02= station_out['in_d02'] 327 | in_d03= station_out['in_d02'] 328 | 329 | # ------------------------------------------------------------------# ------------------------------------------------------------------ 330 | 331 | # ----------------------- get Station data ------------------------------------------- 332 | # get indices for wrf given stn lat lon 333 | xx_d01=[]; xx_d02=[]; xx_d03=[]; yy_d01=[]; yy_d02=[]; yy_d03=[] 334 | # pull indices for d0# 335 | # assuming all files with d0# are in the same grid 336 | xx_d01,yy_d01=find_index(stn_lon, stn_lat, wrf_lond01, wrf_latd01) 337 | xx_d02,yy_d02=find_index(stn_lon, stn_lat, wrf_lond02, wrf_latd02) 338 | xx_d03,yy_d03=find_index(stn_lon, stn_lat, wrf_lond03, wrf_latd03) 339 | 340 | # Start pulling station data to compare 341 | # Output is a list of values for each station 342 | 343 | if Chatty: print('-'*70+'\n Starting processing station data \n' + '-'*70) 344 | 345 | # Pull out station data ... each rain[box] is a long list of 346 | rain_real=[[] for t in range(len(yy_d01))] 347 | temp_real=[[] for t in range(len(yy_d01))] 348 | 349 | start_out=time.time() 350 | for station in range(len(yy_d01)): 351 | start=time.time() 352 | LCD = pd.read_csv(NOAAdataLink + stationList[station]) 353 | #new loop 354 | if Chatty: print('-'*70) 355 | #letemknow 356 | correctedRain_noRepeats, correctedTemp_noRepeats, dateCorrected_noRepeats, timeCorrected_noRepeats = getRealData(LCD) 357 | #make variable with all station data so we can compare to wrfout 358 | if Chatty: print('-> Completed loop %s (%s) in %.2f seconds' %(str(station),stationList[station],(time.time()-start))) 359 | rain_real[station]=correctedRain_noRepeats 360 | temp_real[station]=correctedTemp_noRepeats 361 | if int(len(yy_d01)/4)==station: 362 | print('25% complete, %.2f' %(time.time()-start_out,)) 363 | elif int(len(yy_d01)/2)==station: 364 | print('50% complete, %.2f' %(time.time()-start_out,)) 365 | elif int(3*len(yy_d01)/4)==station: 366 | print('75% complete, %.2f' %(time.time()-start_out,)) 367 | 368 | xx_d01_list=[xx_d01[i][0] for i in range(len(yy_d01))]; yy_d01_list=[yy_d01[i][0] for i in range(len(yy_d01))] 369 | xx_d02_list=[xx_d02[i][0] for i in range(len(yy_d02))]; yy_d02_list=[yy_d02[i][0] for i in range(len(yy_d02))] 370 | xx_d03_list=[xx_d03[i][0] for i in range(len(yy_d03))]; yy_d03_list=[yy_d03[i][0] for i in range(len(yy_d03))] 371 | 372 | #compare station data to wrf station data 373 | writeout_real = pd.DataFrame(temp_real) 374 | #writeout_real.columns = ['xx_d01'] 375 | writeout_real['xx_d01']= xx_d01_list 376 | writeout_real['yy_d01']= yy_d01_list 377 | writeout_real['lat']=stn_lat 378 | writeout_real['lon']=stn_lon 379 | writeout_real['in_d02']= in_d02 380 | writeout_real['in_d03']= in_d03 381 | writeout_real['dates']="%s"% dateCorrected_noRepeats 382 | writeout_real['times']="%s"% timeCorrected_noRepeats 383 | 384 | writeout_real.to_csv('./completed_dataset.csv') 385 | 386 | 387 | 388 | -------------------------------------------------------------------------------- /DataPreprocessing/compareHourlyWrfToClimateStations.py: -------------------------------------------------------------------------------- 1 | 2 | # Stacy Montgomery, April 2019 3 | #Single day WRF output 4 | 5 | # Future work == $[num] 6 | # $1: -- separate out so it can do d01, d02, d03 7 | 8 | # Notes -- NOTE[num] 9 | 10 | # Data for comparison 11 | # LCD data from noaa: https://www.ncei.noaa.gov/data/local-climatological-data/access/2018/ 12 | # LCD station names -- metadata of stations -- must make CSV: https://www.ncdc.noaa.gov/homr/file/lcd-stations.txt 13 | # Currently the UTC offset calculator is for negative offsets, simple loop fix to do positive offsets 14 | 15 | # ~~~~~~ IMPORT PACKAGES ~~~~~~~~~~~~ 16 | #Station 17 | import glob, os 18 | import pandas as pd, numpy as np, matplotlib.pyplot as plt, cartopy.crs as crs, cartopy.feature as cpf 19 | from netCDF4 import Dataset 20 | from matplotlib.cm import get_cmap 21 | from cartopy.feature import NaturalEarthFeature 22 | from wrf import (to_np, getvar, smooth2d, get_cartopy, cartopy_xlim, cartopy_ylim, latlon_coords) 23 | import time 24 | from timezonefinder import TimezoneFinder 25 | from pytz import timezone 26 | import pytz 27 | from datetime import datetime,date, timedelta 28 | import dateutil.parser as dparser 29 | 30 | tf = TimezoneFinder(in_memory=True) 31 | 32 | # ~~~~~~ CUSTOM FUNCTIONS ~~~~~~~~~~~~ 33 | # adapted from : http://kbkb-wx-python.blogspot.com/2016/08/find-nearest-latitude-and-longitude.html 34 | def find_index(stn_lon, stn_lat, wrf_lon, wrf_lat): 35 | # stn -- points 36 | # wrf -- list 37 | xx=[];yy=[] 38 | for i in range(len(stn_lat)): 39 | abslat = np.abs(wrf_lat-stn_lat[i]) 40 | abslon= np.abs(wrf_lon-stn_lon[i]) 41 | c = np.maximum(abslon,abslat) 42 | latlon_idx = np.argmin(c) 43 | x, y = np.where(c == np.min(c)) 44 | #add indices of nearest wrf point station 45 | xx.append(x) 46 | yy.append(y) 47 | #return indices list 48 | return xx, yy 49 | 50 | # modified from https://stackoverflow.com/questions/16685384/finding-the-indices-of-matching-elements-in-list-in-python 51 | def find(lst, a): 52 | return [i for i, x in enumerate(lst) if x==a] 53 | 54 | # modified from ----- 55 | utc = pytz.utc 56 | def offset(lat,lon): 57 | #returns a location's time zone offset from UTC in minutes. 58 | today = datetime.now() 59 | tz_target = timezone(tf.certain_timezone_at(lat=lat, lng=lon)) 60 | # ATTENTION: tz_target could be None! handle error case 61 | today_target = tz_target.localize(today) 62 | today_utc = utc.localize(today) 63 | return (today_utc - today_target).total_seconds() / 3600 64 | 65 | 66 | # pull in real data, apply UTC, and average and remove hourly values 67 | def getRealData(LCD): 68 | date_noTime=[]; time_noDate=[] 69 | date_noTime= [LCD['DATE'][z].split('T')[0] for z in range(len(LCD['DATE']))] 70 | time_noDate=[LCD['DATE'][z].split('T')[1] for z in range(len(LCD['DATE']))] 71 | UTC_offset=offset(lon=LCD['LONGITUDE'][0], lat=LCD['LATITUDE'][0]) 72 | #get day before and after for UTC offset sake 73 | date_onedaybefore=(dparser.parse(dates[0])-timedelta(days=1)).isoformat().split('T')[0] 74 | date_onedayafter=(dparser.parse(dates[-1])+timedelta(days=1)).isoformat().split('T')[0] 75 | start_ind_dataset = find(date_noTime, date_onedaybefore)[0] 76 | end_ind_dataset= find(date_noTime, date_onedayafter)[-1] 77 | if Chatty: print('-> Adding UTC offset to timestamp and averaging repeated values') 78 | # UTC offset calculator 79 | # Get the time and round up or round down, also add the UTC offset such that correct time is in UTC 80 | correctedTime=[]; correctedRain=[]; correctedTemp =[];correctedDate=[] 81 | for i in range(len(LCD[start_ind_dataset: end_ind_dataset])): 82 | datetimeLCD=dparser.parse(LCD['DATE'][start_ind_dataset+i]) 83 | datetimeLCD_UTC = datetimeLCD + timedelta(hours=UTC_offset) 84 | try: 85 | rainz = float(LCD['HourlyPrecipitation'][start_ind_dataset+i]) 86 | except ValueError: 87 | rainz =float('nan') 88 | try: 89 | tempz= float(LCD['HourlyDryBulbTemperature'][start_ind_dataset+i]) 90 | except ValueError: 91 | tempz=float('nan') 92 | if datetimeLCD_UTC.minute >= 30: 93 | correctedTime.append((datetimeLCD_UTC+timedelta(minutes=60-datetimeLCD_UTC.minute)).isoformat().split('T')[1]) 94 | correctedDate.append((datetimeLCD_UTC+timedelta(minutes=60-datetimeLCD_UTC.minute)).isoformat().split('T')[0]) 95 | correctedRain.append(rainz) 96 | correctedTemp.append(tempz) 97 | elif datetimeLCD_UTC.minute < 30: 98 | correctedTime.append((datetimeLCD_UTC+timedelta(minutes=-datetimeLCD_UTC.minute)).isoformat().split('T')[1]) 99 | correctedDate.append((datetimeLCD_UTC+timedelta(minutes=-datetimeLCD_UTC.minute)).isoformat().split('T')[0]) 100 | correctedRain.append(rainz) 101 | correctedTemp.append(tempz) 102 | else: 103 | correctedTime.append((datetimeLCD_UTC).isoformat().split('T')[1]) 104 | correctedDate.append((datetimeLCD_UTC).isoformat().split('T')[0]) 105 | correctedRain.append(rainz) 106 | correctedTemp.append(tempz) 107 | #Now filter LCD so that it only uses UTC date times 108 | start_ind_dataset2 = find(correctedDate, dates[0])[0] 109 | end_ind_dataset2 = find(correctedDate, dates[-1])[-1] 110 | correctedRain=correctedRain[start_ind_dataset2: end_ind_dataset2] 111 | correctedTemp= correctedTemp[start_ind_dataset2: end_ind_dataset2] 112 | correctedTime = correctedTime[start_ind_dataset2: end_ind_dataset2] 113 | correctedDate = correctedDate[start_ind_dataset2: end_ind_dataset2] 114 | #Now nan-average repeating values 115 | correctedRain_noRepeats=[]; correctedTemp_noRepeats =[]; timeCorrected_noRepeats=[]; i=0; dateCorrected_noRepeats=[] 116 | while i < len(correctedTime): 117 | j=0; tmpRain=[];tmpTemp=[] 118 | try: 119 | while i+j < len(correctedTime)-1 and correctedTime[i] == correctedTime[i+j]: 120 | tmpTemp.append(correctedTemp[i+j]) 121 | tmpRain.append(correctedRain[i+j]) 122 | j=j+1 123 | timeCorrected_noRepeats.append(correctedTime[i]) 124 | dateCorrected_noRepeats.append(correctedDate[i]) 125 | if j == 0 and i0 and i Finished averaging duplicate values in station %s dataset'% str(station)) 139 | #VERY quick check to see if all data is available, if not, flag it for later 140 | missing_dates=[];missing_hours=[] 141 | if len(dates) == len(list(set(dateCorrected_noRepeats))): 142 | if Chatty: print('-> No missing dates at station %s' %(stationList[station],)) 143 | else: 144 | if Chatty: print('-> Missing dates at %s' %(stationList[station],)) 145 | missing_dates.append(stationList[station]) 146 | #next 147 | if len(list(set(timeCorrected_noRepeats))) == 24: 148 | if Chatty: print('-> No missing hours at station %s' %(stationList[station],)) 149 | else: 150 | if Chatty: print('-> Missing hours at %s' %(stationList[station],)) 151 | missing_hours.append(stationList[station]) 152 | #return 153 | return correctedRain_noRepeats, correctedTemp_noRepeats, dateCorrected_noRepeats, timeCorrected_noRepeats 154 | 155 | 156 | # Easy bounding box checker 157 | def checkbounds(x,y,x1,y1,x2,y2): 158 | if (xx1 and y>y1): 159 | return True 160 | else: 161 | return False 162 | 163 | 164 | def findStations(): 165 | # Get station names -- NOTE1: LCD station names has no header... may cause index errors if format is different! 166 | stationList=[]; tmp=[] 167 | listOfStations= pd.read_csv(listOfStationsFile, header=None) 168 | listOfStations = listOfStations[np.isfinite(listOfStations[5])] #remove missing station data 169 | listOfStations =listOfStations.reset_index() #be able to index the stations properly 170 | tmp= [format(listOfStations[0][i],'06') for i in range(len(listOfStations))] #add leading zeroes to stations 171 | listOfStations['format'] = tmp; del tmp # add string names to list of stations 172 | stationList=[str(int(listOfStations[5][i]))+listOfStations['format'][i]+".csv" for i in range(len(listOfStations))] 173 | stn_lat =listOfStations[15].to_list(); stn_lon =listOfStations[16].to_list() 174 | stn_latCopy= stn_lat.copy(); stn_lonCopy= stn_lon.copy() 175 | lenOriginalStations=len(stn_lat) 176 | #check domain 177 | #plt.scatter(stn_lon , stn_lat) 178 | #xd03=[lond03min, lond03min, lond03max, lond03max] 179 | #yd03=[latd03min, latd03max, latd03min, latd03max] 180 | #plt.scatter(xd03, yd03) 181 | stnListCpy = [x for x in stationList] 182 | in_d01=[] 183 | #Check bounds and remove from non d01 domains 184 | for z in range(lenOriginalStations): 185 | x,y= stn_lat[z],stn_lon[z] 186 | if checkbounds(x,y,latd01min, lond01min, latd01max, lond01max): 187 | in_d01.append(True) 188 | else: 189 | stnListCpy.remove(stationList[z]) 190 | stn_latCopy.remove(stn_lat[z]) 191 | stn_lonCopy.remove(stn_lon[z]) 192 | #get rid of duplicates 193 | stationList = [x for x in stnListCpy] 194 | stn_lat = [x for x in stn_latCopy] 195 | stn_lon = [x for x in stn_lonCopy] 196 | del stnListCpy, stn_latCopy, stn_lonCopy 197 | # [in]Sanity check 198 | #plt.scatter(stn_lon , stn_lat,c= in_d03) 199 | #xd03=[lond03min, lond03min, lond03max, lond03max] 200 | #yd03=[latd03min, latd03max, latd03min, latd03max] 201 | #plt.scatter(xd03, yd03) 202 | #plt.show() 203 | #Check if stations exist and are in domain bounds, if not remove the station 204 | import requests 205 | stnListCpy = [x for x in stationList]; stn_latCopy= stn_lat.copy(); stn_lonCopy= stn_lon.copy() 206 | for station in range(len(stationList)): 207 | LCD = requests.get(NOAAdataLink + stationList[station]) 208 | #LCD.connect() 209 | if LCD.status_code > 200: 210 | if Chatty: print("-> Link does not exist for %s, removing station" %(stationList[station],)) 211 | stnListCpy.remove(stationList[station]) 212 | stn_latCopy.remove(stn_lat[station]) 213 | stn_lonCopy.remove(stn_lon[station]) 214 | #Remove copies again 215 | stationList = [x for x in stnListCpy] 216 | stn_lat = [x for x in stn_latCopy] 217 | stn_lon = [x for x in stn_lonCopy] 218 | del stnListCpy, stn_latCopy, stn_lonCopy 219 | # now check to see which of these are within d02, d03 domains 220 | in_d02=[False for z in range(len(stn_lat))]; in_d03=[False for z in range(len(stn_lat))] 221 | for z in range(len(stationList)): 222 | x,y= stn_lat[z],stn_lon[z] 223 | if (checkbounds(x,y,latd02min, lond02min, latd02max, lond02max)): 224 | in_d02[z]=True 225 | if (checkbounds(x,y,latd03min, lond03min, latd03max, lond03max)): 226 | in_d03[z]=True 227 | # !!!!!!!!!!---------- !!!!!!!!!!----------- !!!!!!!!!!----------- !!!!!!!!!! 228 | # write out station list so we don't need to do this again: 229 | # !!!!!!!!!!----------- !!!!!!!!!!----------- !!!!!!!!!!----------- !!!!!!!!!! 230 | station_out=pd.DataFrame(stationList) 231 | station_out.columns = ['stn'] 232 | station_out['lat']= stn_lat 233 | station_out['lon']= stn_lon 234 | station_out['in_d02']= in_d02 235 | station_out['in_d02']=in_d03 236 | station_out.to_csv('./station_out_removedmissing.csv') 237 | 238 | 239 | # ~~~~~~ START USER INPUT ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 240 | monthNum=[i for i in range(12)] 241 | daysOfMonths=[31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] 242 | 243 | # variables of interest 244 | minTemp = 242; maxTemp = 294; 245 | 246 | # US Data 247 | NOAAdataLink="https://www.ncei.noaa.gov/data/local-climatological-data/access/2018/" 248 | listOfStationsFile="~/lcd-stations.csv" #metadata of stations 249 | dirToWRF="/projects/b1045/wrf-cmaq/output/Chicago_LADCO/wrf_pure_NoahLSM/" 250 | listOfStationsFile = "~/lcd-stations.csv" 251 | 252 | Chatty= True # false if you want to remove print statements 253 | written= True 254 | 255 | if Chatty: print('Starting ....') 256 | 257 | # ~~~~~~ START MAIN ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 258 | #------------------------------ load in wrf file names ---------- 259 | # $1 Get WRF file names 260 | filenames_d01=[] 261 | os.chdir(dirToWRF) 262 | for file in glob.glob("wrfout_d01_*"): 263 | filenames_d01.append(file) 264 | 265 | filenames_d01.sort() #files are now sorted by date and time 266 | 267 | # $1 Get WRF file names 268 | filenames_d02=[] 269 | os.chdir(dirToWRF) 270 | for file in glob.glob("wrfout_d02_*"): 271 | filenames_d02.append(file) 272 | 273 | filenames_d02.sort() #files are now sorted by date and time 274 | 275 | # $1 Get WRF file names 276 | filenames_d03=[] 277 | os.chdir(dirToWRF) 278 | for file in glob.glob("wrfout_d03_*"): 279 | filenames_d03.append(file) 280 | 281 | filenames_d03.sort() #files are now sorted by date and time 282 | 283 | dates=[filenames_d01[z].split("wrfout_d01_")[1].split("_00:00:00")[0] for z in range(len(filenames_d01))] 284 | 285 | runname='wrf_pure_PXLSM_v0' 286 | dirToWRF='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/wrf_pure_PXLSM_v0/' 287 | listOfStationsFile = "~/lcd-stations.csv" 288 | dirout='/home/asm0384/WRFcheck/'+runname+'/' 289 | 290 | comp_dataset_name = dirout+'station_data_complete_'+runname+'.csv' # name and directory to write out to 291 | comp_dataset_extra = dirout+'completeddata_mini_extras2.csv' 292 | station_out_name = dirout+'station_out_removedmissing.csv' #name of intermediate file 293 | comp_dataset_name2= dirout+'station_complete_rain.csv' 294 | 295 | 296 | 297 | # pull indices for d0# 298 | #assuming all files with d0# are in the same grid 299 | wrf_latd01, wrf_lond01 = latlon_coords(getvar(Dataset(filenames_d01[1]),"RAINNC")) 300 | wrf_latd02, wrf_lond02 = latlon_coords(getvar(Dataset(filenames_d02[1]),"RAINNC")) 301 | wrf_latd03, wrf_lond03 = latlon_coords(getvar(Dataset(filenames_d03[1]),"RAINNC")) 302 | 303 | #get corners of wrf files 304 | latd01min, latd01max, lond01min, lond01max = wrf_latd01.to_pandas().min().min(), wrf_latd01.to_pandas().max().max(),wrf_lond01.to_pandas().min().min(),wrf_lond01.to_pandas().max().max() 305 | latd02min, latd02max, lond02min, lond02max = wrf_latd02.to_pandas().min().min(), wrf_latd02.to_pandas().max().max(),wrf_lond02.to_pandas().min().min(),wrf_lond02.to_pandas().max().max() 306 | latd03min, latd03max, lond03min, lond03max = wrf_latd03.to_pandas().min().min(), wrf_latd03.to_pandas().max().max(),wrf_lond03.to_pandas().min().min(),wrf_lond03.to_pandas().max().max() 307 | 308 | 309 | #------------------------------ station parsing so we get lat lons ---------------- 310 | #------------------------ check to see if you must do this again --------- 311 | 312 | #if written out already 313 | if written: 314 | station_out=pd.read_csv('./station_out_removedmissing.csv') 315 | stn_lat= station_out['lat'] 316 | stn_lon= station_out['lon'] 317 | stationList =station_out['stn'] 318 | in_d02= station_out['in_d02'] 319 | in_d03= station_out['in_d02'] 320 | else: 321 | findStations() 322 | station_out=pd.read_csv('./station_out_removedmissing.csv') 323 | stn_lat= station_out['lat'] 324 | stn_lon= station_out['lon'] 325 | stationList =station_out['stn'] 326 | in_d02= station_out['in_d02'] 327 | in_d03= station_out['in_d02'] 328 | 329 | # ------------------------------------------------------------------# ------------------------------------------------------------------ 330 | 331 | # ----------------------- get Station data ------------------------------------------- 332 | # get indices for wrf given stn lat lon 333 | xx_d01=[]; xx_d02=[]; xx_d03=[]; yy_d01=[]; yy_d02=[]; yy_d03=[] 334 | # pull indices for d0# 335 | # assuming all files with d0# are in the same grid 336 | xx_d01,yy_d01=find_index(stn_lon, stn_lat, wrf_lond01, wrf_latd01) 337 | xx_d02,yy_d02=find_index(stn_lon, stn_lat, wrf_lond02, wrf_latd02) 338 | xx_d03,yy_d03=find_index(stn_lon, stn_lat, wrf_lond03, wrf_latd03) 339 | 340 | # Start pulling station data to compare 341 | # Output is a list of values for each station 342 | 343 | if Chatty: print('-'*70+'\n Starting processing station data \n' + '-'*70) 344 | 345 | # Pull out station data ... each rain[box] is a long list of 346 | rain_real=[[] for t in range(len(yy_d01))] 347 | temp_real=[[] for t in range(len(yy_d01))] 348 | 349 | start_out=time.time() 350 | for station in range(len(yy_d01)): 351 | start=time.time() 352 | LCD = pd.read_csv(NOAAdataLink + stationList[station]) 353 | #new loop 354 | if Chatty: print('-'*70) 355 | #letemknow 356 | correctedRain_noRepeats, correctedTemp_noRepeats, dateCorrected_noRepeats, timeCorrected_noRepeats = getRealData(LCD) 357 | #make variable with all station data so we can compare to wrfout 358 | if Chatty: print('-> Completed loop %s (%s) in %.2f seconds' %(str(station),stationList[station],(time.time()-start))) 359 | rain_real[station]=correctedRain_noRepeats 360 | temp_real[station]=correctedTemp_noRepeats 361 | if int(len(yy_d01)/4)==station: 362 | print('25% complete, %.2f' %(time.time()-start_out,)) 363 | elif int(len(yy_d01)/2)==station: 364 | print('50% complete, %.2f' %(time.time()-start_out,)) 365 | elif int(3*len(yy_d01)/4)==station: 366 | print('75% complete, %.2f' %(time.time()-start_out,)) 367 | 368 | xx_d01_list=[xx_d01[i][0] for i in range(len(yy_d01))]; yy_d01_list=[yy_d01[i][0] for i in range(len(yy_d01))] 369 | xx_d02_list=[xx_d02[i][0] for i in range(len(yy_d02))]; yy_d02_list=[yy_d02[i][0] for i in range(len(yy_d02))] 370 | xx_d03_list=[xx_d03[i][0] for i in range(len(yy_d03))]; yy_d03_list=[yy_d03[i][0] for i in range(len(yy_d03))] 371 | 372 | #compare station data to wrf station data 373 | writeout_real = pd.DataFrame(temp_real) 374 | #writeout_real.columns = ['xx_d01'] 375 | writeout_real['xx_d01']= xx_d01_list 376 | writeout_real['yy_d01']= yy_d01_list 377 | writeout_real['lat']=stn_lat 378 | writeout_real['lon']=stn_lon 379 | writeout_real['in_d02']= in_d02 380 | writeout_real['in_d03']= in_d03 381 | writeout_real['dates']="%s"% dateCorrected_noRepeats 382 | writeout_real['times']="%s"% timeCorrected_noRepeats 383 | 384 | writeout_real.to_csv('./completed_dataset.csv') 385 | 386 | 387 | 388 | --------------------------------------------------------------------------------