├── Screen Shot 2019-09-06 at 5.25.15 PM.png
├── CommAreas
    ├── README.md
    ├── geo_export_77af1a6a-f8ec-47f4-977c-40956cd94f97.dbf
    ├── geo_export_77af1a6a-f8ec-47f4-977c-40956cd94f97.shp
    ├── geo_export_77af1a6a-f8ec-47f4-977c-40956cd94f97.shx
    └── geo_export_77af1a6a-f8ec-47f4-977c-40956cd94f97.prj
├── make_gif.py
├── mask_given_shapefile.py
├── README.md
├── validation
    ├── table1_validation_model_d02d02.py
    ├── MODEL_EVALUATION_r2_mse_bias_for_d02_d03.py
    ├── MODEL_EVALUATION_CMAQCheck_part1.py
    ├── model_validation_statistics.py
    └── compareHourlyWrfToClimateStations.py
├── DataPreprocessing
    ├── run_vertint.csh
    ├── cmaq_to_stations.py
    ├── wrf_to_stations_step2.py
    └── compareHourlyWrfToClimateStations.py
├── Validation
    ├── VCD_comparison.py
    └── station_validation.py
├── convert_netcdf_to_geotif.py
├── emissions_chg_plot.py
├── plot_cmaq.py
├── compare_CMAQ_to_EPAstation.py
├── model_column_comparison.py
├── PostProcessing
    ├── plot_cmaq_may2021.py
    ├── timeseries_epa_stn_cmaq_may2021.py
    └── o3_profile.py
├── correlation_wrf_cmaq_smoke.py
├── three_panel_epa_gif.py
├── chi_map_cropped.py
└── plot_CMAQ_diff.py


/Screen Shot 2019-09-06 at 5.25.15 PM.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stacymonty/Chi_AQ/HEAD/Screen Shot 2019-09-06 at 5.25.15 PM.png


--------------------------------------------------------------------------------
/CommAreas/README.md:
--------------------------------------------------------------------------------
1 | # shapefiles taken from
2 | https://data.cityofchicago.org/Facilities-Geographic-Boundaries/Boundaries-Community-Areas-current-/cauq-8yn6
3 | 
4 | 


--------------------------------------------------------------------------------
/CommAreas/geo_export_77af1a6a-f8ec-47f4-977c-40956cd94f97.dbf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stacymonty/Chi_AQ/HEAD/CommAreas/geo_export_77af1a6a-f8ec-47f4-977c-40956cd94f97.dbf


--------------------------------------------------------------------------------
/CommAreas/geo_export_77af1a6a-f8ec-47f4-977c-40956cd94f97.shp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stacymonty/Chi_AQ/HEAD/CommAreas/geo_export_77af1a6a-f8ec-47f4-977c-40956cd94f97.shp


--------------------------------------------------------------------------------
/CommAreas/geo_export_77af1a6a-f8ec-47f4-977c-40956cd94f97.shx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stacymonty/Chi_AQ/HEAD/CommAreas/geo_export_77af1a6a-f8ec-47f4-977c-40956cd94f97.shx


--------------------------------------------------------------------------------
/CommAreas/geo_export_77af1a6a-f8ec-47f4-977c-40956cd94f97.prj:
--------------------------------------------------------------------------------
1 | GEOGCS["WGS84(DD)", DATUM["WGS84", SPHEROID["WGS84", 6378137.0, 298.257223563]], PRIMEM["Greenwich", 0.0], UNIT["degree", 0.017453292519943295], AXIS["Geodetic longitude", EAST], AXIS["Geodetic latitude", NORTH]]


--------------------------------------------------------------------------------
/make_gif.py:
--------------------------------------------------------------------------------
 1 | #libraires
 2 | import moviepy.editor as mpy
 3 | import os
 4 | import glob
 5 | 
 6 | 
 7 | filestartswith='NO2'
 8 | dir='~/'
 9 | 
10 | #
11 | gif_name = 'NO2'
12 | fps = 10
13 | file_list = ['NO2_day%i_hour%i.png'%(j,k) for j in range(5) for k in range(24)]
14 | clip = mpy.ImageSequenceClip(file_list, fps=fps)
15 | clip.write_gif('{}.gif'.format(gif_name), fps=fps)
16 | 


--------------------------------------------------------------------------------
/mask_given_shapefile.py:
--------------------------------------------------------------------------------
 1 | def mask_given_shapefile(lon,lat,shapefile):
 2 |    '''
 3 |    Make a mask given a shapefile
 4 |    lon - array of grid lons
 5 |    lat - array of grid lats
 6 |    shapefile - geopandas geodataframe shapefile
 7 |    '''
 8 |    union=gpd.GeoSeries(unary_union(shapefile.geometry))
 9 |    mask=np.ones(lon.shape,dtype=bool)
10 |    mask[:] = False
11 |    for i in range(len(lon)):
12 |        for j in range(len(lon[0])):
13 |           pt = Point(lon[i][j],lat[i][j])
14 |           mask[i][j] =  pt.within(union[0])
15 |    #
16 |    return mask
17 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Chi_AQ
 2 | Use this repository to plot Chicago air quality using model and satellite data.
 3 | Still in development, for sure. Options to customize plotting is still in chi_map_cropped.py
 4 | 
 5 | Output of chi_map_cropped.py looks like this (with some minor adjustments in photoshop):
 6 | ![Image of Chicago Air Quality given shapefile](https://github.com/stacymonty/Chi_AQ/blob/master/Screen%20Shot%202019-09-06%20at%205.25.15%20PM.png)
 7 | 
 8 | The outside line crops image of the gridded data to the merged chicago shapefile.
 9 | Doesn't actually crop the data to the shapefile -- future work.
10 | 
11 | 


--------------------------------------------------------------------------------
/validation/table1_validation_model_d02d02.py:
--------------------------------------------------------------------------------
 1 | # model validation table for CHEMICALS
 2 | 
 3 | import pandas as pd
 4 | import numpy as np
 5 | import scipy.stats as st
 6 | 
 7 | 
 8 | def stats(data,prediction):
 9 | 	x,y=data[~np.isnan(data)],prediction[~np.isnan(data)] # get rid of NaNs
10 | 	mu_d,mu_p = np.mean(x),np.mean(y)
11 | 	bias = np.sum(x-y)/len(x)
12 | 	rmse = np.sqrt(np.mean((y-x)**2))
13 | 	r,p = st.pearsonr(x,y)
14 | 	return mu_d,mu_p,bias,rmse,r,p
15 | 
16 | 
17 | 
18 | fnames = ['NO2_d03_2018_8_EPA_CMAQ_Combine.csv','NO2_d03_2019_1_EPA_CMAQ_Combine.csv',
19 | 'NO2_d02_2018_8_EPA_CMAQ_Combine.csv','NO2_d02_2019_1_EPA_CMAQ_Combine.csv',
20 | 'O3_d03_2018_8_EPA_CMAQ_Combine.csv','O3_d03_2019_1_EPA_CMAQ_Combine.csv',
21 | 'O3_d02_2018_8_EPA_CMAQ_Combine.csv','O3_d02_2019_1_EPA_CMAQ_Combine.csv',
22 | 'PM25_TOT_d03_2018_8_EPA_CMAQ_Combine.csv','PM25_TOT_d03_2019_1_EPA_CMAQ_Combine.csv',
23 | 'PM25_TOT_d02_2018_8_EPA_CMAQ_Combine.csv','PM25_TOT_d02_2019_1_EPA_CMAQ_Combine.csv']
24 | 
25 | out = []
26 | 
27 | for i in range(len(fnames)):
28 | 	f = pd.read_csv(fnames[i])
29 | 	if i>3 and i<8:  
30 | 		s = stats(f['Sample Measurement']*1000,f['CMAQ'])
31 | 	else: 
32 | 		s = stats(f['Sample Measurement'],f['CMAQ'])
33 | 	out.append(s)
34 | 
35 | out = pd.DataFrame(out)
36 | out.columns=['mu_d','mu_p','bias','rmse','r','p']
37 | 
38 | out.index=['NO2_d03_Summer','NO2_d03_Winter','NO2_d02_Summer','NO2_d02_Winter','O3_d03_Summer','O3_d03_Winter','O3_d02_Summer','O3_d02_Winter','PM25_d03_Summer','PM25_d03_Winter','PM25_d02_Summer','PM25_d02_Winter',]
39 | 
40 | out


--------------------------------------------------------------------------------
/DataPreprocessing/run_vertint.csh:
--------------------------------------------------------------------------------
 1 | !/bin/csh
 2 | 
 3 | #For creating VCD from CMAQ output -- needs mcip files and cmaq cctm files
 4 | 
 5 | # For future runs: check indir, fname_start, begdate + enddate
 6 | # Make sure indir/column and indir/mcip exists
 7 | #
 8 | # Location of top directory CONC and MCIP files
 9 | setenv indir /projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_BASE_FINAL_wint_1.33km_sf_rrtmg_5_8_1_v3852
10 | # # Location to create vert files
11 | setenv outdir $indir/column
12 | # # Location of CONC files
13 | setenv concdir $indir
14 | # # Location of METCRO3D files
15 | setenv mcipdir $indir/mcip
16 | # How the netcdffile is named at the front
17 | setenv fname_start CCTM_CONC_v3852_
18 | 
19 | #
20 | # #Starting and ending times (inclusive) of times
21 | setenv begdate_g "2019-01-01"   # YYYYMMDD
22 | setenv enddate_g "2019-02-01"   # YYYYMMDD
23 | #
24 | # # beg date julian
25 | setenv begdate_j  `date -ud "${begdate_g}" +%Y%j`
26 | # # end date julian
27 | setenv enddate_j  `date -ud "${enddate_g}" +%Y%j`
28 | # # curr date (updated in loop) julian
29 | setenv curdate_j  $begdate_j
30 | # # curr date (updated in loop) gregorian
31 | setenv curdate_g  $begdate_g
32 | # # curr date (updated in loop) gregorian
33 | setenv curdate_g_f `date -ud "${curdate_g}" +%Y%m%d`
34 | #
35 | # # Main loop
36 | while ( $curdate_j <= $enddate_j)
37 | #
38 | # # Set name of input file
39 | setenv infile $indir/$fname_start$curdate_g_f".nc"
40 | # # Name of output file
41 | setenv outfile $outdir/$fname_start$curdate_g_f"_column.nc"
42 | # # Name of Metfile
43 | setenv metfile $mcipdir/"METCRO3D_Chicago_LADCO_"$curdate_g".nc"
44 | 
45 | vertintegral<< TEST_DONE
46 | infile
47 | 
48 | 
49 | 
50 | metfile
51 | outfile
52 | TEST_DONE
53 | 
54 | 
55 | setenv curdate_g  `date -ud "${curdate_g}+1days" +%Y-%m-%d`
56 | setenv curdate_j  `date -ud "${curdate_g}" +%Y%j`
57 | setenv curdate_g_f `date -ud "${curdate_g}" +%Y%m%d`
58 | 
59 | #TEST_DONE
60 | 
61 | echo "----------------------------- "
62 | echo $curdate_g
63 | 
64 | end
65 | 


--------------------------------------------------------------------------------
/Validation/VCD_comparison.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # ---------------------------------------------------------------------
 4 | # Stacy Montgomery, Jan 2021
 5 | #
 6 | # Use after you crop the L2 
 7 | 
 8 | # ---------------------------------------------------------------------
 9 | #                             USER INPUT
10 | # ---------------------------------------------------------------------
11 | from netCDF4 import Dataset
12 | import numpy as np
13 | import matplotlib.pyplot as plt
14 | import pandas as pd
15 | import os
16 | import netCDF4
17 | import math
18 | from scipy.interpolate import griddata
19 | import scipy.stats as st
20 | 
21 | # Projections -- this will be used in naming files later
22 | domain = 'Chicago'
23 | # grid file
24 | grid='/home/asm0384/ChicagoStudy/inputs/grid/latlon_ChicagoLADCO_d03.nc'
25 | lon,lat = np.array(Dataset(grid,'r')['lon']),np.array(Dataset(grid,'r')['lat'])
26 | 
27 | var='NO2'
28 | 
29 | #Directory to where L2 TropOMI files are stored
30 | dir='/projects/b1045/TropOMI/'+var+'/l2_cut/'
31 | 
32 | #from netcdf file, what do you want
33 | varname='nitrogendioxide_tropospheric_column'
34 | varprecision='qa_value'
35 | tagdir = '~/tag/'
36 | 
37 | filestartswith  = 'S5P_OFFL_L2__NO2____' # 'S5P_OFFL_L2__O3'
38 | 
39 | summer_regrid = pd.read_csv('~/rbdinterp_linear_smooth_201808.csv',index_col=0)
40 | summer_regrid2 = pd.read_csv('~/rbdinterp_linear_smooth_201808_pt2.csv',index_col=0)
41 | 
42 | wint_avg_trop= np.asarray(pd.read_csv('~/rbdinterp_linear_smooth_201901_NO2.csv',index_col=0))*1000
43 | summer_avg_trop = np.asarray((summer_regrid2+summer_regrid)/2)*1000
44 | 
45 | 
46 | # pull in column
47 | 
48 | dwint = '/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_BASE_FINAL_wint_1.33km_sf_rrtmg_5_8_1_v3852/'
49 | dsum = '/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_BASE_FINAL_1.33km_sf_rrtmg_5_8_1_v3852/'
50 | 
51 | filestartswith  = 'CCTM_CONC_v385' # 
52 | fs = next(os.walk(dwint))[2]
53 | fs = [x for x in fs if x.startswith(filestartswith)]
54 | f_wint=sorted(fs)
55 | fs = next(os.walk(dsum))[2]
56 | fs = [x for x in fs if x.startswith(filestartswith)]
57 | f_sum=sorted(fs)
58 | 
59 | time = 13
60 | summer_cmaq_trop = np.asarray([Dataset(dsum+f_sum[i])['NO2'][time][0:31].sum(axis=0) for i in range(len(f_sum))])
61 | winter_cmaq_trop = np.asarray([Dataset(dwint+f_wint[i])['NO2'][time][0:31].sum(axis=0) for i in range(len(f_wint))])
62 | 
63 | summer_cmaq_avg_trop = summer_cmaq_trop.mean(axis=0)
64 | winter_cmaq_avg_trop =winter_cmaq_trop.mean(axis=0)
65 | 
66 | 
67 | 
68 | # functions
69 | def stats_normalized(data,prediction):
70 | 	x,y=data[~np.isnan(data)],prediction[~np.isnan(data)] # get rid of NaNs
71 | 	mu_d,mu_p = np.mean(x),np.mean(y)
72 | 	nmb = np.sum(y-x)/np.sum(x)*100
73 | 	nme = np.sum(np.abs(y-x))/np.sum(x)*100
74 | 	r,p = st.pearsonr(x,y)
75 | 	return mu_d,mu_p,nmb,nme,r,p
76 | 
77 | tropstats = pd.DataFrame(np.asarray([stats_normalized(summer_avg_trop,summer_cmaq_avg_trop),stats_normalized(wint_avg_trop,winter_cmaq_avg_trop)]))
78 | tropstats.columns = ['mu_d','mu_p','bias','rmse','r','p']
79 | tropstats.index = ['Summer 2018','Winter 2019']
80 | tropstats.to_csv('~/TroposphereStats.csv')
81 | 
82 | 


--------------------------------------------------------------------------------
/convert_netcdf_to_geotif.py:
--------------------------------------------------------------------------------
 1 | # CONVERT CMAQ NETCDF OUTPUT FILE TO RASTER / GEOTIF / SHAPEFILE
 2 | 
 3 | import rioxarray
 4 | import xarray
 5 | import numpy as np
 6 | 
 7 | d = '/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_BASE_FINAL_1.33km_sf_rrtmg_5_8_1_v3852/postprocess/o3/'
 8 | 
 9 | dir='/projects/b1045/jschnell/ForStacy/' 
10 | ll='latlon_ChicagoLADCO_d03.nc' 
11 | ll = xarray.open_dataset(dir+ ll)
12 | 
13 | fnames = ['COMBINE_ACONC_201808.nc',  'dailymaxozone_201808.nc',  'NO2_201808.nc ', 'O3_201808.nc',  'pm25_201808.nc', 'COMBINE_ACONC_201901.nc',  'dailymaxozone_201901.nc',  'NO2_201901.nc',  'O3_201901.nc',  'pm25_201901.nc']
14 | 
15 | 
16 | #  ------- DOING AUG O3
17 | 
18 | fnames_out = ['dailymaxozone_201808'+str(i+1).zfill(2)+'.tif' for i in range(31)] #set up names for files out
19 | 
20 | for i in range(31): # number of days in these files
21 |    xds = xarray.open_dataset(d+ 'dailymaxozone_201808.nc')
22 |    # I'm writing out each time step as its own file name and only taking the first layer
23 |    # hence taking the ith time step and 0th layer
24 |    data = xds["O3"][i][0] 
25 |    # Key here is literally spelling out the indices and the coordinates with x, y 
26 |    foo=xarray.DataArray(data, coords={"x": np.arange(0,len(ll.lat)),"y": np.arange(0,len(ll.lat[0])),"latitude": (["x","y"],ll.lat),"longitude": (["x","y"],ll.lon)},dims=["x","y"])
27 |    # and this is how you write out the file
28 |    foo.T.rio.to_raster(fnames_out[i])
29 | 
30 | # ------- DOING JAN O3
31 | 
32 | fnames_out = ['dailymaxozone_201901'+str(i+1).zfill(2)+'.tif' for i in range(31)]
33 | 
34 | for i in range(31): # number of days in these files
35 |    xds = xarray.open_dataset(d+ 'dailymaxozone_201901.nc')
36 |    data = xds["O3"][i][0]
37 |    foo=xarray.DataArray(data, coords={"x": np.arange(0,len(ll.lat)),"y": np.arange(0,len(ll.lat[0])),"latitude": (["x","y"],ll.lat),"longitude": (["x","y"],ll.lon)},dims=["x","y"])
38 |    foo.T.rio.to_raster(fnames_out[i])
39 | 
40 | # ------- DOING JAN O3
41 | 
42 | fnames_out = ['dailymaxozone_201901'+str(i+1).zfill(2)+'.tif' for i in range(31)]
43 | 
44 | for i in range(31): # number of days in these files
45 |    xds = xarray.open_dataset(d+ 'dailymaxozone_201901.nc')
46 |    data = xds["O3"][0][0] # theres only 1 timestep here
47 |    foo=xarray.DataArray(data, coords={"x": np.arange(0,len(ll.lat)),"y": np.arange(0,len(ll.lat[0])),"latitude": (["x","y"],ll.lat),"longitude": (["x","y"],ll.lon)},dims=["x","y"])
48 |    foo.T.rio.to_raster(fnames_out[i])
49 | 
50 | # ------- AVERAGE O3
51 | 
52 | fnames_in = ['O3_201808.nc','O3_201901.nc' ]
53 | fnames_out = ['O3_201808.tif','O3_201901.tif' ]
54 | 
55 | for i in range(len(fnames_in)): # number of days in these files
56 |    xds = xarray.open_dataset(d+ fnames_in[i])
57 |    data = xds["O3"][0][0]
58 |    foo=xarray.DataArray(data, coords={"x": np.arange(0,len(ll.lat)),"y": np.arange(0,len(ll.lat[0])),"latitude": (["x","y"],ll.lat),"longitude": (["x","y"],ll.lon)},dims=["x","y"])
59 |    foo.T.rio.to_raster(fnames_out[i])
60 | 
61 | 
62 | # ------- DOING NO2
63 | 
64 | fnames_in = ['NO2_201808.nc','NO2_201901.nc' ]
65 | fnames_out = ['NO2_201808.tif','NO2_201901.tif' ]
66 | 
67 | for i in range(len(fnames_in)): # number of days in these files
68 |    xds = xarray.open_dataset(d+ fnames_in[i])
69 |    data = xds["NO2"][0][0]
70 |    foo=xarray.DataArray(data, coords={"x": np.arange(0,len(ll.lat)),"y": np.arange(0,len(ll.lat[0])),"latitude": (["x","y"],ll.lat),"longitude": (["x","y"],ll.lon)},dims=["x","y"])
71 |    foo.T.rio.to_raster(fnames_out[i])
72 | 


--------------------------------------------------------------------------------
/emissions_chg_plot.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | #------------------------------------------
  4 | # Libraries
  5 | #--------------
  6 | from matplotlib import pyplot as plt ; from matplotlib import colors
  7 | import numpy as np; import numpy.ma as ma; from matplotlib.patches import Path, PathPatch
  8 | import pandas as pd; from shapely.geometry import Point, shape, Polygon;import fiona
  9 | from shapely.ops import unary_union, cascaded_union; from geopandas.tools import sjoin
 10 | import geopandas as gpd; import geoplot; import glob; import os; from datetime import timedelta, date;
 11 | from netCDF4 import Dataset
 12 | import scipy.ndimage; from cartopy import crs as ccrs; from cartopy.io.shapereader import Reader
 13 | import matplotlib.path as mpath; import seaborn as sns
 14 | 
 15 | # files
 16 | dir='/projects/b1045/jschnell/ForStacy/'
 17 | ll='latlon_ChicagoLADCO_d03.nc'
 18 | emis='emis_20180801_noSchoolnoBusnoRefuse_minus_base.nc'
 19 | emis='emis_20180801_noSchool_minus_base.nc'
 20 | ll=Dataset(dir+ll,'r')
 21 | lat,lon=ll['lat'][:],ll['lon'][:]
 22 | 
 23 | path='/home/asm0384/shapefiles/commareas/geo_export_77af1a6a-f8ec-47f4-977c-40956cd94f97.shp'
 24 | 
 25 | # Start pulling and cropping data
 26 | chi  = gpd.GeoDataFrame.from_file(path)
 27 | 
 28 | 
 29 | #pull in files and variables
 30 | ncfile= Dataset(dir+emis,'r')
 31 | df_lat,df_lon=pd.DataFrame(lat),pd.DataFrame(lon)
 32 | no2= pd.DataFrame(Dataset(dir+emis,'r')['NO2'][13][0][:])*10e2
 33 | df=pd.DataFrame(no2[:])
 34 | 
 35 | #find all rows and columns where the change is 0 and drop them
 36 | no2_drop=df.loc[~(df==0).all(axis=1)]
 37 | 
 38 | # given where no2 values are 0, filter out the lat lons
 39 | # ie. drop the outside parts that are 0 change in the array
 40 | data= np.array(df.loc[~(df==0).all(axis=1)])
 41 | lat= np.array(df_lat.loc[~(df==0).all(axis=1)])
 42 | lon= np.array(df_lon.loc[~(df==0).all(axis=1)])
 43 | 
 44 | 
 45 | # files
 46 | emis1='emis_20180801_noSchoolnoBusnoRefuse_minus_base.nc'
 47 | 
 48 | #pull in files and variables
 49 | ncfile1= Dataset(dir+emis1,'r')
 50 | 
 51 | no21= pd.DataFrame(Dataset(dir+emis1,'r')['NO2'][13][0][:])*10e2
 52 | df1=pd.DataFrame(no21[:])
 53 | 
 54 | # drop outside parts that are 0 in the array
 55 | data1= np.array(df1.loc[~(df1==0).all(axis=1)])
 56 | lat1= np.array(df_lat.loc[~(df1==0).all(axis=1)])
 57 | lon1= np.array(df_lon.loc[~(df1==0).all(axis=1)])
 58 | data=data-data1
 59 | 
 60 | crs_new = ccrs. AlbersEqualArea(central_longitude=(chi.bounds.mean().minx+chi.bounds.mean().maxx)/2)
 61 | 
 62 | 
 63 | # get shape outside
 64 | union=gpd.GeoSeries(unary_union(chi.geometry))
 65 | outsideofunion=pd.DataFrame([list(union[0][2].exterior.xy)[0], list(union[0][2].exterior.xy)[1]])
 66 | 
 67 | # make fig object
 68 | fig, axs = plt.subplots(subplot_kw={'projection': crs_new},figsize=(6, 6))
 69 | 
 70 | #set up data for plotting via levels
 71 | vmax=pd.DataFrame(data).max().max()
 72 | vmin= int(pd.DataFrame(data).min().min())
 73 | vmax=-.5
 74 | vmin=-1.5
 75 | levels = np.linspace(vmin, int(vmax), 15)
 76 | 
 77 | 
 78 | # get rid of values outside the levels we are contouring to
 79 | data[pd.DataFrame(data)<vmin]=vmin
 80 | 
 81 | 
 82 | # set boundary as outer extent by making a matplotlib path object and adding that geometry
 83 | # i think setting the boundary before you plot the data actually crops the data to the shape, so set ax first
 84 | axs.set_boundary(mpath.Path(outsideofunion.T,closed=True), transform= crs_new, use_as_clip_path=True)
 85 | axs.add_geometries(Reader(path).geometries(), crs=crs_new,facecolor='None', edgecolor='black')
 86 | 
 87 | #plot the gridded data by using contourf
 88 | cs=plt.contourf(lon,lat,data,cmap= "inferno", transform=crs_new, levels=levels)
 89 | 
 90 | # add landmarks with scatterplot
 91 | midway= 41.7868, -87.7522
 92 | ohare = 41.9742, -87.9073
 93 | loop = 41.8786, -87.6251
 94 | #plt.scatter(pd.DataFrame([midway,ohare,loop])[1],pd.DataFrame([midway,ohare,loop])[0],marker = '*',color='white')
 95 | 
 96 | # set axes extents from shapefile
 97 | x=[min(chi.bounds.minx), max(chi.bounds.maxx)] 
 98 | y=[min(chi.bounds.miny), max(chi.bounds.maxy)]
 99 | axs.set_extent([x[0]-.03,x[1]+.03,y[0]-.03,y[1]+.03],crs= crs_new)
100 | axs.set_title('1 PM Change in Emissions from Scenario 2')
101 | 
102 | 
103 | #add colorbar and label
104 | cbar=plt.colorbar(cs,boundaries=np.arange(vmin,11))
105 | cbar.ax.set_ylabel('moles/sec')
106 | cbar.set_ticks(np.arange(vmin, int(vmax),.5))
107 | 
108 | 
109 | 
110 | #EPA Sensor Scatter
111 | 
112 | 
113 | # get rid of values outside the levels we are contouring to
114 | fig, axs = plt.subplots()
115 | 
116 | chi.plot(ax=axs,color='lightgrey',linewidth=100)
117 | 
118 | # set boundary as outer extent by making a matplotlib path object and adding that geometry
119 | # i think setting the boundary before you plot the data actually crops the data to the shape, so set ax first
120 | 
121 | labels=['NO2','Ozone','NO2','Ozone','Ozone','PM10','NO2']
122 | values=[1,2,1,2,2,3,1]
123 | 
124 | latttt=[41.920009, 42.062053, 41.755832, 41.855243,41.984332, 41.801180, 41.751400]
125 | 
126 | lonbbb=[-87.672995,-87.675254,-87.545350,-87.752470,-87.792002,-87.832349, -87.713488]
127 | 
128 | axs.scatter(lonbbb, latttt,c=values,s=100,label=labels)
129 | 
130 | fig.patch.set_visible(False)
131 | axs.axis('off')
132 | 
133 | #legend1 = axs.legend(*scatter.legend_elements(num=4),
134 | #                    loc="outer left", title="Ranking")
135 | 
136 | #axs.add_artist(legend1)
137 | 
138 | #axs.legend()
139 | 
140 | plt.show()
141 | 
142 | 
143 | 


--------------------------------------------------------------------------------
/plot_cmaq.py:
--------------------------------------------------------------------------------
  1 | #------------------------------------------
  2 | # Libraries
  3 | #--------------
  4 | from matplotlib import pyplot as plt ; from matplotlib import colors
  5 | import numpy as np; import numpy.ma as ma; from matplotlib.patches import Path, PathPatch
  6 | import pandas as pd; from shapely.geometry import Point, shape, Polygon;import fiona
  7 | from shapely.ops import unary_union, cascaded_union; from geopandas.tools import sjoin
  8 | import geopandas as gpd; import geoplot; import glob; import os; from datetime import timedelta, date;
  9 | from netCDF4 import Dataset
 10 | import scipy.ndimage; from cartopy import crs as ccrs; from cartopy.io.shapereader import Reader
 11 | import matplotlib.path as mpath; import seaborn as sns
 12 | #------------------------------------------
 13 | 
 14 | # USER INPUT
 15 | 
 16 | # shapes and directories
 17 | # shapefile == https://www.census.gov/cgi-bin/geo/shapefiles/index.php?year=2019&layergroup=State+Legislative+Districts
 18 | path='/home/asm0384/shapefiles/replines/tl_2019_17_sldl.shp'
 19 | chi_shapefile  = gpd.GeoDataFrame.from_file(path)
 20 | rep_districts_shapefile = gpd.GeoDataFrame.from_file('/home/asm0384/shapefiles/commareas/geo_export_77af1a6a-f8ec-47f4-977c-40956cd94f97.shp')
 21 | 
 22 | # dir to grid file
 23 | dir='/projects/b1045/jschnell/ForStacy/' 
 24 | ll='latlon_ChicagoLADCO_d03.nc' 
 25 | 
 26 | # dir to model files
 27 | dir_files= '/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_BASE_FINAL_1.33km_sf_rrtmg_5_8_1_v3852/postprocess/'
 28 | 
 29 | # ---------------------------------------------------------------------
 30 | # START
 31 | # ---------------------------------------------------------------------
 32 | 
 33 | #pull files from given directoy
 34 | onlyfiles = next(os.walk(dir_files))[2]
 35 | onlyfiles=sorted(onlyfiles) # so that searching for dates are easier
 36 | 
 37 | # pull only CONC files
 38 | fnames = [x for x in onlyfiles if x.startswith("COMBINE_ACONC")]
 39 | numfiles=(len(fnames))
 40 | 
 41 | # Days and months we're interested in:
 42 | #datesofinterest=np.arange(startday,endday+1)
 43 | #monthsofinterest=np.arange(startmonth,endmonth+1)
 44 | 
 45 | #get lat lon from grid file
 46 | ll=Dataset(dir+ll,'r')
 47 | lat,lon=ll['lat'][:],ll['lon'][:]
 48 | 
 49 | #pull in files and variables
 50 | ncfile= [Dataset(dir_files+fnames[i],'r') for i in range(len(fnames))]
 51 | 
 52 | #full day conc
 53 | no2 = [np.average(ncfile[i]['NO2'][:],axis=0) for i in range(len(fnames))]
 54 | no2_hourly=np.average(no2,axis=0)
 55 | #hourly conc
 56 | daytime_hours = [12,13,14,15,16,17,18,19,20,21,22,23,1]
 57 | no2_daytime = [ncfile[i]['NO2'][daytime_hours[j]] for j in range(len(daytime_hours)) for i in range(len(fnames))]
 58 | no2_daytime_avg = np.average(no2_daytime,axis=0)
 59 | 
 60 | O3 = [np.average(ncfile[i]['O3'][:],axis=0) for i in range(len(fnames))]
 61 | O3_hourly = np.average(O3,axis=0)
 62 | #O3_hourly = [ncfile[i]['O3'][18] for i in range(len(fnames))]
 63 | O3_daytime = [ncfile[i]['O3'][daytime_hours[j]] for j in range(len(daytime_hours)) for i in range(len(fnames))]
 64 | O3_daytime_avg = np.average(O3_daytime,axis=0)
 65 | 
 66 | CO = [np.average(ncfile[i]['CO'][:],axis=0) for i in range(len(fnames))]
 67 | CO=np.average(CO,axis=0)
 68 | 
 69 | # get outside shape of shapefile to do plotting
 70 | union=gpd.GeoSeries(unary_union(chi_shapefile.geometry))
 71 | outsideofunion=pd.DataFrame([list(union[0].exterior.xy)[0], list(union[0].exterior.xy)[1]])
 72 | 
 73 | #==================================================
 74 | 
 75 | # set var for plot
 76 | var='O3'
 77 | data=pd.DataFrame(O3_daytime_avg[0])
 78 | 
 79 | #==================================================
 80 | 
 81 | # files
 82 | crs_new = ccrs.AlbersEqualArea(central_longitude=(chi_shapefile.bounds.mean().minx+chi_shapefile.bounds.mean().maxx)/2)
 83 | 
 84 | # make fig object
 85 | fig, axs = plt.subplots(subplot_kw={'projection': crs_new},figsize=(6, 6))
 86 | 
 87 | #set up data for plotting via levels
 88 | vmax=int(pd.DataFrame(data).max().max())-3
 89 | vmin= int(pd.DataFrame(data).min().min())+5
 90 | levels = np.linspace(vmin, vmax, 10)
 91 | 
 92 | #vmin,vmax= 15,130
 93 | #levels = np.linspace(vmin, vmax, 20)
 94 | 
 95 | # get rid of values outside the levels we are contouring to
 96 | #data[pd.DataFrame(data)>vmax]=vmax
 97 | 
 98 | # set boundary as outer extent by making a matplotlib path object and adding that geometry
 99 | # i think setting the boundary before you plot the data actually crops the data to the shape, so set ax first
100 | axs.set_boundary(mpath.Path(outsideofunion.T,closed=True), transform= crs_new, use_as_clip_path=True)
101 | axs.add_geometries(Reader(path).geometries(), crs=crs_new,facecolor='None', edgecolor='black')
102 | #bold dis 45
103 | axs.add_geometries(gpd.geoseries.GeoSeries(chi_shapefile[chi_shapefile['NAMELSAD']=='State House District 45'].geometry), crs=crs_new,facecolor='none', edgecolor='black', linewidth=2.0)
104 | 
105 | #plot the gridded data by using contourf
106 | cs=plt.pcolormesh(lon,lat,data,cmap= "magma_r", transform=crs_new,vmin=vmin,vmax=vmax)
107 | # add landmarks with scatterplot
108 | midway=  -87.7522,41.7868
109 | ohare = -87.9073, 41.9842
110 | loop =  -87.6251,41.8786
111 | axs.annotate(xy=midway,s="Midway",color='white')
112 | axs.annotate(xy=ohare,s="OHare",color='white')
113 | axs.annotate(xy=loop,s="Loop",color='white')
114 | 
115 | # annotate dist: gpd.geoseries.GeoSeries(chi_shapefile[chi_shapefile['NAMELSAD']=='State House District 45'].geometry).centroid
116 | axs.annotate(xy=(-88.10863773846053, 41.90002038299817), s="Dist 45", color='white')
117 | 
118 | # set axes extents from shapefile
119 | yl=41.65;yu=42.3
120 | xu=-87.47;xl=-88.3
121 | axs.set_extent([xl,xu,yl,yu],crs= crs_new)
122 | 
123 | # title
124 | axs.set_title(var+' at Daytime, Aug. 2018')
125 | 
126 | #add colorbar and label
127 | cbar=plt.colorbar(cs,boundaries=levels)
128 | #cbar.ax.set_ylabel('100 * ' +ncfile[0][var].units)
129 | cbar.set_ticks(levels)
130 | 
131 | # add state lines
132 | import cartopy.feature as cfeature
133 | states_provinces = cfeature.NaturalEarthFeature(
134 |         category='cultural',
135 |         name='admin_1_states_provinces_lines',
136 |         scale='50m',facecolor='none')
137 | 
138 | axs.add_feature(cfeature.STATES, edgecolor='black')
139 | 
140 | #add chi neighbs
141 | #rep_districts_shapefile.plot(ax=axs, transform= crs_new,facecolor='None',edgecolor='grey',alpha=0.5)
142 | 
143 | #add epa monitors
144 | #where are EPA monitors in CHI area
145 | #latttt=[41.920009, 42.062053, 41.755832, 41.855243,41.984332, 41.801180, 41.751400]
146 | #lonbbb=[-87.672995,-87.675254,-87.545350,-87.752470,-87.792002,-87.832349, -87.713488]
147 | #axs.scatter(lonbbb, latttt, marker = '*', color = 'white', s = 30)
148 | 
149 | plt.savefig(var+'_10lvl_daytime_dist45.png')
150 | 
151 | plt.show()
152 | 
153 | 


--------------------------------------------------------------------------------
/validation/MODEL_EVALUATION_r2_mse_bias_for_d02_d03.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | from scipy.stats import pearsonr
  3 | import numpy as np
  4 | from sklearn.metrics import mean_squared_error
  5 | 
  6 | 
  7 | def corr(x,y):
  8 |     x,y=np.asarray(x),np.asarray(y)
  9 |     nas = np.logical_or(np.isnan(x), np.isnan(y))
 10 |     x,y = x[~nas], y[~nas]
 11 |     corr = pearsonr(x,y)[0]
 12 |     bias = (np.array(y)-np.array(x)).mean()
 13 |     #mse = mean_squared_error(x,y)
 14 |     return corr,bias#,mse
 15 | 
 16 | 
 17 | def get_corrs(fnames):
 18 |    corrs=[]; bias=[]; mses=[]
 19 | #
 20 |    for i in range(len(fnames)):
 21 |        df=pd.read_csv(fnames[i])
 22 |        latlon=[str(df.Latitude[i]) + " " + str(df.Longitude[i]) for i in range(len(df))]
 23 |        df['latlon']=latlon
 24 |        if df['Units of Measure'].unique()[0]=='Parts per million': df['Sample Measurement']=df['Sample Measurement']*1000
 25 |        elif df['Units of Measure'].unique()[1]=='Parts per million': df['Sample Measurement']=df['Sample Measurement']*1000
 26 |        x,y = np.array(df['Sample Measurement']),np.array(df['CMAQ'])
 27 |        cor,bia=corr(x,y)
 28 |        corrs.append(cor); bias.append(bia); #mses.append(mse)
 29 |        
 30 | #
 31 |    return corrs, bias, mses
 32 | 
 33 | 
 34 | 
 35 | fnamesd02=['CO_d02_2018_8_EPA_CMAQ_Combine.csv',
 36 | 'CO_d02_2019_1_EPA_CMAQ_Combine.csv',
 37 | 'NO2_d02_2018_8_EPA_CMAQ_Combine.csv',
 38 | 'NO2_d02_2019_1_EPA_CMAQ_Combine.csv',
 39 | 'O3_d02_2018_8_EPA_CMAQ_Combine.csv',
 40 | 'O3_d02_2019_1_EPA_CMAQ_Combine.csv',
 41 | 'SO2_d02_2018_8_EPA_CMAQ_Combine.csv',
 42 | 'SO2_d02_2019_1_EPA_CMAQ_Combine.csv']  
 43 | 
 44 | fnamesd03=['CO_d03_2018_8_EPA_CMAQ_Combine.csv',
 45 | 'CO_d03_2019_1_EPA_CMAQ_Combine.csv',
 46 | 'NO2_d03_2018_8_EPA_CMAQ_Combine.csv',
 47 | 'NO2_d03_2019_1_EPA_CMAQ_Combine.csv',
 48 | 'O3_d03_2018_8_EPA_CMAQ_Combine.csv',
 49 | 'O3_d03_2019_1_EPA_CMAQ_Combine.csv',
 50 | 'SO2_d03_2018_8_EPA_CMAQ_Combine.csv',
 51 | 'SO2_d03_2019_1_EPA_CMAQ_Combine.csv']      
 52 | 
 53 | 
 54 | corrd02,biasd02,msed02= get_corrs(fnamesd02)
 55 | corrd03,biasd03,msed03= get_corrs(fnamesd03)
 56 | 
 57 | namesd03 =[fnamesd03[i].split('_EPA_CMAQ_Combine.csv')[0] for i in range(len(fnamesd03))]
 58 | namesd02=[fnamesd02[i].split('_EPA_CMAQ_Combine.csv')[0] for i in range(len(fnamesd02))]
 59 | 
 60 | chems=['Aug CO','Jan CO','Aug NO2','Jan NO2','Aug O3','Jan O3','Aug SO2','Jan SO2']
 61 | 
 62 | final=pd.DataFrame([chems,corrd02,biasd02,corrd03,biasd03]).T
 63 | final.columns=['chem/date','r2 d02','bias d02','r2 d03','bias d03']
 64 | 
 65 | pd.options.display.float_format = '{:,.2f}'.format
 66 | final
 67 | 
 68 | 
 69 | 
 70 | def get_corrs_monthly(fnames):
 71 |    corrs=[]; bias=[]; mses=[]
 72 | #
 73 |    for i in range(len(fnames)):
 74 |        df=pd.read_csv(fnames[i])
 75 | #for i in range(1):
 76 |        if df['Units of Measure'][0]=='Parts per million': df['Sample Measurement']=df['Sample Measurement']*1000
 77 |        df['date']=pd.to_datetime(df['level_0'])
 78 |        df = df.set_index('date').sort_index()
 79 |        df=df.groupby(['Longitude','Latitude']).mean()
 80 |        x,y = np.array(df['Sample Measurement']),np.array(df['CMAQ'])
 81 |        cor,bia,mse=corr(x,y)
 82 |        corrs.append(cor); bias.append(bia); mses.append(mse)
 83 | #
 84 |    return corrs, bias, mses
 85 | 
 86 | 
 87 | # 7day weekly means 
 88 | df=pd.read_csv(fnamesd03[0]); df['date']=pd.to_datetime(df['level_0']); df = df.set_index('date').sort_index(); df=df.groupby(['Longitude','Latitude']).resample('7d').mean()
 89 | x,y = np.array(df['Sample Measurement']),np.array(df['CMAQ'])
 90 | cor,bia,mse=corr(x,y)
 91 | df.plot.scatter('Sample Measurement','CMAQ',c='Longitude',colormap='viridis')
 92 | 
 93 | #7day rolling means
 94 | df=pd.read_csv(fnamesd03[0]); df['date']=pd.to_datetime(df['level_0']); df = df.set_index('date').sort_index(); df=df.groupby(['Longitude','Latitude']).rolling('3h').mean()
 95 | x,y = np.array(df['Sample Measurement']),np.array(df['CMAQ'])
 96 | cor,bia,mse=corr(x,y)
 97 | df.plot.scatter('Sample Measurement','CMAQ',c='Longitude',colormap='viridis')
 98 | 
 99 | 
100 | # REDO THIS: Two sentences in the methods
101 | # Subset d03 and d02 --> pull same stations, see how the r2 changes
102 | # make daily and monthly
103 | # here's a more holistic picture
104 | 
105 | # >> make january o3 gif
106 | # Bias of 30 -- is there even any measurements for o3 in january
107 | # i think the offset is just stepwise changed
108 | 
109 | # making the gifs
110 | # throw area average hours together and rank
111 | #--> almost the worst 95%ile, max
112 | 
113 | # d02-> d03 gifs do roads show up can you see individiaul power plants
114 | 
115 | 
116 | # Only d03 stations with d02
117 | #------------------------------------------------------
118 | 
119 | head=['State Code', 'County Code', 'Site Num', 'Parameter Code', 'POC', 'Latitude', 'Longitude', 'Datum', 'Parameter Name', 'Date Local', 'Time Local', 'Date GMT', 'Time GMT', 'Sample Measurement', 'Units of Measure', 'MDL', 'Uncertainty', 'Qualifier', 'Method Type', 'Method Code', 'Method Name', 'State Name', 'County Name', 'Date of Last Change','date']
120 | 
121 | def only_d03_corr(df,df2):
122 | #for i in range(1):
123 |     df['date']=pd.to_datetime(df['level_0'])
124 |     df2['date']=pd.to_datetime(df2['level_0'])
125 |     latlon=[str(df.Latitude[i]) + " " + str(df.Longitude[i]) for i in range(len(df))]
126 |     latlon2=[str(df2.Latitude[i]) + " " + str(df2.Longitude[i]) for i in range(len(df2))]
127 |     df['latlon']=latlon;
128 |     df2['latlon']=latlon2;
129 |     if df['Units of Measure'].unique()[0]=='Parts per million': df['Sample Measurement']=df['Sample Measurement']*1000; df2['Sample Measurement']=df2['Sample Measurement']*1000
130 |     elif df['Units of Measure'].unique()[1]=='Parts per million': df['Sample Measurement']=df['Sample Measurement']*1000; df2['Sample Measurement']=df2['Sample Measurement']*1000
131 |     m=pd.merge(df2,df,on=['latlon','date'],suffixes=('_d02', '_d03'))
132 |     x,y,z = np.array(m['Sample Measurement_d02']),np.array(m['CMAQ_d02']),np.array(m['CMAQ_d03'])
133 |     corrd02=corr(x,y)[:2]
134 |     corrd03=corr(x,z)[:2]
135 |     nstations=len(m.latlon.unique())
136 |     return corrd02,corrd03,nstations,np.nanmean(x),y.mean(),z.mean()
137 | 
138 | 
139 | corrd02,corrd03=[],[]
140 | biasd02,biasd03=[],[]
141 | nstation=[]
142 | avgstn=[];avgcmq2=[];avgcmq3=[]
143 | for i in range(len(fnamesd02)):
144 |     df,df2=pd.read_csv(fnamesd03[i]),pd.read_csv(fnamesd02[i])
145 |     c2,c3,ns,xm,ym,zm = only_d03_corr(df,df2)
146 |     corrd02.append(c2[0]);corrd03.append(c3[0])
147 |     biasd02.append(c2[1]);biasd03.append(c3[1]);
148 |     nstation.append(ns)
149 |     avgstn.append(xm); avgcmq2.append(ym); avgcmq3.append(zm)
150 |     
151 | 
152 | 
153 | chems=['Aug CO','Jan CO','Aug NO2','Jan NO2','Aug O3','Jan O3','Aug SO2','Jan SO2']
154 | 
155 | final=pd.DataFrame([chems,corrd02,biasd02,corrd03,biasd03,avgstn,avgcmq2,avgcmq3,nstation]).T
156 | 
157 | final.columns=['chem/date','r2 d02','bias d02','r2 d03','bias d03','avg stn','avg d02','avg d03','n station']
158 | 
159 | pd.options.display.float_format = '{:,.2f}'.format
160 | final
161 | 
162 | 


--------------------------------------------------------------------------------
/compare_CMAQ_to_EPAstation.py:
--------------------------------------------------------------------------------
  1 | #!/bin/bash python3
  2 | 
  3 | #---------------------------------------------------------#
  4 | # Stacy Montgomery, Aug 2019
  5 | # Purpose: find aqs stations within model domain, 
  6 | #          pull & format aqs data for comparison
  7 | # 
  8 | 
  9 | # Link to air tech website with year you're interested in -- NOT WORKING
 10 | #linktoaqs='http://files.airnowtech.org/?prefix=airnow/2018/'
 11 | # USE: 
 12 | #---------------------------------------------------------#
 13 | 
 14 | # LIBRARIES
 15 | #---------------------------------------------------------#
 16 | from datetime import timedelta, date, datetime; import pandas as pd
 17 | import numpy as np
 18 | from netCDF4 import Dataset
 19 | from wrf import latlon_coords, getvar
 20 | import glob, os
 21 | import matplotlib.pyplot as plt
 22 | 
 23 | #import requests
 24 | #from bs4 import BeautifulSoup
 25 | 
 26 | 
 27 | # USER INPUT
 28 | #---------------------------------------------------------#
 29 | # Find stations within bounding box 
 30 | #llon,llat,ulon,ulat=-98.854465,39.517152,-74.289036,49.678626 #use bounds from griddesc
 31 | 
 32 | # Date range to pull from AQS --- if commented out, defined by cmaq files avail
 33 | #start_dt = date(2018, 8, 1); end_dt = date(2018, 9, 1)
 34 | 
 35 | 
 36 | # Directories for cmaq + EPA 
 37 | dir_cmaq='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_Amy_noMUNI_1.33km_sf_rrtmg_5_8_1_v3852/postprocess/'
 38 | dir_epa='/home/asm0384/CMAQcheck/'
 39 | 
 40 | # to get grid, pull WRF coords
 41 | runname='wrf_pure_PXLSM'
 42 | dirToWRF='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/'+runname+'/' # to get grid
 43 | 
 44 | grid='/projects/b1045/jschnell/ForStacy/latlon_ChicagoLADCO_d03.nc'
 45 | 
 46 | # CMAQ RUN things
 47 | domain='d03'
 48 | time='hourly'
 49 | year='2018'
 50 | month='8'
 51 | epa_code=['42401','42602','44201']; var=['SO2','NO2','O3'] #numerical identifiers and corresponding vars
 52 | epa_files =[dir_epa+'%s_%s_%s.csv'%(time,epa_code[i],year,) for i in range(len(epa_code))]
 53 | 
 54 | 
 55 | 
 56 | # USER DEF FUNC
 57 | #---------------------------------------------------------#
 58 | 
 59 | #------ DATERANGE
 60 | # 
 61 | #
 62 | # * dates must be in yyyymmdd format
 63 | def daterange(date1, date2):
 64 |     for n in range(int ((date2 - date1).days)+1):
 65 |         yield date1 + timedelta(n)
 66 | 
 67 | #------ VARfromIND
 68 | # 
 69 | #
 70 | def getVARfromIND(ncfile,indxy, filenames,varname):
 71 |     t2d01=[ncfile[z][varname][i] for z in range(len(filenames)) for i in range(24)]
 72 |     t2d01_xx=  [[t2d01[t][indxy[l]] for t in range(24*len(filenames_d01))] for l in range(len(indxy))]
 73 |     return t2d01_xx
 74 | 
 75 | #------ FIND INDEX
 76 | # 
 77 | #
 78 | # adapted from : http://kbkb-wx-python.blogspot.com/2016/08/find-nearest-latitude-and-longitude.html
 79 | def find_index(stn_lon, stn_lat, wrf_lon, wrf_lat):
 80 | # stn -- points 
 81 | # wrf -- list
 82 | #for iz in range(1):
 83 |    xx=[];yy=[]
 84 |    for i in range(len(stn_lat)):
 85 |    #for i in range(1):
 86 |       abslat = np.abs(wrf_lat-stn_lat[i])
 87 |       abslon= np.abs(wrf_lon-stn_lon[i])
 88 |       c = np.maximum(abslon,abslat)
 89 |       latlon_idx = np.argmin(c)
 90 |       x, y = np.where(c == np.min(c))
 91 |       #add indices of nearest wrf point station
 92 |       xx.append(x)
 93 |       yy.append(y)
 94 |    #
 95 |    xx=[xx[i][0] for i in range(len(xx))];yy=[yy[i][0] for i in range(len(yy))]
 96 |    #return indices list
 97 |    return xx, yy
 98 | 
 99 | #------ PULL CMAQ
100 | # 
101 | #
102 | def filter_EPA(file, start_dt, end_dt,llat,ulat,llon,ulon,cmaq_lon,cmaq_lat,cmaq,VAR):
103 |    #read in file
104 |    f=pd.read_csv(file)
105 |    # Crop given bounding box
106 |    df=f[(f['Latitude'] >=  llat) & (f['Latitude'] <=  ulat)] 
107 |    df=df[(df['Longitude'] >=  llon) & (df['Longitude'] <=  ulon)]
108 |    df['Datetime GMT']=pd.to_datetime(df['Date GMT']+ ' ' + df['Time GMT'])
109 |    df= df[(df['Datetime GMT'] >= pd.to_datetime(start_dt) ) & (df['Datetime GMT'] <= pd.to_datetime(end_dt))]
110 |    lon,lat=df['Longitude'].unique(),df['Latitude'].unique()
111 |    df.reset_index(inplace=True)
112 |    return lon,lat,df
113 | #somehow make the 0s match up
114 | 
115 | 
116 | #------ RESAMPLE DF
117 | # Take in real data, fill in missing values with missing values but keep that date open
118 | #
119 | def resample_df(df,lat,lon,start_dt,end_dt):
120 |    dff=pd.DataFrame()
121 |    # get list of target dates
122 |    t_index = pd.DatetimeIndex(start=start_dt, end=end_dt, freq='1h')
123 |    #set index as dates
124 |    df.set_index('Datetime GMT',inplace=True)
125 |    # go through each locations and fill in missing dates
126 |    for i in range(len(lat)):
127 |       check=df[(df['Latitude']==lat[i]) & (df['Longitude']==lon[i])]
128 |       #aka: if there are multiple sensors of same thing, just average
129 |       if len(check['POC'].unique())>1 or len(check) > len(t_index): 
130 |          sample = check.resample('H').mean().reindex(t_index).fillna(float('nan'))['Sample Measurement']
131 |          df2=check[check['POC']==1].resample('H').asfreq().reindex(t_index).fillna(float('nan'))
132 |          df2['Sample Measurement']=sample
133 |          #print('%s in %s,%s is irregular'%(check['Site Num'][0] ,check['County Name'][0],check['State Name'][0],))
134 |       else: #just fill out values
135 |          df2 = check.resample('H').asfreq().reindex(t_index).fillna(float('nan'))
136 |       #averaged or not, add to final df
137 |       dff=dff.append(df2)
138 |    #return index with index rather than dates
139 |    dff.reset_index(inplace=True)
140 |    return dff
141 | 
142 | 
143 | # MAIN
144 | #---------------------------------------------------------#
145 | 
146 | # $1 Get CMAQ file names
147 | cmaq_files=[]
148 | os.chdir(dir_cmaq)
149 | for file in glob.glob("COMBINE_ACONC_*"):
150 |     cmaq_files.append(file)
151 | 
152 | cmaq_files.sort()
153 | dates=[cmaq_files[z].split("COMBINE_ACONC_")[1].split(".nc")[0] for z in range(len(cmaq_files))]
154 | start_dt=datetime(int(dates[0][0:4]),int(dates[0][4:6]),int(dates[0][6:8]))
155 | end_dt=datetime(int(dates[-1][0:4]),int(dates[-1][4:6]),int(dates[-1][6:8]),23)
156 | 
157 | 
158 | # Get cmaq grid
159 | #cmaq_lat,cmaq_lon=Dataset(grid)['LAT'][0][0],Dataset(grid)['LON'][0][0]
160 | cmaq_lat,cmaq_lon = np.asarray(Dataset(grid)['lat']),np.asarray(Dataset(grid)['lon'])
161 | llat,ulat,llon,ulon=cmaq_lat.min(), cmaq_lat.max(), cmaq_lon.min(), cmaq_lon.max()
162 | 
163 | # cmas output
164 | # fname='COMBINE_ACONC_20180810.nc'
165 | cmaq=[Dataset(dir_cmaq+cmaq_files[i]) for i in range(len(cmaq_files))]
166 | t_index = pd.DatetimeIndex(start=start_dt, end=end_dt, freq='1h')
167 | 
168 | # drop last day to make loop better?
169 | 
170 | # Loop through each variable and check
171 | for loop in range(len(epa_files)):
172 |    lon,lat,df= filter_EPA(epa_files[loop], start_dt, end_dt, llat,ulat,llon,ulon,cmaq_lon,cmaq_lat,cmaq,var[loop])
173 |    xx,yy= find_index(lon, lat, cmaq_lon, cmaq_lat)
174 |    dff= resample_df(df,lat,lon,start_dt,end_dt)
175 |    dff['CMAQ']=float('nan')
176 |    for numday in range(len(cmaq)):
177 |       s=pd.DataFrame([[cmaq[numday][var[loop]][time][0][xx[idx]][yy[idx]] for time in range(24)] for idx in range(len(xx))]).T
178 |       #
179 |       for station in range(len(xx)):
180 |             dff['CMAQ'][24*numday+ station*len(t_index):(24*numday+ station*len(t_index)+24)]=s[station]
181 |             #dff['level_0'][(24*numday+ station*len(t_index)):(24*numday+ station*len(t_index)+24)] # check eq    
182 |     #
183 |    dff.to_csv(dir_epa+'%s_%s_%s_%s_EPA_CMAQ_Combine.csv'%(var[loop],domain,year,month));
184 |     #
185 |    print('Done with %s'%(var[loop]));
186 | 
187 | 
188 | # plot cmaq comparison
189 | epa_condense=[dir_epa+'%s_%s_%s_%s_EPA_CMAQ_Combine.csv'%(var[loop],domain,year,month) for loop in range(len(epa_code))]
190 | so2_epa,no2_epa,o3_epa,co_epa = [pd.read_csv(epa_condense[i]) for i in range(len(epa_condense))]
191 | 
192 | 
193 | #1 to 1 plots
194 | o3df=o3.groupby('Site Num')
195 | o3df.plot.scatter('Sample Measurement','CMAQ',title='Site Num')
196 | 


--------------------------------------------------------------------------------
/validation/MODEL_EVALUATION_CMAQCheck_part1.py:
--------------------------------------------------------------------------------
  1 | #!/bin/bash python3
  2 | 
  3 | #---------------------------------------------------------#
  4 | # Stacy Montgomery, Aug 2019
  5 | # Purpose: find aqs stations within model domain, 
  6 | #          pull & format aqs data for comparison
  7 | # 
  8 | 
  9 | # Link to air tech website with year you're interested in -- NOT WORKING
 10 | #linktoaqs='http://files.airnowtech.org/?prefix=airnow/2018/'
 11 | # USE: 
 12 | #---------------------------------------------------------#
 13 | 
 14 | # LIBRARIES
 15 | #---------------------------------------------------------#
 16 | from datetime import timedelta, date, datetime; import pandas as pd
 17 | import numpy as np
 18 | from netCDF4 import Dataset
 19 | from wrf import latlon_coords, getvar
 20 | import glob, os
 21 | import matplotlib.pyplot as plt
 22 | 
 23 | #import requests
 24 | #from bs4 import BeautifulSoup
 25 | 
 26 | 
 27 | # USER INPUT
 28 | #---------------------------------------------------------#
 29 | # Find stations within bounding box 
 30 | #llon,llat,ulon,ulat=-98.854465,39.517152,-74.289036,49.678626 #use bounds from griddesc
 31 | 
 32 | # Date range to pull from AQS --- if commented out, defined by cmaq files avail
 33 | #start_dt = date(2018, 8, 1); end_dt = date(2018, 9, 1)
 34 | 
 35 | 
 36 | # Directories for cmaq + EPA 
 37 | dir_cmaq='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_Amy_noMUNI_1.33km_sf_rrtmg_5_8_1_v3852/postprocess/'
 38 | dir_epa='/home/asm0384/CMAQcheck/'
 39 | dir_ncdc='/'
 40 | 
 41 | # to get grid, pull WRF coords
 42 | runname='wrf_pure_PXLSM'
 43 | dirToWRF='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/'+runname+'/' # to get grid
 44 | 
 45 | grid='/projects/b1045/jschnell/ForStacy/latlon_ChicagoLADCO_d03.nc'
 46 | 
 47 | # CMAQ RUN things
 48 | domain='d02'
 49 | time='hourly'
 50 | year='2018'
 51 | epa_code=['42401','42602','44201']; var=['SO2','NO2','O3'] #numerical identifiers and corresponding vars
 52 | epa_files =[dir_epa+'%s_%s_%s.csv'%(time,epa_code[i],year,) for i in range(len(epa_code))]
 53 | 
 54 | 
 55 | 
 56 | # USER DEF FUNC
 57 | #---------------------------------------------------------#
 58 | 
 59 | #------ DATERANGE
 60 | # 
 61 | #
 62 | # * dates must be in yyyymmdd format
 63 | def daterange(date1, date2):
 64 |     for n in range(int ((date2 - date1).days)+1):
 65 |         yield date1 + timedelta(n)
 66 | 
 67 | #------ VARfromIND
 68 | # 
 69 | #
 70 | def getVARfromIND(ncfile,indxy, filenames,varname):
 71 |     t2d01=[ncfile[z][varname][i] for z in range(len(filenames)) for i in range(24)]
 72 |     t2d01_xx=  [[t2d01[t][indxy[l]] for t in range(24*len(filenames_d01))] for l in range(len(indxy))]
 73 |     return t2d01_xx
 74 | 
 75 | #------ FIND INDEX
 76 | # 
 77 | #
 78 | # adapted from : http://kbkb-wx-python.blogspot.com/2016/08/find-nearest-latitude-and-longitude.html
 79 | def find_index(stn_lon, stn_lat, wrf_lon, wrf_lat):
 80 | # stn -- points 
 81 | # wrf -- list
 82 | #for iz in range(1):
 83 |    xx=[];yy=[]
 84 |    for i in range(len(stn_lat)):
 85 |    #for i in range(1):
 86 |       abslat = np.abs(wrf_lat-stn_lat[i])
 87 |       abslon= np.abs(wrf_lon-stn_lon[i])
 88 |       c = np.maximum(abslon,abslat)
 89 |       latlon_idx = np.argmin(c)
 90 |       x, y = np.where(c == np.min(c))
 91 |       #add indices of nearest wrf point station
 92 |       xx.append(x)
 93 |       yy.append(y)
 94 |    #
 95 |    xx=[xx[i][0] for i in range(len(xx))];yy=[yy[i][0] for i in range(len(yy))]
 96 |    #return indices list
 97 |    return xx, yy
 98 | 
 99 | #------ PULL CMAQ
100 | # 
101 | #
102 | def filter_EPA(file, start_dt, end_dt,llat,ulat,llon,ulon,cmaq_lon,cmaq_lat,cmaq,VAR):
103 |    #read in file
104 |    f=pd.read_csv(file)
105 |    # Crop given bounding box
106 |    df=f[(f['Latitude'] >=  llat) & (f['Latitude'] <=  ulat)] 
107 |    df=df[(df['Longitude'] >=  llon) & (df['Longitude'] <=  ulon)]
108 |    df['Datetime GMT']=pd.to_datetime(df['Date GMT']+ ' ' + df['Time GMT'])
109 |    df= df[(df['Datetime GMT'] >= pd.to_datetime(start_dt) ) & (df['Datetime GMT'] <= pd.to_datetime(end_dt))]
110 |    lon,lat=df['Longitude'].unique(),df['Latitude'].unique()
111 |    df.reset_index(inplace=True)
112 |    return lon,lat,df
113 | #somehow make the 0s match up
114 | 
115 | 
116 | latd02min, latd02max, lond02min, lond02max = wrf_latd02.min().min(), wrf_latd02.max().max(),wrf_lond02.min().min(),wrf_lond02.max().max()
117 | 
118 | 
119 | latd03min, latd03max, lond03min, lond03max = wrf_latd03.min().min(), wrf_latd03.max().max(),wrf_lond03.min().min(),wrf_lond03.max().max()
120 | 
121 | #------ RESAMPLE DF
122 | # Take in real data, fill in missing values with missing values but keep that date open
123 | #
124 | def resample_df(df,lat,lon,start_dt,end_dt):
125 |    dff=pd.DataFrame()
126 |    # get list of target dates
127 |    t_index = pd.DatetimeIndex(start=start_dt, end=end_dt, freq='1h')
128 |    #set index as dates
129 |    df.set_index('Datetime GMT',inplace=True)
130 |    # go through each locations and fill in missing dates
131 |    for i in range(len(lat)):
132 |       check=df[(df['Latitude']==lat[i]) & (df['Longitude']==lon[i])]
133 |       #aka: if there are multiple sensors of same thing, just average
134 |       if len(check['POC'].unique())>1 or len(check) > len(t_index): 
135 |          sample = check.resample('H').mean().reindex(t_index).fillna(float('nan'))['Sample Measurement']
136 |          df2=check[check['POC']==1].resample('H').asfreq().reindex(t_index).fillna(float('nan'))
137 |          df2['Sample Measurement']=sample
138 |          #print('%s in %s,%s is irregular'%(check['Site Num'][0] ,check['County Name'][0],check['State Name'][0],))
139 |       else: #just fill out values
140 |          df2 = check.resample('H').asfreq().reindex(t_index).fillna(float('nan'))
141 |       #averaged or not, add to final df
142 |       dff=dff.append(df2)
143 |    #return index with index rather than dates
144 |    dff.reset_index(inplace=True)
145 |    return dff
146 | 
147 | 
148 | # MAIN
149 | #---------------------------------------------------------#
150 | 
151 | # $1 Get CMAQ file names
152 | cmaq_files=[]
153 | os.chdir(dir_cmaq)
154 | for file in glob.glob("COMBINE_ACONC_*"):
155 |     cmaq_files.append(file)
156 | 
157 | cmaq_files.sort()
158 | dates=[cmaq_files[z].split("COMBINE_ACONC_")[1].split(".nc")[0] for z in range(len(cmaq_files))]
159 | start_dt=datetime(int(dates[0][0:4]),int(dates[0][4:6]),int(dates[0][6:8]))
160 | end_dt=datetime(int(dates[-1][0:4]),int(dates[-1][4:6]),int(dates[-1][6:8]),23)
161 | 
162 | 
163 | # Get cmaq grid
164 | #cmaq_lat,cmaq_lon=Dataset(grid)['LAT'][0][0],Dataset(grid)['LON'][0][0]
165 | cmaq_lat,cmaq_lon = np.asarray(Dataset(grid)['lat']),np.asarray(Dataset(grid)['lon'])
166 | llat,ulat,llon,ulon=cmaq_lat.min(), cmaq_lat.max(), cmaq_lon.min(), cmaq_lon.max()
167 | 
168 | # cmas output
169 | # fname='COMBINE_ACONC_20180810.nc'
170 | cmaq=[Dataset(dir_cmaq+cmaq_files[i]) for i in range(len(cmaq_files))]
171 | t_index = pd.DatetimeIndex(start=start_dt, end=end_dt, freq='1h')
172 | 
173 | # drop last day to make loop better?
174 | 
175 | # Loop through each variable and check
176 | for loop in range(len(epa_files)):
177 |    lon,lat,df= filter_EPA(epa_files[loop], start_dt, end_dt, llat,ulat,llon,ulon,cmaq_lon,cmaq_lat,cmaq,var[loop])
178 |    xx,yy= find_index(lon, lat, cmaq_lon, cmaq_lat)
179 |    dff= resample_df(df,lat,lon,start_dt,end_dt)
180 |    dff['CMAQ']=float('nan')
181 |    for numday in range(len(cmaq)):
182 |       s=pd.DataFrame([[cmaq[numday][var[loop]][time][0][xx[idx]][yy[idx]] for time in range(24)] for idx in range(len(xx))]).T
183 |       #
184 |       for station in range(len(xx)):
185 |             dff['CMAQ'][24*numday+ station*len(t_index):(24*numday+ station*len(t_index)+24)]=s[station]
186 |             #dff['level_0'][(24*numday+ station*len(t_index)):(24*numday+ station*len(t_index)+24)] # check eq    
187 |     #
188 |    dff.to_csv(dir_epa+'%s_%s_EPA_CMAQ_Combine.csv'%(var[loop],domain));
189 |     #
190 |    print('Done with %s'%(var[loop]));
191 | 
192 | 
193 | 
194 | 
195 | @@@@@#####
196 | 
197 | 
198 | Here's a vid of me making a laser driver for my LED based gas sensor. The voltage from a charged LED is dropped by impending light. Incoming light is impeded by absorption from gases at this wavelength. The time for the voltage drop corresponds to the absorption of gas between LEDs, thus giving us the concentration of gas.
199 | 
200 | 
201 | 
202 | 


--------------------------------------------------------------------------------
/DataPreprocessing/cmaq_to_stations.py:
--------------------------------------------------------------------------------
  1 | #!/bin/bash python3
  2 | 
  3 | #---------------------------------------------------------#
  4 | # Stacy Montgomery, Aug 2019
  5 | # Purpose: find aqs stations within model domain, 
  6 | #          pull & format aqs data for comparison
  7 | # 
  8 | 
  9 | # Link to air tech website with year you're interested in -- NOT WORKING
 10 | #linktoaqs='http://files.airnowtech.org/?prefix=airnow/2018/'
 11 | # USE: 
 12 | #---------------------------------------------------------#
 13 | 
 14 | # LIBRARIES
 15 | #---------------------------------------------------------#
 16 | from datetime import timedelta, date,datetime; import pandas as pd
 17 | import numpy as np
 18 | from netCDF4 import Dataset
 19 | from wrf import latlon_coords, getvar
 20 | import glob, os
 21 | import matplotlib.pyplot as plt
 22 | 
 23 | #import requests
 24 | #from bs4 import BeautifulSoup
 25 | 
 26 | 
 27 | # USER INPUT
 28 | #---------------------------------------------------------#
 29 | # Find stations within bounding box 
 30 | #llon,llat,ulon,ulat=-98.854465,39.517152,-74.289036,49.678626 #use bounds from griddesc
 31 | 
 32 | # Date range to pull from AQS --- if commented out, defined by cmaq files avail
 33 | #start_dt = date(2018, 8, 1); end_dt = date(2018, 9, 1)
 34 | 
 35 | 
 36 | # Directories for cmaq + EPA 
 37 | dir_epa='/home/asm0384/CMAQcheck/'
 38 | grid='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/mcip/PXLSM/ChicagoLADCO_d02/lat_lon_chicago_d02.nc'
 39 | #grid='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/mcip/PXLSM/ChicagoLADCO_d03/latlon_ChicagoLADCO_d03.nc'
 40 | 
 41 | dir_cmaq='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_BASE_FINAL_4km_sf_rrtmg_10_8_1_v3852/postprocess/'
 42 | dir_cmaq='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_BASE_FINAL_wint_4km_sf_rrtmg_10_8_1_v3852/postprocess/'
 43 | 
 44 | # to get grid, pull WRF coords
 45 | #runname='wrf_pure_PXLSM'
 46 | #dirToWRF='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/'+runname+'/' # to get grid
 47 | 
 48 | # CMAQ RUN things
 49 | domain='d02'
 50 | time='hourly'
 51 | year='2019'
 52 | month='1'
 53 | #epa_code=['42401','42602','44201','42101']; var=['SO2','NO2','O3','CO'] #numerical identifiers and corresponding vars
 54 | #epa_code=['44201'];var=['O3']
 55 | epa_code=['88101']
 56 | var = ['PM25_TOT']
 57 | epa_files =[dir_epa+'%s_%s_%s.csv'%(time,epa_code[i],year,) for i in range(len(epa_code))]
 58 | 
 59 | #names of lat lons in the cmaq grid
 60 | la,lo='lat','lon' # for 1.3km
 61 | la,lo='LAT','LON' # for 4km
 62 | 
 63 | # USER DEF FUNC
 64 | #---------------------------------------------------------#
 65 | 
 66 | 
 67 | #------ DATERANGE
 68 | # 
 69 | #
 70 | # * dates must be in yyyymmdd format
 71 | def daterange(date1, date2):
 72 |     for n in range(int ((date2 - date1).days)+1):
 73 |         yield date1 + timedelta(n)
 74 | 
 75 | #------ VARfromIND
 76 | # 
 77 | #
 78 | def getVARfromIND(ncfile,indxy, filenames,varname):
 79 |     t2d01=[ncfile[z][varname][i] for z in range(len(filenames)) for i in range(24)]
 80 |     t2d01_xx=  [[t2d01[t][indxy[l]] for t in range(24*len(filenames_d01))] for l in range(len(indxy))]
 81 |     return t2d01_xx
 82 | 
 83 | #------ FIND INDEX
 84 | # 
 85 | #
 86 | # adapted from : http://kbkb-wx-python.blogspot.com/2016/08/find-nearest-latitude-and-longitude.html
 87 | def find_index(stn_lon, stn_lat, wrf_lon, wrf_lat):
 88 | # stn -- points 
 89 | # wrf -- list
 90 | #for iz in range(1):
 91 |    xx=[];yy=[]
 92 |    for i in range(len(stn_lat)):
 93 |    #for i in range(1):
 94 |       abslat = np.abs(wrf_lat-stn_lat[i])
 95 |       abslon= np.abs(wrf_lon-stn_lon[i])
 96 |       c = np.maximum(abslon,abslat)
 97 |       latlon_idx = np.argmin(c)
 98 |       x, y = np.where(c == np.min(c))
 99 |       #add indices of nearest wrf point station
100 |       xx.append(x)
101 |       yy.append(y)
102 |    #
103 |    xx=[xx[i][0] for i in range(len(xx))];yy=[yy[i][0] for i in range(len(yy))]
104 |    #return indices list
105 |    return xx, yy
106 | 
107 | #------ PULL CMAQ
108 | # 
109 | 
110 | def pull_cmaq(file, start_dt, end_dt,llat,ulat,llon,ulon,cmaq_lon,cmaq_lat,cmaq,VAR):
111 |    #read in file
112 |    f=pd.read_csv(file)
113 |    # Crop given bounding box
114 |    df=f[(f['Latitude'] >=  llat) & (f['Latitude'] <=  ulat)]
115 |    df=df[(df['Longitude'] >=  llon) & (df['Longitude'] <=  ulon)]
116 |    df['Datetime GMT']=pd.to_datetime(df['Date GMT']+ ' ' + df['Time GMT'])
117 |    df= df[(df['Datetime GMT'] >= pd.to_datetime(start_dt) ) & (df['Datetime GMT'] <= pd.to_datetime(end_dt))]
118 |    lon,lat=df['Longitude'].unique(),df['Latitude'].unique()
119 |    df.reset_index(inplace=True)
120 |    return lon,lat,df
121 | #somehow make the 0s match up
122 | 
123 | 
124 | #------ RESAMPLE DF
125 | # Take in real data, fill in missing values with missing values but keep that date open
126 | #
127 | def resample_df(df,lat,lon,start_dt,end_dt):
128 |    dff=pd.DataFrame()
129 |    # get list of target dates
130 |    t_index = pd.DatetimeIndex(start=start_dt, end=end_dt, freq='1h')
131 |    #set index as dates
132 |    df.set_index('Datetime GMT',inplace=True)
133 |    # go through each locations and fill in missing dates
134 |    for i in range(len(lat)):
135 |       check=df[(df['Latitude']==lat[i]) & (df['Longitude']==lon[i])]
136 |       #aka: if there are multiple sensors of same thing, just average
137 |       if len(check['POC'].unique())>1 or len(check) > len(t_index):
138 |          sample = check.resample('H').mean().reindex(t_index).fillna(float('nan'))['Sample Measurement']
139 |          df2=check[check['POC']==1].resample('H').asfreq().reindex(t_index).fillna(float('nan'))
140 |          df2['Sample Measurement']=sample
141 |          #print('%s in %s,%s is irregular'%(check['Site Num'][0] ,check['County Name'][0],check['State Name'][0],))
142 |       else: #just fill out values
143 |          df2 = check.resample('H').asfreq().reindex(t_index).fillna(float('nan'))
144 |       #averaged or not, add to final df
145 |       dff=dff.append(df2)
146 |    #return index with index rather than dates
147 |    dff.reset_index(inplace=True)
148 |    return dff
149 | 
150 | --------------------------------------------------#
151 | 
152 | # $1 Get CMAQ file names
153 | cmaq_files=[]
154 | os.chdir(dir_cmaq)
155 | for file in glob.glob("COMBINE_ACONC_*"):
156 |     cmaq_files.append(file)
157 | 
158 | cmaq_files.sort()
159 | cmaq_files.remove(cmaq_files[-1])
160 | 
161 | dates=[cmaq_files[z].split("COMBINE_ACONC_")[1].split(".nc")[0] for z in range(len(cmaq_files))]
162 | start_dt=date(int(dates[0][0:4]),int(dates[0][4:6]),int(dates[0][6:8]))
163 | end_dt=datetime(int(dates[-1][0:4]),int(dates[-1][4:6]),int(dates[-1][6:8]),23,0)
164 | 
165 | # Get first date range, pull monitoring station range ... unecessary
166 | #dtrange=[]
167 | #for dt in daterange(start_dt, end_dt):
168 | #    dtrange.append(dt.strftime("%Y%m%d"))
169 | 
170 | # Get cmaq grid
171 | #grid='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/mcip/PXLSM/ChicagoLADCO_d02/GRIDCRO2D_Chicago_LADCO_2018-08-20.nc'
172 | #cmaq_lat,cmaq_lon=Dataset(grid)['LAT'][0][0],Dataset(grid)['LON'][0][0]
173 | 
174 | cmaq_lat,cmaq_lon = np.asarray(Dataset(grid)[la]),np.asarray(Dataset(grid)[lo])
175 | 
176 | if len(cmaq_lat.shape) == 4:
177 |    cmaq_lat,cmaq_lon = cmaq_lat[0][0],cmaq_lon[0][0]
178 | 
179 | print('CMAQLATSHAPE')
180 | print(cmaq_lat.shape)
181 | #cmaq_lat,cmaq_lon = np.asarray(Dataset(grid)['LAT'])[0][0],np.asarray(Dataset(grid)['LON'])[0][0]
182 | llat,ulat,llon,ulon=cmaq_lat.min(), cmaq_lat.max(), cmaq_lon.min(), cmaq_lon.max()
183 | 
184 | cmaq=[Dataset(dir_cmaq+cmaq_files[i]) for i in range(len(cmaq_files))]
185 | t_index = pd.DatetimeIndex(start=start_dt, end=end_dt, freq='1h')
186 | 
187 | # Loop through each variable and check
188 | for loop in range(len(epa_files)):
189 |    lon,lat,df=pull_cmaq(epa_files[loop], start_dt, end_dt, llat,ulat,llon,ulon,cmaq_lon,cmaq_lat,cmaq,var[loop])
190 |    xx,yy= find_index(lon, lat, cmaq_lon, cmaq_lat)
191 |    dff= resample_df(df,lat,lon,start_dt,end_dt)
192 |    dff['CMAQ']=float('nan')
193 |    for numday in range(len(cmaq)):
194 |       s=pd.DataFrame([[cmaq[numday][var[loop]][time][0][xx[idx]][yy[idx]] for time in range(24)] for idx in range(len(xx))]).T
195 |       #
196 |       for station in range(len(xx)):
197 |             dff['CMAQ'][(24*(numday)+station*len(t_index)):(24*(numday)+ station*len(t_index)+24)]=s[station]
198 |    # Output the var
199 |    dff.to_csv(dir_epa+'%s_%s_%s_%s_EPA_CMAQ_Combine.csv'%(var[loop],domain,year,month));
200 |    print('Done with %s'%(var[loop]));
201 | 
202 | #end
203 | 
204 | 
205 | 
206 | 
207 | 
208 | 


--------------------------------------------------------------------------------
/model_column_comparison.py:
--------------------------------------------------------------------------------
  1 | #!/bin/python
  2 | 
  3 | #model to column comparison
  4 | #---------------------------------------
  5 | 
  6 | 
  7 | #------------------------------------------
  8 | # Libraries
  9 | #--------------
 10 | from matplotlib import pyplot as plt ; from matplotlib import colors
 11 | import numpy as np; import numpy.ma as ma; from matplotlib.patches import Path, PathPatch
 12 | import pandas as pd; from shapely.geometry import Point, shape, Polygon;import fiona
 13 | from shapely.ops import unary_union, cascaded_union; from geopandas.tools import sjoin
 14 | import geopandas as gpd; import geoplot; import glob; import os; from datetime import timedelta, date;
 15 | from netCDF4 import Dataset
 16 | import scipy.ndimage; from cartopy import crs as ccrs; from cartopy.io.shapereader import Reader
 17 | import matplotlib.path as mpath; import seaborn as sns
 18 | #------------------------------------------
 19 | 
 20 | column_dir = '/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_BASE_FINAL_1.33km_sf_rrtmg_5_8_1_v3852/column/'
 21 | onlyfiles = next(os.walk(column_dir))[2]
 22 | onlyfiles.sort() 
 23 | 
 24 | fig_dir = '~/figs_for_dan/'
 25 | 
 26 | path='/home/asm0384/shapefiles/commareas/geo_export_77af1a6a-f8ec-47f4-977c-40956cd94f97.shp'
 27 | chi_shapefile  = gpd.GeoDataFrame.from_file(path)
 28 | 
 29 | #get lat lon from grid file
 30 | dir='/projects/b1045/jschnell/ForStacy/' 
 31 | ll='latlon_ChicagoLADCO_d03.nc' 
 32 | llx=Dataset(dir+ll,'r')
 33 | lat,lon=llx['lat'][:],llx['lon'][:]
 34 | 
 35 | model_columns = np.asarray([Dataset(column_dir + onlyfiles[i],'r')['NO2'] for i in range(len(onlyfiles))])
 36 | 
 37 | model_columns_month = np.array([model_columns[i].mean(axis=0) for i in range(len(model_columns))]).mean(axis=0)
 38 | #model_columns_no2 = model_columns[i]['NO2']
 39 | 
 40 | model_columns_month_ish= model_columns_month*10**-20
 41 | 
 42 | sat_columns = pd.read_csv('/projects/b1045/NO2/l3/nitrogendioxide_tropospheric_column.csv',index_col = 0)
 43 | 
 44 | sat_columns = sat_columns*10**5
 45 | model_columns_month_ish = model_columns_month_ish*10**5
 46 | 
 47 | # Make scatter
 48 | #--------------------------------
 49 | 
 50 | plt.scatter(sat_columns, model_columns_month_ish, alpha = 0.84, color = 'purple')
 51 | plt.scatter(sat_columns, model_columns_month_ish, alpha = 0.84, color = 'purple')
 52 | plt.xlim(0,0.00021*10**5)
 53 | plt.ylim(0,0.00021*10**5)
 54 | plt.xlabel('TropOMI Column ('+ Dataset(column_dir + onlyfiles[i],'r')['NO2'].units + '*10^15)')
 55 | plt.ylabel('CMAQ Column')
 56 | plt.plot([-100,100],[-100,100],c='black',alpha = 0.5)
 57 | 
 58 | from scipy.stats import pearsonr
 59 | sat_columns  = np.array(sat_columns) 
 60 | scr, mcr = sat_columns.ravel(), model_columns_month_ish.ravel()
 61 | bad = np.isnan(scr)
 62 | 
 63 | r = round(pearsonr(mcr[~bad],scr[~bad])[0],2)
 64 | 
 65 | plt.title('R = '+ str(r))
 66 | 
 67 | plt.savefig(fig_dir+'sat_to_model.png')
 68 | 
 69 | fig,ax = plt.subplots(figsize = (6,6))
 70 | from palettable.colorbrewer.sequential import OrRd_4
 71 | 
 72 | [plt.scatter(stn_epa_mix[i]['Sample Measurement'], pd.DataFrame(stnpixel_from_cmaq)[i], alpha = 0.99, color = OrRd_4.mpl_colors[i+1]) for i in range(len(epa_lat))]
 73 | plt.xlim(0,27)
 74 | plt.ylim(0,27)
 75 | plt.xlabel('EPA Station (ppb)')
 76 | plt.ylabel('CMAQ (ppb)')
 77 | plt.plot([-100,100],[-100,100],c='black',alpha = 0.5)
 78 | 
 79 | ab = np.array([np.array(stn_epa_mix[i]['Sample Measurement']) for i in range(len(stn_epa_mix))]).ravel()
 80 | ba = np.array([np.array(pd.DataFrame(stnpixel_from_cmaq)[i]) for i in range(len(stn_epa_mix))]).ravel()
 81 | 
 82 | bad = np.isnan(ab)
 83 | 
 84 | r = round(pearsonr(ab[~bad],ba[~bad])[0],2)
 85 | 
 86 | plt.title('Daily Average R = '+ str(r))
 87 | 
 88 | plt.savefig(fig_dir+'stn_to_model.png')
 89 | 
 90 | # make MAPS 
 91 | #--------------------------------
 92 | 
 93 | #options
 94 | crs_new = ccrs.PlateCarree()
 95 | import cartopy.feature as cfeature 
 96 | from cartopy.feature import NaturalEarthFeature, LAND, COASTLINE
 97 | 
 98 | vmin,vmax = 2, 20
 99 | levels = np.arange(vmin, vmax, (vmax-vmin)/10)
100 | cmap = 'magma_r'
101 | xl,xu,yl,yu = lon.min()+1,lon.max()-1,lat.min()+1,lat.max()-1
102 | xl,xu,yl,yu = lon.ravel()[~bad].min(),lon.ravel()[~bad].max(),lat.ravel()[~bad].min(),lat.ravel()[~bad].max()
103 | 
104 | # sat column
105 | fig, ax = plt.subplots(subplot_kw={'projection': crs_new},figsize=(6, 8))
106 | cs = ax.pcolormesh(lon,lat, sat_columns,transform=crs_new,cmap = cmap,vmin=vmin,vmax=vmax)
107 | cbar=plt.colorbar(cs,boundaries=levels,shrink = 0.5)
108 | cbar.set_ticks(levels)
109 | cbar.set_ticks(levels)
110 | ax.set_extent([xl,xu,yl,yu])
111 | ax.set_title('Regridded TropOMI NO2')
112 | states = cfeature.STATES.with_scale('10m')
113 | ax.add_feature(states)
114 | 
115 | plt.savefig(fig_dir+'sat_no2.png')
116 | 
117 | # model column
118 | fig, ax = plt.subplots(subplot_kw={'projection': crs_new},figsize=(6, 8))
119 | cs = ax.pcolormesh(lon,lat, model_columns_month_ish[0],transform=crs_new,cmap = cmap,vmin=vmin,vmax=vmax)
120 | cbar=plt.colorbar(cs,boundaries=levels,shrink = 0.5)
121 | cbar.set_ticks(levels)
122 | ax.set_extent([xl,xu,yl,yu], crs= crs_new)
123 | ax.set_title('CMAQ Column NO2')
124 | states = cfeature.STATES.with_scale('10m')
125 | ax.add_feature(states)
126 | plt.savefig(fig_dir+'model_no2.png')
127 | 
128 | 
129 | #difference bw model and satellite
130 | from palettable.colorbrewer.diverging import RdGy_10
131 | 
132 | difference = model_columns_month_ish[0] - sat_columns
133 | vmin,vmax = difference[~np.isnan(difference)].min(),difference[~np.isnan(difference)].max()
134 | vmin,vmax = -8,8
135 | cmap = RdGy_10.mpl_colormap
136 | levels = np.arange(vmin, vmax, (vmax-vmin)/10)
137 | 
138 | fig, ax = plt.subplots(subplot_kw={'projection': crs_new},figsize=(6, 8))
139 | cs = ax.pcolormesh(lon,lat, difference,transform=crs_new,cmap = cmap,vmin=vmin,vmax=vmax)
140 | cbar=plt.colorbar(cs,boundaries=levels,shrink = 0.5)
141 | cbar.set_ticks(levels)
142 | ax.set_extent([xl,xu,yl,yu])
143 | states = cfeature.STATES.with_scale('10m')
144 | ax.add_feature(states)
145 | ax.set_title('Delta Column NO2')
146 | plt.savefig(fig_dir+'difference_no2.png')
147 | 
148 | 
149 | # timeseries_model
150 | 
151 | model_columns = np.asarray([Dataset(column_dir + onlyfiles[i],'r')['NO2'] for i in range(len(onlyfiles))])
152 | model_columns_month = np.array([model_columns[i].mean(axis=0) for i in range(len(model_columns))])
153 | model_columns_month = model_columns_month*10**-15
154 | cmap = 'magma_r'
155 | levels = np.arange(vmin, vmax, (vmax-vmin)/10)
156 | xl,xu,yl,yu = lon.min()+1,lon.max()-1,lat.min()+1,lat.max()-1
157 | 
158 | vmin,vmax = 2, 20
159 | 
160 | for i in range(len(model_columns_month)):
161 | # model column
162 |    fig, ax = plt.subplots(subplot_kw={'projection': crs_new},figsize=(10, 6))
163 |    cs = ax.pcolormesh(lon,lat, model_columns_month[i][0],transform=crs_new,cmap = cmap,vmin=vmin,vmax=vmax)
164 |    cbar=plt.colorbar(cs,boundaries=levels,shrink = 0.5)
165 |    cbar.set_ticks(levels)
166 |    ax.set_extent([xl,xu,yl,yu], crs= crs_new)
167 |    ax.set_title('CMAQ Column NO2')
168 |    states = cfeature.STATES.with_scale('10m')
169 |    ax.add_feature(states)
170 |    plt.savefig(fig_dir+'timeseries_model_column'+str(i)+'.png')
171 |    plt.close()
172 | 
173 | vmin,vmax = 2, 20
174 | 
175 | path='/home/asm0384/shapefiles/commareas/geo_export_77af1a6a-f8ec-47f4-977c-40956cd94f97.shp'
176 | chi_shapefile  = gpd.GeoDataFrame.from_file(path)
177 | union=gpd.GeoSeries(unary_union(chi_shapefile.geometry))
178 | outsideofunion=pd.DataFrame([list(union[0][2].exterior.xy)[0], list(union[0][2].exterior.xy)[1]])
179 | 
180 | for i in range(5):
181 |    for j in range(len(model_columns[0])):
182 | #for i in range(1):
183 | #   for j in range(1):
184 |       data = model_columns[i][j][0]*10**-15
185 |       fig, ax = plt.subplots(subplot_kw={'projection': crs_new},figsize=(10, 6))
186 |       #ax.set_boundary(mpath.Path(outsideofunion.T,closed=True), transform= crs_new, use_as_clip_path=True)
187 |       cs = ax.pcolormesh(lon,lat, data,transform=crs_new,cmap = cmap,vmin=vmin,vmax=vmax)
188 |       cbar=plt.colorbar(cs,boundaries=levels,shrink = 0.5)
189 |       cbar.set_ticks(levels)
190 |       ax.add_geometries(Reader(path).geometries(), crs=crs_new,facecolor='None', edgecolor='black')
191 |       x=[chi_shapefile.bounds.minx.min(), chi_shapefile.bounds.maxx.max()] 
192 |       y=[chi_shapefile.bounds.miny.min(), chi_shapefile.bounds.maxy.max()]
193 |       #ax.set_extent([x[0]-.03,x[1]+.03,y[0]-.03,y[1]+.03],crs= crs_new)
194 |       ax.set_extent([xl,xu,yl,yu], crs= crs_new)
195 |       ax.set_title('CMAQ Column')
196 |       states = cfeature.STATES.with_scale('10m')
197 |       ax.add_feature(states)
198 |       plt.savefig(fig_dir+'timeseries_model_column_d'+str(i)+'_h'+str(j)+'.png')
199 |       plt.close()
200 | 
201 | 
202 | 
203 | 


--------------------------------------------------------------------------------
/PostProcessing/plot_cmaq_may2021.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # plot_cmaq_may2021.py
  4 | 
  5 | # ---------------------------------------------------------------------
  6 | # Stacy Montgomery, NOV 2018 - DEC 2018
  7 | # This program takes the cropped l2 files and regrids the data to new domain.
  8 | # ---------------------------------------------------------------------
  9 | #                             USER INPUT
 10 | # ---------------------------------------------------------------------
 11 | from netCDF4 import Dataset
 12 | import numpy as np
 13 | import matplotlib.pyplot as plt
 14 | import pandas as pd
 15 | import os
 16 | import netCDF4
 17 | import math
 18 | from scipy.interpolate import griddata
 19 | import scipy.stats as st
 20 | import cartopy.feature as cfeature 
 21 | from cartopy import crs as ccrs;
 22 | from shapely.ops import unary_union, cascaded_union
 23 | from geopandas.tools import sjoin
 24 | from shapely.geometry import Point, shape
 25 | from cartopy import crs as ccrs;
 26 | # ---------------------------------------------------------------------
 27 | 
 28 | # dir to grid file
 29 | dir='/projects/b1045/jschnell/ForStacy/' 
 30 | ll='latlon_ChicagoLADCO_d03.nc' 
 31 | 
 32 | dir_epa='/projects/b1045/montgomery/CMAQcheck/'
 33 | 
 34 | dir_cmaq_d03='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_BASE_FINAL_1.33km_sf_rrtmg_5_8_1_v3852/postprocess/'
 35 | dir_cmaq_d03_wint='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_BASE_FINAL_wint_1.33km_sf_rrtmg_5_8_1_v3852/postprocess/'
 36 | 
 37 | #names of lat lons in the cmaq grid
 38 | la,lo='lat','lon' # for 1.3km
 39 | la,lo='LAT','LON' # for 4km
 40 | 
 41 | # CMAQ RUN things
 42 | domain=['d03']*3
 43 | time='hourly'
 44 | year='2018'
 45 | month='8'
 46 | ssn = 'Summer'
 47 | 
 48 | var = ['NO2','O3','PM25_TOT']*2
 49 | var_tit=[r'NO$_2$',r'O$_3$',r'PM$_{2.5,TOT}$']
 50 | #epa_files =[dir_epa+'%s_%s_%s.csv'%(time,epa_code[i],year,) for i in range(len(epa_code))]
 51 | epa_files =[dir_epa+'%s_%s_%s_%s_EPA_CMAQ_Combine.csv'%(var[i],domain[i],year,month,) for i in range(len(domain))]
 52 | 
 53 | year='2019'
 54 | month='1'
 55 | ssn = 'Winter'
 56 | 
 57 | ep2 = [dir_epa+'%s_%s_%s_%s_EPA_CMAQ_Combine.csv'%(var[i],domain[i],year,month,) for i in range(len(domain))]
 58 | 
 59 | epa_files = epa_files +ep2
 60 | 
 61 | startswith = 'COMBINE_ACONC'
 62 | 
 63 | # ---------------------------------------------------------------------
 64 | 
 65 | def pull_cmaq(dir_CMAQ,startswith,cmaq_var):
 66 |    #pull model files from given directoy
 67 |    onlyfiles = next(os.walk(dir_CMAQ))[2]
 68 |    onlyfiles.sort() # so that searching for dates are easier
 69 |    # pull only CONC files
 70 |    fnames_CMAQ = [x for x in onlyfiles if x.startswith(startswith)]
 71 |    # get data files
 72 |    ncfile_CMAQ_base = [Dataset(dir_CMAQ+ fnames_CMAQ[i],'r') for i in range(len(fnames_CMAQ))]
 73 |    units_cmaq = [ncfile_CMAQ_base[0][cmaq_var[i]].units for i in range(len(cmaq_var))]
 74 |    return ncfile_CMAQ_base, units_cmaq
 75 | 
 76 | 
 77 | 
 78 | def get_min_max_cmaq(base,var,hrs, mask=False, ma = np.zeros(3)):
 79 | 	basel=[base[i][var][hr] for i in range(len(base)) for hr in hrs]
 80 | 	basel=np.array(basel)
 81 | 	if mask==True: 
 82 | 		base_max = np.array([basel[i][0][~mask].max() for i in range(len(basel))])
 83 | 		base_min = np.array([basel[i][0][~mask].min() for i in range(len(basel))])
 84 | 		base_mean = np.array([basel[i][0][~mask].mean() for i in range(len(basel))])
 85 | 	else:
 86 | 		base_max = np.array([basel[i][0].max() for i in range(len(basel))])
 87 | 		base_min = np.array([basel[i][0].min() for i in range(len(basel))])
 88 | 		base_mean = np.array([basel[i][0].mean() for i in range(len(basel))])
 89 | 	return base_max,base_min,base_mean
 90 | 
 91 | 
 92 | def get_avg_cmaq(base,var,hrs, mask=False, ma = np.zeros(3)):
 93 | 	basel=[base[i][var][hr] for i in range(len(base)) for hr in hrs]
 94 | 	basel=np.array(basel)
 95 | 	return np.mean(basel,axis=0)[0]
 96 | 
 97 | def get_avg_epa(epa_file):
 98 | #for t in range(1):
 99 | 	ef = epa_file
100 | 	epa = pd.read_csv(ef)
101 | 	epa_drop = pd.DataFrame([epa.level_0.tolist(),epa['Sample Measurement'].tolist(),epa['CMAQ'].tolist(),epa['Latitude'].tolist(),epa['Longitude'].tolist()]).T
102 | 	epa_drop.columns = ['Datetime','Sample Measurement','CMAQ','Lat','Lon']
103 | 	epa_drop['Sample Measurement'] = epa_drop['Sample Measurement'].astype(float)
104 | 	return epa_drop.groupby(['Lat','Lon']).mean().reset_index()
105 | 
106 | 
107 | def get_min_max_epa(epa_file):
108 | #for t in range(1):
109 | 	ef = epa_file
110 | 	epa = pd.read_csv(ef)
111 | 	epa_drop = pd.DataFrame([epa.level_0.tolist(),epa['Sample Measurement'].tolist(),epa['CMAQ'].tolist()]).T
112 | 	epa_drop.columns = ['Datetime','Sample Measurement','CMAQ']
113 | 	epa_drop.Datetime = pd.to_datetime(epa_drop.Datetime)
114 | 	epa_drop = epa_drop.set_index('Datetime')
115 | 	#
116 | 	fmax_epa,fmin_epa,fmean_epa,fmax_cmaq,fmin_cmaq,fmean_cmaq = [],[],[],[],[],[]
117 | 	for i in range(744):
118 | 		f=epa_drop.loc[epa_drop.index[i]]
119 | 		fmax_epa.append(f.max().tolist()[0])
120 | 		fmin_epa.append(f.min().tolist()[0])
121 | 		fmean_epa.append(f.mean().tolist()[0])
122 | 		fmax_cmaq.append(f.max().tolist()[1])
123 | 		fmin_cmaq.append(f.min().tolist()[1])
124 | 		fmean_cmaq.append(f.mean().tolist()[1])
125 | 	# Plot by max/min/avg
126 | 	return epa_drop.index[0:744],fmax_epa,fmin_epa,fmean_epa,fmax_cmaq,fmin_cmaq,fmean_cmaq
127 | 
128 | 
129 | # ---------------------------------------------------------------------
130 | #START CODE
131 | # ################### ################### ################### ##################
132 | 
133 | 
134 | base,t_u = pull_cmaq(dir_cmaq_d03,startswith,var[0:3])
135 | base_wint,t_u = pull_cmaq(dir_cmaq_d03_wint,startswith,var[0:3])
136 | 
137 | datas = [get_avg_cmaq(base,var[i],hrs) for i in range(len(var[0:3]))] + [get_avg_cmaq(base_wint,var[i],hrs) for i in range(len(var[0:3]))] 
138 | 
139 | 
140 | # adjustable plotting parts
141 | llx=Dataset(dir+ll,'r')
142 | lat,lon=llx['lat'][:],llx['lon'][:]
143 | 
144 | 
145 | #START PLOT
146 | # ################### ################### ################### ##################
147 | 
148 | c = ['Orchid','Blue','limegreen']
149 | c2 = ['Purple','Navy','darkgreen']
150 | 
151 | hrs = np.arange(0,24)
152 | 
153 | 
154 | units = [r'ppb',r'ppb',r'ug/m$^3$']*3
155 | vmins,vmaxs = [0,25,5,0,25,5],[20,45,13,20,45,13]
156 | 
157 | titles = [r'Summer NO$_2$ ',r'Winter NO$_2$',
158 | r'Summer O$_3$ ',r'Winter O$_3$',
159 | r'Summer PM$_2.5$ ',r'Winter PM$_2.5$']
160 | 
161 | 
162 | cmaps = ['Purples','Blues','Greens']*2
163 | 
164 | 
165 | figtit = 'monthly_average_with_overlay.png'
166 | #--- fig
167 | 
168 | def create_fig(lon,lat,base,datas,varS,vmins,vmaxs,cmaps,units, titles,figtit,show=False,save=False):
169 | #
170 | 	crs_new = ccrs.PlateCarree()
171 | 	fig, axs = plt.subplots(nrows=3,ncols=2,subplot_kw={'projection': crs_new},figsize=(8,7))
172 | 	axs = axs.T.ravel()
173 | 	axs[0].set_ylabel(r'NO$_2$')
174 | 	axs[1].set_ylabel(r'O$_3$')
175 | 	axs[3].set_ylabel(r'PM$_2.5$')
176 | 	axs[0].set_title('Summer')
177 | 	axs[3].set_title('Winter')
178 | 	#
179 | 	for i in range(len(axs)):
180 | 		print(varS[i])
181 | 		epa = get_avg_epa(epa_files[i])
182 | 		#if i < 3: data = get_avg_cmaq(base,varS[i],hrs)
183 | 		#else: data = get_avg_cmaq(base_wint,varS[i],hrs)
184 | 		data = datas[i]
185 | 		vmin = vmins[i]
186 | 		vmax = vmaxs[i]
187 | 		title = titles[i]
188 | 		cmap = cmaps[i]
189 | 		if varS[i] == 'O3': epa['Sample Measurement'] = epa['Sample Measurement']*1000+10
190 | 		levels =  list(np.arange(vmin,vmax,(vmax-vmin)/10))+[vmax]
191 | 		#plot
192 | 		cs=axs[i].pcolormesh(lon,lat, data,transform=crs_new,cmap = cmap,vmin=vmin,vmax=vmax)#
193 | 		cs2 = axs[i].scatter(epa.Lon,epa.Lat,c=epa['Sample Measurement'],cmap = cmap, vmin = vmin, vmax = vmax,s=40,edgecolors = 'black')
194 | 		# add limits
195 | 		#
196 | 		x=[lon.min(),lon.max()]
197 | 		y=[lat.min(),lat.max()]
198 | 		axs[i].set_extent([x[0]+.5,x[1]-.5,y[0]+.5,y[1]-.5],crs= crs_new)
199 | 		#
200 | 		#if i ==3 or i ==4 or i == 5:
201 | 		if i < 100:
202 | 			cbar=plt.colorbar(cs,boundaries= levels,fraction=0.028, pad=0.02,ax=axs[i])
203 | 			#
204 | 			cbar.set_ticks(levels)
205 | 			cbar.set_label(units[i])
206 | 		# add features
207 | 		states_provinces = cfeature.NaturalEarthFeature(category='cultural',name='admin_1_states_provinces_lines',edgecolor='black',facecolor='none',scale='10m',alpha = 0.3)
208 | 		borders = cfeature.NaturalEarthFeature(scale='50m',category='cultural',name='admin_0_countries',edgecolor='black',facecolor='none',alpha=0.6)
209 | 		land = cfeature.NaturalEarthFeature('physical', 'lakes', '10m', edgecolor='black', facecolor='none')
210 | 		axs[i].add_feature(land, edgecolor='black')
211 | 		axs[i].add_feature(borders, edgecolor='black')
212 | 		axs[i].add_feature(states_provinces, edgecolor='black')
213 | 		#axs[i].set_title(i)
214 | 		# add title
215 | 		#axs[i].set_title(title)
216 | 	plt.tight_layout()
217 | 	#
218 | 	#
219 | 	if save == True: plt.savefig(figtit)
220 | 	#
221 | 	if show==True: plt.show()
222 | 
223 | 
224 | create_fig(lon,lat,base,datas,var,vmins,vmaxs,cmaps,units, titles,figtit,show=True,save=False)
225 | 
226 | 
227 | 
228 | 


--------------------------------------------------------------------------------
/correlation_wrf_cmaq_smoke.py:
--------------------------------------------------------------------------------
  1 | 
  2 | #------------------------------------------
  3 | # Libraries
  4 | #--------------
  5 | from matplotlib import pyplot as plt ; from matplotlib import colors
  6 | import numpy as np; import numpy.ma as ma; from matplotlib.patches import Path, PathPatch
  7 | import pandas as pd; from shapely.geometry import Point, shape, Polygon;import fiona
  8 | from shapely.ops import unary_union, cascaded_union; from geopandas.tools import sjoin
  9 | import geopandas as gpd; import geoplot; import glob; import os; from datetime import timedelta, date;
 10 | from netCDF4 import Dataset
 11 | import scipy.ndimage; from cartopy import crs as ccrs; from cartopy.io.shapereader import Reader
 12 | import matplotlib.path as mpath; import seaborn as sns
 13 | import timeit
 14 | from cartopy import crs as ccrs
 15 | import cartopy
 16 | 
 17 | #------------------------------------------
 18 | 
 19 | 
 20 | 
 21 | dir_WRF='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_BASE_FINAL_1.33km_sf_rrtmg_5_8_1_v3852/'
 22 | dir_CMAQ = '/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_BASE_FINAL_1.33km_sf_rrtmg_5_8_1_v3852/postprocess/'
 23 | dir_GRID='/projects/b1045/jschnell/ForStacy/latlon_ChicagoLADCO_d03.nc' 
 24 | dir_EMIS = '/projects/b1045/wrf-cmaq/input/emis/Chicago_LADCO/ChicagoLADCO_d03/'
 25 | emis_dir = '/projects/b1045/wrf-cmaq/input/emis/Chicago_LADCO/ChicagoLADCO_d03/'
 26 | 
 27 | #variables of interest
 28 | var=['O3','NO2','NO','CO','ISOP','SO2','FORM','PM25_TOT']
 29 | wrf_var=['T2','PSFC','RAINC','RAINNC','Q2','V10','U10']
 30 | smoke_var = ['NO2','NO','CO','ISOP','SO2', 'FORM']
 31 | 
 32 | # User defined functions
 33 | #------------------------------------------
 34 | def common_data(list1, list2): 
 35 |     result = False
 36 |     # traverse in the 1st list 
 37 |     for x in list1:
 38 |         # traverse in the 2nd list 
 39 |         for y in list2: 
 40 |             # if one common 
 41 |             if x == y: 
 42 |                 result = True
 43 |                 return result
 44 |     return result
 45 | 
 46 | #------------------------------------------
 47 | 
 48 | #load chicago shapefile
 49 | path='/home/asm0384/shapefiles/commareas/geo_export_77af1a6a-f8ec-47f4-977c-40956cd94f97.shp'
 50 | chi_shapefile  = gpd.GeoDataFrame.from_file(path)
 51 | 
 52 | #get names of files given directoy
 53 | onlyfiles = next(os.walk(dir_CMAQ))[2]
 54 | onlyfiles=sorted(onlyfiles)
 55 | fnames_cmaq = [x for x in onlyfiles if x.startswith("COMBINE_ACONC")]
 56 | fnames_wrf= ['wrfout_d01_'+str(fnames_cmaq[i]).split('_')[-1].split('.nc')[0][0:4]+'-'+str(fnames_cmaq[i]).split('_')[-1].split('.nc')[0][4:6]+'-'+str(fnames_cmaq[i]).split('_')[-1].split('.nc')[0][6:]+'_00:00:00' for i in range(len(fnames_cmaq))]
 57 | 
 58 | fnames_cmaq = fnames_cmaq[:-1]
 59 | fnames_wrf = fnames_wrf[:-1]
 60 | 
 61 | #dates
 62 | dates=[fnames_wrf[i].split('wrfout_d01_')[1].split('_')[0] for i in range(len(fnames_wrf))]
 63 | dates = dates[:-1]
 64 | dates2 = ['2018'+'08'+("{:02d}".format(i)) for i in range(1,32)]
 65 | 
 66 | version = 'emissions_v0'
 67 | # emissions dir
 68 | #Get number of files in directory with L2 domain CSV files
 69 | emis_files = next(os.walk(emis_dir))[2]
 70 | emis_files = [x for x in emis_files if x.startswith("emis_mole_all")]
 71 | emis_files =sorted(emis_files) # so that searching for dates are easier
 72 | maskfiles = [common_data(emis_files[i].split('_'), dates2) for i in range(len(emis_files))]
 73 | emis_files = np.array(emis_files)[maskfiles]
 74 | 
 75 | 
 76 | #pull in model files and variables
 77 | # for example: finding the difference between the 11th day and the 0th day of NO2:
 78 | # cmaq_ncfile[10]['NO2'][0]-cmaq_ncfile[0]['NO2'][0]
 79 | cmaq_ncfile= [Dataset(dir_CMAQ+ fnames_cmaq[i],'r') for i in range(len(fnames_cmaq))]
 80 | wrf_ncfile=[Dataset(dir_WRF + fnames_wrf[i],'r') for i in range(len(fnames_wrf))]
 81 | emis_ncfile=[Dataset(dir_EMIS + emis_files[i],'r') for i in range(len(emis_files))]
 82 | 
 83 | units_cmaq = [cmaq_ncfile[0][var[i]].units for i in range(len(var))]
 84 | units_wrf = [wrf_ncfile[0][wrf_var[i]].units for i in range(len(wrf_var))]
 85 | units_smoke = [emis_ncfile[0][smoke_var[i]].units for i in range(len(smoke_var))]
 86 | 
 87 | #get lat lon from grid file
 88 | ll=Dataset(dir_GRID,'r')
 89 | lat,lon=ll['lat'][:],ll['lon'][:]
 90 | 
 91 | #wrflatlon
 92 | wrflon, wrflat = wrf_ncfile[0]['XLONG'][0],wrf_ncfile[0]['XLAT'][0]
 93 | 
 94 | 
 95 | # pull out variables
 96 | #------------------------------------------
 97 | union=gpd.GeoSeries(unary_union(chi_shapefile.geometry)[2])
 98 | 
 99 | # routine to mask mask over chicago shapefile
100 | mask=np.ones(lon.shape,dtype=bool)
101 | mask[:] = False
102 | 
103 | for i in range(len(lon)):
104 |     for j in range(len(lon[0])):
105 |        pt = Point(lon[i][j],lat[i][j])
106 |        mask[i][j] =  pt.within(union[0])
107 | 
108 | # routine to mask mask over chicago shapefile
109 | mask_wrf=np.ones(wrflon.shape,dtype=bool)
110 | mask_wrf[:] = False
111 | 
112 | for i in range(len(wrflon)):
113 |     for j in range(len(wrflon[0])):
114 |        pt = Point(wrflon[i][j], wrflat[i][j])
115 |        mask_wrf[i][j] =  pt.within(union[0])
116 | 
117 | 
118 | 
119 | hours = pd.date_range(dates[0]+" 00:00", dates[-2]+" 23:00",freq="60min")
120 | 
121 | 
122 | # pull out variables
123 | #------------------------------------------
124 | 
125 | def pull_vars(ncfile,var,mask):
126 |    var_crop = []
127 |    for i in range(len(var)):
128 |       if ncfile == cmaq_ncfile: crop = [ncfile[j][var[i]][h][0][mask] for h in range(24) for j in range(len(ncfile))]
129 |       elif ncfile == wrf_ncfile: crop = [ncfile[j][var[i]][h][mask_wrf] for h in range(24) for j in range(len(ncfile))]
130 |       elif ncfile == emis_ncfile: crop = [ncfile[j][var[i]][h][0][mask] for h in range(24) for j in range(len(ncfile))]
131 |       else: print('ERROR')
132 |       #
133 |       var_crop.append(crop)
134 |    return var_crop
135 | 
136 | mask_ravel = np.array(mask).ravel()
137 | lon_ravel = np.array(lon).ravel()[np.array(mask).ravel()]
138 | lat_ravel = np.array(lat).ravel()[np.array(mask).ravel()]
139 | 
140 | var_crop=pull_vars(cmaq_ncfile,var,mask)
141 | var_crop_emis=pull_vars(emis_ncfile,smoke_var,mask)
142 | var_crop_wrf=pull_vars(wrf_ncfile,wrf_var,mask_wrf)
143 | 
144 | #rainc,rainnc = np.asarray(var_crop_wrf[3]), np.asarray(var_crop_wrf[2])
145 | rain_cumulative = np.asarray(var_crop_wrf[3]) + np.asarray(var_crop_wrf[2])
146 | 
147 | rain = [[] for i in range(len(rain_cumulative))]
148 | 
149 | # remove the cumulative nature of rain variables
150 | for i in range(len(rain_cumulative)):
151 |   if i == 0: rain[0] = np.zeros(rain_cumulative[0].shape).tolist()
152 |   else: rain[i] = (rain_cumulative[i]-rain_cumulative[i-1]).tolist()
153 | 
154 | #wrf_var=['T2','PSFC','RAINC','RAINNC','Q2','V10','U10']
155 | var_crop_wrf = np.array([var_crop_wrf[0]]+ [var_crop_wrf[1]]+ [var_crop_wrf[4]]+ [var_crop_wrf[5]]+[var_crop_wrf[6]]+[rain])
156 | wrf_var = ['T2','PSFC','Q2','V10','U10','RAIN']
157 | 
158 | var_crop_emis_tot = [np.array(var_crop_emis[i]).ravel() for i in range(len(var_crop_emis))]
159 | var_crop_tot = [np.array(var_crop[i]).ravel() for i in range(len(var_crop))]
160 | var_crop_wrf_tot = [var_crop_wrf[i].ravel() for i in range(len(var_crop_wrf))]
161 | 
162 | var_crop_wrf_tot = var_crop_wrf_tot+ np.array([(np.array(var_crop_wrf_tot[-1])**2+np.array(var_crop_wrf_tot[-2])**2)**.5]).tolist()
163 | wrf_var = wrf_var + ['Wind_TOT']
164 | 
165 | var_to_wrf = var_crop_tot + var_crop_wrf_tot
166 | var_to_emis = var_crop_tot + var_crop_emis_tot 
167 | 
168 | #make corr matric
169 | corr_matrix_vw = np.zeros([len(var_to_wrf), len(var_to_wrf)]); corr_matrix_ve = np.zeros([len(var_to_emis), len(var_to_emis)])
170 | 
171 | from scipy.stats import pearsonr
172 | 
173 | for i in range(len(var_to_wrf)):
174 |    for j in range(len(var_to_wrf)):
175 |       corr_matrix_vw[i][j]= pearsonr(var_to_wrf[i], var_to_wrf[j])[0]
176 | 
177 | for i in range(len(var_to_emis)):
178 |    for j in range(len(var_to_emis)):
179 |       corr_matrix_ve[i][j]= pearsonr(var_to_emis[i], var_to_emis[j])[0]
180 | 
181 | maskvw = np.zeros_like(corr_matrix_vw_df)
182 | maskvw[np.triu_indices_from(maskvw)] = True
183 | 
184 | maskve = np.zeros_like(corr_matrix_ve_df)
185 | maskve[np.triu_indices_from(maskve)] = True
186 | 
187 | # Start plotting cmaq v wrf
188 | titles_vw = [var[i]+'_CMAQ' for i in range(len(var))] + [wrf_var[i]+'_WRF' for i in range(len(wrf_var))] 
189 | 
190 | corr_matrix_vw_df = pd.DataFrame(corr_matrix_vw)
191 | corr_matrix_ve_df = pd.DataFrame(corr_matrix_ve)
192 | corr_matrix_vw_df.columns = titles_vw
193 | corr_matrix_vw_df.index = titles_vw
194 | corr_matrix_ve_df.columns = titles_ve
195 | corr_matrix_ve_df.index = titles_ve
196 | 
197 | # Make heat maps of variables
198 | fig = plt.subplots(figsize = (8,7))
199 | sns.heatmap(corr_matrix_vw_df,center = 0,annot = True,mask = maskvw, fmt='.2f')
200 | plt.tight_layout()
201 | plt.savefig('correlation_matrix_vw.svg')
202 | plt.show()
203 | 
204 | # Start plotting cmaq v wrf
205 | titles_ve = [var[i]+'_CMAQ' for i in range(len(var))] + [smoke_var[i]+'_SMK' for i in range(len(smoke_var))] 
206 | corr_matrix_ve_df = pd.DataFrame(corr_matrix_ve)
207 | corr_matrix_ve_df.columns = titles_ve
208 | corr_matrix_ve_df.index = titles_ve
209 | 
210 | # Make heat maps of variables
211 | fig = plt.subplots(figsize = (8,7))
212 | sns.heatmap(corr_matrix_ve_df,center = 0,annot = True,mask = maskve, fmt='.2f')
213 | plt.tight_layout()
214 | plt.savefig('correlation_matrix_ve.svg')
215 | plt.show()
216 | 


--------------------------------------------------------------------------------
/PostProcessing/timeseries_epa_stn_cmaq_may2021.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # Time series
  4 | # Stacy Montgomery
  5 | # May 2021
  6 | 
  7 | #---------------------------------------------------------#
  8 | from datetime import timedelta, date,datetime; 
  9 | import pandas as pd
 10 | import numpy as np
 11 | from netCDF4 import Dataset
 12 | from wrf import latlon_coords, getvar
 13 | import glob, os
 14 | import matplotlib.pyplot as plt
 15 | import matplotlib.dates as mdates
 16 | from shapely.geometry import Point, shape, Polygon
 17 | import fiona
 18 | from shapely.ops import unary_union, cascaded_union
 19 | from geopandas.tools import sjoin
 20 | import geopandas as gpd; import geoplot; 
 21 | import glob; 
 22 | import os;
 23 | #---------------------------------------------------------#
 24 | 
 25 | dir_epa='/projects/b1045/montgomery/CMAQcheck/'
 26 | 
 27 | dir_cmaq_d03='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_BASE_FINAL_1.33km_sf_rrtmg_5_8_1_v3852/postprocess/'
 28 | dir_cmaq_d02='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_BASE_FINAL_4km_sf_rrtmg_10_8_1_v3852/postprocess/'
 29 | 
 30 | #names of lat lons in the cmaq grid
 31 | la,lo='lat','lon' # for 1.3km
 32 | la,lo='LAT','LON' # for 4km
 33 | 
 34 | # CMAQ RUN things
 35 | domain=['d03']*3+['d02']*3
 36 | time='hourly'
 37 | year='2018'
 38 | month='8'
 39 | ssn = 'Summer'
 40 | 
 41 | var = ['NO2','O3','PM25_TOT']*2
 42 | var_tit=[r'NO$_2$',r'O$_3$',r'PM$_{2.5,TOT}$']
 43 | #epa_files =[dir_epa+'%s_%s_%s.csv'%(time,epa_code[i],year,) for i in range(len(epa_code))]
 44 | epa_files =[dir_epa+'%s_%s_%s_%s_EPA_CMAQ_Combine.csv'%(var[i],domain[i],year,month,) for i in range(len(domain))]
 45 | 
 46 | #------ DATERANGE
 47 | 
 48 | def pull_cmaq(dir_CMAQ,startswith,cmaq_var):
 49 |    #pull model files from given directoy
 50 |    onlyfiles = next(os.walk(dir_CMAQ))[2]
 51 |    onlyfiles.sort() # so that searching for dates are easier
 52 |    # pull only CONC files
 53 |    fnames_CMAQ = [x for x in onlyfiles if x.startswith(startswith)]
 54 |    # get data files
 55 |    ncfile_CMAQ_base = [Dataset(dir_CMAQ+ fnames_CMAQ[i],'r') for i in range(len(fnames_CMAQ))]
 56 |    units_cmaq = [ncfile_CMAQ_base[0][cmaq_var[i]].units for i in range(len(cmaq_var))]
 57 |    return ncfile_CMAQ_base, units_cmaq
 58 | 
 59 | 
 60 | 
 61 | def mask_given_shapefile(lon,lat,shapefile):
 62 |    '''
 63 |    Make a mask given a shapefile
 64 |    lon - array of grid lons
 65 |    lat - array of grid lats
 66 |    shapefile - geopandas geodataframe shapefile
 67 |    '''
 68 |    union=gpd.GeoSeries(unary_union(shapefile.geometry))
 69 |    mask=np.ones(lon.shape,dtype=bool)
 70 |    mask[:] = True
 71 |    for i in range(len(lon)):
 72 |        for j in range(len(lon[0])):
 73 |           pt = Point(lon[i][j],lat[i][j])
 74 |           if pt.within(union[0]):
 75 |              mask[i][j] = False
 76 |    #
 77 |    return mask
 78 | 
 79 | 
 80 | 
 81 | def get_min_max_cmaq(base,var,hrs, mask=False, ma = np.zeros(3)):
 82 | 	basel=[base[i][var][hr] for i in range(len(base)) for hr in hrs]
 83 | 	basel=np.array(basel)
 84 | 	if mask==True: 
 85 | 		base_max = np.array([basel[i][0][~mask].max() for i in range(len(basel))])
 86 | 		base_min = np.array([basel[i][0][~mask].min() for i in range(len(basel))])
 87 | 		base_mean = np.array([basel[i][0][~mask].mean() for i in range(len(basel))])
 88 | 	else:
 89 | 		base_max = np.array([basel[i][0].max() for i in range(len(basel))])
 90 | 		base_min = np.array([basel[i][0].min() for i in range(len(basel))])
 91 | 		base_mean = np.array([basel[i][0].mean() for i in range(len(basel))])
 92 | 	return base_max,base_min,base_mean
 93 | 
 94 | 
 95 | def get_min_max_epa(epa_file):
 96 | #for t in range(1):
 97 | 	ef = epa_files[0]
 98 | 	epa = pd.read_csv(ef)
 99 | 	epa_drop = pd.DataFrame([epa.level_0.tolist(),epa['Sample Measurement'].tolist(),epa['CMAQ'].tolist()]).T
100 | 	epa_drop.columns = ['Datetime','Sample Measurement','CMAQ']
101 | 	epa_drop.Datetime = pd.to_datetime(epa_drop.Datetime)
102 | 	epa_drop = epa_drop.set_index('Datetime')
103 | 	#
104 | 	fmax_epa,fmin_epa,fmean_epa,fmax_cmaq,fmin_cmaq,fmean_cmaq = [],[],[],[],[],[]
105 | 	for i in range(744):
106 | 		f=epa_drop.loc[epa_drop.index[i]]
107 | 		fmax_epa.append(f.max().tolist()[0])
108 | 		fmin_epa.append(f.min().tolist()[0])
109 | 		fmean_epa.append(f.mean().tolist()[0])
110 | 		fmax_cmaq.append(f.max().tolist()[1])
111 | 		fmin_cmaq.append(f.min().tolist()[1])
112 | 		fmean_cmaq.append(f.mean().tolist()[1])
113 | 	# Plot by max/min/avg
114 | 	return epa_drop.index[0:744],fmax_epa,fmin_epa,fmean_epa,fmax_cmaq,fmin_cmaq,fmean_cmaq
115 | 
116 | 
117 | 
118 | #epa_drop.groupby('Datetime').mean()
119 | 
120 | 
121 | 
122 | 
123 | #START CODE
124 | # ################### ################### ################### ##################
125 | 
126 | c = ['Orchid','Blue','limegreen']
127 | c2 = ['Purple','Navy','darkgreen']
128 | 
129 | fig, axs = plt.subplots(nrows=3,ncols=1,figsize=(7.5,9))
130 | axs=axs.ravel()
131 | 
132 | for i in range(3):
133 | 	ax = axs[i]
134 | 	dt,fmax_epa,fmin_epa,fmean_epa,fmax_cmaq,fmin_cmaq,fmean_cmaq = get_min_max_epa(epa_files[i])
135 | 	#station
136 | 	#if var[i]=='O3': fmax_epa,fmin_epa,fmean_epa = np.array(fmax_epa)*1000,np.array(fmin_epa)*1000,np.array(fmean_epa)*1000
137 | 	ax.plot(dt, fmean_epa, '--',color=c[i],label='Station Mean')
138 | 	ax.fill_between(dt,fmin_epa, fmax_epa,facecolor=c[i],alpha=0.1)
139 | 	#cmaq
140 | 	ax.plot(dt[0:744], fmean_cmaq[0:744],'--',color=c2[i],label='CMAQ Mean')
141 | 	ax.fill_between(dt[0:744],fmin_cmaq[0:744], fmax_cmaq[0:744],facecolor=c2[i],alpha=0.1)
142 | 	#extra info
143 | 	if var[i]== 'O3': ax.set_ylim([0,100])
144 | 	else: ax.set_ylim([0,50])
145 | 	ax.set_xlim(dt[0],dt[-1])
146 | 	# set week major ticks
147 | 	fmt_wk = mdates.DayLocator(interval=7)
148 | 	ax.xaxis.set_major_locator(fmt_wk)
149 | 	# set dayminor ticks
150 | 	fmt_day = mdates.DayLocator()
151 | 	ax.xaxis.set_minor_locator(fmt_day)
152 | 	# format title
153 | 	ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
154 | 	if var[i] == 'PM25_TOT': ax.set_ylabel(var[i]+r' (ug/m$^3$)')
155 | 	else: ax.set_ylabel(var[i]+' (ppb)')
156 | 	ax.legend()
157 | 	#ax.set_title(var_tit[i])
158 | 	if i ==0: ax.set_title(ssn)
159 | 
160 | plt.tight_layout()
161 | 
162 | plt.savefig('timseries_epa_cmaq_%s-%s.png'%(year,month))
163 | 
164 | plt.show()
165 | 
166 | 
167 | # CMAQ
168 | # ################### ################### ##################
169 | 
170 | 
171 | #pull model files from given directoy
172 | onlyfiles = next(os.walk(dir_cmaq_d03))[2]
173 | onlyfiles.sort() # so that searching for dates are easier
174 | startswith = 'COMBINE_ACONC'
175 | 
176 | # pull only CONC files
177 | fnames_CMAQ = [x for x in onlyfiles if x.startswith(startswith)]
178 | fnames_CMAQ = fnames_CMAQ[:-1]
179 | 
180 | #get lat lon from grid file
181 | dir='/projects/b1045/jschnell/ForStacy/' 
182 | ll='latlon_ChicagoLADCO_d03.nc'
183 | llx=Dataset(dir+ll,'r')
184 | lat,lon=llx['lat'][:],llx['lon'][:]
185 | 
186 | # shapes and directories == https://www.census.gov/cgi-bin/geo/shapefiles/index.php?year=2019&layergroup=State+Legislative+Districts
187 | path='/projects/b1045/montgomery/shapefiles/Chicago/commareas/geo_export_77af1a6a-f8ec-47f4-977c-40956cd94f97.shp'
188 | path2 ='/projects/b1045/montgomery/shapefiles/Chicago/cook/Cook_County_Border.shp'
189 | chi_shapefile  = gpd.GeoDataFrame.from_file(path2)
190 | mask = mask_given_shapefile(lon,lat,chi_shapefile)
191 | 
192 | #pull temp
193 | #base,t_u = pull_cmaq(dir_cmaq_d03,startswith,var[0:3])
194 | #base_max,base_min,base_mean = get_min_max_cmaq(base,var[0],hrs)
195 | #base_max_chi,base_min_chi,base_mean_chi = get_min_max_cmaq(base,var[0],hrs,mask=True,ma = mask)
196 | 
197 | 
198 | base,t_u = pull_cmaq(dir_cmaq_d03,startswith,var[0:3])
199 | hrs = np.arange(0,24)
200 | dt,fmax_epa,fmin_epa,fmean_epa,fmax_cmaq,fmin_cmaq,fmean_cmaq = get_min_max_epa(epa_files[0])
201 | 
202 | c = ['Orchid','Blue','limegreen']
203 | c2 = ['Purple','Navy','darkgreen']
204 | 
205 | fig, axs = plt.subplots(nrows=3,ncols=1,figsize=(7.5,9))
206 | axs=axs.ravel()
207 | 
208 | for i in range(3):
209 | 	ax = axs[i]
210 | 	base_max,base_min,base_mean = get_min_max_cmaq(base,var[i],hrs)
211 | 	base_max_chi,base_min_chi,base_mean_chi = get_min_max_cmaq(base,var[i],hrs,mask=True,ma = mask)
212 | 	#dt,fmax_epa,fmin_epa,fmean_epa,fmax_cmaq,fmin_cmaq,fmean_cmaq = get_min_max_epa(epa_files[i])
213 | 	#
214 | 	ax.plot(dt, base_mean[0:744], '--',color=c[i],label='Domain Mean')
215 | 	ax.fill_between(dt,base_min[0:744], base_max[0:744],facecolor=c[i],alpha=0.1)
216 | 	#cmaq
217 | 	ax.plot(dt[0:744], base_mean_chi[0:744],'--',color=c2[i],label='Chicago Mean')
218 | 	ax.fill_between(dt[0:744],base_min_chi[0:744], base_max_chi[0:744],facecolor=c2[i],alpha=0.1)
219 | 	#extra info
220 | 	if var[i]== 'O3': ax.set_ylim([0,100])
221 | 	else: ax.set_ylim([0,50])
222 | 	ax.set_xlim(dt[0],dt[-1])
223 | 	# set week major ticks
224 | 	fmt_wk = mdates.DayLocator(interval=7)
225 | 	ax.xaxis.set_major_locator(fmt_wk)
226 | 	# set dayminor ticks
227 | 	fmt_day = mdates.DayLocator()
228 | 	ax.xaxis.set_minor_locator(fmt_day)
229 | 	# format title
230 | 	ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
231 | 	if var[i] == 'PM25_TOT': ax.set_ylabel(var[i]+r' (ug/m$^3$)')
232 | 	else: ax.set_ylabel(var[i]+' (ppb)')
233 | 	ax.legend()
234 | 	#ax.set_title(var_tit[i])
235 | 	if i ==0: ax.set_title(ssn)
236 | 
237 | plt.tight_layout()
238 | 
239 | plt.savefig('timseries_ONLY_cmaq_%s-%s.png'%(year,month))
240 | 
241 | plt.show()
242 | 
243 | 
244 | 
245 | 
246 | 
247 | 
248 | 
249 | 
250 | 
251 | 
252 | 
253 | 
254 | 
255 | 
256 | #locator = mdates.AutoDateLocator(minticks=3, maxticks=7)
257 | #formatter = mdates.ConciseDateFormatter(locator)
258 | #ax.xaxis.set_major_locator(locator)
259 | #ax.xaxis.set_major_formatter(formatter)
260 | 
261 | 
262 | 
263 | 
264 | 	#PLOT BY STATIONS
265 | 	#epa['latlon'] = [(epa.Longitude.tolist()[i],epa.Latitude.tolist()[i]) for i in range(len(epa.Latitude))]
266 | 	#lalo = epa.Latitude.unique().tolist(),epa.Longitude.unique().tolist()
267 | 	#epa_drop = epa.dropna(axis=0,subset=['Latitude'])
268 | 	#epa_drop_lalo = epa_drop.Latitude.unique()
269 | 	#fig,ax = plt.subplots()
270 | 	#
271 | 	#for i in epa_drop.Latitude.unique():
272 | #		tmp = epa_drop[epa_drop['Latitude']==i]
273 | #		ax.scatter(tmp['level_0'],tmp['Sample Measurement'],label = i)
274 | 
275 | 
276 | 
277 | 
278 | 
279 | 
280 | 
281 | 
282 | 
283 | 


--------------------------------------------------------------------------------
/DataPreprocessing/wrf_to_stations_step2.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # -----------------
  4 | # Step 2
  5 | # -----------------
  6 | # from step 1, get indices of the LCD latlon, which is used as an input to this code.
  7 | # This code pulls out WRF data into csv files in the order of the LCD station data, which is then used as input to code 3. 
  8 | 
  9 | #ERROR-- rain seems to be weird. check write out. plot rain variables
 10 | 
 11 | # ---------------------------------------------------------------------------------------------------------
 12 | # ~~~~~~ START USER INPUT ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 13 | 
 14 | # variables of interest
 15 | minTemp = 242; maxTemp = 294;
 16 | month='08'
 17 | year='2018'
 18 | # Location of WRF output
 19 | runname='output_BASE_FINAL_1.33km_sf_rrtmg_5_8_1_v3852'
 20 | dirout='/home/asm0384/WRFcheck/'+runname+'/'
 21 | 
 22 | # Processed US data, from previous file
 23 | #File out names
 24 | comp_dataset_name = dirout+'wrfcheck_withstations_'+runname+'_'+month+year+'.csv'                     # name and directory to write out to
 25 | comp_dataset_extra = dirout+'completeddata_mini_extras2.csv'
 26 | station_out_name = dirout+'station_out_removedmissing.csv' #name of intermediate file
 27 | comp_dataset_name2= dirout+'wrfcheck_withstations_complete_rain.csv'
 28 | 
 29 | 
 30 | 
 31 | #location of wrf and filenames
 32 | #dirToWRF='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/'+runname+'/'
 33 | # Name of run
 34 | runname='output_BASE_FINAL_1.33km_sf_rrtmg_5_8_1_v3852'
 35 | #BASE_PXLSM_v0
 36 | # Location of WRF output
 37 | 
 38 | dirToWRF_d03='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_BASE_FINAL_1.33km_sf_rrtmg_5_8_1_v3852/'
 39 | dirToWRF_d02='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_BASE_FINAL_4km_sf_rrtmg_10_8_1_v3852/'
 40 | listOfStationsFile = "~/lcd-stations.csv"
 41 | dirout='/home/asm0384/WRFcheck/'+runname+'/'
 42 | grid='latlon.nc'
 43 | 
 44 | 
 45 | #dirout='/home/asm0384/WRFcheck/'+runname+'/'
 46 | Chatty= True       # false if you want to remove print statements
 47 | slpon= True #True  #need to configure to make SLP
 48 | 
 49 | #start the code
 50 | if Chatty: print('Starting ....')
 51 | 
 52 | # --------------------------------------------------------------------------------------------------------
 53 | def getWRFfromIND(ncfile,indxy, filenames,varname):
 54 |     t2d01=[ncfile[z][varname][i] for z in range(len(ncfile)) for i in range(24)]
 55 |     t2d01_xx=  [[t2d01[t][indxy[l]] for t in range(24*len(ncfile))] for l in range(len(indxy))]
 56 |     return t2d01_xx
 57 | 
 58 | def getslpfromIND(ncfile,indxy, filenames,varname):
 59 |     t2d01=[ncfile[z][varname][i] for i in range(24) for z in range(len(ncfile))]
 60 |     t2d01_xx= [[t2d01[t][indxy[l]] for t in range(24*len(ncfile))] for l in range(len(indxy))]
 61 |     return t2d01_xx
 62 | 
 63 | def getRHfromIND(ncfile,indxy, filenames):
 64 |     pq0 = 379.90516; a2 = 17.2693882; a3 = 273.16; a4 = 35.86
 65 |     q2=[ncfile[z]['Q2'][i]/((pq0 / ncfile[z]['PSFC'][i]) **(a2 * (ncfile[z]['T2'][i] - a3) / (ncfile[z]['T2'][i] - a4))) for z in range(len(ncfile)) for i in range(24)]
 66 |     t2d01_xx=  [[q2[t][indxy[l]] for t in range(24*len(ncfile))] for l in range(len(indxy))]
 67 |     return t2d01_xx
 68 | 
 69 | # remove missing files 
 70 | def rm_missing(filenames_d01):
 71 |    testrm=[]
 72 |    for i in filenames_d01:
 73 |       try:
 74 |          test=Dataset(dirToWRF+i)
 75 |       except FileNotFoundError:
 76 |          print(i)
 77 |          testrm.append(i)
 78 | #
 79 |    for i in testrm:
 80 |        filenames_d01.remove(i)
 81 |    #return
 82 |    return filenames_d01
 83 | 
 84 | #t2d01=[getvar(ncfiled01[z],"slp",timeidx=i).data for i in range(24) for z in range(len(filenames_d01))]
 85 | 
 86 | # --------------------------------------------------------------------------------------------------------
 87 | # ~~~~~~ IMPORT PACKAGES ~~~~~~~~~~~~
 88 | #Station
 89 | import glob, os
 90 | import pandas as pd, numpy as np, matplotlib.pyplot as plt, cartopy.crs as crs, cartopy.feature as cpf
 91 | from netCDF4 import Dataset
 92 | from matplotlib.cm import get_cmap
 93 | from cartopy.feature import NaturalEarthFeature
 94 | from wrf import (to_np, getvar, smooth2d, get_cartopy, cartopy_xlim, cartopy_ylim, latlon_coords)
 95 | import time
 96 | from timezonefinder import TimezoneFinder
 97 | from pytz import timezone
 98 | import pytz
 99 | from datetime import datetime,date, timedelta
100 | import dateutil.parser as dparser
101 | 
102 | tf = TimezoneFinder(in_memory=True)
103 | 
104 | 
105 | #------------------------------------------------------------------------------
106 | # ~~~~~~ START MAIN ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
107 | #------------------------------ load in wrf file names ------------------------
108 | # $1 Get WRF file names
109 | #filenames_d01=[] 
110 | #os.chdir(dirToWRF)
111 | #for file in glob.glob("wrfout_d01_*"):
112 | #    filenames_d01.append(file)
113 | #
114 | #filenames_d01.sort() #files are now sorted by date and time
115 | 
116 | # $1 Get WRF file names
117 | filenames_d02=[]
118 | os.chdir(dirToWRF_d02)
119 | for file in glob.glob("wrfout_d01_*"):
120 |     filenames_d02.append(file)
121 | 
122 | filenames_d02.sort() #files are now sorted by date and time
123 | filenames_d02=filenames_d02[:-1]
124 | 
125 | # $1 Get WRF file names
126 | filenames_d03=[]
127 | os.chdir(dirToWRF_d03)
128 | for file in glob.glob("wrfout_d01_*"):
129 |     filenames_d03.append(file)
130 | 
131 | filenames_d03.sort() #files are now sorted by date and time
132 | filenames_d03=filenames_d03[:-1]
133 | 
134 | # remove missing files
135 | #filenames_d01=rm_missing(filenames_d01)
136 | #filenames_d02=rm_missing(filenames_d02)
137 | #filenames_d03=rm_missing(filenames_d03)
138 | 
139 | #dates_d01=[filenames_d01[z].split("wrfout_d01_")[1].split("_00:00:00")[0] for z in range(len(filenames_d01))]
140 | dates_d02=[filenames_d02[z].split("wrfout_d01_")[1].split("_00:00:00")[0] for z in range(len(filenames_d02))]
141 | dates_d03=[filenames_d03[z].split("wrfout_d01_")[1].split("_00:00:00")[0] for z in range(len(filenames_d03))]
142 | 
143 | #if dates_d01== dates_d02 and d
144 | ates=dates_d01
145 | #else:
146 | #   print('dates are not consistent between domains! Defaulting to d01 dates, may cause errors!')
147 | dates=dates_d02
148 | 
149 | #ncfiled01 = [Dataset(filenames_d01[i]) for i in range(len(filenames_d01))]
150 | ncfiled02 = [Dataset(dirToWRF_d02+filenames_d02[i]) for i in range(len(filenames_d02))]
151 | ncfiled03 = [Dataset(dirToWRF_d03+filenames_d03[i]) for i in range(len(filenames_d03))]
152 | 
153 | 
154 | #get indices for dataset, compress  the indices for each domain
155 | STATION= pd.read_csv(comp_dataset_name)
156 | in_d02= STATION['in_d02'].tolist()
157 | in_d03 = STATION['in_d03'].tolist()
158 | yy_d02=np.compress(in_d02,STATION['yy_d02']).tolist();xx_d02= np.compress(in_d02, STATION['xx_d02']).tolist()
159 | yy_d03= np.compress(in_d03, STATION['yy_d03']).tolist();xx_d03= np.compress(in_d03, STATION['xx_d03']).tolist()
160 | 
161 | indxyd02clip =[(xx_d02[t],yy_d02[t]) for t in range(len(yy_d02))]
162 | indxyd03clip =[(xx_d03[t],yy_d03[t]) for t in range(len(yy_d03))]
163 | print(indxyd02clip)
164 | #pull variables
165 | start=time.time()
166 | #t2d01 = getWRFfroimIND(ncfiled01,indxyd01, filenames_d01,'T2')
167 | t2d02 = getWRFfromIND(ncfiled02, indxyd02clip, filenames_d02,'T2')
168 | t2d03 = getWRFfromIND(ncfiled03, indxyd03clip, filenames_d03,'T2')
169 | 
170 | #raind01 = getWRFfromIND(ncfiled01,indxyd01, filenames_d01,'RAINC')
171 | raind02 = getWRFfromIND(ncfiled02, indxyd02clip, filenames_d02,'RAINC')
172 | raind03 = getWRFfromIND(ncfiled03, indxyd03clip, filenames_d03,'RAINC')
173 | 
174 | #rainncd01 = getWRFfromIND(ncfiled01,indxyd01, filenames_d01,'RAINNC')
175 | rainncd02 = getWRFfromIND(ncfiled02, indxyd02clip, filenames_d02,'RAINNC')
176 | rainncd03 = getWRFfromIND(ncfiled03, indxyd03clip, filenames_d03,'RAINNC')
177 | 
178 | #rhd01 = getRHfromIND(ncfiled01,indxyd01, filenames_d01)
179 | rhd02 = getRHfromIND(ncfiled02, indxyd02clip, filenames_d02)
180 | rhd03 = getRHfromIND(ncfiled03, indxyd03clip, filenames_d03)
181 | 
182 | # 10 might be wrong
183 | 
184 | # 10 might be wrong
185 | #u10d01,v10d01  = getWRFfromIND(ncfiled01,indxyd01, filenames_d01,'U10'),getWRFfromIND(ncfiled01,indxyd01, filenames_d01,'V10')
186 | u10d02,v10d02 =getWRFfromIND(ncfiled02,indxyd02clip,filenames_d02,'U10'),getWRFfromIND(ncfiled02,indxyd02clip,filenames_d02, 'V10')
187 | u10d03,v10d03 = getWRFfromIND(ncfiled03,indxyd03clip, filenames_d03, 'U10'),getWRFfromIND(ncfiled03,indxyd03clip, filenames_d03, 'V10')
188 | 
189 | 
190 | if slpon==True:
191 | #   slpd01 = getslpfromIND(ncfiled01,indxyd01, filenames_d01,'PSFC')
192 |    slpd02 = getslpfromIND(ncfiled02, indxyd02clip, filenames_d02,'PSFC')
193 |    slpd03 = getslpfromIND(ncfiled03, indxyd03clip, filenames_d03,'PSFC')
194 | 
195 | end=str(time.time()-start)
196 | print('Time to pull variables from netCDF files: '+ end + 's')
197 | 
198 | 
199 | #q=[t2d01, t2d02, t2d03, raind01, raind02, raind03, rainncd01, rainncd02, rainncd03,rhd01,rhd02,rhd03, u10d01,v10d01,u10d02,v10d02,u10d03,v10d03]
200 | #q1=['t2d01', 't2d02', 't2d03', 'raind01', 'raind02', 'raind03', 'rainncd01', 'rainncd02', 'rainncd03']
201 | #del t2d01, t2d02, t2d03, raind01, raind02, raind03, rainncd01, rainncd02, rainncd03
202 | q=[t2d02, t2d03,raind02, raind03, rainncd02, rainncd03, rhd02,rhd03,u10d02,u10d03,v10d02,v10d02]
203 | #name=['t2d01.csv', 't2d02.csv', 't2d03.csv', 'raind01.csv', 'raind02.csv', 'raind03.csv', 'rainncd01.csv', 'rainncd02.csv', 'rainncd03.csv',]
204 | #name=['t2d01.csv', 't2d02.csv', 't2d03.csv', 'raind01.csv', 'raind02.csv', 'raind03.csv', 'rainncd01.csv', 'rainncd02.csv', 'rainncd03.csv','rhd01.csv','rhd02.csv','rhd03.csv', 'u10d01.csv','v10d01.csv','u10d02.csv','v10d02.csv','u10d03.csv','v10d03.csv']
205 | name=['t2d02.csv', 't2d03.csv',  'raind02.csv', 'raind03.csv', 'rainncd02.csv', 'rainncd03.csv','rhd02.csv','rhd03.csv', 'u10d02.csv','u10d03.csv', 'v10d02.csv','v10d03.csv']
206 | 
207 | for i in range(len(q)):
208 |    df= pd.DataFrame(q[i])
209 |    df.to_csv(dirout+name[i])
210 | 
211 | 
212 | if slpon==True:
213 |    #q1=[slpd01, slpd02, slpd03]
214 |    #name1=['slpd01.csv', 'slpd02.csv', 'slpd03.csv']
215 |    q1=[slpd02, slpd03]
216 |    name1=[ 'slpd02.csv', 'slpd03.csv']
217 |    for i in range(len(q1)):
218 |       df= pd.DataFrame(q1[i])
219 |       df.to_csv(dirout+name1[i])
220 | 
221 | 
222 | print("Done with step 2")
223 | 
224 | 
225 | 


--------------------------------------------------------------------------------
/PostProcessing/o3_profile.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # o3_column_june2021.py
  4 | 
  5 | 
  6 | # ---------------------------------------------------------------------
  7 | # Stacy Montgomery, NOV 2018 - DEC 2018
  8 | # Plot o3 column over Chicago to watch how it transitions
  9 | # ---------------------------------------------------------------------
 10 | #                             USER INPUT
 11 | # ---------------------------------------------------------------------
 12 | from netCDF4 import Dataset
 13 | import numpy as np
 14 | import matplotlib.pyplot as plt
 15 | import pandas as pd
 16 | import os
 17 | import netCDF4
 18 | import math
 19 | from scipy.interpolate import griddata
 20 | import scipy.stats as st
 21 | import cartopy.feature as cfeature 
 22 | from cartopy import crs as ccrs;
 23 | from shapely.ops import unary_union, cascaded_union
 24 | from geopandas.tools import sjoin
 25 | from shapely.geometry import Point, shape
 26 | from cartopy import crs as ccrs;
 27 | import geopandas as gpd
 28 | import moviepy.editor as mpy
 29 | import os
 30 | import glob
 31 | import pandas as pd; from shapely.geometry import Point, shape, Polygon;import fiona
 32 | from shapely.ops import unary_union, cascaded_union; from geopandas.tools import sjoin
 33 | import matplotlib.path as mpath;
 34 | from cartopy.io.shapereader import Reader
 35 | 
 36 | import matplotlib.colors as colors
 37 | 
 38 | # ---------------------------------------------------------------------
 39 | 
 40 | # dir to grid file
 41 | dir='/projects/b1045/jschnell/ForStacy/' 
 42 | ll='latlon_ChicagoLADCO_d03.nc' 
 43 | 
 44 | dir='/home/asm0384/'
 45 | ll = 'lat_lon_chicago_d02.nc'
 46 | 
 47 | dir_cmaq_d03='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_BASE_FINAL_1.33km_sf_rrtmg_5_8_1_v3852/'
 48 | dir_cmaq_d03_wint='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_BASE_FINAL_wint_1.33km_sf_rrtmg_5_8_1_v3852/'
 49 | 
 50 | dir_cmaq_d02='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_BASE_FINAL_4km_sf_rrtmg_10_8_1_v3852/'
 51 | dir_cmaq_d02_wint='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_BASE_FINAL_wint_4km_sf_rrtmg_10_8_1_v3852/'
 52 | 
 53 | 
 54 | #names of lat lons in the cmaq grid
 55 | la,lo='lat','lon' # for 1.3km
 56 | 
 57 | # 
 58 | year='2018'
 59 | month='8'
 60 | ssn = 'Summer'
 61 | 
 62 | startswith = 'CCTM_CONC'
 63 | 
 64 | # shapes and directories == https://www.census.gov/cgi-bin/geo/shapefiles/index.php?year=2019&layergroup=State+Legislative+Districts
 65 | path='/projects/b1045/montgomery/shapefiles/Chicago/commareas/geo_export_77af1a6a-f8ec-47f4-977c-40956cd94f97.shp'
 66 | chi_shapefile  = gpd.GeoDataFrame.from_file(path)
 67 | crs_new = ccrs.PlateCarree()# get shape outside
 68 | union=gpd.GeoSeries(unary_union(chi_shapefile.geometry))
 69 | outsideofunion=pd.DataFrame([list(union[0][2].exterior.xy)[0], list(union[0][2].exterior.xy)[1]])
 70 | 
 71 | # ---------------------------------------------------------------------
 72 | 
 73 | def pull_cmaq(dir_CMAQ,startswith,cmaq_var):
 74 |    #pull model files from given directoy
 75 |    onlyfiles = next(os.walk(dir_CMAQ))[2]
 76 |    onlyfiles.sort() # so that searching for dates are easier
 77 |    # pull only CONC files
 78 |    fnames_CMAQ = [x for x in onlyfiles if x.startswith(startswith)]
 79 |    print(fnames_CMAQ)
 80 |    # get data files
 81 |    ncfile_CMAQ_base = [Dataset(dir_CMAQ+ fnames_CMAQ[i],'r') for i in range(len(fnames_CMAQ))]
 82 |    return ncfile_CMAQ_base
 83 | 
 84 | 
 85 | 
 86 | def find_index(stn_lon, stn_lat, wrf_lon, wrf_lat):
 87 | # stn -- list (points) 
 88 | # wrf -- list (grid)
 89 |    xx=[];yy=[]
 90 |    for i in range(len(stn_lat)):
 91 |       abslat = np.abs(wrf_lat-stn_lat[i])
 92 |       abslon= np.abs(wrf_lon-stn_lon[i])
 93 |       c = np.maximum(abslon,abslat)
 94 |       latlon_idx = np.argmin(c)
 95 |       x, y = np.where(c == np.min(c))
 96 |       #add indices of nearest wrf point station
 97 |       xx.append(x) 
 98 |       yy.append(y)
 99 |    #return indices list
100 |    return xx, yy
101 | 
102 | 
103 | def mask_given_shapefile(lon,lat,shapefile):
104 |    '''
105 |    Make a mask given a shapefile
106 |    lon - array of grid lons
107 |    lat - array of grid lats
108 |    shapefile - geopandas geodataframe shapefile
109 |    '''
110 |    union=gpd.GeoSeries(unary_union(shapefile.geometry))
111 |    mask=np.ones(lon.shape,dtype=bool)
112 |    mask[:] = False
113 |    for i in range(len(lon)):
114 |        for j in range(len(lon[0])):
115 |           pt = Point(lon[i][j],lat[i][j])
116 |           mask[i][j] =  pt.within(union[0])
117 |    #
118 |    return mask
119 | 
120 | # ---------------------------------------------------------------------
121 | 
122 | 
123 | 
124 | # I think  day 10 is best
125 | # what day is best day for pbl fomation
126 | #fig,ax = plt.subplots(10,3,figsize=(11,11))
127 | #count = 0
128 | 
129 | #for day in range(21,30):
130 | # 	sli= [np.array([np.array(base[day]['O3'][t][l][xl]).T[yl:yu+3] for l in range(35)]) for t in range(24)]
131 | # 	utc = 6
132 | # 	sli_morn = np.mean(sli[7+utc:10+utc],axis=0)
133 | # 	sli_mid = np.mean(sli[11+utc:14+utc],axis=0)
134 | # 	sli_after =  np.mean(sli[15+utc:18+utc],axis=0)
135 | # 	#fig,ax = plt.subplots(1,3)
136 | # 	ax[count][0].pcolormesh(lon[xl].T[yl:yu+3],np.arange(0,len(sli[0])),sli_morn,norm=colors.LogNorm(vmin = 0.01, vmax = 0.1))
137 | # 	ax[count][1].pcolormesh(lon[xl].T[yl:yu+3],np.arange(0,len(sli[0])),sli_mid,norm=colors.LogNorm(vmin = 0.01, vmax = 0.1))
138 | # 	ax[count][2].pcolormesh(lon[xl].T[yl:yu+3],np.arange(0,len(sli[0])),sli_after,norm=colors.LogNorm(vmin = 0.01, vmax = 0.1))
139 | # #	count = count+1
140 | 
141 | #plt.tight_layout()
142 | #plt.savefig('pbl_o3_21-30.png')
143 | 
144 | # Make average Chicago slice~
145 | # chicago box: upper lat lon
146 | #lolo = -87.939930; lola= 41.644543 
147 | #ulo = -87.524137; ula = 42.023039
148 | #xu,yu = find_index([ulo],[ula],lon,lat)
149 | #xl,yl = find_index([lolo],[lola],lon,lat)
150 | #xu,yu,xl,yl = xu[0]+4,yu[0]+4,xl[0]-4,yl[0]-4
151 | #
152 | # adjustable plotting parts
153 | llx=Dataset(dir+ll,'r')
154 | #lat,lon=llx['lat'][:],llx['lon'][:]
155 | lat,lon=llx['LAT'][0][0],llx['LON'][0][0]
156 | 
157 | 
158 | #mask = mask_given_shapefile(lon,lat,chi_shapefile)
159 | base = pull_cmaq(dir_cmaq_d02,startswith,"O3")
160 | 
161 | # Pull single row from data
162 | la = 41.8
163 | lolo = -87.939930; ulo = -87.524137;
164 | xu,yu = find_index([ulo],[la],lon,lat)
165 | xl,yl = find_index([lolo],[la],lon,lat)
166 | 
167 | xu,yu,xl,yl  = xu[0][0],yu[0][0],xl[0][0]+2,yl[0][0]
168 | 
169 | # # check where we're plotting
170 | crs_new = ccrs.PlateCarree()
171 | fig, axs = plt.subplots(subplot_kw={'projection': crs_new},figsize=(8, 6))
172 | #axs.scatter(lon[xl:xu].T[yl:yu],lat[xl:xu].T[yl:yu])
173 | #axs.scatter(lon[xl].T[yl:yu+3],lat[xl].T[yl:yu+3])
174 | #axs.set_boundary(mpath.Path(outsideofunion.T,closed=True), transform= crs_new, use_as_clip_path=True)
175 | chi_shapefile.plot(ax=axs,facecolor="None")
176 | axs.add_geometries(Reader(path).geometries(), crs=crs_new,facecolor='None', edgecolor='black')
177 | axs.plot(lon[xl].T[yl:yu+3],lat[xl].T[yl:yu+3])
178 | plt.show()
179 | 
180 | #morning = [7,8,9,10]
181 | #midday = [12,13,14]
182 | #after = [16,17,18]
183 | 
184 | 
185 | # make slices of 2 rep days
186 | # ozone profiles
187 | utc = 6
188 | day = 9
189 | 
190 | 
191 | base = pull_cmaq(dir_cmaq_d02,startswith,"O3")
192 | sli= [np.array([np.array(base[day]['NO2'][t][l][xl]).T[yl:yu+3] for l in range(35)]) for t in range(24)]
193 | sli_morn = np.mean(sli[7+utc:10+utc],axis=0)*1000
194 | sli_mid = np.mean(sli[11+utc:14+utc],axis=0)*1000
195 | sli_after =  np.mean(sli[16+utc:17+utc],axis=0)*1000
196 | 
197 | del base
198 | del sli
199 | 
200 | wbase = pull_cmaq(dir_cmaq_d02_wint,startswith,"O3")
201 | wbase = wbase[10:]
202 | wint_sli= [np.array([np.array(wbase[day]["NO2"][t][l][xl]).T[yl:yu+3] for l in range(35)]) for t in range(24)]
203 | wsli_morn = np.mean(wint_sli[7+utc:10+utc],axis=0)*1000
204 | wsli_mid = np.mean(wint_sli[11+utc:14+utc],axis=0)*1000
205 | wsli_after =  np.mean(wint_sli[16+utc:17+utc],axis=0)*1000
206 | 
207 | del wbase
208 | del wint_sli
209 | 
210 | 
211 | vmin = 0
212 | vmax = 30
213 | 
214 | 
215 | cmap = 'Purples'
216 | fig,ax = plt.subplots(2,3,figsize = (10,7))
217 | ax=ax.ravel()
218 | #ax[0].pcolormesh(lon[xl].T[yl:yu],np.arange(0,len(sli[0])),sli_morn,norm=colors.LogNorm(vmin = 0.01, vmax = 0.1))
219 | #ax[1].pcolormesh(lon[xl].T[yl:yu],np.arange(0,len(sli[0])),sli_mid,norm=colors.LogNorm(vmin = 0.01, vmax = 0.1))
220 | #cs = ax[2].pcolormesh(lon[xl].T[yl:yu],np.arange(0,len(sli[0])),sli_after,norm=colors.LogNorm(vmin = 0.01, vmax = 0.1))
221 | im =ax[0].pcolormesh(lon[xl].T[yl:yu+3],np.arange(0,35),sli_morn,vmin = vmin, vmax = vmax,cmap=cmap)
222 | im =ax[1].pcolormesh(lon[xl].T[yl:yu+3],np.arange(0,35),sli_mid,vmin = vmin, vmax = vmax,cmap=cmap)
223 | im = ax[2].pcolormesh(lon[xl].T[yl:yu+3],np.arange(0,35),sli_after,vmin = vmin, vmax = vmax,cmap=cmap)
224 | #cb = plt.colorbar(cs)
225 | ax[0].set_title('(a) Summer 7 - 10 AM'); ax[1].set_title('(b) Summer 11 AM - 2 PM'); ax[2].set_title('(c) Summer 4 - 7 PM');
226 | 
227 | im = ax[3].pcolormesh(lon[xl].T[yl:yu+3],np.arange(0,35),wsli_morn,vmin = vmin, vmax = vmax,cmap=cmap)
228 | im = ax[4].pcolormesh(lon[xl].T[yl:yu+3],np.arange(0,35),wsli_mid,vmin = vmin, vmax = vmax,cmap=cmap)
229 | im = ax[5].pcolormesh(lon[xl].T[yl:yu+3],np.arange(0,35),wsli_after,vmin = vmin, vmax = vmax,cmap=cmap)
230 | ax[3].set_title('(d) Winter 7 - 10 AM'); ax[4].set_title('(e) Winter 11 AM - 2 PM'); ax[5].set_title('(f) Winter 4 - 7 PM');
231 | 
232 | # make lake line
233 | [ax[i].axvline(x=-87.6,alpha=0.8,c = 'k', linestyle="dotted") for i in range(len(ax))] # line showing lakeshore
234 | [ax[i].set_ylim(0,25) for i in range(len(ax))] # line showing lakeshore
235 | fig.colorbar(im, ax=ax.tolist())
236 | #plt.show()
237 | 
238 | plt.savefig('no2_profile_d02.png',transparent=True)
239 | 
240 | 
241 | 
242 | 
243 | 
244 | 
245 | 
246 | 
247 | # make gif
248 | for day in range(7):
249 | 	sli= [np.array([np.array(base[day]['O3'][t][l][xl:xu]).T[yl:yu].mean(axis=-1) for l in range(35)]) for t in range(24)]
250 | 	for t in range(len(sli)):
251 | 		plt.figure()
252 | 		plt.pcolormesh(lon[xl].T[yl:yu],np.arange(0,len(sli[0])),sli[t],vmin=0,vmax=.08)
253 | 		plt.title('Day %i, Hour %i'%(day,t))
254 | 		plt.xlabel('Longitude')
255 | 		plt.ylabel('Layer')
256 | 		plt.savefig('Chi_o3_day_%i_hour_%i.png'%(day,t))
257 | 		plt.close()
258 | 
259 | 
260 | #
261 | gif_name = 'o3_column'
262 | fps = 6
263 | file_list = ['Chi_o3_day_%i_hour_%i.png'%(day,t) for day in range(7) for t in range(24)]
264 | clip = mpy.ImageSequenceClip(file_list, fps=fps)
265 | clip.write_gif('{}.gif'.format(gif_name), fps=fps)
266 | 
267 | 
268 | 
269 | 


--------------------------------------------------------------------------------
/Validation/station_validation.py:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | # model validation table for CHEMICALS
  4 | # model validation table for ground-based
  5 | # Uses wrf and cmaq output
  6 | # Epa data from AQS yearly summaries
  7 | # NCDC stations are loaded in
  8 | # created normalized and non-normalized testing statistics ...
  9 | 
 10 | import pandas as pd
 11 | import numpy as np
 12 | import scipy.stats as st
 13 | import wrf
 14 | from netCDF4 import Dataset
 15 | import glob,os
 16 | import matplotlib.pyplot as plt
 17 | import scipy.stats as st
 18 | 
 19 | #input
 20 | #dirToWRF_d02='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_BASE_FINAL_wint_4km_sf_rrtmg_10_8_1_v3852/'
 21 | #dirToWRF_d03='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_BASE_FINAL_wint_1.33km_sf_rrtmg_5_8_1_v3852/'
 22 | 
 23 | dirToWRF_d02='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_BASE_FINAL_4km_sf_rrtmg_10_8_1_v3852/'
 24 | dirToWRF_d03='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_BASE_FINAL_1.33km_sf_rrtmg_5_8_1_v3852/'
 25 | dir = '/home/asm0384/CMAQcheck/'
 26 | 
 27 | fnames = ['NO2_d03_2018_8_EPA_CMAQ_Combine.csv','NO2_d03_2019_1_EPA_CMAQ_Combine.csv',
 28 | 'NO2_d02_2018_8_EPA_CMAQ_Combine.csv','NO2_d02_2019_1_EPA_CMAQ_Combine.csv',
 29 | 'O3_d03_2018_8_EPA_CMAQ_Combine.csv','O3_d03_2019_1_EPA_CMAQ_Combine.csv',
 30 | 'O3_d02_2018_8_EPA_CMAQ_Combine.csv','O3_d02_2019_1_EPA_CMAQ_Combine.csv',
 31 | 'PM25_TOT_d03_2018_8_EPA_CMAQ_Combine.csv','PM25_TOT_d03_2019_1_EPA_CMAQ_Combine.csv',
 32 | 'PM25_TOT_d02_2018_8_EPA_CMAQ_Combine.csv','PM25_TOT_d02_2019_1_EPA_CMAQ_Combine.csv']
 33 | 
 34 | # functions
 35 | def stats(data,prediction):
 36 | 	x,y=data[~np.isnan(data)],prediction[~np.isnan(data)] # get rid of NaNs
 37 | 	mu_d,mu_p = np.mean(x),np.mean(y)
 38 | 	bias = np.sum(x-y)/len(x)
 39 | 	rmse = np.sqrt(np.mean((y-x)**2))
 40 | 	r,p = st.pearsonr(x,y)
 41 | 	return mu_d,mu_p,bias,rmse,r,p
 42 | 
 43 | # functions
 44 | def stats_normalized(data,prediction):
 45 | 	x,y=data[~np.isnan(data)],prediction[~np.isnan(data)] # get rid of NaNs
 46 | 	mu_d,mu_p = np.mean(x),np.mean(y)
 47 | 	nmb = np.sum(y-x)/np.sum(x)*100
 48 | 	nme = np.sum(np.abs(y-x))/np.sum(x)*100
 49 | 	r,p = st.pearsonr(x,y)
 50 | 	return mu_d,mu_p,nmb,nme,r,p
 51 | 
 52 | def pull_winds(dirwrf,fnames,xx,yy):
 53 | 	fws,fwd = [],[]
 54 | 	for q in range(len(fnames)):
 55 | 		wrfout = wrf.g_uvmet.get_uvmet10_wspd_wdir(Dataset(dirwrf + fnames[q]),wrf.ALL_TIMES)
 56 | 		winds = [[wrfout.data[0][hour][xx[i]][yy[i]] for i in range(len(xx))] for hour in range(24)] 
 57 | 		winddir = [[wrfout.data[1][hour][xx[i]][yy[i]] for i in range(len(xx))] for hour in range(24)]
 58 | 		fws.append(winds)
 59 | 		fwd.append(winddir)
 60 | 	# return
 61 | 	return fws,fwd
 62 | 
 63 | 
 64 | # start
 65 | out = []
 66 | out2 = []
 67 | indnames = ['NO2_d03_Summer','NO2_d03_Winter','NO2_d02_Summer','NO2_d02_Winter','O3_d03_Summer','O3_d03_Winter','O3_d02_Summer','O3_d02_Winter','PM25_d03_Summer','PM25_d03_Winter','PM25_d02_Summer','PM25_d02_Winter',]
 68 | 
 69 | for i in range(len(fnames)):
 70 | 	f = pd.read_csv(dir+fnames[i])
 71 | 	if i>3 and i<8:  
 72 | 		s = stats(f['Sample Measurement']*1000,f['CMAQ'])
 73 | 		s2 = stats_normalized(f['Sample Measurement']*1000,f['CMAQ'])
 74 | 	else: 
 75 | 		s = stats(f['Sample Measurement'],f['CMAQ'])
 76 | 		s2 = stats_normalized(f['Sample Measurement'],f['CMAQ'])
 77 | 	out.append(s)
 78 | 	out2.append(s2)
 79 | 	#if len(f[f['level_0']=='2018-08-01 00:00:00']) >0: print(indnames[i]+'| number of stations = %i'%len(f[f['level_0']=='2018-08-01 00:00:00']))
 80 | 	#if len(f[f['level_0']=='2019-01-02 00:00:00']) >0: print(indnames[i]+'winter| number of stations = %i'%len(f[f['level_0']=='2019-01-02 00:00:00']))
 81 | 	print('%s| number of stations = %.1f'%(indnames[i],len(f['Longitude'].unique())-1))
 82 | 
 83 | out = pd.DataFrame(out)
 84 | out.columns=['mu_d','mu_p','bias','rmse','r','p']
 85 | 
 86 | out.index=['NO2_d03_Summer','NO2_d03_Winter','NO2_d02_Summer','NO2_d02_Winter','O3_d03_Summer','O3_d03_Winter','O3_d02_Summer','O3_d02_Winter','PM25_d03_Summer','PM25_d03_Winter','PM25_d02_Summer','PM25_d02_Winter',]
 87 | 
 88 | out
 89 | 
 90 | out2 = pd.DataFrame(out2)
 91 | out2.columns=['mu_d','mu_p','MB','NME','r','p']
 92 | out2.index=['NO2_d03_Summer','NO2_d03_Winter','NO2_d02_Summer','NO2_d02_Winter','O3_d03_Summer','O3_d03_Winter','O3_d02_Summer','O3_d02_Winter','PM25_d03_Summer','PM25_d03_Winter','PM25_d02_Summer','PM25_d02_Winter',]
 93 | 
 94 | out2.to_csv('~/chemicals_normalized.csv')
 95 | 
 96 | 
 97 | 
 98 | # model validation name for  meteorology
 99 | #getting wrf windspeed/directions: 
100 | # NEED TO DO FOR WINTER
101 | # knots to m/s - knots/1.9438444924406
102 | sim = 'output_BASE_FINAL_1.33km_sf_rrtmg_5_8_1_v3852'
103 | 
104 | windstn = pd.read_csv('/home/asm0384/WRFcheck/'+sim+'/wrfcheck_withstations_'+sim+'_Wind.csv',index_col=0)
105 | windDirstn = pd.read_csv('/home/asm0384/WRFcheck/'+sim+'/wrfcheck_withstations_'+sim+'_WindDir.csv',index_col=0)
106 | times = pd.read_csv('/home/asm0384/WRFcheck/'+sim+'/completeddata_mini_extras2.csv')
107 | #check winter and summer times for station index
108 | 
109 | xx_d02,yy_d02 = np.array(windstn['xx_d02']),np.array(windDirstn['yy_d02'])
110 | xx_d03,yy_d03 = np.array(windstn[windstn['in_d03']==True]['xx_d03']),np.array(windstn[windstn['in_d03']==True]['yy_d03'])
111 | 
112 | # 10*24+1:-24-9
113 | # :744
114 | 
115 | fws_stn_d03 = np.array(windstn[windstn['in_d03']==True].T[10*24+1:-24-9],dtype='float32')
116 | fwd_stn_d03 = np.array(windDirstn[windDirstn['in_d03']==True].T[10*24+1:-24-9],dtype='float32')
117 | 
118 | #fws_stn_d02 = np.array(windstn[windstn['in_d02']==True].T[:744],dtype='float32')
119 | #fwd_stn_d02 = np.array(windDirstn[windDirstn['in_d02']==True].T[:744],dtype='float32')
120 | fws_stn_d02 = np.array(windstn[windstn['in_d02']==True].T[10*24+1:-24-9],dtype='float32')
121 | fwd_stn_d02 = np.array(windDirstn[windDirstn['in_d02']==True].T[10*24+1:-24-9],dtype='float32')
122 | 
123 | 
124 | filenames_d02=[]
125 | os.chdir(dirToWRF_d02)
126 | for file in glob.glob("wrfout_d01_*"):
127 |     filenames_d02.append(file)
128 | 
129 | filenames_d02.sort()
130 | 
131 | # $1 Get WRF file names
132 | filenames_d03=[]
133 | os.chdir(dirToWRF_d03)
134 | for file in glob.glob("wrfout_d01_*"):
135 |     filenames_d03.append(file)
136 | 
137 | filenames_d03.sort()
138 | 
139 | # pull wind and dir
140 | fws_d02,fwd_d02 = pull_winds(dirToWRF_d02,filenames_d02[10:-1],xx_d02,yy_d02)
141 | fws_d03,fwd_d03 = pull_winds(dirToWRF_d03,filenames_d03[10:-1],xx_d03,yy_d03)
142 | 
143 | 
144 | # make array and reshape
145 | fws_d03=np.asarray(fws_d03)
146 | fws_d03 = np.array([fws_d03[i][x] for i in range(len(filenames_d02[10:-1])) for x in range(24)])
147 | fwd_d03 = np.array([fwd_d03[i][x] for i in range(len(filenames_d02[10:-1])) for x in range(24)])
148 | 
149 | fws_d02=np.asarray(fws_d02)
150 | fws_d02 = np.array([fws_d02[i][x] for i in range(len(filenames_d02[10:-1])) for x in range(24)])
151 | fwd_d02 = np.array([fwd_d02[i][x] for i in range(len(filenames_d02[10:-1])) for x in range(24)])
152 | 
153 | #d03
154 | 
155 | b=fws_d03.ravel()
156 | a=fws_stn_d03.ravel()/1.9438444924406
157 | stwspd_d03 = stats_normalized(a[~np.isnan(b)],b[~np.isnan(b)])
158 | 
159 | b=fwd_d03.ravel()
160 | a=fwd_stn_d03.ravel()
161 | stwdir_d03 = stats_normalized(a[~np.isnan(b)],b[~np.isnan(b)])
162 | 
163 | # d02
164 | 
165 | b=fws_d02.ravel()
166 | a=fws_stn_d02.ravel()/1.9438444924406
167 | stwspd_d02 = stats_normalized(a[~np.isnan(b)],b[~np.isnan(b)])
168 | 
169 | b=fwd_d02.T.ravel()
170 | a=fwd_stn_d02.ravel()
171 | stwdir_d02 = stats_normalized(a[~np.isnan(b)],b[~np.isnan(b)])
172 | 
173 | dfout=pd.DataFrame([stwspd_d02,stwdir_d02,stwspd_d03,stwdir_d03])
174 | dfout.index = ['speed_d02','dir_d02','speed_d03','dir_d03']
175 | dfout.columns = ['mu_d','mu_p','MB','MSE','r','p']
176 | 
177 | 
178 | dfout.to_csv('~/windmetrics_summer_normalized.csv')
179 | 
180 | ##-----------
181 | #  get temperature and RH shit from combine aconc
182 | # pull station data again
183 | tmpstn = pd.read_csv('/home/asm0384/WRFcheck/'+sim+'/wrfcheck_withstations_'+sim+'_082018.csv',index_col=0)
184 | rhstn = pd.read_csv('/home/asm0384/WRFcheck/'+sim+'/wrfcheck_withstations_'+sim+'_RH.csv',index_col=0)
185 | 
186 | xx_d02,yy_d02 = np.array(tmpstn['xx_d02']),np.array(tmpstn['yy_d02'])
187 | xx_d03,yy_d03 = np.array(tmpstn[tmpstn['in_d03']==True]['xx_d03']),np.array(tmpstn[tmpstn['in_d03']==True]['yy_d03'])
188 | 
189 | # check completedatamini for the times associated with the indices
190 | wint_ind = ':744'
191 | sum_ind = '11*24+1:-9'
192 | 
193 | temp_stn_d03 = np.array(tmpstn[tmpstn['in_d03']==True].T[11*24+1:-9],dtype='float32')
194 | rh_stn_d03 = np.array(rhstn[rhstn['in_d03']==True].T[11*24+1:-9],dtype='float32')
195 | 
196 | temp_stn_d02 = np.array(tmpstn[tmpstn['in_d02']==True].T[11*24+1:-9],dtype='float32')
197 | rh_stn_d02 = np.array(rhstn[rhstn['in_d02']==True].T[11*24+1:-9],dtype='float32')
198 | 
199 | # pull aconc files
200 | filenames_d02=[]
201 | os.chdir(dirToWRF_d02+'/postprocess/')
202 | for file in glob.glob("COMBINE_ACONC*"):
203 |     filenames_d02.append(file)
204 | 
205 | filenames_d02.sort()
206 | 
207 | # $1 Get WRF file names
208 | filenames_d03=[]
209 | os.chdir(dirToWRF_d03+'/postprocess/')
210 | for file in glob.glob("COMBINE_ACONC*"):
211 |     filenames_d03.append(file)
212 | 
213 | filenames_d03.sort()
214 | 
215 | 
216 | def get_temp_rh(dirToWRF_d02,filenames_d02,var,xx,yy):
217 | 	d2=[]
218 | 	for q in range(len(filenames_d02)):
219 | 		nc = Dataset(dirToWRF_d02 +'/postprocess/'+ filenames_d02[q])
220 | 		d=[[nc[var][hour][0][xx[i]][yy[i]] for i in range(len(xx))] for hour in range(24)]
221 | 		d2.append(d)
222 | 		#
223 | 	d2=np.asarray(d2)
224 | 	d2 = np.array([d2[i][x] for i in range(len(filenames_d02)) for x in range(24)])
225 | 	#
226 | 	return d2
227 | 
228 | temp_d02 = get_temp_rh(dirToWRF_d02,filenames_d02[0:],'SFC_TMP',xx_d02,yy_d02)
229 | temp_d03 = get_temp_rh(dirToWRF_d03,filenames_d03[0:],"SFC_TMP",xx_d03,yy_d03)
230 | rh_d02 = get_temp_rh(dirToWRF_d02,filenames_d02[0:],'RH',xx_d02,yy_d02)
231 | rh_d03 = get_temp_rh(dirToWRF_d03,filenames_d03[0:],'RH',xx_d03,yy_d03)
232 | 
233 | 
234 | b=temp_d02.ravel()
235 | a=(temp_stn_d02.ravel()-32)*5/9
236 | st_temp_d02 = stats_normalized(a[~np.isnan(b)],b[~np.isnan(b)])
237 | 
238 | b=temp_d03.ravel()
239 | a=(temp_stn_d03.ravel()-32)*5/9
240 | st_temp_d03 = stats_normalized(a[~np.isnan(b)],b[~np.isnan(b)])
241 | 
242 | 
243 | b=rh_d02.ravel()
244 | a=rh_stn_d02.ravel()
245 | st_rh_d02 = stats_normalized(a[~np.isnan(b)],b[~np.isnan(b)])
246 | 
247 | b=rh_d03.ravel()
248 | a=rh_stn_d03.ravel()
249 | st_rh_d03 = stats_normalized(a[~np.isnan(b)],b[~np.isnan(b)])
250 | 
251 | 
252 | dfout=pd.DataFrame([st_temp_d02,st_rh_d02,st_temp_d03,st_rh_d03])
253 | dfout.index = ['temp_d02','rh_d02','temp_d03','rh_d03']
254 | dfout.columns = ['mu_d','mu_p','MB','MSE','r','p']
255 | 
256 | 
257 | dfout.to_csv('~/temp_rh_summermetrics_normalized.csv')
258 | 
259 | 
260 | 


--------------------------------------------------------------------------------
/three_panel_epa_gif.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | #libraries
  4 | from matplotlib import pyplot as plt ; from matplotlib import colors
  5 | import numpy as np; import numpy.ma as ma; from matplotlib.patches import Path, PathPatch
  6 | import pandas as pd; from shapely.geometry import Point, shape, Polygon;import fiona
  7 | from shapely.ops import unary_union, cascaded_union; from geopandas.tools import sjoin
  8 | import geopandas as gpd; import glob; import os; from datetime import timedelta, date;
  9 | from netCDF4 import Dataset; from cartopy import crs as ccrs; from cartopy.io.shapereader import Reader
 10 | import matplotlib.path as mpath; import seaborn as sns; import timeit; from cartopy import crs as ccrs
 11 | 
 12 | import matplotlib.gridspec as gridspec
 13 | from scipy.stats import pearsonr
 14 | from cartopy.feature import NaturalEarthFeature as cfeature
 15 | 
 16 | 
 17 | dir_EPA = '/home/asm0384/ChicagoStudy/inputs/EPA_hourly_station_data/'
 18 | 
 19 | 
 20 | #----------------------------------------------------------------------------------------
 21 | # User input
 22 | #----------------------------------------------------------------------------------------
 23 | 
 24 | gmt_offset = 7
 25 | 
 26 | # directory to model files
 27 | dir_CMAQ='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_BASE_FINAL_1.33km_sf_rrtmg_5_8_1_v3852/postprocess/'
 28 | dir_SMOKE='/projects/b1045/jschnell/ForAmy/smoke_out/base/'
 29 | dir_WRF='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_BASE_FINAL_1.33km_sf_rrtmg_5_8_1_v3852/'
 30 | 
 31 | #directory to grid file
 32 | dir_GRID='/projects/b1045/jschnell/ForStacy/latlon_ChicagoLADCO_d03.nc' 
 33 | 
 34 | # dir to lat lon
 35 | dir='/projects/b1045/jschnell/ForStacy/' 
 36 | ll='latlon_ChicagoLADCO_d03.nc' 
 37 | 
 38 | # CMAQ RUN things
 39 | domain='d03'
 40 | time='hourly'
 41 | year='2018'
 42 | month='8'
 43 | 
 44 | #directory to chicago shapefile
 45 | dir_shapefile='/home/asm0384/shapefiles/commareas/geo_export_77af1a6a-f8ec-47f4-977c-40956cd94f97.shp'
 46 | 
 47 | # this will use just the epa var
 48 | cmaq_var=['O3','NO2','NO','CO','ISOP','SO2','FORM','PM25_TOT']
 49 | smoke_var=['NO2','NO','CO','ISOP','SO2','FORM']
 50 | epa_code=['42401','42602','44201','42101']; var=['SO2','NO2','O3','CO']
 51 | 
 52 | 
 53 | # pull epa
 54 | dir_epa='/home/asm0384/CMAQcheck/'
 55 | 
 56 | epa_condense=[dir_epa+'%s_%s_%s_%s_EPA_CMAQ_Combine.csv'%(var[loop],domain,year,month) for loop in range(len(epa_code))]
 57 | so2_epa,no2_epa,o3_epa,co_epa = [pd.read_csv(epa_condense[i]) for i in range(len(epa_condense))]
 58 | 
 59 | # set up shape of cmaq indexing
 60 | shape = (32,24,1,288,315)
 61 | 
 62 | #-------------------------------------------------------------------------------------------
 63 | # User defined functions
 64 | #-------------------------------------------------------------------------------------------
 65 | 
 66 | def pull_CMAQ(dir_CMAQ_BASE,startswith,cmaq_var,version):
 67 |    #pull files from given directoy
 68 | #for i in range(1):
 69 |    onlyfiles = next(os.walk(dir_CMAQ_BASE))[2]
 70 |    onlyfiles.sort() # so that searching for dates are easier
 71 |    fnames_CMAQ_BASE = [x for x in onlyfiles if x.startswith(startswith)]
 72 |    ncfile_CMAQ_base = [Dataset(dir_CMAQ_BASE+ fnames_CMAQ_BASE[i],'r') for i in range(len(fnames_CMAQ_BASE))]
 73 |    units_cmaq = [ncfile_CMAQ_base[0][cmaq_var[i]].units for i in range(len(cmaq_var))]
 74 |    #full day conc
 75 |    cmaq_avgs_BASE = []; cmaq_avgs_daily_BASE  = []; cmaq_avgs_hourly_BASE  = []; all_hours =[]
 76 |    # make averages for cmaq base
 77 |    for i in range(len(cmaq_var)):
 78 |    #for i in range(1):
 79 |       tmp = np.asarray([ncfile_CMAQ_base[j][cmaq_var[i]] for j in range(len(ncfile_CMAQ_base))])
 80 |       hourly = np.average(tmp,axis=0) # hour by hour concs
 81 |       daily = np.average(tmp,axis=1) # daily average concs
 82 |    #
 83 |       monthly = np.average(daily,axis=0)
 84 |       #if writeoutcsv == True: pd.DataFrame(monthly[0]).to_csv(cmaq_var[i]+'_'+version+'_2018_aug.csv', header=False,index=False) 
 85 |       cmaq_avgs_BASE.append(monthly[0])
 86 |       cmaq_avgs_daily_BASE.append(daily)
 87 |       cmaq_avgs_hourly_BASE.append(hourly)
 88 |       all_hours.append(tmp)
 89 |       #return
 90 |       if Chatty: print('Done with ' +cmaq_var[i])
 91 | #return
 92 |    return cmaq_avgs_BASE, cmaq_avgs_daily_BASE, cmaq_avgs_hourly_BASE, all_hours, units_cmaq
 93 | 
 94 | 
 95 | def adjust_spines(ax,spines):
 96 |     for loc, spine in ax.spines.items():
 97 |         if loc in spines:
 98 |             spine.set_position(('outward', 10))  # outward by 10 points
 99 |         else:
100 |             spine.set_color('none')  # don't draw spine
101 |     # turn off ticks where there is no spine
102 |     if 'left' in spines:
103 |         ax.yaxis.set_ticks_position('left')
104 |     else:
105 |         # no yaxis ticks
106 |         ax.yaxis.set_ticks([])
107 |     if 'bottom' in spines:
108 |         ax.xaxis.set_ticks_position('bottom')
109 |     else:
110 |         # no xaxis ticks
111 |         ax.xaxis.set_ticks([])
112 | 
113 | 
114 | def add_gmt_offset(list_of_hours,gmt_offset):
115 |     update_list = []
116 |     for i in range(len(list_of_hours)):
117 |         if list_of_hours[i] + gmt_offset > 23:
118 |            update_list.append(list_of_hours[i] + gmt_offset - 24)
119 |         elif list_of_hours[i] + gmt_offset < 0:
120 |            update_list.append(list_of_hours[i] + gmt_offset + 24)
121 |         else: update_list.append(list_of_hours[i] + gmt_offset)
122 |     # return
123 |     return update_list
124 | 
125 | 
126 | def find_index(stn_lon, stn_lat, wrf_lon, wrf_lat):
127 |  # stn -- points in a list (list, can be a list of just 1) 
128 |  # wrf -- gridded wrf lat/lon (np.array)
129 |  #for iz in range(1):
130 |     xx=[];yy=[]
131 |     for i in range(len(stn_lat)):
132 |        abslat = np.abs(wrf_lat-stn_lat[i])
133 |        abslon= np.abs(wrf_lon-stn_lon[i])
134 |        c = np.maximum(abslon,abslat)
135 |        latlon_idx = np.argmin(c)
136 |        x, y = np.where(c == np.min(c))
137 |        #add indices of nearest wrf point station
138 |        xx.append(x)
139 |        yy.append(y)
140 |     #
141 |     xx=[xx[i][0] for i in range(len(xx))];yy=[yy[i][0] for i in range(len(yy))]
142 |     #return indices list
143 |     return xx, yy
144 | 
145 | 
146 | 
147 | #-------------------------------------------------------------------------------------------
148 | # 
149 | #-------------------------------------------------------------------------------------------
150 | 
151 | 
152 | # get dates
153 | startswith = 'COMBINE_ACONC_'
154 | onlyfiles = next(os.walk(dir_CMAQ))[2]
155 | onlyfiles.sort() # so that searching for dates are easier
156 | fnames_CMAQ = [x for x in onlyfiles if x.startswith(startswith)]
157 | dates=[fnames_CMAQ[i].split(startswith)[1].split('.nc')[0] for i in range(len(fnames_CMAQ))]
158 | 
159 | # get lat lon
160 | llx=Dataset(dir+ll,'r')
161 | lat,lon=llx['lat'][:],llx['lon'][:]
162 | 
163 | 
164 | # ============================================
165 | # make fancy plot to plot full days
166 | # ============================================
167 | '''
168 | no2_epa = chemical data over domain with nearest cmaq pixel. dataframe.
169 | 
170 | '''
171 | 
172 | def tri_plot(epa, ncfile_CMAQ, var, v, picdir, printout=False):
173 | #data prep
174 |    epa['level_0']=pd.to_datetime(epa['level_0'])
175 |    epa['month-day'] = pd.to_datetime(epa['level_0']).dt.to_period('D')
176 |    #epa=epa.groupby('month-day').mean()
177 |    vmin=round(np.percentile(ncfile_CMAQ[0][var[v]][0][0].ravel(),0.01))
178 |    vmax=round(np.percentile(ncfile_CMAQ[0][var[v]][0][0].ravel(),99.99))
179 | # start plotting
180 |    cmap = 'magma_r'
181 |    crs_new = ccrs.PlateCarree()
182 |    for d in range(shape[0]):
183 |       for h in range(shape[1]):
184 |    # set up fig
185 |          fig = plt.figure(figsize=(10,8))
186 |          #fig.execute_constrained_layout()
187 |          widths = [2, 2]
188 |          heights = [5, 2]
189 |          gs = fig.add_gridspec(ncols=2, nrows=2, width_ratios=widths,height_ratios=heights)
190 |    # set up plot
191 |          #
192 |          # PLOT 1
193 |          # make map plot on top
194 |          tmp = epa[epa['level_0']==epa['level_0'][h+d*24]]
195 |          levels = np.arange(vmin, vmax, (vmax-vmin)/10)
196 |          ax = fig.add_subplot(gs[0, :],projection= crs_new)
197 |          cs = ax.pcolor(lon,lat, ncfile_CMAQ[d][var[v]][h][0],transform=crs_new,cmap = cmap,vmin=vmin,vmax=vmax)
198 |          ax.scatter(tmp['Longitude'],tmp['Latitude'],c= tmp['Sample Measurement'], cmap = cmap, vmin = vmin, vmax = vmax,s=75,edgecolors = 'black')
199 |          cbar=plt.colorbar(cs,boundaries=levels,shrink = 0.75,label='ppbV')
200 |          cbar.set_ticks(levels)
201 |          states_provinces = cfeature(category='cultural',name='admin_1_states_provinces_lines',scale='50m',facecolor='none')
202 |          land = cfeature('physical', 'lakes', '10m',edgecolor='black',facecolor='none')
203 |          ax.add_feature(land, edgecolor='black')
204 |          ax.add_feature(states_provinces, edgecolor='black',alpha = 0.5)
205 |          b = .8
206 |          xl,xu,yl,yu = lon.min()+b,lon.max()-b,lat.min()+b,lat.max()-b
207 |          ax.set_extent([xl,xu,yl,yu], crs= crs_new)
208 |          plt.title(var[v]+' on '+str(epa['level_0'][h+d*24]))
209 |    # PLOT 2
210 |    # make 1:1 plot 
211 |          ax1 = fig.add_subplot(gs[1, 1])
212 |          tmp = epa[epa['level_0']==epa['level_0'][h+d*24]]
213 |    #
214 |          for label in range(len(tmp['County Name'])):
215 |             l = tmp.index[label]
216 |             if tmp['Sample Measurement'][l] == np.nan: print('movin')
217 |             else: ax1.scatter(tmp['Sample Measurement'][l],tmp['CMAQ'][l],label= tmp['County Name'][l],color = plt.get_cmap('Blues',len(tmp))(label))
218 |    #
219 |          plt.xlabel('Sample Measurement (ppbv)'); plt.ylabel('CMAQ (ppbv)')
220 |          #plt.legend( loc='upper center', bbox_to_anchor=(.5, 1.5), ncol=4, prop={'size': 6},)
221 |          scr, mcr = tmp['Sample Measurement'], tmp['CMAQ']
222 |          bad = np.isnan(scr)
223 |          r = round(pearsonr(mcr[~bad],scr[~bad])[0],2)
224 |          plt.title(f'Station vs. CMAQ Pixel: r = {r}')
225 |          plt.xlim([tmp['CMAQ'].min()*.8,tmp['CMAQ'].max()*1.2]); plt.ylim([tmp['CMAQ'].min()*.8,tmp['CMAQ'].max()*1.2])
226 |          ax1.plot([-1000,1000],[-1000,1000],c='black',alpha = 0.75)
227 |    # PLOT 3
228 |    ##make diurnal plot
229 |    #for i in range(1):
230 |          ax2 = fig.add_subplot(gs[1, 0])
231 |          tmp2=epa.groupby('level_0').mean()['Sample Measurement']
232 |          tmp2.plot.line(linestyle='--',color= plt.get_cmap('Blues',8)(1),ax=ax2,label='EPA')
233 |          tmp2=epa.groupby('level_0').mean()['CMAQ']
234 |          tmp2.index.name = 'Dates'
235 |          tmp2.plot.line(color=plt.get_cmap('Blues',8)(5),ax=ax2)
236 |          ax2.scatter(tmp2.index[24*d+h],tmp2[24*d+h], marker='*',color='pink',s=200)
237 |          ax2.set_xlim(tmp2.index[h]+timedelta(days=d-1), tmp2.index[h]+timedelta(days=d+1))
238 |    #
239 |          plt.legend( loc='upper center', ncol=4, prop={'size': 8},)
240 |    #
241 |          plt.savefig(picdir+var[v]+'_'+'day'+str(d)+'_hour'+str(h)+'.png', orientation='landscape')
242 |          plt.close()
243 |          if printout== True: print(f'Done with day {d} hour {h}')
244 | 
245 | 
246 | startswith = 'COMBINE_ACONC_'
247 | 
248 | onlyfiles = next(os.walk(dir_CMAQ))[2]
249 | onlyfiles.sort() # so that searching for dates are easier
250 | fnames_CMAQ = [x for x in onlyfiles if x.startswith(startswith)]
251 | ncfile_CMAQ = [Dataset(dir_CMAQ+ fnames_CMAQ[i],'r') for i in range(len(fnames_CMAQ))]
252 | 
253 | 
254 | picdir = '/home/asm0384/gifs/'
255 | 
256 | tri_plot(so2_epa, ncfile_CMAQ, var, 0, picdir, False)
257 | 
258 | 
259 | v=1; epa = no2_epa
260 | tri_plot(epa, ncfile_CMAQ, var, v, picdir, False)
261 | 
262 | 
263 | v=2; epa = o3_epa
264 | tri_plot(epa, ncfile_CMAQ, var, v, picdir, False)
265 | 
266 | v=3; epa = co_epa
267 | tri_plot(epa, ncfile_CMAQ, var, v, picdir, False)
268 | 


--------------------------------------------------------------------------------
/validation/model_validation_statistics.py:
--------------------------------------------------------------------------------
  1 | # model validation table for CHEMICALS
  2 | 
  3 | import pandas as pd
  4 | import numpy as np
  5 | import scipy.stats as st
  6 | import wrf
  7 | from netCDF4 import Dataset
  8 | import glob,os
  9 | import matplotlib.pyplot as plt
 10 | import scipy.stats as st
 11 | 
 12 | #input
 13 | #dirToWRF_d02='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_BASE_FINAL_wint_4km_sf_rrtmg_10_8_1_v3852/'
 14 | #dirToWRF_d03='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_BASE_FINAL_wint_1.33km_sf_rrtmg_5_8_1_v3852/'
 15 | 
 16 | dirToWRF_d02='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_BASE_FINAL_4km_sf_rrtmg_10_8_1_v3852/'
 17 | dirToWRF_d03='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_BASE_FINAL_1.33km_sf_rrtmg_5_8_1_v3852/'
 18 | dir = '/projects/b1045/montgomery/'
 19 | 
 20 | fnames = ['NO2_d03_2018_8_EPA_CMAQ_Combine.csv','NO2_d03_2019_1_EPA_CMAQ_Combine.csv',
 21 | 'NO2_d02_2018_8_EPA_CMAQ_Combine.csv','NO2_d02_2019_1_EPA_CMAQ_Combine.csv',
 22 | 'O3_d03_2018_8_EPA_CMAQ_Combine.csv','O3_d03_2019_1_EPA_CMAQ_Combine.csv',
 23 | 'O3_d02_2018_8_EPA_CMAQ_Combine.csv','O3_d02_2019_1_EPA_CMAQ_Combine.csv',
 24 | 'PM25_TOT_d03_2018_8_EPA_CMAQ_Combine.csv','PM25_TOT_d03_2019_1_EPA_CMAQ_Combine.csv',
 25 | 'PM25_TOT_d02_2018_8_EPA_CMAQ_Combine.csv','PM25_TOT_d02_2019_1_EPA_CMAQ_Combine.csv']
 26 | 
 27 | # functions
 28 | def stats(data,prediction):
 29 | 	x,y=data[~np.isnan(data)],prediction[~np.isnan(data)] # get rid of NaNs
 30 | 	mu_d,mu_p = np.mean(x),np.mean(y)
 31 | 	bias = np.sum(x-y)/len(x)
 32 | 	rmse = np.sqrt(np.mean((y-x)**2))
 33 | 	r,p = st.pearsonr(x,y)
 34 | 	return mu_d,mu_p,bias,rmse,r,p
 35 | 
 36 | # functions
 37 | def stats_normalized(data,prediction):
 38 | 	x,y=data[~np.isnan(data)],prediction[~np.isnan(data)] # get rid of NaNs
 39 | 	mu_d,mu_p = np.mean(x),np.mean(y)
 40 | 	nmb = np.sum(y-x)/np.sum(x)*100
 41 | 	nme = np.sum(np.abs(y-x))/np.sum(x)*100
 42 | 	r,p = st.pearsonr(x,y)
 43 | 	return mu_d,mu_p,nmb,nme,r,p
 44 | 
 45 | def pull_winds(dirwrf,fnames,xx,yy):
 46 | 	fws,fwd = [],[]
 47 | 	for q in range(len(fnames)):
 48 | 		wrfout = wrf.g_uvmet.get_uvmet10_wspd_wdir(Dataset(dirwrf + fnames[q]),wrf.ALL_TIMES)
 49 | 		winds = [[wrfout.data[0][hour][xx[i]][yy[i]] for i in range(len(xx))] for hour in range(24)] 
 50 | 		winddir = [[wrfout.data[1][hour][xx[i]][yy[i]] for i in range(len(xx))] for hour in range(24)]
 51 | 		fws.append(winds)
 52 | 		fwd.append(winddir)
 53 | 	# return
 54 | 	return fws,fwd
 55 | 
 56 | 
 57 | # start
 58 | out = []
 59 | out2 = []
 60 | indnames = ['NO2_d03_Summer','NO2_d03_Winter','NO2_d02_Summer','NO2_d02_Winter','O3_d03_Summer','O3_d03_Winter','O3_d02_Summer','O3_d02_Winter','PM25_d03_Summer','PM25_d03_Winter','PM25_d02_Summer','PM25_d02_Winter',]
 61 | 
 62 | for i in range(len(fnames)):
 63 | 	f = pd.read_csv(dir+'/CMAQcheck/'+fnames[i])
 64 | 	if i>3 and i<8:  
 65 | 		s = stats(f['Sample Measurement']*1000,f['CMAQ'])
 66 | 		s2 = stats_normalized(f['Sample Measurement']*1000,f['CMAQ'])
 67 | 	else: 
 68 | 		s = stats(f['Sample Measurement'],f['CMAQ'])
 69 | 		s2 = stats_normalized(f['Sample Measurement'],f['CMAQ'])
 70 | 	out.append(s)
 71 | 	out2.append(s2)
 72 | 	#if len(f[f['level_0']=='2018-08-01 00:00:00']) >0: print(indnames[i]+'| number of stations = %i'%len(f[f['level_0']=='2018-08-01 00:00:00']))
 73 | 	#if len(f[f['level_0']=='2019-01-02 00:00:00']) >0: print(indnames[i]+'winter| number of stations = %i'%len(f[f['level_0']=='2019-01-02 00:00:00']))
 74 | 	print('%s| number of stations = %.1f'%(indnames[i],len(f['Longitude'].unique())-1))
 75 | 
 76 | out = pd.DataFrame(out)
 77 | out.columns=['mu_d','mu_p','bias','rmse','r','p']
 78 | 
 79 | out.index=['NO2_d03_Summer','NO2_d03_Winter','NO2_d02_Summer','NO2_d02_Winter','O3_d03_Summer','O3_d03_Winter','O3_d02_Summer','O3_d02_Winter','PM25_d03_Summer','PM25_d03_Winter','PM25_d02_Summer','PM25_d02_Winter',]
 80 | 
 81 | out
 82 | 
 83 | out2 = pd.DataFrame(out2)
 84 | out2.columns=['mu_d','mu_p','MB','NME','r','p']
 85 | out2.index=['NO2_d03_Summer','NO2_d03_Winter','NO2_d02_Summer','NO2_d02_Winter','O3_d03_Summer','O3_d03_Winter','O3_d02_Summer','O3_d02_Winter','PM25_d03_Summer','PM25_d03_Winter','PM25_d02_Summer','PM25_d02_Winter',]
 86 | 
 87 | out2.to_csv('~/chemicals_normalized.csv')
 88 | 
 89 | 
 90 | 
 91 | # model validation name for  meteorology
 92 | #getting wrf windspeed/directions: 
 93 | # NEED TO DO FOR WINTER
 94 | # knots to m/s - knots/1.9438444924406
 95 | sim = 'output_BASE_FINAL_wint_1.33km_sf_rrtmg_5_8_1_v3852'
 96 | 
 97 | windstn = pd.read_csv(dir+'/WRFcheck/'+sim+'/wrfcheck_withstations_'+sim+'_Wind.csv',index_col=0)
 98 | windDirstn = pd.read_csv(dir+'WRFcheck/'+sim+'/wrfcheck_withstations_'+sim+'_WindDir.csv',index_col=0)
 99 | times = pd.read_csv(dir+'WRFcheck/'+sim+'/completeddata_mini_extras2.csv')
100 | #check winter and summer times for station index
101 | 
102 | xx_d02,yy_d02 = np.array(windstn['xx_d02']),np.array(windDirstn['yy_d02'])
103 | xx_d03,yy_d03 = np.array(windstn[windstn['in_d03']==True]['xx_d03']),np.array(windstn[windstn['in_d03']==True]['yy_d03'])
104 | 
105 | # 10*24+1:-24-9
106 | # :744
107 | 
108 | fws_stn_d03 = np.array(windstn[windstn['in_d03']==True].T[10*24+1:-24-9],dtype='float32')
109 | fwd_stn_d03 = np.array(windDirstn[windDirstn['in_d03']==True].T[10*24+1:-24-9],dtype='float32')
110 | 
111 | #fws_stn_d02 = np.array(windstn[windstn['in_d02']==True].T[:744],dtype='float32')
112 | #fwd_stn_d02 = np.array(windDirstn[windDirstn['in_d02']==True].T[:744],dtype='float32')
113 | fws_stn_d02 = np.array(windstn[windstn['in_d02']==True].T[10*24+1:-24-9],dtype='float32')
114 | fwd_stn_d02 = np.array(windDirstn[windDirstn['in_d02']==True].T[10*24+1:-24-9],dtype='float32')
115 | 
116 | 
117 | filenames_d02=[]
118 | os.chdir(dirToWRF_d02)
119 | for file in glob.glob("wrfout_d01_*"):
120 |     filenames_d02.append(file)
121 | 
122 | filenames_d02.sort()
123 | 
124 | # $1 Get WRF file names
125 | filenames_d03=[]
126 | os.chdir(dirToWRF_d03)
127 | for file in glob.glob("wrfout_d01_*"):
128 |     filenames_d03.append(file)
129 | 
130 | filenames_d03.sort()
131 | 
132 | # pull wind and dir
133 | fws_d02,fwd_d02 = pull_winds(dirToWRF_d02,filenames_d02[10:-1],xx_d02,yy_d02)
134 | fws_d03,fwd_d03 = pull_winds(dirToWRF_d03,filenames_d03[10:-1],xx_d03,yy_d03)
135 | 
136 | 
137 | # make array and reshape
138 | fws_d03= np.asarray(fws_d03)
139 | fws_d03 = np.array([fws_d03[i][x] for i in range(len(filenames_d02[10:-1])) for x in range(24)])
140 | fwd_d03 = np.array([fwd_d03[i][x] for i in range(len(filenames_d02[10:-1])) for x in range(24)])
141 | 
142 | fws_d02=np.asarray(fws_d02)
143 | fws_d02 = np.array([fws_d02[i][x] for i in range(len(filenames_d02[10:-1])) for x in range(24)])
144 | fwd_d02 = np.array([fwd_d02[i][x] for i in range(len(filenames_d02[10:-1])) for x in range(24)])
145 | 
146 | #d03
147 | 
148 | ld03= windstn[windstn['in_d03']==True]
149 | ld03 = ld03[['lat','lon']]
150 | ld03.reset_index(inplace=True,drop=True)
151 | 
152 | ld02= windstn[windstn['in_d02']==True]
153 | ld02 = ld02[['lat','lon']]
154 | ld02.reset_index(inplace=True,drop=True)
155 | 
156 | 
157 | 
158 | #write out files
159 | if write ==True:
160 | 	pd.DataFrame(fws_d03/1.9438444924406).append(ld03.T).to_csv('~/wrf_winds_jan_d03.csv')
161 | 	pd.DataFrame(fws_d02/1.9438444924406).append(ld02.T).to_csv('~/wrf_winds_jan_d02.csv')
162 | 	pd.DataFrame(fwd_d03).append(ld03.T).to_csv('~/wrf_winddir_jan_d03.csv')
163 | 	pd.DataFrame(fwd_d02).append(ld02.T).to_csv('~/wrf_winddir_jan_d02.csv')
164 | 
165 | 
166 | b=fws_d03.ravel()
167 | a=fws_stn_d03.ravel()/1.9438444924406
168 | stwspd_d03 = stats_normalized(a[~np.isnan(b)],b[~np.isnan(b)])
169 | 
170 | b=fwd_d03.ravel()
171 | a=fwd_stn_d03.ravel()
172 | stwdir_d03 = stats_normalized(a[~np.isnan(b)],b[~np.isnan(b)])
173 | 
174 | # d02
175 | 
176 | b=fws_d02.ravel()
177 | a=fws_stn_d02.ravel()/1.9438444924406
178 | stwspd_d02 = stats_normalized(a[~np.isnan(b)],b[~np.isnan(b)])
179 | 
180 | b=fwd_d02.T.ravel()
181 | a=fwd_stn_d02.ravel()
182 | stwdir_d02 = stats_normalized(a[~np.isnan(b)],b[~np.isnan(b)])
183 | 
184 | dfout=pd.DataFrame([stwspd_d02,stwdir_d02,stwspd_d03,stwdir_d03])
185 | dfout.index = ['speed_d02','dir_d02','speed_d03','dir_d03']
186 | dfout.columns = ['mu_d','mu_p','MB','MSE','r','p']
187 | 
188 | 
189 | dfout.to_csv('~/windmetrics_summer_normalized.csv')
190 | 
191 | ##-----------
192 | #  get temperature and RH shit from combine aconc
193 | # pull station data again
194 | tmpstn = pd.read_csv('/home/asm0384/WRFcheck/'+sim+'/wrfcheck_withstations_'+sim+'_012019.csv',index_col=0)
195 | rhstn = pd.read_csv('/home/asm0384/WRFcheck/'+sim+'/wrfcheck_withstations_'+sim+'_RH.csv',index_col=0)
196 | 
197 | xx_d02,yy_d02 = np.array(tmpstn['xx_d02']),np.array(tmpstn['yy_d02'])
198 | xx_d03,yy_d03 = np.array(tmpstn[tmpstn['in_d03']==True]['xx_d03']),np.array(tmpstn[tmpstn['in_d03']==True]['yy_d03'])
199 | 
200 | # check completedatamini for the times associated with the indices
201 | wint_ind = ':744'
202 | sum_ind = '11*24+1:-9'
203 | 
204 | temp_stn_d03 = np.array(tmpstn[tmpstn['in_d03']==True].T[11*24+1:-9],dtype='float32')
205 | rh_stn_d03 = np.array(rhstn[rhstn['in_d03']==True].T[11*24+1:-9],dtype='float32')
206 | 
207 | temp_stn_d02 = np.array(tmpstn[tmpstn['in_d02']==True].T[11*24+1:-9],dtype='float32')
208 | rh_stn_d02 = np.array(rhstn[rhstn['in_d02']==True].T[11*24+1:-9],dtype='float32')
209 | 
210 | # pull aconc files
211 | filenames_d02=[]
212 | os.chdir(dirToWRF_d02+'/postprocess/')
213 | for file in glob.glob("COMBINE_ACONC*"):
214 |     filenames_d02.append(file)
215 | 
216 | filenames_d02.sort()
217 | 
218 | # $1 Get WRF file names
219 | filenames_d03=[]
220 | os.chdir(dirToWRF_d03+'/postprocess/')
221 | for file in glob.glob("COMBINE_ACONC*"):
222 |     filenames_d03.append(file)
223 | 
224 | filenames_d03.sort()
225 | 
226 | 
227 | def get_temp_rh(dirToWRF_d02,filenames_d02,var,xx,yy):
228 | 	d2=[]
229 | 	for q in range(len(filenames_d02)):
230 | 		nc = Dataset(dirToWRF_d02 +'/postprocess/'+ filenames_d02[q])
231 | 		d=[[nc[var][hour][0][xx[i]][yy[i]] for i in range(len(xx))] for hour in range(24)]
232 | 		d2.append(d)
233 | 		#
234 | 	d2=np.asarray(d2)
235 | 	d2 = np.array([d2[i][x] for i in range(len(filenames_d02)) for x in range(24)])
236 | 	#
237 | 	return d2
238 | 
239 | temp_d02 = get_temp_rh(dirToWRF_d02,filenames_d02[0:],'SFC_TMP',xx_d02,yy_d02)
240 | temp_d03 = get_temp_rh(dirToWRF_d03,filenames_d03[0:],"SFC_TMP",xx_d03,yy_d03)
241 | rh_d02 = get_temp_rh(dirToWRF_d02,filenames_d02[0:],'RH',xx_d02,yy_d02)
242 | rh_d03 = get_temp_rh(dirToWRF_d03,filenames_d03[0:],'RH',xx_d03,yy_d03)
243 | 
244 | #d03
245 | 
246 | ld03= tmpstn[tmpstn['in_d03']==True]
247 | ld03 = ld03[['lat','lon']]
248 | ld03.reset_index(inplace=True,drop=True)
249 | 
250 | ld02= rhstn[rhstn['in_d02']==True]
251 | ld02 = ld02[['lat','lon']]
252 | ld02.reset_index(inplace=True,drop=True)
253 | 
254 | 
255 | #write out files
256 | if write ==True:
257 | 	pd.DataFrame(temp_d03).append(ld03.T).to_csv('~/wrf_t2_jan_d03.csv')
258 | 	pd.DataFrame(temp_d02).append(ld02.T).to_csv('~/wrf_t2_jan_d02.csv')
259 | 	pd.DataFrame(rh_d03).append(ld03.T).to_csv('~/wrf_rh_jan_d03.csv')
260 | 	pd.DataFrame(rh_d02).append(ld02.T).to_csv('~/wrf_rh_jan_d02.csv')
261 | 
262 | # do stats
263 | 
264 | b=temp_d02.ravel()
265 | a=(temp_stn_d02.ravel()-32)*5/9
266 | st_temp_d02 = stats_normalized(a[~np.isnan(b)],b[~np.isnan(b)])
267 | 
268 | b=temp_d03.ravel()
269 | a=(temp_stn_d03.ravel()-32)*5/9
270 | st_temp_d03 = stats_normalized(a[~np.isnan(b)],b[~np.isnan(b)])
271 | 
272 | 
273 | b=rh_d02.ravel()
274 | a=rh_stn_d02.ravel()
275 | st_rh_d02 = stats_normalized(a[~np.isnan(b)],b[~np.isnan(b)])
276 | 
277 | b=rh_d03.ravel()
278 | a=rh_stn_d03.ravel()
279 | st_rh_d03 = stats_normalized(a[~np.isnan(b)],b[~np.isnan(b)])
280 | 
281 | 
282 | dfout=pd.DataFrame([st_temp_d02,st_rh_d02,st_temp_d03,st_rh_d03])
283 | dfout.index = ['temp_d02','rh_d02','temp_d03','rh_d03']
284 | dfout.columns = ['mu_d','mu_p','MB','MSE','r','p']
285 | 
286 | 
287 | dfout.to_csv('~/temp_rh_summermetrics_normalized.csv')
288 | 
289 | 
290 | # now group by each station and get correlation
291 | 
292 | fnames = ['NO2_d03_2018_8_EPA_CMAQ_Combine.csv','NO2_d03_2019_1_EPA_CMAQ_Combine.csv',
293 | 'O3_d03_2018_8_EPA_CMAQ_Combine.csv','O3_d03_2019_1_EPA_CMAQ_Combine.csv',
294 | 'PM25_TOT_d03_2018_8_EPA_CMAQ_Combine.csv','PM25_TOT_d03_2019_1_EPA_CMAQ_Combine.csv']
295 | 
296 | level_2 = ['NO2_Sum','NO2_Wint','O3_Sum','O3_Wint','PM25_Sum','PM25_Wint']
297 | 
298 | for i in range(len(fnames)):
299 | 	fname = fnames[i]
300 | 	f = pd.read_csv(dir+'/CMAQcheck/'+fname)
301 | 	fl = f.groupby(['Latitude','Longitude'])[['CMAQ','Sample Measurement']].corr().iloc[0::2,-1]
302 | 	fl = fl.reset_index()
303 | 	fl['level_2']=level_2[i]
304 | 	print(fl)
305 | 	if i==0: final = fl
306 | 	else: final = final.append(fl)
307 | # using 
308 | 
309 | final.to_csv('~/Station_correlations_EPA.csv')
310 | 
311 | 
312 | 
313 | 
314 | 


--------------------------------------------------------------------------------
/chi_map_cropped.py:
--------------------------------------------------------------------------------
  1 | #------------------------------------------
  2 | # Fun figure for AGU CVD
  3 | # Stacy Montgomery, Sept. 2019
  4 | #
  5 | # I made some AQ figures, the interesting part is "mpath" 
  6 | # and using the "exterior" of the shapefile to crop the figure
  7 | #------------------------------------------
  8 | 
  9 | #------------------------------------------
 10 | # Libraries
 11 | #--------------
 12 | from matplotlib import pyplot as plt
 13 | from mpl_toolkits import basemap as bm
 14 | from matplotlib import colors
 15 | import numpy as np
 16 | import numpy.ma as ma
 17 | from matplotlib.patches import Path, PathPatch
 18 | import pandas as pd
 19 | from shapely.geometry import Point, shape, Polygon
 20 | import fiona
 21 | from shapely.ops import unary_union, cascaded_union
 22 | from geopandas.tools import sjoin
 23 | import geopandas as gpd
 24 | import geoplot
 25 | import glob
 26 | import os
 27 | from datetime import timedelta, date;
 28 | from netCDF4 import Dataset
 29 | import scipy.ndimage
 30 | from cartopy import crs as ccrs
 31 | from cartopy.io.shapereader import Reader
 32 | import matplotlib.path as mpath
 33 | import seaborn as sns
 34 | 
 35 | #------------------------------------------
 36 | # Find index of points on a gridded array
 37 | # stn_lon,stn_lat = list of lat lon points --> lat_list, lon_list = [x1,x2][y1,y2]
 38 | # wrf_lon, wrf_lat = np.array of gridded lat lon --> grid_x= np.array([x1,x2,x3],[x4,x5,x6])
 39 | def find_index(stn_lon, stn_lat, wrf_lon, wrf_lat):
 40 |  # stn -- points in a list (list, can be a list of just 1) 
 41 |  # wrf -- gridded wrf lat/lon (np.array)
 42 |  #for iz in range(1):
 43 |     xx=[];yy=[]
 44 |     for i in range(len(stn_lat)):
 45 |        abslat = np.abs(wrf_lat-stn_lat[i])
 46 |        abslon= np.abs(wrf_lon-stn_lon[i])
 47 |        c = np.maximum(abslon,abslat)
 48 |        latlon_idx = np.argmin(c)
 49 |        x, y = np.where(c == np.min(c))
 50 |        #add indices of nearest wrf point station
 51 |        xx.append(x)
 52 |        yy.append(y)
 53 |     #
 54 |     xx=[xx[i][0] for i in range(len(xx))];yy=[yy[i][0] for i in range(len(yy))]
 55 |     #return indices list
 56 |     return xx, yy
 57 | 
 58 | #------------------------------------------
 59 | # USER INPUT
 60 | fout_dir_l3='/home/asm0384/tempfiles/practice/NO2_l3_big/'
 61 | plot_file='L3_averaged_Chicago_L2_Chicago_2018-8-1_through_L2_Chicago_2018-8-30_made_1567447847_nx_1250_ny_1000.csv'
 62 | varname= 'no2'
 63 | path='/home/asm0384/shapefiles/commareas/geo_export_77af1a6a-f8ec-47f4-977c-40956cd94f97.shp'
 64 | 
 65 | # Start pulling and cropping data
 66 | chi  = gpd.GeoDataFrame.from_file(path)
 67 | 
 68 | #data frame with all data
 69 | finalgrid = pd.read_csv(fout_dir_l3+plot_file, index_col =0)
 70 | varname ='nitrogendioxide_tropospheric_column'
 71 | 
 72 | #Pull information from title
 73 | filename= plot_file
 74 | ymd= plot_file.split('_made_')
 75 | nxny=ymd[1].split('_nx_')[1].split('_ny_')
 76 | nx=int(nxny[0])
 77 | ny=int(nxny[1].split('.csv')[0])
 78 | startdate=ymd[0].split('L3_averaged_Chicago_L2_Chicago_')[1].split('_through')[0]
 79 | enddate=ymd[0].split('L3_averaged_Chicago_L2_Chicago_')[1].split('_through_L2_Chicago_')[1]
 80 | 
 81 | finalgrid.describe()
 82 | 
 83 | # NOW CROP OVER CHICAGO
 84 | # Initialize grid
 85 | grid_nlat=np.zeros((ny,nx)); grid_nlon=np.zeros((ny,nx)); grid_no2=np.zeros((ny,nx))
 86 | 
 87 | # Return back to grid form
 88 | for i in range(ny):
 89 |     for j in range(nx):
 90 |         l=i*nx+j
 91 |         grid_nlat[i][j]=finalgrid['nlats'][l]
 92 |         grid_nlon[i][j]=finalgrid['nlons'][l]
 93 |         grid_no2[i][j]=finalgrid[varname][l]
 94 | 
 95 | # Check 
 96 | #plt.scatter(finalgrid['nlons'],finalgrid['nlats'],c=finalgrid['nitrogendioxide_tropospheric_column'])
 97 | #plt.show()
 98 | 
 99 | # Make box around chicago to cut data -- specific for satellite, check to make sure the arrays are increasing in size
100 | x1,y1=find_index([min(chi.bounds.minx)],[min(chi.bounds.miny)], np.array(grid_nlon), np.array (grid_nlat))
101 | x2,y2=find_index([max(chi.bounds.maxx)],[max(chi.bounds.maxy+.05)], np.array(grid_nlon), np.array (grid_nlat))
102 | x3,y3=find_index([min(chi.bounds.minx)],[max(chi.bounds.maxy)], np.array(grid_nlon), np.array (grid_nlat))
103 | x4,y4=find_index([max(chi.bounds.maxx)+.05],[min(chi.bounds.miny)], np.array(grid_nlon), np.array (grid_nlat))
104 | 
105 | #set up zeros array given the bound of chicago
106 | diffy =max(y1,y2,y3,y4)[0]-min(y1,y2,y3,y4)[0]
107 | diffx=max(x1,x2,x3,x4)[0]-min(x1,x2,x3,x4)[0]
108 | 
109 | zlon,zlat,z=np.zeros([diffx, diffy]), np.zeros([diffx, diffy]), np.zeros([diffx, diffy])
110 | 
111 | # fill out zeros array from the gridded data
112 | for i in range(diffx):
113 |    for j in range(diffy):
114 |       z[i][j]= grid_no2[min(x1,x2,x3,x4)[0]+i][min(y1,y2,y3,y4)[0]+j] 
115 |       zlat[i][j]= grid_nlat[min(x1,x2,x3,x4)[0]+i][min(y1,y2,y3,y4)[0]+j]
116 |       zlon[i][j]= grid_nlon[min(x1,x2,x3,x4)[0]+i][min(y1,y2,y3,y4)[0]+j]
117 | 
118 | # Check 
119 | #ax= chi.plot()
120 | #plt.scatter(zlon,zlat,c=z)
121 | 
122 | #plt.show()
123 | 
124 | # Make the contour plot
125 | # make finer
126 | import scipy.ndimage
127 | 
128 | from cartopy import crs as ccrs
129 | from cartopy.io.shapereader import Reader
130 | import matplotlib.path as mpath
131 | import seaborn as sns
132 | 
133 | crs_new = ccrs. AlbersEqualArea(central_longitude=(chi.bounds.mean().minx+chi.bounds.mean().maxx)/2)
134 | 
135 | #get data at higher resolution for contouring
136 | lat,lon,data=scipy.ndimage.zoom(zlat, 3),scipy.ndimage.zoom(zlon, 3),scipy.ndimage.zoom(z, 3)
137 | data=data*10e4
138 | 
139 | # merge polygons using unary union and get the outside values
140 | # NOTE -- the union makes a multipolygon, but if you reference the largest of the polygons you actually get the outside
141 | union=gpd.GeoSeries(unary_union(chi.geometry))
142 | outsideofunion=pd.DataFrame([list(union[0][2].exterior.xy)[0], list(union[0][2].exterior.xy)[1]])
143 | 
144 | # make fig object
145 | fig, axs = plt.subplots(subplot_kw={'projection': crs_new},figsize=(5, 5))
146 | 
147 | #set up data for plotting via levels
148 | vmax=pd.DataFrame(data).max().max()+1.5
149 | vmin= int(pd.DataFrame(data).min().min())+2
150 | levels = np.linspace(vmin, int(vmax), int(vmax)+10)
151 | 
152 | #locate outside
153 | #plt.scatter(list(union[0][2].exterior.xy)[0], list(union[0][2].exterior.xy)[1])
154 | 
155 | # set boundary as outer extent by making a matplotlib path object and adding that geometry
156 | # i think setting the boundary before you plot the data actually crops the data to the shape, so set ax first
157 | axs.set_boundary(mpath.Path(outsideofunion.T,closed=True), transform= crs_new, use_as_clip_path=True)
158 | axs.add_geometries(Reader(path).geometries(), crs=crs_new,facecolor='None', edgecolor='black')
159 | 
160 | #plot the gridded data by using contourf
161 | cs=plt.contourf(lon,lat,data,cmap= "inferno_r", transform=crs_new, levels=levels)
162 | 
163 | # add landmarks with scatterplot
164 | midway= 41.7868, -87.7522
165 | ohare = 41.9742, -87.9073
166 | loop = 41.8786, -87.6251
167 | plt.scatter(pd.DataFrame([midway,ohare,loop])[1],pd.DataFrame([midway,ohare,loop])[0],marker = '*',color='white')
168 | 
169 | # set axes extents from shapefile
170 | x=[min(chi.bounds.minx), max(chi.bounds.maxx)] 
171 | y=[min(chi.bounds.miny), max(chi.bounds.maxy)]
172 | axs.set_extent([x[0]-.03,x[1]+.03,y[0]-.03,y[1]+.03],crs= crs_new)
173 | axs.set_title('1 PM TropOMI NO$_{2}$ Column Density')
174 | 
175 | #add colorbar and label
176 | cbar=plt.colorbar(cs,boundaries=np.arange(vmin,11))
177 | cbar.ax.set_ylabel('10$^{-2}$ molecules m$^{2}$')
178 | cbar.set_ticks(np.arange(vmin, int(vmax),1))
179 | 
180 | # save and show
181 | plt.savefig('/home/asm0384/tropomi_no2_neighbs_1_star.pdf',format='pdf')
182 | plt.show()
183 | 
184 | 
185 | #------------------------------------------------------------------------------------
186 | # CMAQ Processing
187 | #--------------------------------
188 | 
189 | print('-----------')
190 | print('Starting CMAQ PROCESSING....')
191 | print('-----------')
192 | 
193 | # Directories for cmaq + EPA 
194 | dir_cmaq='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_BASE_BC_4km_v0/postprocess/'
195 | 
196 | # CMAQ things
197 | domain='d02'
198 | time='hourly'
199 | year='2018'
200 | #epa_code=['42401','42602','44201']; var=['SO2','NO2','O3'] #numerical identifiers and corresponding vars
201 | epa_code=['42602']; var=['NO2']
202 | 
203 | # Get CMAQ file names
204 | cmaq_files=[]
205 | os.chdir(dir_cmaq)
206 | for file in glob.glob("COMBINE_ACONC_*"):
207 |     cmaq_files.append(file)
208 | 
209 | # Find dates from cmaq
210 | cmaq_files.sort();
211 | cmaq_files=cmaq_files[0:-2] #get rid of september
212 | 
213 | dates=[cmaq_files[z].split("COMBINE_ACONC_")[1].split(".nc")[0] for z in range(len(cmaq_files))]
214 | start_dt=date(int(dates[0][0:4]),int(dates[0][4:6]),int(dates[0][6:8]))
215 | end_dt=date(int(dates[-1][0:4]),int(dates[-1][4:6]),int(dates[-1][6:8]))
216 | 
217 | #pull data
218 | cmaq=[Dataset(dir_cmaq+cmaq_files[i]) for i in range(len(cmaq_files))]
219 | t_index = pd.DatetimeIndex(start=start_dt, end=end_dt, freq='1h')
220 | dates_ft=[str(date(int(dates[i][0:4]),int(dates[i][4:6]),int(dates[i][6:8]))) for i in range(len(dates))]
221 | 
222 | #get monthly avg of CMAQ data
223 | monthly_avg_no2=[cmaq[i]['NO2'][h] for i in range(len(cmaq)) for h in range(24)]
224 | monthly_avg_no2= sum(monthly_avg_no2)/(len(cmaq)*24)
225 | 
226 | # get 1 pm avg no2
227 | pm_avg_no2=[cmaq[i]['NO2'][13] for i in range(len(cmaq))]
228 | pm_avg_no2=sum(monthly_avg_no2)/(len(cmaq))
229 | 
230 | monthly_avg_no2= pm_avg_no2 #stupid
231 | 
232 | # get 1 pm avg o3
233 | #monthly_avg_o3=[cmaq[i]['O3'][h] for i in range(len(cmaq)) for h in range(24)]
234 | #monthly_avg_o3= sum(monthly_avg_no2)/(len(cmaq)*24)
235 | 
236 | #Pull cmaq grid
237 | grid='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/mcip/PXLSM/ChicagoLADCO_d02/GRIDCRO2D_Chicago_LADCO_2018-08-20.nc'
238 | cmaq_lat,cmaq_lon=Dataset(grid)['LAT'][0][0],Dataset(grid)['LON'][0][0]
239 | 
240 | #check the extent and that everything looks right
241 | #plt.scatter(cmaq_lon, cmaq_lat, c= monthly_avg_no2[0])
242 | 
243 | # Find indices of the greatest outside points of the data
244 | x1,y1=find_index([min(chi.bounds.minx)],[min(chi.bounds.miny)], np.array(cmaq_lon), np.array (cmaq_lat))
245 | x2,y2=find_index([max(chi.bounds.maxx)],[max(chi.bounds.maxy)], np.array(cmaq_lon), np.array (cmaq_lat))
246 | x3,y3=find_index([min(chi.bounds.minx)],[max(chi.bounds.maxy)], np.array(cmaq_lon), np.array (cmaq_lat))
247 | x4,y4=find_index([max(chi.bounds.maxx)+.05],[min(chi.bounds.miny)], np.array(cmaq_lon), np.array (cmaq_lat))
248 | 
249 | # make an array the size of the bounds of the shapefile
250 | diffy =max(y1,y2,y3,y4)[0]-min(y1,y2,y3,y4)[0]
251 | diffx=max(x1,x2,x3,x4)[0]-min(x1,x2,x3,x4)[0]
252 | zlon,zlat,z=np.zeros([diffx, diffy]), np.zeros([diffx, diffy]), np.zeros([diffx, diffy])
253 | 
254 | for i in range(diffx):
255 |    for j in range(diffy):
256 |       z[i][j]= monthly_avg_no2[min(x1,x2,x3,x4)[0]+i][min(y1,y2,y3,y4)[0]+j] 
257 |       zlat[i][j]= cmaq_lat[min(x1,x2,x3,x4)[0]+i][min(y1,y2,y3,y4)[0]+j]
258 |       zlon[i][j]= cmaq_lon[min(x1,x2,x3,x4)[0]+i][min(y1,y2,y3,y4)[0]+j]
259 | 
260 | # Make Contour plot
261 | # make finer
262 | 
263 | 
264 | crs_new = ccrs. AlbersEqualArea(central_longitude=(chi.bounds.mean().minx+chi.bounds.mean().maxx)/2)
265 | 
266 | #get data at higher resolution for contouring
267 | lat,lon,data=scipy.ndimage.zoom(zlat, 3),scipy.ndimage.zoom(zlon, 3),scipy.ndimage.zoom(z, 3)
268 | 
269 | #merge polygons and get the outside valules
270 | b=gpd.GeoSeries(unary_union(chi.geometry))
271 | v=pd.DataFrame([list(b[0][2].exterior.xy)[0], list(b[0][2].exterior.xy)[1]])
272 | 
273 | # make fig object
274 | fig, axs = plt.subplots(subplot_kw={'projection': crs_new},
275 |                         figsize=(5, 5))
276 | 
277 | #set up data for plotting via levels
278 | vmax=pd.DataFrame(data).max().max()
279 | vmin= pd.DataFrame(data).min().min()-.007
280 | vmin=.3
281 | levels = np.arange(vmin, vmax+.1, .10)
282 | 
283 | #locate outside
284 | #plt.scatter(list(b[0][2].exterior.xy)[0], list(b[0][2].exterior.xy)[1])
285 | 
286 | #set boundary as outer extent
287 | axs.set_boundary(mpath.Path(v.T,closed=True), transform= crs_new, use_as_clip_path=True)
288 | 
289 | axs.add_geometries(Reader(path).geometries(), crs=crs_new,facecolor='None', edgecolor='black')
290 | cs=plt.contourf(lon,lat,data,cmap= "inferno_r", transform=crs_new, levels=levels)
291 | 
292 | x=[min(chi.bounds.minx), max(chi.bounds.maxx)] 
293 | y=[min(chi.bounds.miny), max(chi.bounds.maxy)]
294 | 
295 | axs.set_extent([x[0]-.03,x[1]+.03,y[0]-.03,y[1]+.03],crs= crs_new)
296 | axs.set_title('1 PM CMAQ NO$_{2}$ Ground Level')
297 | 
298 | cbar=plt.colorbar(cs,boundaries=np.arange(vmin,11))
299 | cbar.ax.set_ylabel('ppbV')
300 | cbar.set_ticks(np.arange(vmin, vmax,.2))
301 | 
302 | midway= 41.7868, -87.7522
303 | ohare = 41.9742, -87.9073
304 | loop = 41.8786, -87.6251
305 | 
306 | plt.scatter(pd.DataFrame([midway,ohare,loop])[1],pd.DataFrame([midway,ohare,loop])[0],marker = '*',color='white')
307 | 
308 | plt.savefig('/home/asm0384/cmaq_no2_neighbs_1pm.pdf',format='pdf')
309 | 
310 | plt.show()
311 | 
312 | 
313 | #------------------------------------------------------------------------------------
314 | # INCOME Processing
315 | # This could be done better, I will in the future sort my own chloropleth, the geoplot funciton is
316 | # not broad enough, but it's quick enough to work for me
317 | #--------------------------------
318 | import geoplot.crs as gcrs
319 | 
320 | fname='/home/asm0384/Census_Data_-_Selected_socioeconomic_indicators_in_Chicago__2008___2012.csv'
321 | income=pd.read_csv(fname)
322 | 
323 | path='/home/asm0384/shapefiles/commareas/geo_export_77af1a6a-f8ec-47f4-977c-40956cd94f97.shp'
324 | 
325 | chi  = gpd.GeoDataFrame.from_file(path)
326 | 
327 | income['COMMUNITY AREA NAME'][75]="OHARE"
328 | 
329 | income['community']=[income['COMMUNITY AREA NAME'][i].upper() for i in range(len(income))]
330 | 
331 | 
332 | dfmerge=pd.merge(chi,income,on='community')
333 | 
334 | # dropna cloropleth graph ... 
335 | c=geoplot.choropleth(dfmerge, hue = dfmerge['HARDSHIP INDEX'],
336 |     cmap='Blues', figsize=(5, 5), k=None, legend=True)
337 |     #legend_values=np.arange(10000,90000,10000))
338 | 
339 | 
340 | plt.title('Hardship Index')
341 | 
342 | # scatter landmarks
343 | midway= 41.7868, -87.7522
344 | ohare = 41.9742, -87.9073
345 | loop = 41.8786, -87.6251
346 | 
347 | #oops doesnt work ... add in post processing ...
348 | plt.scatter(pd.DataFrame([midway,ohare,loop])[1],pd.DataFrame([midway,ohare,loop])[0],marker = '*',color='white')
349 | 
350 | plt.savefig('/home/asm0384/HI_neighbs_hot_1.5.pdf',format='pdf')
351 | 
352 | plt.show()
353 | 


--------------------------------------------------------------------------------
/plot_CMAQ_diff.py:
--------------------------------------------------------------------------------
  1 | #!/bin/python
  2 | 
  3 | #------------------------------------------
  4 | # Libraries
  5 | #--------------
  6 | from matplotlib import pyplot as plt ; from matplotlib import colors
  7 | import numpy as np; import numpy.ma as ma; from matplotlib.patches import Path, PathPatch
  8 | import pandas as pd; from shapely.geometry import Point, shape, Polygon;import fiona
  9 | from shapely.ops import unary_union, cascaded_union; from geopandas.tools import sjoin
 10 | import geopandas as gpd; import geoplot; import glob; import os; from datetime import timedelta, date;
 11 | from netCDF4 import Dataset
 12 | import scipy.ndimage; from cartopy import crs as ccrs; from cartopy.io.shapereader import Reader
 13 | import matplotlib.path as mpath; import seaborn as sns
 14 | #------------------------------------------
 15 | 
 16 | # USER INPUT
 17 | 
 18 | # shapes and directories == https://www.census.gov/cgi-bin/geo/shapefiles/index.php?year=2019&layergroup=State+Legislative+Districts
 19 | path='/home/asm0384/shapefiles/commareas/geo_export_77af1a6a-f8ec-47f4-977c-40956cd94f97.shp'
 20 | chi_shapefile  = gpd.GeoDataFrame.from_file(path)
 21 | 
 22 | # dir to grid file
 23 | dir='/projects/b1045/jschnell/ForStacy/' 
 24 | ll='latlon_ChicagoLADCO_d03.nc' 
 25 | 
 26 | # dir to model files
 27 | dir_CMAQ = '/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_Amy_noMUNI_1.33km_sf_rrtmg_5_8_1_v3852/postprocess/'
 28 | 
 29 | 
 30 | dir_CMAQ_BASE = '/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_BASE_FINAL_1.33km_sf_rrtmg_5_8_1_v3852/postprocess/' # experimental choice
 31 | 
 32 | dir_WRF='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_BASE_FINAL_1.33km_sf_rrtmg_5_8_1_v3852/'
 33 | 
 34 | dir_EPA = '/home/asm0384/ChicagoStudy/inputs/EPA_hourly_station_data/'
 35 | 
 36 | #write out monthly files to csv?
 37 | # read in monthly files from csv?
 38 | writeoutcsv = True
 39 | writeincsv = False
 40 | show = True
 41 | 
 42 | #variables of interest
 43 | cmaq_var=['O3','NO2','NO','NOX','CO','ISOP','SO2','FORM','PM25_TOT','PM10']
 44 | cmaq_var=['O3','NO2','NO','CO','ISOP','SO2','FORM']
 45 | 
 46 | startswith = "COMBINE_ACONC_"
 47 | 
 48 | # ---------------------------------------------------------------------
 49 | # USER DEF FUNCTIONS
 50 | # ---------------------------------------------------------------------
 51 | 
 52 | #pull in cmaq
 53 | startswith = "COMBINE_ACONC"
 54 | 
 55 | 
 56 | #writeoutcsv = false
 57 | def pull_CMAQ(dir_CMAQ_BASE,startswith,cmaq_var,version):
 58 |    #pull files from given directoy
 59 |    onlyfiles = next(os.walk(dir_CMAQ_BASE))[2]
 60 |    onlyfiles.sort() # so that searching for dates are easier
 61 |    fnames_CMAQ_BASE = [x for x in onlyfiles if x.startswith(startswith)]
 62 |    numfiles=(len(fnames_CMAQ))
 63 |    ncfile_CMAQ_base = [Dataset(dir_CMAQ_BASE+ fnames_CMAQ_BASE[i],'r') for i in range(len(fnames_CMAQ_BASE))]
 64 |    units_cmaq = [ncfile_CMAQ_base[0][cmaq_var[i]].units for i in range(len(cmaq_var))]
 65 |    #full day conc
 66 |    cmaq_avgs_BASE = []
 67 |    cmaq_avgs_daily_BASE  = []
 68 |    cmaq_avgs_hourly_BASE  = []
 69 |    # make averages for cmaq base
 70 |    for i in range(len(cmaq_var)):
 71 |       tmp = np.asarray([ncfile_CMAQ_base[j][cmaq_var[i]] for j in range(len(ncfile_CMAQ_base))])
 72 |       hourly = np.average(tmp,axis=0) # hour by hour concs
 73 |       daily = np.average(tmp,axis=1) # daily average concs
 74 |    #
 75 |       monthly = np.average(daily,axis=0)
 76 |       if writeoutcsv == True: pd.DataFrame(monthly[0]).to_csv(cmaq_var[i]+'_'+version+'_BASE_2018_aug.csv', header=False,index=False) 
 77 |       cmaq_avgs_BASE.append(monthly[0])
 78 |       cmaq_avgs_daily_BASE.append(daily)
 79 |       cmaq_avgs_hourly_BASE.append(hourly)
 80 |       #return
 81 |       print('Done with ' +cmaq_var[i])
 82 | #return
 83 |    return cmaq_avgs_BASE, cmaq_avgs_daily_BASE, cmaq_avgs_hourly_BASE, units_cmaq
 84 | 
 85 | 
 86 | #writeoutcsv = false
 87 | 
 88 | 
 89 | 
 90 | #plot cmaq
 91 | 
 92 | #plotting loop
 93 | def plot_cmaq(monthly_tot,var_tot,title_2,cmap,vmaxs,vmins,crs_new,show,add_epa,version,div,shaped):
 94 |    for i in range(0,len(monthly_tot)):
 95 |    #for i in range(1):
 96 |       # set var for plot
 97 |       var= var_tot[i]
 98 |       data= np.asarray(monthly_tot[i])
 99 |       if var == 'RAINC': pass
100 |       else:
101 |          if i<len(cmaq_var): plon,plat = lon,lat
102 |          else: plon,plat = wlon,wlat
103 |          # make fig object
104 |          fig, axs = plt.subplots(subplot_kw={'projection': crs_new},figsize=(6, 6))
105 |          #set up data for plotting via levels
106 |          vmax,vmin=vmaxs[i],vmins[i]
107 | #for i in range(1):
108 |          levels = np.arange(vmin, vmax, (vmax-vmin)/10)
109 |          # set boundary as outer extent by making a matplotlib path object and adding that geometry
110 |          if shaped: axs.set_boundary(mpath.Path(outsideofunion.T,closed=True), transform= crs_new, use_as_clip_path=True)
111 |          axs.add_geometries(Reader(path).geometries(), crs=crs_new,facecolor='None', edgecolor='black')
112 |          #plot the gridded data by using contourf
113 |          if div==False: cs=plt.pcolormesh(plon,plat,data,cmap= cmap , transform=crs_new, vmin=vmin,vmax=vmax)
114 |          if div == True:
115 |                divnorm = colors.DivergingNorm(vmin=vmin, vcenter=0, vmax= vmax)
116 |                cs=plt.pcolormesh(plon,plat,data,cmap= cmap , transform=crs_new, vmin=vmin,vmax=vmax, norm=divnorm)
117 | #for i in range(1):
118 |          # add landmarks with scatterplot
119 |          midway=  -87.7522,41.7868
120 |          ohare = -87.9073, 41.9842
121 |          loop =  -87.6251,41.8786
122 |          axs.annotate(xy=midway,s="*",color='white')
123 |          axs.annotate(xy=ohare,s="*",color='white')
124 |          axs.annotate(xy=loop,s="*",color='white')
125 |          # set axes extents from shapefile
126 |          x=[chi_shapefile.bounds.minx.min(), chi_shapefile.bounds.maxx.max()] 
127 |          y=[chi_shapefile.bounds.miny.min(), chi_shapefile.bounds.maxy.max()]
128 |          #
129 |          if shaped: axs.set_extent([x[0],x[1],y[0],y[1]],crs= crs_new)
130 |          else: axs.set_extent([x[0]-.3,x[1]+.3,y[0]-.3,y[1]+.3],crs= crs_new)
131 |          # title
132 |          axs.set_title(var+title_2)
133 |          #add colorbar and label
134 |          cbar=plt.colorbar(cs,boundaries=levels,shrink = 0.5)
135 |          cbar.set_ticks(levels)
136 |          # add state lines
137 |          states_provinces = cfeature.NaturalEarthFeature(category='cultural',name='admin_1_states_provinces_lines',scale='50m',facecolor='none')
138 |          #
139 |          land = cfeature.NaturalEarthFeature('physical', 'lakes', '10m',
140 |                                         edgecolor='black',
141 |                                         facecolor='none')
142 |          axs.add_feature(land, edgecolor='black')
143 |          axs.add_feature(states_provinces, edgecolor='black')
144 |          #add epa monitors if its CMAQ
145 |          if add_epa == True:
146 |             if i < len(cmaq_var):
147 |                try: axs.scatter(epa_avgs_latlon[i]['Longitude'],epa_avgs_latlon[i]['Latitude'],c=epa_avgs_latlon[i]['Arithmetic Mean'],vmin = vmin, vmax= vmax,s=75, cmap = cmap,edgecolors = 'black')
148 |                except: pass
149 |          #savefig
150 |          plt.savefig(var+version+'v7.png',transparent=True)
151 |          if show == True: plt.show()
152 | 
153 | 
154 | 
155 | 
156 | 
157 | 
158 | 
159 | # ---------------------------------------------------------------------
160 | # START
161 | # ---------------------------------------------------------------------
162 | 
163 | #pull files from given directoy
164 | onlyfiles = next(os.walk(dir_CMAQ))[2]
165 | onlyfiles.sort() # so that searching for dates are easier
166 | 
167 | # pull only CONC files
168 | fnames_CMAQ = [x for x in onlyfiles if x.startswith(startswith)]
169 | numfiles=(len(fnames_CMAQ))
170 | 
171 | #dates
172 | dates=[fnames_CMAQ[i].split(startswith)[1].split('_')[1].split('.nc')[0] for i in range(len(fnames_CMAQ))]
173 | 
174 | #get lat lon from grid file
175 | llx=Dataset(dir+ll,'r')
176 | lat,lon=llx['lat'][:],llx['lon'][:]
177 | 
178 | 
179 | #-------------------- EPA -------------------------------
180 | 
181 | param_codes = pd.read_csv(dir_EPA+'parameters.csv')
182 | #epa codes for variables
183 | epa_codes = []
184 | for i in range(len(cmaq_var)):
185 |    try:
186 |       code = int(param_codes[param_codes['Parameter Abbreviation']== cmaq_var[i]]['Parameter Code'])
187 |       epa_codes.append(code)
188 |    except:
189 |       if cmaq_var[i] == 'PM25_TOT': epa_codes.append(81104)
190 |       elif cmaq_var[i] == 'ISOP': epa_codes.append(43243)
191 |       else: epa_codes.append(np.nan)
192 | 
193 | # make monthly averages of EPA stations from daily data
194 | 
195 | units_epa = []
196 | 
197 | for i in range(len(cmaq_var)):
198 |     try: 
199 |        tmp = pd.read_csv(dir_EPA+'daily_'+str(epa_codes[i])+'_2018.csv')
200 |        units_epa.append(tmp['Units of Measure'].unique()[0])
201 |     except:
202 |        units_epa.append(np.nan)
203 | 
204 | # make averages for epa
205 | epa_avgs_latlon = []
206 | for i in range(len(cmaq_var)):
207 |     try: 
208 |        tmp = pd.read_csv(dir_EPA+'daily_'+str(epa_codes[i])+'_2018.csv')
209 |        tmp = tmp[tmp['County Name']=='Cook']
210 |        tmp['datetime'] = pd.to_datetime(tmp['Date Local'])
211 |        mask = (tmp['datetime'] >= pd.to_datetime(dates)[0]) & (tmp['datetime'] <= pd.to_datetime(dates)[-1])
212 |        tmp = tmp.loc[mask]
213 |        epa_lat,epa_lon= tmp['Latitude'].unique(), tmp['Longitude'].unique()
214 |        tmp_avg = tmp.groupby(['Longitude','Latitude','datetime'])['Arithmetic Mean'].mean().reset_index()
215 |        tmp_avg.to_csv(dir_EPA + cmaq_var[i]+ '_'+ version+ '_daily_avg_by_ChiMonitor_Aug2018.csv')
216 |        tmp_avg = tmp.groupby(['Longitude','Latitude'])['Arithmetic Mean'].mean().reset_index()
217 |        tmp_avg.to_csv(dir_EPA + cmaq_var[i]+ '_' + version +'_by_ChiMonitor_Aug2018.csv')
218 |        epa_avgs_latlon.append(tmp_avg)
219 |     except: 
220 |        print('No EPA file for ' + cmaq_var[i])
221 |        epa_avgs_latlon.append(np.nan)
222 |        #epa_avgs.append(np.nan)
223 | 
224 | # ppm to ppb
225 | epa_avgs_latlon[4]['Arithmetic Mean'] = epa_avgs_latlon[4]['Arithmetic Mean']*1000
226 | epa_avgs_latlon[0]['Arithmetic Mean'] = epa_avgs_latlon[0]['Arithmetic Mean']*1000
227 | 
228 | 
229 | #----------  PULL IN CMAQ
230 | 
231 | #pull in cmaq
232 | 
233 | startswith = "COMBINE_ACONC"
234 | version = '_aug2018_monthly_nobusdiff'
235 | base_monthly, base_daily, base_hourly, units = pull_CMAQ(dir_CMAQ_BASE,startswith,cmaq_var,version)
236 | 
237 | #pull in cmaq
238 | startswith = "COMBINE_ACONC"
239 | version2 = '_aug2018_monthly_nomunidiff'
240 | dir_CMAQ='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_Amy_noMUNI_1.33km_sf_rrtmg_5_8_1_v3852/postprocess/'
241 | nomuni_monthly, nomuni_daily, nomuni_hourly, units = pull_CMAQ(dir_CMAQ,startswith,cmaq_var,version2)
242 | 
243 | #pull in cmaq
244 | 
245 | #pull in cmaq
246 | startswith = "COMBINE_ACONC"
247 | version3 = '_aug2018_monthly_nobusdiff'
248 | dir_CMAQ='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_Amy_noBUS_1.33km_sf_rrtmg_5_8_1_v3852/postprocess/'
249 | nobus_monthly, nobus_daily, nobus_hourly, units = pull_CMAQ(dir_CMAQ,startswith,cmaq_var,version3)
250 | 
251 | 
252 | 
253 | #pull in cmaq
254 | startswith = "COMBINE_ACONC"
255 | version4 = '_aug2018_monthly_noschooldiff'
256 | dir_CMAQ='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/output_Amy_noSCHOOL_1.33km_sf_rrtmg_5_8_1_v3852/postprocess/'
257 | noschool_monthly, noschool_daily, noschool_hourly, units = pull_CMAQ(dir_CMAQ,startswith,cmaq_var,version4)
258 | 
259 | 
260 | #----------  START PLOTTING
261 | import cartopy.feature as cfeature 
262 | 
263 | # projection
264 | crs_new = ccrs.PlateCarree()# get shape outside
265 | union=gpd.GeoSeries(unary_union(chi_shapefile.geometry))
266 | outsideofunion=pd.DataFrame([list(union[0][2].exterior.xy)[0], list(union[0][2].exterior.xy)[1]])
267 | 
268 | 
269 | # version and title STUFF
270 | title_2 = " %diff no MUNI, Aug. 2018"
271 | var_tot = cmaq_var cmap = 'magma_r'
272 | version = '_nomuniDIFF_aug2018_'
273 | #monthly_tot = [nomuni_monthly[i]-base_monthly[i] for i in range(len(base_monthly))]
274 | monthly_tot = [(nomuni_monthly[i]-base_monthly[i])/base_monthly[i]*100 for i in range(len(base_monthly))]
275 | 
276 | # SET RANGES
277 | vmaxs,vmins = [round(np.percentile(monthly_tot[i],99.999),2) for i in range(len(monthly_tot))],[round(np.percentile(monthly_tot[i],0.01),2) for i in range(len(monthly_tot))]
278 | 
279 | # DO WE EPA SCATTER
280 | add_epa = False #True
281 | div = True
282 | if div==True: cmap = 'RdBu_r'
283 | shaped= True
284 | show=False
285 | 
286 | version = '_nomuni_PERCENTDIFF_aug2018_'
287 | 
288 | plot_cmaq(monthly_tot,var_tot,title_2,cmap,vmaxs,vmins,crs_new,show,add_epa,version,div,shaped)
289 | 
290 | #================ no bus percent diff
291 | # version and title STUFF
292 | title_2 = " %diff no BUS, Aug. 2018"
293 | var_tot = cmaq_var cmap = 'magma_r'
294 | version = '_nobus_PERCENTDIFF_aug2018_'
295 | #monthly_tot = [nobus_monthly[i]-base_monthly[i] for i in range(len(base_monthly))]
296 | monthly_tot = [(nobus_monthly[i]-base_monthly[i])/base_monthly[i]*100 for i in range(len(base_monthly))]
297 | 
298 | # SET RANGES
299 | vmaxs,vmins = [round(np.percentile(monthly_tot[i],99.999),2) for i in range(len(monthly_tot))],[round(np.percentile(monthly_tot[i],0.01),2) for i in range(len(monthly_tot))]
300 | 
301 | # DO WE EPA SCATTER
302 | add_epa = False #True
303 | div = True
304 | if div==True: cmap = 'RdBu_r'
305 | 
306 | shaped= True
307 | show=False
308 | 
309 | plot_cmaq(monthly_tot,var_tot,title_2,cmap,vmaxs,vmins,crs_new,show,add_epa,version,div,shaped)
310 | 
311 | 
312 | #================ no SCHOOL percent diff
313 | 
314 | 
315 | # version and title STUFF
316 | title_2 = " %diff no SCHOOL, Aug. 2018"
317 | var_tot = cmaq_var cmap = 'magma_r'
318 | version = '_noSCHOOL_PERCENTDIFF_aug2018_'
319 | #monthly_tot = [noschool_monthly[i]-base_monthly[i] for i in range(len(base_monthly))]
320 | monthly_tot = [(noschool_monthly[i]-base_monthly[i])/base_monthly[i]*100 for i in range(len(base_monthly))]
321 | 
322 | # SET RANGES
323 | vmaxs,vmins = [round(np.percentile(monthly_tot[i],99.999),2) for i in range(len(monthly_tot))],[round(np.percentile(monthly_tot[i],0.01),2) for i in range(len(monthly_tot))]
324 | 
325 | # DO WE EPA SCATTER
326 | add_epa = False #True
327 | div = True
328 | if div==True: cmap = 'RdBu_r'
329 | 
330 | shaped= True
331 | show=False
332 | 
333 | plot_cmaq(monthly_tot,var_tot,title_2,cmap,vmaxs,vmins,crs_new,show,add_epa,version,div,shaped)
334 | 
335 | 
336 | 
337 | #---- base case
338 | monthly_tot = base_monthly #[nbase_monthly[i] for i in range(len(base_monthly))]
339 | version = '_SPOT_'
340 | title_2 = " , Aug. 2018"
341 | vmaxs,vmins = [round(np.percentile(monthly_tot[i],99.99),5)*1000 for i in range(len(monthly_tot))],[round(np.percentile(monthly_tot[i],98),5)*1000 for i in range(len(monthly_tot))]
342 | vmaxs[0],vmins[0] =40,30
343 | shaped = False
344 | add_epa = True
345 | plot_cmaq(monthly_tot,var_tot,title_2,cmap,vmaxs,vmins,crs_new,show,add_epa,version,div,shaped)
346 | 
347 | 


--------------------------------------------------------------------------------
/validation/compareHourlyWrfToClimateStations.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # Stacy Montgomery, April 2019
  3 | #Single day WRF output
  4 | 
  5 | # Future work == $[num]
  6 | # $1: -- separate out so it can do d01, d02, d03
  7 |  
  8 | # Notes -- NOTE[num]
  9 | 
 10 | # Data for comparison
 11 | # LCD data from noaa: https://www.ncei.noaa.gov/data/local-climatological-data/access/2018/
 12 | # LCD station names -- metadata of stations -- must make CSV: https://www.ncdc.noaa.gov/homr/file/lcd-stations.txt
 13 | # Currently the UTC offset calculator is for negative offsets, simple loop fix to do positive offsets
 14 | 
 15 | # ~~~~~~ IMPORT PACKAGES ~~~~~~~~~~~~
 16 | #Station
 17 | import glob, os
 18 | import pandas as pd, numpy as np, matplotlib.pyplot as plt, cartopy.crs as crs, cartopy.feature as cpf
 19 | from netCDF4 import Dataset
 20 | from matplotlib.cm import get_cmap
 21 | from cartopy.feature import NaturalEarthFeature
 22 | from wrf import (to_np, getvar, smooth2d, get_cartopy, cartopy_xlim, cartopy_ylim, latlon_coords)
 23 | import time
 24 | from timezonefinder import TimezoneFinder
 25 | from pytz import timezone
 26 | import pytz
 27 | from datetime import datetime,date, timedelta
 28 | import dateutil.parser as dparser
 29 | 
 30 | tf = TimezoneFinder(in_memory=True)
 31 | 
 32 | # ~~~~~~ CUSTOM FUNCTIONS ~~~~~~~~~~~~
 33 | # adapted from : http://kbkb-wx-python.blogspot.com/2016/08/find-nearest-latitude-and-longitude.html
 34 | def find_index(stn_lon, stn_lat, wrf_lon, wrf_lat):
 35 | # stn -- points 
 36 | # wrf -- list
 37 |    xx=[];yy=[]
 38 |    for i in range(len(stn_lat)):
 39 |       abslat = np.abs(wrf_lat-stn_lat[i])
 40 |       abslon= np.abs(wrf_lon-stn_lon[i])
 41 |       c = np.maximum(abslon,abslat)
 42 |       latlon_idx = np.argmin(c)
 43 |       x, y = np.where(c == np.min(c))
 44 |       #add indices of nearest wrf point station
 45 |       xx.append(x) 
 46 |       yy.append(y)
 47 |    #return indices list
 48 |    return xx, yy
 49 | 
 50 | # modified from https://stackoverflow.com/questions/16685384/finding-the-indices-of-matching-elements-in-list-in-python
 51 | def find(lst, a):
 52 |     return [i for i, x in enumerate(lst) if x==a]
 53 | 
 54 | # modified from ----- 
 55 | utc = pytz.utc
 56 | def offset(lat,lon):
 57 |     #returns a location's time zone offset from UTC in minutes.
 58 |     today = datetime.now()
 59 |     tz_target = timezone(tf.certain_timezone_at(lat=lat, lng=lon))
 60 |     # ATTENTION: tz_target could be None! handle error case
 61 |     today_target = tz_target.localize(today)
 62 |     today_utc = utc.localize(today)
 63 |     return (today_utc - today_target).total_seconds() / 3600
 64 | 
 65 | 
 66 | # pull in real data, apply UTC, and average and remove hourly values
 67 | def getRealData(LCD):
 68 |    date_noTime=[]; time_noDate=[]
 69 |    date_noTime= [LCD['DATE'][z].split('T')[0] for z in range(len(LCD['DATE']))]
 70 |    time_noDate=[LCD['DATE'][z].split('T')[1] for z in range(len(LCD['DATE']))]
 71 |    UTC_offset=offset(lon=LCD['LONGITUDE'][0], lat=LCD['LATITUDE'][0])
 72 |    #get day before and after for UTC offset sake
 73 |    date_onedaybefore=(dparser.parse(dates[0])-timedelta(days=1)).isoformat().split('T')[0]
 74 |    date_onedayafter=(dparser.parse(dates[-1])+timedelta(days=1)).isoformat().split('T')[0]
 75 |    start_ind_dataset = find(date_noTime, date_onedaybefore)[0]
 76 |    end_ind_dataset= find(date_noTime, date_onedayafter)[-1]
 77 |    if Chatty: print('-> Adding UTC offset to timestamp and averaging repeated values')
 78 |    # UTC offset calculator
 79 |    # Get the time and round up or round down, also add the UTC offset such that correct time is in UTC
 80 |    correctedTime=[]; correctedRain=[]; correctedTemp =[];correctedDate=[]
 81 |    for i in range(len(LCD[start_ind_dataset: end_ind_dataset])):
 82 |       datetimeLCD=dparser.parse(LCD['DATE'][start_ind_dataset+i])
 83 |       datetimeLCD_UTC = datetimeLCD + timedelta(hours=UTC_offset)
 84 |       try:
 85 |          rainz = float(LCD['HourlyPrecipitation'][start_ind_dataset+i])
 86 |       except ValueError:
 87 |          rainz =float('nan')
 88 |       try:
 89 |          tempz= float(LCD['HourlyDryBulbTemperature'][start_ind_dataset+i])
 90 |       except ValueError:
 91 |          tempz=float('nan')
 92 |       if datetimeLCD_UTC.minute >= 30:
 93 |             correctedTime.append((datetimeLCD_UTC+timedelta(minutes=60-datetimeLCD_UTC.minute)).isoformat().split('T')[1])
 94 |             correctedDate.append((datetimeLCD_UTC+timedelta(minutes=60-datetimeLCD_UTC.minute)).isoformat().split('T')[0])
 95 |             correctedRain.append(rainz)
 96 |             correctedTemp.append(tempz)
 97 |       elif datetimeLCD_UTC.minute < 30:
 98 |             correctedTime.append((datetimeLCD_UTC+timedelta(minutes=-datetimeLCD_UTC.minute)).isoformat().split('T')[1])
 99 |             correctedDate.append((datetimeLCD_UTC+timedelta(minutes=-datetimeLCD_UTC.minute)).isoformat().split('T')[0])
100 |             correctedRain.append(rainz)
101 |             correctedTemp.append(tempz)
102 |       else:
103 |             correctedTime.append((datetimeLCD_UTC).isoformat().split('T')[1])
104 |             correctedDate.append((datetimeLCD_UTC).isoformat().split('T')[0])
105 |             correctedRain.append(rainz)
106 |             correctedTemp.append(tempz)
107 |    #Now filter LCD so that it only uses UTC date times
108 |    start_ind_dataset2 = find(correctedDate, dates[0])[0]
109 |    end_ind_dataset2 = find(correctedDate, dates[-1])[-1]
110 |    correctedRain=correctedRain[start_ind_dataset2: end_ind_dataset2]
111 |    correctedTemp= correctedTemp[start_ind_dataset2: end_ind_dataset2]
112 |    correctedTime = correctedTime[start_ind_dataset2: end_ind_dataset2]
113 |    correctedDate = correctedDate[start_ind_dataset2: end_ind_dataset2]
114 |    #Now nan-average repeating values
115 |    correctedRain_noRepeats=[]; correctedTemp_noRepeats =[]; timeCorrected_noRepeats=[]; i=0; dateCorrected_noRepeats=[]
116 |    while i < len(correctedTime):
117 |       j=0; tmpRain=[];tmpTemp=[]
118 |       try: 
119 |          while i+j < len(correctedTime)-1 and correctedTime[i] == correctedTime[i+j]:
120 |             tmpTemp.append(correctedTemp[i+j])
121 |             tmpRain.append(correctedRain[i+j])
122 |             j=j+1
123 |          timeCorrected_noRepeats.append(correctedTime[i])
124 |          dateCorrected_noRepeats.append(correctedDate[i])
125 |          if j == 0 and i<len(correctedTime):
126 |             i=i+1
127 |          if j>0 and i<len(correctedTime):
128 |             i=i+j
129 |          if len(tmpRain) == 0:
130 |             correctedRain_noRepeats.append(correctedRain[i])
131 |             correctedTemp_noRepeats.append(correctedTemp[i])
132 |          else:
133 |             correctedRain_noRepeats.append(np.nanmean(tmpRain))
134 |             correctedTemp_noRepeats.append(np.nanmean(tmpTemp))
135 |       except:
136 |            pass
137 |    #finished if
138 |    if Chatty: print('-> Finished averaging duplicate values in station %s dataset'% str(station))
139 |    #VERY quick check to see if all data is available, if not, flag it for later
140 |    missing_dates=[];missing_hours=[]
141 |    if len(dates) == len(list(set(dateCorrected_noRepeats))):
142 |       if Chatty: print('-> No missing dates at station %s' %(stationList[station],))
143 |    else:
144 |       if Chatty: print('-> Missing dates at %s' %(stationList[station],))
145 |       missing_dates.append(stationList[station])
146 |    #next
147 |    if len(list(set(timeCorrected_noRepeats))) == 24:
148 |       if Chatty: print('-> No missing hours at station %s' %(stationList[station],))
149 |    else:
150 |       if Chatty: print('-> Missing hours at %s' %(stationList[station],))
151 |       missing_hours.append(stationList[station])
152 |    #return
153 |    return correctedRain_noRepeats, correctedTemp_noRepeats, dateCorrected_noRepeats, timeCorrected_noRepeats
154 |    
155 | 
156 | # Easy bounding box checker
157 | def checkbounds(x,y,x1,y1,x2,y2):
158 |     if (x<x2 and y<y2 and x>x1 and y>y1):
159 |         return True
160 |     else:
161 |         return False
162 | 
163 | 
164 | def findStations():
165 |    # Get station names -- NOTE1: LCD station names has no header... may cause index errors if format is different!
166 |    stationList=[]; tmp=[]
167 |    listOfStations= pd.read_csv(listOfStationsFile, header=None)
168 |    listOfStations = listOfStations[np.isfinite(listOfStations[5])]   #remove missing station data
169 |    listOfStations =listOfStations.reset_index()   #be able to index the stations properly
170 |    tmp= [format(listOfStations[0][i],'06') for i in range(len(listOfStations))]   #add leading zeroes to stations
171 |    listOfStations['format'] = tmp; del tmp  # add string names to list of stations
172 |    stationList=[str(int(listOfStations[5][i]))+listOfStations['format'][i]+".csv" for i in range(len(listOfStations))]
173 |    stn_lat =listOfStations[15].to_list(); stn_lon =listOfStations[16].to_list()
174 |    stn_latCopy= stn_lat.copy(); stn_lonCopy= stn_lon.copy()  
175 |    lenOriginalStations=len(stn_lat)
176 |    #check domain
177 |    #plt.scatter(stn_lon , stn_lat)
178 |    #xd03=[lond03min, lond03min, lond03max, lond03max]
179 |    #yd03=[latd03min, latd03max, latd03min, latd03max]
180 |    #plt.scatter(xd03, yd03) 
181 |    stnListCpy = [x for x in stationList]
182 |    in_d01=[]
183 |    #Check bounds and remove from non d01 domains
184 |    for z in range(lenOriginalStations):
185 |       x,y= stn_lat[z],stn_lon[z]
186 |       if checkbounds(x,y,latd01min, lond01min, latd01max, lond01max):
187 |          in_d01.append(True)
188 |       else:
189 |          stnListCpy.remove(stationList[z])
190 |          stn_latCopy.remove(stn_lat[z])
191 |          stn_lonCopy.remove(stn_lon[z])
192 |    #get rid of duplicates
193 |    stationList = [x for x in stnListCpy]
194 |    stn_lat = [x for x in stn_latCopy]
195 |    stn_lon = [x for x in stn_lonCopy]
196 |    del stnListCpy, stn_latCopy, stn_lonCopy  
197 |    # [in]Sanity check
198 |    #plt.scatter(stn_lon , stn_lat,c= in_d03)
199 |    #xd03=[lond03min, lond03min, lond03max, lond03max]
200 |    #yd03=[latd03min, latd03max, latd03min, latd03max]
201 |    #plt.scatter(xd03, yd03) 
202 |    #plt.show()
203 |    #Check if stations exist and are in domain bounds, if not remove the station
204 |    import requests
205 |    stnListCpy = [x for x in stationList]; stn_latCopy= stn_lat.copy(); stn_lonCopy= stn_lon.copy()
206 |    for station in range(len(stationList)):
207 |       LCD = requests.get(NOAAdataLink + stationList[station])
208 |          #LCD.connect()
209 |       if LCD.status_code > 200:
210 |          if Chatty: print("-> Link does not exist for %s, removing station" %(stationList[station],))
211 |          stnListCpy.remove(stationList[station])
212 |          stn_latCopy.remove(stn_lat[station])
213 |          stn_lonCopy.remove(stn_lon[station])
214 |    #Remove copies again
215 |    stationList = [x for x in stnListCpy]
216 |    stn_lat = [x for x in stn_latCopy]
217 |    stn_lon = [x for x in stn_lonCopy]
218 |    del stnListCpy, stn_latCopy, stn_lonCopy
219 |    # now check to see which of these are within d02, d03 domains
220 |    in_d02=[False for z in range(len(stn_lat))]; in_d03=[False for z in range(len(stn_lat))]
221 |    for z in range(len(stationList)):
222 |       x,y= stn_lat[z],stn_lon[z]
223 |       if (checkbounds(x,y,latd02min, lond02min, latd02max, lond02max)):
224 |           in_d02[z]=True
225 |       if (checkbounds(x,y,latd03min, lond03min, latd03max, lond03max)):
226 |           in_d03[z]=True        
227 |    # !!!!!!!!!!----------  !!!!!!!!!!----------- !!!!!!!!!!----------- !!!!!!!!!!
228 |    # write out station list so we don't need to do this again:
229 |    # !!!!!!!!!!----------- !!!!!!!!!!----------- !!!!!!!!!!----------- !!!!!!!!!! 
230 |    station_out=pd.DataFrame(stationList)
231 |    station_out.columns = ['stn']
232 |    station_out['lat']= stn_lat
233 |    station_out['lon']= stn_lon
234 |    station_out['in_d02']= in_d02
235 |    station_out['in_d02']=in_d03
236 |    station_out.to_csv('./station_out_removedmissing.csv')
237 | 
238 | 
239 | # ~~~~~~ START USER INPUT ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
240 | monthNum=[i for i in range(12)]
241 | daysOfMonths=[31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
242 | 
243 | # variables of interest
244 | minTemp = 242; maxTemp = 294;
245 | 
246 | # US Data
247 | NOAAdataLink="https://www.ncei.noaa.gov/data/local-climatological-data/access/2018/"
248 | listOfStationsFile="~/lcd-stations.csv" #metadata of stations
249 | dirToWRF="/projects/b1045/wrf-cmaq/output/Chicago_LADCO/wrf_pure_NoahLSM/"
250 | listOfStationsFile = "~/lcd-stations.csv"
251 | 
252 | Chatty= True # false if you want to remove print statements
253 | written= True
254 | 
255 | if Chatty: print('Starting ....')
256 | 
257 | # ~~~~~~ START MAIN ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
258 | #------------------------------ load in wrf file names ----------
259 | # $1 Get WRF file names
260 | filenames_d01=[] 
261 | os.chdir(dirToWRF)
262 | for file in glob.glob("wrfout_d01_*"):
263 |     filenames_d01.append(file)
264 | 
265 | filenames_d01.sort() #files are now sorted by date and time
266 | 
267 | # $1 Get WRF file names
268 | filenames_d02=[] 
269 | os.chdir(dirToWRF)
270 | for file in glob.glob("wrfout_d02_*"):
271 |     filenames_d02.append(file)
272 | 
273 | filenames_d02.sort() #files are now sorted by date and time
274 | 
275 | # $1 Get WRF file names
276 | filenames_d03=[] 
277 | os.chdir(dirToWRF)
278 | for file in glob.glob("wrfout_d03_*"):
279 |     filenames_d03.append(file)
280 | 
281 | filenames_d03.sort() #files are now sorted by date and time
282 | 
283 | dates=[filenames_d01[z].split("wrfout_d01_")[1].split("_00:00:00")[0] for z in range(len(filenames_d01))]
284 | 
285 | runname='wrf_pure_PXLSM_v0'
286 | dirToWRF='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/wrf_pure_PXLSM_v0/'
287 | listOfStationsFile = "~/lcd-stations.csv"
288 | dirout='/home/asm0384/WRFcheck/'+runname+'/'
289 | 
290 | comp_dataset_name = dirout+'station_data_complete_'+runname+'.csv'                     # name and directory to write out to
291 | comp_dataset_extra = dirout+'completeddata_mini_extras2.csv'
292 | station_out_name = dirout+'station_out_removedmissing.csv' #name of intermediate file
293 | comp_dataset_name2= dirout+'station_complete_rain.csv'
294 | 
295 | 
296 | 
297 | # pull indices for d0#
298 | #assuming all files with d0# are in the same grid
299 | wrf_latd01, wrf_lond01 = latlon_coords(getvar(Dataset(filenames_d01[1]),"RAINNC"))
300 | wrf_latd02, wrf_lond02 = latlon_coords(getvar(Dataset(filenames_d02[1]),"RAINNC"))
301 | wrf_latd03, wrf_lond03 = latlon_coords(getvar(Dataset(filenames_d03[1]),"RAINNC"))
302 | 
303 | #get corners of wrf files
304 | latd01min, latd01max, lond01min, lond01max = wrf_latd01.to_pandas().min().min(), wrf_latd01.to_pandas().max().max(),wrf_lond01.to_pandas().min().min(),wrf_lond01.to_pandas().max().max()
305 | latd02min, latd02max, lond02min, lond02max = wrf_latd02.to_pandas().min().min(), wrf_latd02.to_pandas().max().max(),wrf_lond02.to_pandas().min().min(),wrf_lond02.to_pandas().max().max()
306 | latd03min, latd03max, lond03min, lond03max = wrf_latd03.to_pandas().min().min(), wrf_latd03.to_pandas().max().max(),wrf_lond03.to_pandas().min().min(),wrf_lond03.to_pandas().max().max()
307 | 
308 | 
309 | #------------------------------ station parsing so we get lat lons ----------------
310 | #------------------------ check to see if you must do this again  ---------
311 | 
312 | #if written out already
313 | if written:
314 |    station_out=pd.read_csv('./station_out_removedmissing.csv')
315 |    stn_lat= station_out['lat']
316 |    stn_lon= station_out['lon']
317 |    stationList =station_out['stn']
318 |    in_d02=  station_out['in_d02']
319 |    in_d03=  station_out['in_d02']
320 | else:
321 |    findStations()
322 |    station_out=pd.read_csv('./station_out_removedmissing.csv')
323 |    stn_lat= station_out['lat']
324 |    stn_lon= station_out['lon']
325 |    stationList =station_out['stn']
326 |    in_d02=  station_out['in_d02']
327 |    in_d03=  station_out['in_d02']
328 | 
329 | # ------------------------------------------------------------------# ------------------------------------------------------------------
330 | 
331 | # ----------------------- get Station data -------------------------------------------  
332 | # get indices for wrf given stn lat lon
333 | xx_d01=[]; xx_d02=[]; xx_d03=[]; yy_d01=[]; yy_d02=[]; yy_d03=[]
334 | # pull indices for d0#
335 | # assuming all files with d0# are in the same grid
336 | xx_d01,yy_d01=find_index(stn_lon, stn_lat, wrf_lond01, wrf_latd01)
337 | xx_d02,yy_d02=find_index(stn_lon, stn_lat, wrf_lond02, wrf_latd02)
338 | xx_d03,yy_d03=find_index(stn_lon, stn_lat, wrf_lond03, wrf_latd03)
339 | 
340 | # Start pulling station data to compare
341 | # Output is a list of values for each station
342 | 
343 | if Chatty: print('-'*70+'\n Starting processing station data \n' + '-'*70)
344 | 
345 | # Pull out station data ... each rain[box] is a long list of 
346 | rain_real=[[] for t in range(len(yy_d01))]
347 | temp_real=[[] for t in range(len(yy_d01))]
348 | 
349 | start_out=time.time()
350 | for station in range(len(yy_d01)):
351 |    start=time.time()
352 |    LCD = pd.read_csv(NOAAdataLink + stationList[station])
353 |    #new loop
354 |    if Chatty: print('-'*70)
355 |    #letemknow
356 |    correctedRain_noRepeats, correctedTemp_noRepeats, dateCorrected_noRepeats, timeCorrected_noRepeats = getRealData(LCD)
357 |    #make variable with all station data so we can compare to wrfout
358 |    if Chatty: print('-> Completed loop %s (%s) in %.2f seconds' %(str(station),stationList[station],(time.time()-start)))
359 |    rain_real[station]=correctedRain_noRepeats
360 |    temp_real[station]=correctedTemp_noRepeats
361 |    if int(len(yy_d01)/4)==station:
362 |        print('25% complete, %.2f' %(time.time()-start_out,))
363 |    elif int(len(yy_d01)/2)==station:
364 |        print('50% complete, %.2f' %(time.time()-start_out,))
365 |    elif int(3*len(yy_d01)/4)==station:
366 |        print('75% complete, %.2f' %(time.time()-start_out,))
367 | 
368 | xx_d01_list=[xx_d01[i][0] for i in range(len(yy_d01))]; yy_d01_list=[yy_d01[i][0] for i in range(len(yy_d01))]
369 | xx_d02_list=[xx_d02[i][0] for i in range(len(yy_d02))]; yy_d02_list=[yy_d02[i][0] for i in range(len(yy_d02))]
370 | xx_d03_list=[xx_d03[i][0] for i in range(len(yy_d03))]; yy_d03_list=[yy_d03[i][0] for i in range(len(yy_d03))]
371 | 
372 | #compare station data to wrf station data
373 | writeout_real = pd.DataFrame(temp_real)
374 | #writeout_real.columns = ['xx_d01']
375 | writeout_real['xx_d01']= xx_d01_list
376 | writeout_real['yy_d01']= yy_d01_list
377 | writeout_real['lat']=stn_lat
378 | writeout_real['lon']=stn_lon
379 | writeout_real['in_d02']= in_d02
380 | writeout_real['in_d03']= in_d03
381 | writeout_real['dates']="%s"% dateCorrected_noRepeats
382 | writeout_real['times']="%s"% timeCorrected_noRepeats
383 | 
384 | writeout_real.to_csv('./completed_dataset.csv')
385 | 
386 | 
387 | 
388 | 


--------------------------------------------------------------------------------
/DataPreprocessing/compareHourlyWrfToClimateStations.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # Stacy Montgomery, April 2019
  3 | #Single day WRF output
  4 | 
  5 | # Future work == $[num]
  6 | # $1: -- separate out so it can do d01, d02, d03
  7 |  
  8 | # Notes -- NOTE[num]
  9 | 
 10 | # Data for comparison
 11 | # LCD data from noaa: https://www.ncei.noaa.gov/data/local-climatological-data/access/2018/
 12 | # LCD station names -- metadata of stations -- must make CSV: https://www.ncdc.noaa.gov/homr/file/lcd-stations.txt
 13 | # Currently the UTC offset calculator is for negative offsets, simple loop fix to do positive offsets
 14 | 
 15 | # ~~~~~~ IMPORT PACKAGES ~~~~~~~~~~~~
 16 | #Station
 17 | import glob, os
 18 | import pandas as pd, numpy as np, matplotlib.pyplot as plt, cartopy.crs as crs, cartopy.feature as cpf
 19 | from netCDF4 import Dataset
 20 | from matplotlib.cm import get_cmap
 21 | from cartopy.feature import NaturalEarthFeature
 22 | from wrf import (to_np, getvar, smooth2d, get_cartopy, cartopy_xlim, cartopy_ylim, latlon_coords)
 23 | import time
 24 | from timezonefinder import TimezoneFinder
 25 | from pytz import timezone
 26 | import pytz
 27 | from datetime import datetime,date, timedelta
 28 | import dateutil.parser as dparser
 29 | 
 30 | tf = TimezoneFinder(in_memory=True)
 31 | 
 32 | # ~~~~~~ CUSTOM FUNCTIONS ~~~~~~~~~~~~
 33 | # adapted from : http://kbkb-wx-python.blogspot.com/2016/08/find-nearest-latitude-and-longitude.html
 34 | def find_index(stn_lon, stn_lat, wrf_lon, wrf_lat):
 35 | # stn -- points 
 36 | # wrf -- list
 37 |    xx=[];yy=[]
 38 |    for i in range(len(stn_lat)):
 39 |       abslat = np.abs(wrf_lat-stn_lat[i])
 40 |       abslon= np.abs(wrf_lon-stn_lon[i])
 41 |       c = np.maximum(abslon,abslat)
 42 |       latlon_idx = np.argmin(c)
 43 |       x, y = np.where(c == np.min(c))
 44 |       #add indices of nearest wrf point station
 45 |       xx.append(x) 
 46 |       yy.append(y)
 47 |    #return indices list
 48 |    return xx, yy
 49 | 
 50 | # modified from https://stackoverflow.com/questions/16685384/finding-the-indices-of-matching-elements-in-list-in-python
 51 | def find(lst, a):
 52 |     return [i for i, x in enumerate(lst) if x==a]
 53 | 
 54 | # modified from ----- 
 55 | utc = pytz.utc
 56 | def offset(lat,lon):
 57 |     #returns a location's time zone offset from UTC in minutes.
 58 |     today = datetime.now()
 59 |     tz_target = timezone(tf.certain_timezone_at(lat=lat, lng=lon))
 60 |     # ATTENTION: tz_target could be None! handle error case
 61 |     today_target = tz_target.localize(today)
 62 |     today_utc = utc.localize(today)
 63 |     return (today_utc - today_target).total_seconds() / 3600
 64 | 
 65 | 
 66 | # pull in real data, apply UTC, and average and remove hourly values
 67 | def getRealData(LCD):
 68 |    date_noTime=[]; time_noDate=[]
 69 |    date_noTime= [LCD['DATE'][z].split('T')[0] for z in range(len(LCD['DATE']))]
 70 |    time_noDate=[LCD['DATE'][z].split('T')[1] for z in range(len(LCD['DATE']))]
 71 |    UTC_offset=offset(lon=LCD['LONGITUDE'][0], lat=LCD['LATITUDE'][0])
 72 |    #get day before and after for UTC offset sake
 73 |    date_onedaybefore=(dparser.parse(dates[0])-timedelta(days=1)).isoformat().split('T')[0]
 74 |    date_onedayafter=(dparser.parse(dates[-1])+timedelta(days=1)).isoformat().split('T')[0]
 75 |    start_ind_dataset = find(date_noTime, date_onedaybefore)[0]
 76 |    end_ind_dataset= find(date_noTime, date_onedayafter)[-1]
 77 |    if Chatty: print('-> Adding UTC offset to timestamp and averaging repeated values')
 78 |    # UTC offset calculator
 79 |    # Get the time and round up or round down, also add the UTC offset such that correct time is in UTC
 80 |    correctedTime=[]; correctedRain=[]; correctedTemp =[];correctedDate=[]
 81 |    for i in range(len(LCD[start_ind_dataset: end_ind_dataset])):
 82 |       datetimeLCD=dparser.parse(LCD['DATE'][start_ind_dataset+i])
 83 |       datetimeLCD_UTC = datetimeLCD + timedelta(hours=UTC_offset)
 84 |       try:
 85 |          rainz = float(LCD['HourlyPrecipitation'][start_ind_dataset+i])
 86 |       except ValueError:
 87 |          rainz =float('nan')
 88 |       try:
 89 |          tempz= float(LCD['HourlyDryBulbTemperature'][start_ind_dataset+i])
 90 |       except ValueError:
 91 |          tempz=float('nan')
 92 |       if datetimeLCD_UTC.minute >= 30:
 93 |             correctedTime.append((datetimeLCD_UTC+timedelta(minutes=60-datetimeLCD_UTC.minute)).isoformat().split('T')[1])
 94 |             correctedDate.append((datetimeLCD_UTC+timedelta(minutes=60-datetimeLCD_UTC.minute)).isoformat().split('T')[0])
 95 |             correctedRain.append(rainz)
 96 |             correctedTemp.append(tempz)
 97 |       elif datetimeLCD_UTC.minute < 30:
 98 |             correctedTime.append((datetimeLCD_UTC+timedelta(minutes=-datetimeLCD_UTC.minute)).isoformat().split('T')[1])
 99 |             correctedDate.append((datetimeLCD_UTC+timedelta(minutes=-datetimeLCD_UTC.minute)).isoformat().split('T')[0])
100 |             correctedRain.append(rainz)
101 |             correctedTemp.append(tempz)
102 |       else:
103 |             correctedTime.append((datetimeLCD_UTC).isoformat().split('T')[1])
104 |             correctedDate.append((datetimeLCD_UTC).isoformat().split('T')[0])
105 |             correctedRain.append(rainz)
106 |             correctedTemp.append(tempz)
107 |    #Now filter LCD so that it only uses UTC date times
108 |    start_ind_dataset2 = find(correctedDate, dates[0])[0]
109 |    end_ind_dataset2 = find(correctedDate, dates[-1])[-1]
110 |    correctedRain=correctedRain[start_ind_dataset2: end_ind_dataset2]
111 |    correctedTemp= correctedTemp[start_ind_dataset2: end_ind_dataset2]
112 |    correctedTime = correctedTime[start_ind_dataset2: end_ind_dataset2]
113 |    correctedDate = correctedDate[start_ind_dataset2: end_ind_dataset2]
114 |    #Now nan-average repeating values
115 |    correctedRain_noRepeats=[]; correctedTemp_noRepeats =[]; timeCorrected_noRepeats=[]; i=0; dateCorrected_noRepeats=[]
116 |    while i < len(correctedTime):
117 |       j=0; tmpRain=[];tmpTemp=[]
118 |       try: 
119 |          while i+j < len(correctedTime)-1 and correctedTime[i] == correctedTime[i+j]:
120 |             tmpTemp.append(correctedTemp[i+j])
121 |             tmpRain.append(correctedRain[i+j])
122 |             j=j+1
123 |          timeCorrected_noRepeats.append(correctedTime[i])
124 |          dateCorrected_noRepeats.append(correctedDate[i])
125 |          if j == 0 and i<len(correctedTime):
126 |             i=i+1
127 |          if j>0 and i<len(correctedTime):
128 |             i=i+j
129 |          if len(tmpRain) == 0:
130 |             correctedRain_noRepeats.append(correctedRain[i])
131 |             correctedTemp_noRepeats.append(correctedTemp[i])
132 |          else:
133 |             correctedRain_noRepeats.append(np.nanmean(tmpRain))
134 |             correctedTemp_noRepeats.append(np.nanmean(tmpTemp))
135 |       except:
136 |            pass
137 |    #finished if
138 |    if Chatty: print('-> Finished averaging duplicate values in station %s dataset'% str(station))
139 |    #VERY quick check to see if all data is available, if not, flag it for later
140 |    missing_dates=[];missing_hours=[]
141 |    if len(dates) == len(list(set(dateCorrected_noRepeats))):
142 |       if Chatty: print('-> No missing dates at station %s' %(stationList[station],))
143 |    else:
144 |       if Chatty: print('-> Missing dates at %s' %(stationList[station],))
145 |       missing_dates.append(stationList[station])
146 |    #next
147 |    if len(list(set(timeCorrected_noRepeats))) == 24:
148 |       if Chatty: print('-> No missing hours at station %s' %(stationList[station],))
149 |    else:
150 |       if Chatty: print('-> Missing hours at %s' %(stationList[station],))
151 |       missing_hours.append(stationList[station])
152 |    #return
153 |    return correctedRain_noRepeats, correctedTemp_noRepeats, dateCorrected_noRepeats, timeCorrected_noRepeats
154 |    
155 | 
156 | # Easy bounding box checker
157 | def checkbounds(x,y,x1,y1,x2,y2):
158 |     if (x<x2 and y<y2 and x>x1 and y>y1):
159 |         return True
160 |     else:
161 |         return False
162 | 
163 | 
164 | def findStations():
165 |    # Get station names -- NOTE1: LCD station names has no header... may cause index errors if format is different!
166 |    stationList=[]; tmp=[]
167 |    listOfStations= pd.read_csv(listOfStationsFile, header=None)
168 |    listOfStations = listOfStations[np.isfinite(listOfStations[5])]   #remove missing station data
169 |    listOfStations =listOfStations.reset_index()   #be able to index the stations properly
170 |    tmp= [format(listOfStations[0][i],'06') for i in range(len(listOfStations))]   #add leading zeroes to stations
171 |    listOfStations['format'] = tmp; del tmp  # add string names to list of stations
172 |    stationList=[str(int(listOfStations[5][i]))+listOfStations['format'][i]+".csv" for i in range(len(listOfStations))]
173 |    stn_lat =listOfStations[15].to_list(); stn_lon =listOfStations[16].to_list()
174 |    stn_latCopy= stn_lat.copy(); stn_lonCopy= stn_lon.copy()  
175 |    lenOriginalStations=len(stn_lat)
176 |    #check domain
177 |    #plt.scatter(stn_lon , stn_lat)
178 |    #xd03=[lond03min, lond03min, lond03max, lond03max]
179 |    #yd03=[latd03min, latd03max, latd03min, latd03max]
180 |    #plt.scatter(xd03, yd03) 
181 |    stnListCpy = [x for x in stationList]
182 |    in_d01=[]
183 |    #Check bounds and remove from non d01 domains
184 |    for z in range(lenOriginalStations):
185 |       x,y= stn_lat[z],stn_lon[z]
186 |       if checkbounds(x,y,latd01min, lond01min, latd01max, lond01max):
187 |          in_d01.append(True)
188 |       else:
189 |          stnListCpy.remove(stationList[z])
190 |          stn_latCopy.remove(stn_lat[z])
191 |          stn_lonCopy.remove(stn_lon[z])
192 |    #get rid of duplicates
193 |    stationList = [x for x in stnListCpy]
194 |    stn_lat = [x for x in stn_latCopy]
195 |    stn_lon = [x for x in stn_lonCopy]
196 |    del stnListCpy, stn_latCopy, stn_lonCopy  
197 |    # [in]Sanity check
198 |    #plt.scatter(stn_lon , stn_lat,c= in_d03)
199 |    #xd03=[lond03min, lond03min, lond03max, lond03max]
200 |    #yd03=[latd03min, latd03max, latd03min, latd03max]
201 |    #plt.scatter(xd03, yd03) 
202 |    #plt.show()
203 |    #Check if stations exist and are in domain bounds, if not remove the station
204 |    import requests
205 |    stnListCpy = [x for x in stationList]; stn_latCopy= stn_lat.copy(); stn_lonCopy= stn_lon.copy()
206 |    for station in range(len(stationList)):
207 |       LCD = requests.get(NOAAdataLink + stationList[station])
208 |          #LCD.connect()
209 |       if LCD.status_code > 200:
210 |          if Chatty: print("-> Link does not exist for %s, removing station" %(stationList[station],))
211 |          stnListCpy.remove(stationList[station])
212 |          stn_latCopy.remove(stn_lat[station])
213 |          stn_lonCopy.remove(stn_lon[station])
214 |    #Remove copies again
215 |    stationList = [x for x in stnListCpy]
216 |    stn_lat = [x for x in stn_latCopy]
217 |    stn_lon = [x for x in stn_lonCopy]
218 |    del stnListCpy, stn_latCopy, stn_lonCopy
219 |    # now check to see which of these are within d02, d03 domains
220 |    in_d02=[False for z in range(len(stn_lat))]; in_d03=[False for z in range(len(stn_lat))]
221 |    for z in range(len(stationList)):
222 |       x,y= stn_lat[z],stn_lon[z]
223 |       if (checkbounds(x,y,latd02min, lond02min, latd02max, lond02max)):
224 |           in_d02[z]=True
225 |       if (checkbounds(x,y,latd03min, lond03min, latd03max, lond03max)):
226 |           in_d03[z]=True        
227 |    # !!!!!!!!!!----------  !!!!!!!!!!----------- !!!!!!!!!!----------- !!!!!!!!!!
228 |    # write out station list so we don't need to do this again:
229 |    # !!!!!!!!!!----------- !!!!!!!!!!----------- !!!!!!!!!!----------- !!!!!!!!!! 
230 |    station_out=pd.DataFrame(stationList)
231 |    station_out.columns = ['stn']
232 |    station_out['lat']= stn_lat
233 |    station_out['lon']= stn_lon
234 |    station_out['in_d02']= in_d02
235 |    station_out['in_d02']=in_d03
236 |    station_out.to_csv('./station_out_removedmissing.csv')
237 | 
238 | 
239 | # ~~~~~~ START USER INPUT ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
240 | monthNum=[i for i in range(12)]
241 | daysOfMonths=[31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
242 | 
243 | # variables of interest
244 | minTemp = 242; maxTemp = 294;
245 | 
246 | # US Data
247 | NOAAdataLink="https://www.ncei.noaa.gov/data/local-climatological-data/access/2018/"
248 | listOfStationsFile="~/lcd-stations.csv" #metadata of stations
249 | dirToWRF="/projects/b1045/wrf-cmaq/output/Chicago_LADCO/wrf_pure_NoahLSM/"
250 | listOfStationsFile = "~/lcd-stations.csv"
251 | 
252 | Chatty= True # false if you want to remove print statements
253 | written= True
254 | 
255 | if Chatty: print('Starting ....')
256 | 
257 | # ~~~~~~ START MAIN ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
258 | #------------------------------ load in wrf file names ----------
259 | # $1 Get WRF file names
260 | filenames_d01=[] 
261 | os.chdir(dirToWRF)
262 | for file in glob.glob("wrfout_d01_*"):
263 |     filenames_d01.append(file)
264 | 
265 | filenames_d01.sort() #files are now sorted by date and time
266 | 
267 | # $1 Get WRF file names
268 | filenames_d02=[] 
269 | os.chdir(dirToWRF)
270 | for file in glob.glob("wrfout_d02_*"):
271 |     filenames_d02.append(file)
272 | 
273 | filenames_d02.sort() #files are now sorted by date and time
274 | 
275 | # $1 Get WRF file names
276 | filenames_d03=[] 
277 | os.chdir(dirToWRF)
278 | for file in glob.glob("wrfout_d03_*"):
279 |     filenames_d03.append(file)
280 | 
281 | filenames_d03.sort() #files are now sorted by date and time
282 | 
283 | dates=[filenames_d01[z].split("wrfout_d01_")[1].split("_00:00:00")[0] for z in range(len(filenames_d01))]
284 | 
285 | runname='wrf_pure_PXLSM_v0'
286 | dirToWRF='/projects/b1045/wrf-cmaq/output/Chicago_LADCO/wrf_pure_PXLSM_v0/'
287 | listOfStationsFile = "~/lcd-stations.csv"
288 | dirout='/home/asm0384/WRFcheck/'+runname+'/'
289 | 
290 | comp_dataset_name = dirout+'station_data_complete_'+runname+'.csv'                     # name and directory to write out to
291 | comp_dataset_extra = dirout+'completeddata_mini_extras2.csv'
292 | station_out_name = dirout+'station_out_removedmissing.csv' #name of intermediate file
293 | comp_dataset_name2= dirout+'station_complete_rain.csv'
294 | 
295 | 
296 | 
297 | # pull indices for d0#
298 | #assuming all files with d0# are in the same grid
299 | wrf_latd01, wrf_lond01 = latlon_coords(getvar(Dataset(filenames_d01[1]),"RAINNC"))
300 | wrf_latd02, wrf_lond02 = latlon_coords(getvar(Dataset(filenames_d02[1]),"RAINNC"))
301 | wrf_latd03, wrf_lond03 = latlon_coords(getvar(Dataset(filenames_d03[1]),"RAINNC"))
302 | 
303 | #get corners of wrf files
304 | latd01min, latd01max, lond01min, lond01max = wrf_latd01.to_pandas().min().min(), wrf_latd01.to_pandas().max().max(),wrf_lond01.to_pandas().min().min(),wrf_lond01.to_pandas().max().max()
305 | latd02min, latd02max, lond02min, lond02max = wrf_latd02.to_pandas().min().min(), wrf_latd02.to_pandas().max().max(),wrf_lond02.to_pandas().min().min(),wrf_lond02.to_pandas().max().max()
306 | latd03min, latd03max, lond03min, lond03max = wrf_latd03.to_pandas().min().min(), wrf_latd03.to_pandas().max().max(),wrf_lond03.to_pandas().min().min(),wrf_lond03.to_pandas().max().max()
307 | 
308 | 
309 | #------------------------------ station parsing so we get lat lons ----------------
310 | #------------------------ check to see if you must do this again  ---------
311 | 
312 | #if written out already
313 | if written:
314 |    station_out=pd.read_csv('./station_out_removedmissing.csv')
315 |    stn_lat= station_out['lat']
316 |    stn_lon= station_out['lon']
317 |    stationList =station_out['stn']
318 |    in_d02=  station_out['in_d02']
319 |    in_d03=  station_out['in_d02']
320 | else:
321 |    findStations()
322 |    station_out=pd.read_csv('./station_out_removedmissing.csv')
323 |    stn_lat= station_out['lat']
324 |    stn_lon= station_out['lon']
325 |    stationList =station_out['stn']
326 |    in_d02=  station_out['in_d02']
327 |    in_d03=  station_out['in_d02']
328 | 
329 | # ------------------------------------------------------------------# ------------------------------------------------------------------
330 | 
331 | # ----------------------- get Station data -------------------------------------------  
332 | # get indices for wrf given stn lat lon
333 | xx_d01=[]; xx_d02=[]; xx_d03=[]; yy_d01=[]; yy_d02=[]; yy_d03=[]
334 | # pull indices for d0#
335 | # assuming all files with d0# are in the same grid
336 | xx_d01,yy_d01=find_index(stn_lon, stn_lat, wrf_lond01, wrf_latd01)
337 | xx_d02,yy_d02=find_index(stn_lon, stn_lat, wrf_lond02, wrf_latd02)
338 | xx_d03,yy_d03=find_index(stn_lon, stn_lat, wrf_lond03, wrf_latd03)
339 | 
340 | # Start pulling station data to compare
341 | # Output is a list of values for each station
342 | 
343 | if Chatty: print('-'*70+'\n Starting processing station data \n' + '-'*70)
344 | 
345 | # Pull out station data ... each rain[box] is a long list of 
346 | rain_real=[[] for t in range(len(yy_d01))]
347 | temp_real=[[] for t in range(len(yy_d01))]
348 | 
349 | start_out=time.time()
350 | for station in range(len(yy_d01)):
351 |    start=time.time()
352 |    LCD = pd.read_csv(NOAAdataLink + stationList[station])
353 |    #new loop
354 |    if Chatty: print('-'*70)
355 |    #letemknow
356 |    correctedRain_noRepeats, correctedTemp_noRepeats, dateCorrected_noRepeats, timeCorrected_noRepeats = getRealData(LCD)
357 |    #make variable with all station data so we can compare to wrfout
358 |    if Chatty: print('-> Completed loop %s (%s) in %.2f seconds' %(str(station),stationList[station],(time.time()-start)))
359 |    rain_real[station]=correctedRain_noRepeats
360 |    temp_real[station]=correctedTemp_noRepeats
361 |    if int(len(yy_d01)/4)==station:
362 |        print('25% complete, %.2f' %(time.time()-start_out,))
363 |    elif int(len(yy_d01)/2)==station:
364 |        print('50% complete, %.2f' %(time.time()-start_out,))
365 |    elif int(3*len(yy_d01)/4)==station:
366 |        print('75% complete, %.2f' %(time.time()-start_out,))
367 | 
368 | xx_d01_list=[xx_d01[i][0] for i in range(len(yy_d01))]; yy_d01_list=[yy_d01[i][0] for i in range(len(yy_d01))]
369 | xx_d02_list=[xx_d02[i][0] for i in range(len(yy_d02))]; yy_d02_list=[yy_d02[i][0] for i in range(len(yy_d02))]
370 | xx_d03_list=[xx_d03[i][0] for i in range(len(yy_d03))]; yy_d03_list=[yy_d03[i][0] for i in range(len(yy_d03))]
371 | 
372 | #compare station data to wrf station data
373 | writeout_real = pd.DataFrame(temp_real)
374 | #writeout_real.columns = ['xx_d01']
375 | writeout_real['xx_d01']= xx_d01_list
376 | writeout_real['yy_d01']= yy_d01_list
377 | writeout_real['lat']=stn_lat
378 | writeout_real['lon']=stn_lon
379 | writeout_real['in_d02']= in_d02
380 | writeout_real['in_d03']= in_d03
381 | writeout_real['dates']="%s"% dateCorrected_noRepeats
382 | writeout_real['times']="%s"% timeCorrected_noRepeats
383 | 
384 | writeout_real.to_csv('./completed_dataset.csv')
385 | 
386 | 
387 | 
388 | 


--------------------------------------------------------------------------------