├── .gitignore ├── LICENSE ├── Pictures ├── LSU_FullName_Purple_RGB.jpg ├── Northern_Illinois_University_logo.jpg ├── downloads_logos_other_csu_logos.jpg ├── nsf1.jpg └── nsf1.tif ├── README.md ├── mcs ├── notebooks │ ├── MCS_Swath_Occurrence.ipynb │ ├── Slice_data_and_spatial_occurrence.ipynb │ ├── Slice_segmentation.ipynb │ ├── Swath_building.ipynb │ ├── Test_Track_Performance.ipynb │ └── Testing_and_training_data.ipynb └── utils │ ├── colors.py │ ├── mapping_help.py │ ├── multiprocess_tracks.py │ ├── refl_std_calc.py │ ├── segmentation.py │ ├── singleprocess_tracks.py │ └── tracking.py ├── mcs_climo └── notebooks │ ├── Basic_Stats.ipynb │ └── Spatial_Climatology.ipynb └── mcs_future ├── notebooks ├── MCS_QLCS_NonQLCS_Stats.ipynb ├── Mapping_Spatial_Information.ipynb └── checksum.jpg └── utils └── mapping_help.py /.gitignore: -------------------------------------------------------------------------------- 1 | ~* 2 | .DS_Store 3 | *.swp 4 | .idea/ 5 | *.csv 6 | *.pyc 7 | *.png 8 | *.pkl 9 | *.npz 10 | *.npy 11 | *.webm 12 | *.gif 13 | *.prj 14 | *.sbx 15 | *.sbn 16 | *.xml 17 | *.tif 18 | *.jpeg 19 | *.txt 20 | *.db 21 | *.nc 22 | *.hdf 23 | *.bin 24 | *.h5 25 | *.npz 26 | *.zip 27 | *.xlsx 28 | *.job 29 | *.tar 30 | *.lock 31 | *.xml 32 | *.sbn 33 | *.tif 34 | *.sbx 35 | *.tar.gz 36 | *.nc 37 | *.shp 38 | *.cpg 39 | *.dbf 40 | *.shx 41 | *.pdf 42 | *.h5 43 | *.mp4 44 | *.ini 45 | 46 | .coverage 47 | .cache 48 | 49 | .ipynb_checkpoints/ 50 | 51 | dist/* 52 | build/* 53 | *.egg-info 54 | MANIFEST 55 | .eggs/ 56 | 57 | docs/build/ 58 | docs/source/examples 59 | examples/scripts -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Alex Haberlie 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Pictures/LSU_FullName_Purple_RGB.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ahaberlie/MCS/5ac4f38975b360b1c882beeb6f03f8193bb97127/Pictures/LSU_FullName_Purple_RGB.jpg -------------------------------------------------------------------------------- /Pictures/Northern_Illinois_University_logo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ahaberlie/MCS/5ac4f38975b360b1c882beeb6f03f8193bb97127/Pictures/Northern_Illinois_University_logo.jpg -------------------------------------------------------------------------------- /Pictures/downloads_logos_other_csu_logos.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ahaberlie/MCS/5ac4f38975b360b1c882beeb6f03f8193bb97127/Pictures/downloads_logos_other_csu_logos.jpg -------------------------------------------------------------------------------- /Pictures/nsf1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ahaberlie/MCS/5ac4f38975b360b1c882beeb6f03f8193bb97127/Pictures/nsf1.jpg -------------------------------------------------------------------------------- /Pictures/nsf1.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ahaberlie/MCS/5ac4f38975b360b1c882beeb6f03f8193bb97127/Pictures/nsf1.tif -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MCS 2 | 3 | MCS Segmentation, Classification, and Tracking Project: Data, Analyses, and Results 4 | 5 | 6 | Requirements: 7 | 8 | 20 | 21 | 22 | 23 | Public access to data will be available soon. 24 | 25 | 26 | 27 | MCS Climatology Paper Citation 28 | 29 | ``` 30 | Haberlie, A. M., and W. S. Ashley, 2019: A radar-based climatology of mesoscale convective systems in the United States. Journal of Climate. 31 | ``` 32 | 33 | 34 | If using *slice* data generated by this project or the described methods, please cite: 35 | 36 | ``` 37 | Haberlie, A. M., and W. S. Ashley, 2018: Identifying mesoscale convective systems in radar mosaics. Part I. Segmentation and classification. Journal of Applied Meteorology and Climatology, 57, 1575-1598. 38 | ``` 39 | 40 | If using *swath* data generated by this project or the described methods, please cite this paper in addition to the first paper: 41 | 42 | ``` 43 | Haberlie, A. M., and W. S. Ashley, 2018: Identifying mesoscale convective systems in radar mosaics. Part II. Tracking and application. Journal of Applied Meteorology and Climatology, 57, 1599-1621. 44 | ``` 45 | 46 | Dynamical Downscaling Paper: 47 | 48 | ``` 49 | Haberlie, A. M., and W. S. Ashley, 2018: Climatological representation of mesoscale convective systems in a dynamically downscaled climate simulation. International Journal of Climatology. In Press. 50 | ``` 51 | 52 | This research is supported by National Science Foundation Grant ATM-1637225, 53 | an NIU Division of Research and Innovation Partnerships Research and Artistry Grant, 54 | and an NIU Graduate School Dissertation Completion Fellowship. 55 | 56 |

57 | 58 | 59 | 60 | 61 |

62 | 63 | 64 | -------------------------------------------------------------------------------- /mcs/utils/colors.py: -------------------------------------------------------------------------------- 1 | import matplotlib as mpl 2 | 3 | 4 | def radar_colormap(): 5 | nws_reflectivity_colors = [ 6 | "#646464", # 0 7 | "#04e9e7", # 5 8 | "#019ff4", # 10 9 | "#0300f4", # 15 10 | "#02fd02", # 20 11 | "#01c501", # 25 12 | "#008e00", # 30 13 | "#fdf802", # 35 14 | "#e5bc00", # 40 15 | "#fd9500", # 45 16 | "#fd0000", # 50 17 | "#d40000", # 55 18 | "#bc0000", # 60 19 | "#f800fd", # 65 20 | "#9854c6", # 70 21 | "#fdfdfd", # 75 22 | "#000000" 23 | ] 24 | 25 | cmap = mpl.colors.ListedColormap(nws_reflectivity_colors) 26 | 27 | return cmap 28 | 29 | def quantize(img, s=4, c=8, i=10): 30 | 31 | strat = 1*(img >= s) 32 | conv = 1*(img >= c) 33 | ints = 1*(img >= i) 34 | 35 | return strat+conv+ints -------------------------------------------------------------------------------- /mcs/utils/mapping_help.py: -------------------------------------------------------------------------------- 1 | 2 | from math import floor 3 | from matplotlib import patheffects 4 | from matplotlib import patches as mpatches 5 | import cartopy.io.shapereader as shpreader 6 | import cartopy 7 | import cartopy.crs as ccrs 8 | 9 | import pickle 10 | import numpy as np 11 | import pandas as pd 12 | 13 | import matplotlib.pyplot as plt 14 | 15 | def get_track_centroids(group): 16 | 17 | xc = [np.mean([x, y]) for (x, y) in zip(group['xmin'].values, group['xmax'].values)] 18 | yc = [np.mean([x, y]) for (x, y) in zip(group['ymin'].values, group['ymax'].values)] 19 | 20 | return xc, yc 21 | 22 | def running_ave(arr, N): 23 | 24 | data = [] 25 | length = len(arr) 26 | 27 | n = int(N/2) 28 | 29 | for i in range(length): 30 | 31 | if i + n + 1 > length: 32 | data.append(np.mean(arr[i-n:])) 33 | 34 | if i - n < 0: 35 | data.append(np.mean(arr[:i+n+1])) 36 | 37 | else: 38 | data.append(np.mean(arr[i-n:i+n+1])) 39 | 40 | return np.array(data) 41 | 42 | def quantize(img): 43 | 44 | strat = 1*(img>=4) 45 | conv = 1*(img>=8) 46 | ints = 1*(img>=10) 47 | 48 | return strat+conv+ints 49 | 50 | def utm_from_lon(lon): 51 | """ 52 | utm_from_lon - UTM zone for a longitude 53 | 54 | Not right for some polar regions (Norway, Svalbard, Antartica) 55 | 56 | :param float lon: longitude 57 | :return: UTM zone number 58 | :rtype: int 59 | """ 60 | return floor( ( lon + 180 ) / 6) + 1 61 | 62 | def scale_bar(ax, proj, length, location=(0.5, 0.05), linewidth=3, 63 | units='km', m_per_unit=1000, fontsize=8): 64 | """ 65 | 66 | http://stackoverflow.com/a/35705477/1072212 67 | ax is the axes to draw the scalebar on. 68 | proj is the projection the axes are in 69 | location is center of the scalebar in axis coordinates ie. 0.5 is the middle of the plot 70 | length is the length of the scalebar in km. 71 | linewidth is the thickness of the scalebar. 72 | units is the name of the unit 73 | m_per_unit is the number of meters in a unit 74 | """ 75 | # find lat/lon center to find best UTM zone 76 | x0, x1, y0, y1 = ax.get_extent(proj.as_geodetic()) 77 | # Projection in metres 78 | utm = ccrs.UTM(utm_from_lon((x0+x1)/2)) 79 | # Get the extent of the plotted area in coordinates in metres 80 | x0, x1, y0, y1 = ax.get_extent(utm) 81 | # Turn the specified scalebar location into coordinates in metres 82 | sbcx, sbcy = x0 + (x1 - x0) * location[0], y0 + (y1 - y0) * location[1] 83 | # Generate the x coordinate for the ends of the scalebar 84 | bar_xs = [sbcx - length * m_per_unit/2, sbcx + length * m_per_unit/2] 85 | # buffer for scalebar 86 | buffer = [patheffects.withStroke(linewidth=5, foreground="w")] 87 | # Plot the scalebar with buffer 88 | ax.plot(bar_xs, [sbcy, sbcy], transform=utm, color='k', 89 | linewidth=linewidth, path_effects=buffer) 90 | # buffer for text 91 | buffer = [patheffects.withStroke(linewidth=3, foreground="w")] 92 | # Plot the scalebar label 93 | t0 = ax.text(sbcx, sbcy+10000, str(length) + ' ' + units, transform=utm, 94 | horizontalalignment='center', verticalalignment='bottom', 95 | path_effects=buffer, zorder=2, fontsize=fontsize) 96 | left = x0+(x1-x0)*0.05 97 | # Plot the N arrow 98 | t1 = ax.text(left, sbcy, u'\u25B2\nN', transform=utm, 99 | horizontalalignment='center', fontsize=20, verticalalignment='bottom', 100 | path_effects=buffer, zorder=2) 101 | # Plot the scalebar without buffer, in case covered by text buffer 102 | ax.plot(bar_xs, [sbcy, sbcy], transform=utm, color='k', 103 | linewidth=linewidth, zorder=3) 104 | 105 | def quantize(img): 106 | 107 | strat = 1*(img>=4) 108 | conv = 1*(img>=8) 109 | ints = 1*(img>=10) 110 | 111 | return strat+conv+ints 112 | 113 | def running_ave(arr, N): 114 | 115 | data = [] 116 | length = len(arr) 117 | 118 | n = int(N/2) 119 | 120 | for i in range(length): 121 | 122 | if i + n + 1 > length: 123 | data.append(np.mean(arr[i-n:])) 124 | 125 | if i - n < 0: 126 | data.append(np.mean(arr[:i+n+1])) 127 | 128 | else: 129 | data.append(np.mean(arr[i-n:i+n+1])) 130 | 131 | return np.array(data) 132 | 133 | 134 | def generate_view(w_lon, e_lon, n_lat, s_lat, from_proj, to_proj): 135 | 136 | view = plt.axes([0,0,1,1], projection=to_proj) 137 | 138 | view.set_extent([w_lon, e_lon, s_lat, n_lat]) 139 | 140 | shapename = 'admin_1_states_provinces_lakes_shp' 141 | states_shp = shpreader.natural_earth(resolution='50m', 142 | category='cultural', name=shapename) 143 | 144 | for state, info in zip(shpreader.Reader(states_shp).geometries(), shpreader.Reader(states_shp).records()): 145 | if info.attributes['admin'] == 'United States of America': 146 | 147 | view.add_geometries([state], ccrs.PlateCarree(), 148 | facecolor='None', edgecolor='k') 149 | 150 | return view 151 | 152 | 153 | def NOWrad_to_lon_lat(xpoints, ypoints, xMin=0, yMin=0): 154 | """Convert NOWrad x,y grid coordinates to latitude, 155 | longitude coordinates. 156 | 157 | Can even convert a subset of image if you know 158 | the west and north edges. 159 | 160 | Parameters 161 | ---------- 162 | xpoints: (N, ) ndarray 163 | Array of x coordinates to be converted 164 | ypoints: (N, ) ndarray 165 | Array of y coordinates to be converted 166 | xMin: int 167 | Relative most westward x coordinate if image is clipped 168 | yMin: int 169 | Relative most northward y coordinate if image is clipped 170 | """ 171 | 172 | #See: NOWrad Technical Note 173 | lats = 53 - (yMin + ypoints) * .0180 174 | lons = (xMin + xpoints) * .0191 - 130 175 | 176 | return lons, lats 177 | 178 | def get_NOWrad_conus_lon_lat(): 179 | 180 | x = np.array(list(range(0,3661))) 181 | y = np.array(list(range(0,1837))) 182 | 183 | return NOWrad_to_lon_lat(x, y) -------------------------------------------------------------------------------- /mcs/utils/multiprocess_tracks.py: -------------------------------------------------------------------------------- 1 | import sys, getopt 2 | 3 | from tracking import rematch_tracks, create_tracks 4 | 5 | from scipy.ndimage import imread 6 | import numpy as np 7 | import pickle 8 | import pandas as pd 9 | from scipy.spatial.distance import pdist 10 | import datetime 11 | from functools import partial 12 | 13 | 14 | from multiprocessing import Pool 15 | 16 | from sklearn.linear_model import LinearRegression 17 | from sklearn.metrics import mean_squared_error 18 | 19 | def get_date_range(argv): 20 | 21 | start_date = '' 22 | end_date = '' 23 | 24 | try: 25 | opts, args = getopt.getopt(argv, "hs:e:", ["sdate=","edate="]) 26 | except getopt.GetoptError: 27 | print('make_slices.py -s -e ') 28 | sys.exit(2) 29 | 30 | for opt, args in opts: 31 | 32 | if opt == '-s': 33 | 34 | start_date = args 35 | 36 | elif opt == '-e': 37 | 38 | end_date = args 39 | 40 | print("Starting date:", start_date) 41 | print("Ending date:", end_date) 42 | 43 | return start_date, end_date 44 | 45 | def creation_of_tracks(p, df, dt): 46 | 47 | years = df.groupby(df.index.year) 48 | for year, year_rows in years: 49 | 50 | print(year) 51 | 52 | rng = pd.date_range(datetime.datetime(year, 5, 1, 0, 0), datetime.datetime(year, 10, 1, 0, 0), freq=dt) 53 | 54 | year_rows = year_rows[year_rows.index.isin(rng.values)] 55 | 56 | year_rows.loc[:, 'datetime'] = year_rows.index.values 57 | 58 | year_rows.reset_index(drop=True, inplace=True) 59 | 60 | matching = [] 61 | 62 | pobj = Pool(6) 63 | print("initializing async..") 64 | for crsr in [6, 12, 24, 48]: 65 | for ssr in [48, 96, 192]: 66 | 67 | matching.append(pobj.apply_async(create_tracks, (year_rows.copy(), rng, str(year), crsr, ssr, p, ""))) 68 | 69 | pobj.close() 70 | pobj.join() 71 | 72 | for i in matching: 73 | res = i.get() 74 | 75 | def rematching_tracks(p, dt): 76 | 77 | print("reading year files...") 78 | 79 | for year in range(2016, 2017): 80 | 81 | pobj = Pool(6) 82 | results = [] 83 | 84 | for crsr in [6, 12, 24, 48]: 85 | for ssr in [48, 96, 192]: 86 | 87 | fname = "../data/track_data/unmatched/" + str(year) + "/" + str(year) + "_" + str(crsr).zfill(2) + "_" + str(ssr).zfill(3) + "_p" + str(int(p*100)).zfill(2) + ".pkl" 88 | print("reading file:", fname) 89 | 90 | try: 91 | df = pickle.load(open(fname, "rb")) 92 | 93 | df.loc[:, 'datetime'] = pd.to_datetime(df.datetime) 94 | 95 | df = df.set_index('datetime') 96 | 97 | rng = pd.date_range(datetime.datetime(year, 5, 1, 0, 0), datetime.datetime(year, 10, 1, 0, 0), freq=dt) 98 | 99 | df.loc[:, 'datetime'] = df.index.values 100 | 101 | df.reset_index(drop=True, inplace=True) 102 | 103 | results.append(pobj.apply_async(rematch_tracks, (df.copy(), rng, str(year), crsr, ssr, p))) 104 | 105 | except Exception as e: 106 | print(e) 107 | 108 | pobj.close() 109 | pobj.join() 110 | 111 | for i in results: 112 | print(i.get()) 113 | 114 | 115 | 116 | if __name__ == "__main__": 117 | 118 | print("reading index file...") 119 | 120 | start_date, end_date = get_date_range(sys.argv[1:]) 121 | 122 | df = pd.read_csv("../data/slice_data/labeled_slices_020618.csv") 123 | 124 | df.apply(partial(pd.to_numeric, errors='ignore')) 125 | 126 | df.info() 127 | 128 | df.loc[:, 'datetime'] = pd.to_datetime(df.datetime) 129 | 130 | df = df.set_index('datetime') 131 | 132 | df = df[start_date:end_date] 133 | 134 | df = df.drop('Unnamed: 0', axis=1) 135 | df = df.drop('Unnamed: 0.1', axis=1) 136 | df = df.drop('Unnamed: 0.1.1', axis=1) 137 | 138 | print("finished reading index file..") 139 | 140 | print("Calculating attributes..") 141 | fname = [] 142 | 143 | for rid, row in df.iterrows(): 144 | 145 | if rid.year == 2015: 146 | 147 | fn = "E:/p12_slices/92017_slices/2015/" + row.filename 148 | 149 | if rid.year == 2016: 150 | 151 | fn = "F:/2016/" + row.filename 152 | 153 | fname.append(fn) 154 | 155 | df.loc[:, 'filename'] = fname 156 | 157 | print("finished calculating attributes..") 158 | 159 | 160 | for p in [0.0, 0.5, 0.90, 0.95]: 161 | creation_of_tracks(p, df, '15T') 162 | rematching_tracks(p, '15T') 163 | -------------------------------------------------------------------------------- /mcs/utils/refl_std_calc.py: -------------------------------------------------------------------------------- 1 | import sys, getopt 2 | 3 | from scipy.ndimage import imread 4 | import numpy as np 5 | import pickle 6 | import pandas as pd 7 | 8 | from multiprocessing import Pool 9 | 10 | from sklearn.linear_model import LinearRegression 11 | from sklearn.metrics import mean_squared_error 12 | 13 | feature_list = ['area', 'convex_area', 'eccentricity', 14 | 'intense_area', 'convection_area', 15 | 'convection_stratiform_ratio', 'intense_stratiform_ratio', 16 | 'intense_convection_ratio', 'mean_intensity', 'max_intensity', 17 | 'intensity_variance', 'major_axis_length', 'minor_axis_length', 18 | 'solidity'] 19 | 20 | def get_mean_dur(crsr, ssr, p, pref, year): 21 | 22 | entry = {'crsr':[], 'ssr':[], 'p':[], 'mean_size':[], 'dist':[]} 23 | entry['crsr'].append(crsr) 24 | entry['ssr'].append(ssr) 25 | entry['p'].append(float(p)) 26 | 27 | fn = "../data/track_data/" + "/" + pref + "/" + str(year) + "/" + str(year) + "_" + str(crsr).zfill(2) + "_" + str(ssr).zfill(3) + "_p" + str(int(p*100)).zfill(2) + ".pkl" 28 | 29 | 30 | bg = pickle.load(open(fn, 'rb')) 31 | df1 = bg[(bg.CRSR==crsr) & (bg.SSR==ssr)] 32 | 33 | grouped = df1.groupby('storm_num') 34 | 35 | tdata = [] 36 | size = [] 37 | for gid, group in grouped: 38 | duration = (pd.to_datetime(group.iloc[-1]['datetime']) - pd.to_datetime(group.iloc[0]['datetime'])).total_seconds() / 3600 39 | if duration >= 0.5: 40 | 41 | tdata.append(duration) 42 | 43 | entry['dist'].append(np.array(tdata)) 44 | 45 | print("CRSR: ", crsr, "SSR:", ssr, "MCS_P:", p, "Mean length:", np.mean(tdata)) 46 | 47 | return entry 48 | 49 | 50 | def get_lin_err(crsr, ssr, p, pref, year): 51 | 52 | entry = {'crsr':[], 'ssr':[], 'p':[], 'mean_size':[], 'dist':[]} 53 | entry['crsr'].append(crsr) 54 | entry['ssr'].append(ssr) 55 | entry['p'].append(float(p)) 56 | 57 | fn = "../data/track_data/" + "/" + pref + "/" + str(year) + "/" + str(year) + "_" + str(crsr).zfill(2) + "_" + str(ssr).zfill(3) + "_p" + str(int(p*100)).zfill(2) + ".pkl" 58 | 59 | 60 | bg = pickle.load(open(fn, 'rb')) 61 | df1 = bg[(bg.CRSR==crsr) & (bg.SSR==ssr)] 62 | 63 | grouped = df1.groupby('storm_num') 64 | 65 | tdata = [] 66 | for gid, group in grouped: 67 | duration = (pd.to_datetime(group.iloc[-1]['datetime']) - pd.to_datetime(group.iloc[0]['datetime'])).total_seconds() / 3600 68 | if duration >= 1: 69 | 70 | xc = [np.mean([x, y])*2 for (x, y) in zip(group['xmin'].values, group['xmax'].values)] 71 | yc = [np.mean([x, y])*2 for (x, y) in zip(group['ymin'].values, group['ymax'].values)] 72 | 73 | xcmin = np.min(xc) 74 | xcmax = np.max(xc) 75 | 76 | ycmin = np.min(yc) 77 | ycmax = np.max(xc) 78 | 79 | x = [[x1] for x1 in xc] 80 | 81 | clf = LinearRegression() 82 | 83 | clf.fit(x, np.array(yc)) 84 | 85 | y = clf.predict(x) 86 | 87 | rmse = np.sqrt(mean_squared_error(yc, y)) 88 | 89 | tdata.append(rmse) 90 | 91 | entry['dist'].append(np.array(tdata)) 92 | 93 | print("CRSR: ", crsr, "SSR:", ssr, "MCS_P:", p, "Mean Linearity Error:", np.mean(tdata)) 94 | 95 | return entry 96 | 97 | 98 | def get_std_refl(crsr, ssr, p, pref, year): 99 | 100 | entry = {'crsr':[], 'ssr':[], 'p':[], 'mean_size':[], 'dist':[]} 101 | entry['crsr'].append(crsr) 102 | entry['ssr'].append(ssr) 103 | entry['p'].append(float(p)) 104 | 105 | fn = "../data/track_data/" + pref + "/" + str(year) + "/" + str(year) + "_" + str(crsr).zfill(2) + "_" + str(ssr).zfill(3) + "_p" + str(int(p*100)).zfill(2) + ".pkl" 106 | 107 | #fn = "2015/" + str(crsr).zfill(2) + "_" + str(ssr).zfill(2) + "_p" + str(int(p*100)) + "_" + pref + "_tracks.pkl" 108 | 109 | print(crsr, ssr, p, pref, year, fn) 110 | bg = pickle.load(open(fn, 'rb')) 111 | df1 = bg[(bg.CRSR==crsr) & (bg.SSR==ssr)] 112 | 113 | gb = [] 114 | 115 | for col in feature_list: 116 | gb.append(np.max(df1[col].values)) 117 | 118 | grouped = df1.groupby('storm_num') 119 | 120 | 121 | tdata = [] 122 | size = [] 123 | for gid, group in grouped: 124 | duration = (pd.to_datetime(group.iloc[-1]['datetime']) - pd.to_datetime(group.iloc[0]['datetime'])).total_seconds() / 3600 125 | if duration >= 1: 126 | 127 | xmin = np.min(group['xmin']) 128 | xmax = np.max(group['xmax']) 129 | ymin = np.min(group['ymin']) 130 | ymax = np.max(group['ymax']) 131 | 132 | res = np.zeros(shape=(len(group), 1+ymax-ymin, 1+xmax-xmin), dtype=np.uint8) 133 | 134 | for idx, (rid, row) in enumerate(group.iterrows()): 135 | 136 | img = imread(row['filename'], mode='P') 137 | 138 | y, x = np.where(img>0) 139 | 140 | res[idx, y, x] = 5*img[y, x] 141 | 142 | a = res.flatten() 143 | tdata.append(np.std(a[a>0])) 144 | 145 | entry['dist'].append(np.array(tdata, dtype=float)) 146 | 147 | print("CRSR: ", crsr, "SSR:", ssr, "MCS_P:", p, "Mean std:", np.mean(tdata)) 148 | 149 | return entry 150 | 151 | 152 | 153 | if __name__ == "__main__": 154 | 155 | metric = None 156 | 157 | argv = sys.argv[1:] 158 | 159 | try: 160 | opts, args = getopt.getopt(argv, "hm:n", ["metric="]) 161 | except getopt.GetoptError as e: 162 | print(e) 163 | sys.exit(2) 164 | for opt, arg in opts: 165 | print("arg:", arg, "opt:", opt) 166 | if opt in ("-m", "--metric"): 167 | 168 | metric = arg 169 | 170 | print(metric) 171 | 172 | crsr_ = [6, 6, 6, 12, 12, 12, 24, 24, 24, 48, 48, 48] 173 | ssr_ = [48, 96, 192, 48, 96, 192, 48, 96, 192, 48, 96, 192] 174 | 175 | entries = [] 176 | 177 | pref = "rematched" 178 | year = 2016 179 | 180 | for p in [0.0, 0.5, 0.9, 0.95]: 181 | 182 | pobj = Pool(12) 183 | 184 | if metric == 'std_refl': 185 | result = [pobj.apply_async(get_std_refl, (crsr, ssr, p, pref, year)) for (crsr, ssr) in zip(crsr_, ssr_)] 186 | 187 | elif metric == 'lin_err': 188 | result = [pobj.apply_async(get_lin_err, (crsr, ssr, p, pref, year)) for (crsr, ssr) in zip(crsr_, ssr_)] 189 | 190 | elif metric == 'mean_dur': 191 | result = [pobj.apply_async(get_mean_dur, (crsr, ssr, p, pref, year)) for (crsr, ssr) in zip(crsr_, ssr_)] 192 | 193 | else: 194 | print("metric isn't available") 195 | sys.exit(2) 196 | break 197 | 198 | pobj.close() 199 | pobj.join() 200 | 201 | for i in result: 202 | entry = i.get() 203 | 204 | df = pd.DataFrame(columns=['CRSR', 'SSR', 'MCS_proba', 'Distribution']) 205 | 206 | df['CRSR'] = entry['crsr'] 207 | df['SSR'] = entry['ssr'] 208 | df['MCS_proba'] = entry['p'] 209 | df['Distribution'] = entry['dist'] 210 | 211 | df['mean'] = [np.mean(x) for x in df['Distribution'].values] 212 | df['median'] = [np.median(x) for x in df['Distribution'].values] 213 | df['sd'] = [np.std(x) for x in df['Distribution'].values] 214 | 215 | entries.append(df) 216 | 217 | df = pd.concat(entries) 218 | 219 | pickle.dump(df, open(str(year) + "_" + metric + "_" + pref + "_master.pkl", "wb")) 220 | 221 | 222 | -------------------------------------------------------------------------------- /mcs/utils/segmentation.py: -------------------------------------------------------------------------------- 1 | import uuid 2 | import os 3 | 4 | import numpy as np 5 | 6 | from scipy import ndimage, sparse 7 | 8 | from scipy.ndimage import binary_closing, binary_dilation 9 | from scipy.ndimage.measurements import label 10 | 11 | from skimage.morphology import disk, watershed, remove_small_objects 12 | from skimage.measure import regionprops 13 | 14 | from skimage.segmentation import find_boundaries 15 | from scipy.ndimage.morphology import binary_fill_holes 16 | 17 | 18 | 19 | def get_qualifying_clusters(rImage, strat_dbz, conv_dbz, int_dbz, min_length, 20 | conv_buffer, min_size=10, strat_buffer=0): 21 | """Combines the logic of get_intense_cells, 22 | connect_intense_cells, and connect_stratiform_to_lines 23 | to return pixels associated with qualifying slices. 24 | 25 | Stratiform >= 4 (20 dBZ) 26 | Convection >= 8 (40 dBZ) 27 | Intense >= 10 (50 dBZ) 28 | 29 | Parameters 30 | ---------- 31 | rImage: (N, M) ndarray 32 | Radar Image from which to extract qualifying lines. 33 | 34 | strat_dbz: int 35 | Threshold used to identify stratiform pixels 36 | (Multiply value by 5 to get dBZ) 37 | 38 | conv_dbz: int 39 | Threshold used to identify convective pixels 40 | (Multiply value by 5 to get dBZ) 41 | 42 | int_dbz: int 43 | Threshold used to identify intense pixels 44 | (Multiply value by 5 to get dBZ) 45 | 46 | min_length: int 47 | Minimum length for a qualifying merged lines 48 | (Multiply value by 2 to get km) 49 | 50 | conv_buffer: int 51 | Distance within which intense cells are merged 52 | (Multiply value by 2 (pixel distance to km) and then 53 | multiply by minimum search disk radius (3) to get 54 | buffer size in km) 55 | 56 | min_size: int 57 | Minimum size for an intense cell to be considered in 58 | line-building process. 59 | 60 | strat_buffer: int 61 | Distance within which stratiform pixels are merged 62 | with qualifying merged lines. 63 | (Multiply value by 2 to account for pixel distance 64 | and then multiply by minimum search disk radius of 3 65 | to get buffer size in km) 66 | 67 | conv_buffer: integer 68 | Distance to search for nearby intense cells. 69 | 70 | Returns 71 | ------- 72 | regions: list 73 | A list of regionprops for each qualifying slice. 74 | See scikit-image.measure.regionprops for more information. 75 | """ 76 | 77 | convection = 1 * (rImage >= conv_dbz) 78 | 79 | stratiform = 1 * (rImage >= strat_dbz) 80 | 81 | labeled_image, _ = label(convection, np.ones((3,3), dtype=int)) 82 | 83 | remove_small_objects(labeled_image, min_size=min_size, connectivity=2, in_place=True) 84 | 85 | regions = regionprops(labeled_image, intensity_image=rImage) 86 | 87 | for region in regions: 88 | if np.max(region.intensity_image) < int_dbz: 89 | 90 | ymin, xmin = np.min(region.coords[:, 0]), np.min(region.coords[:, 1]) 91 | y, x = np.where(region.intensity_image > 0) 92 | labeled_image[ymin+y, xmin+x] = 0 93 | 94 | thresholded_image = 1 * binary_closing(labeled_image > 0, structure=disk(3), iterations=int(conv_buffer)) 95 | 96 | labeled_image, _ = label(thresholded_image, np.ones((3,3))) 97 | 98 | regions = regionprops(labeled_image, intensity_image=rImage) 99 | 100 | for region in regions: 101 | if region.major_axis_length < min_length: 102 | 103 | ymin, xmin = np.min(region.coords[:, 0]), np.min(region.coords[:, 1]) 104 | y, x = np.where(region.intensity_image > 0) 105 | labeled_image[ymin+y, xmin+x] = 0 106 | 107 | strat_mask = 1 * stratiform * (binary_dilation(1*(labeled_image > 0), structure=disk(3), iterations=strat_buffer)) 108 | 109 | thresholded_image = 1*(labeled_image>0) + strat_mask 110 | 111 | #thresholded_image = watershed(strat_mask, labeled_image, mask=strat_mask) 112 | 113 | labeled_image, _ = label(1*(thresholded_image > 0), np.ones((3,3))) 114 | 115 | labeled_image *= stratiform 116 | 117 | regions = regionprops(labeled_image, intensity_image=thresholded_image) 118 | 119 | for region in regions: 120 | if np.max(region.intensity_image) < 2: 121 | 122 | ymin, xmin = np.min(region.coords[:, 0]), np.min(region.coords[:, 1]) 123 | y, x = np.where(region.intensity_image > 0) 124 | labeled_image[ymin+y, xmin+x] = 0 125 | 126 | return regionprops(labeled_image, intensity_image=rImage) 127 | 128 | 129 | def find_lines(rImage, conv_buffer, min_length=50): 130 | """Combines the logic of get_intense_cells and 131 | connect_intense_cells to return pixels associated 132 | with qualifying merged lines. 133 | 134 | Stratiform >= 4 (20 dBZ) 135 | Convection >= 8 (40 dBZ) 136 | Intense >= 10 (50 dBZ) 137 | 138 | Parameters 139 | ---------- 140 | rImage: (N, M) ndarray 141 | Radar Image from which to extract qualifying lines. 142 | 143 | conv_buffer: integer 144 | Distance to search for nearby intense cells. 145 | 146 | min_length: integer 147 | Minimum size requirment to be considered an MCS. 148 | Default is 50 (100 km with 2 km pixels) 149 | 150 | Returns 151 | ------- 152 | labeled_image: (N, M) ndarray 153 | Binary image of pixels in qualifying merged lines. 154 | Same dimensions as rImage. 155 | """ 156 | 157 | convection = 1 * (rImage >= 8) 158 | 159 | stratiform = 1 * (rImage >= 4) 160 | 161 | labeled_image, _ = label(convection, np.ones((3,3), dtype=int)) 162 | 163 | remove_small_objects(labeled_image, min_size=10, connectivity=2, in_place=True) 164 | 165 | regions = regionprops(labeled_image, intensity_image=rImage) 166 | 167 | for region in regions: 168 | if np.max(region.intensity_image) < 10: 169 | 170 | ymin, xmin = np.min(region.coords[:, 0]), np.min(region.coords[:, 1]) 171 | y, x = np.where(region.intensity_image > 0) 172 | labeled_image[ymin+y, xmin+x] = 0 173 | 174 | thresholded_image = 1 * binary_closing(labeled_image > 0, structure=disk(3), iterations=int(conv_buffer)) 175 | 176 | labeled_image, _ = label(thresholded_image, np.ones((3,3))) 177 | 178 | regions = regionprops(labeled_image, intensity_image=rImage) 179 | 180 | for region in regions: 181 | if region.major_axis_length < min_length: 182 | 183 | ymin, xmin = np.min(region.coords[:, 0]), np.min(region.coords[:, 1]) 184 | y, x = np.where(region.intensity_image > 0) 185 | labeled_image[ymin+y, xmin+x] = 0 186 | 187 | return labeled_image 188 | 189 | 190 | def get_intense_cells(rImage, min_size=10): 191 | """Return pixel coordinates and unique labels associated 192 | with intense thunderstorm cells. 193 | 194 | Convection >= 8 (40 dBZ) 195 | Intense >= 10 (50 dBZ) 196 | 197 | Parameters 198 | ---------- 199 | rImage: (N, M) ndarray 200 | Radar Image from which to extract intense cells. 201 | 202 | Returns 203 | ------- 204 | labeled_image1: (N, M) ndarray 205 | Labeled image of intense cells. Same dimensions as rImage. 206 | """ 207 | 208 | convection = np.uint8(rImage >= 8) 209 | 210 | labeled_image, _ = label(convection, np.ones((3,3))) 211 | 212 | remove_small_objects(labeled_image, min_size=min_size, connectivity=2, in_place=True) 213 | 214 | regions = regionprops(labeled_image, intensity_image=rImage) 215 | 216 | labeled_image1 = np.zeros(labeled_image.shape, dtype=int) 217 | 218 | for region in regions: 219 | if np.max(region.intensity_image) >= 10: 220 | labeled_image1 += (labeled_image == region.label) * rImage 221 | 222 | return labeled_image1 223 | 224 | 225 | def connect_intense_cells(int_cells, conv_buffer): 226 | """Merge nearby intense cells if they are within a given 227 | convective region search radius. 228 | 229 | Parameters 230 | ---------- 231 | int_cells: (N, M) ndarray 232 | Pixels associated with intense cells. 233 | 234 | conv_buffer: integer 235 | Distance to search for nearby intense cells. 236 | 237 | Returns 238 | ------- 239 | labeled_image1: (N, M) ndarray 240 | Binary image of merged intense cells. Same dimensions as int_cells. 241 | """ 242 | 243 | return binary_closing(int_cells>0, structure=disk(3), iterations=conv_buffer) 244 | 245 | 246 | def connect_stratiform_to_lines(lines, stratiform, strat_buffer): 247 | """Connect pixels with values of 20 dBZ or greater surrounding 248 | merged lines within a given stratiform search radius. 249 | 250 | Parameters 251 | ---------- 252 | lines: (N, M) ndarray 253 | Pixels associated with merged lines. 254 | 255 | stratiform: (N, M) ndarray 256 | Binary image using a threshold of 20 dBZ. 257 | 258 | strat_buffer: integer 259 | Distance to search for stratiform pixels to 260 | connect to merged lines. 261 | 262 | Returns 263 | ------- 264 | labeled_image: (N, M) ndarray 265 | Labeled image where each slice has a unique value. 266 | Has same dimensions as lines and stratiform. 267 | """ 268 | 269 | strat_mask = 1 * stratiform * (binary_dilation(1*(lines > 0), structure=disk(3), iterations=strat_buffer)) 270 | 271 | thresholded_image = 1*(lines>0) + strat_mask 272 | 273 | labeled_image, _ = label(1*(thresholded_image > 0), np.ones((3,3))) 274 | 275 | labeled_image *= stratiform 276 | 277 | regions = regionprops(labeled_image, intensity_image=thresholded_image) 278 | 279 | for region in regions: 280 | if np.max(region.intensity_image) < 2: 281 | 282 | ymin, xmin = np.min(region.coords[:, 0]), np.min(region.coords[:, 1]) 283 | y, x = np.where(region.intensity_image > 0) 284 | labeled_image[ymin+y, xmin+x] = 0 285 | 286 | return labeled_image 287 | -------------------------------------------------------------------------------- /mcs/utils/singleprocess_tracks.py: -------------------------------------------------------------------------------- 1 | import sys, getopt 2 | 3 | from tracking import rematch_tracks, create_tracks 4 | 5 | from scipy.ndimage import imread 6 | import numpy as np 7 | import pickle 8 | import pandas as pd 9 | from scipy.spatial.distance import pdist 10 | import datetime 11 | from functools import partial 12 | 13 | 14 | from multiprocessing import Pool 15 | 16 | from sklearn.linear_model import LinearRegression 17 | from sklearn.metrics import mean_squared_error 18 | 19 | def get_date_range(argv): 20 | 21 | start_date = '' 22 | end_date = '' 23 | 24 | try: 25 | opts, args = getopt.getopt(argv, "hs:e:", ["sdate=","edate="]) 26 | except getopt.GetoptError: 27 | print('make_slices.py -s -e ') 28 | sys.exit(2) 29 | 30 | for opt, args in opts: 31 | 32 | if opt == '-s': 33 | 34 | start_date = args 35 | 36 | elif opt == '-e': 37 | 38 | end_date = args 39 | 40 | print("Starting date:", start_date) 41 | print("Ending date:", end_date) 42 | 43 | return start_date, end_date 44 | 45 | def creation_of_tracks(p, df, dt): 46 | 47 | years = df.groupby(df.index.year) 48 | for year, year_rows in years: 49 | 50 | print(year) 51 | 52 | rng = pd.date_range(datetime.datetime(year, 5, 1, 0, 0), datetime.datetime(year, 10, 1, 0, 0), freq=dt) 53 | 54 | year_rows = year_rows[year_rows.index.isin(rng.values)] 55 | 56 | year_rows.loc[:, 'datetime'] = year_rows.index.values 57 | 58 | year_rows.reset_index(drop=True, inplace=True) 59 | 60 | print("initializing async..") 61 | for crsr in [6, 12, 24, 48]: 62 | for ssr in [48, 96, 192]: 63 | 64 | create_tracks(year_rows.copy(), rng, str(year), crsr, ssr, p, "") 65 | 66 | def rematching_tracks(p, dt): 67 | 68 | print("reading year files...") 69 | 70 | for year in range(2015, 2016): 71 | 72 | for crsr in [6, 12, 24, 48]: 73 | for ssr in [48, 96, 192]: 74 | 75 | fname = "../data/track_data/unmatched/" + str(year) + "/" + str(year) + "_" + str(crsr).zfill(2) + "_" + str(ssr).zfill(3) + "_p" + str(int(p*100)).zfill(2) + ".pkl" 76 | print("reading file:", fname) 77 | 78 | try: 79 | df = pickle.load(open(fname, "rb")) 80 | 81 | df.loc[:, 'datetime'] = pd.to_datetime(df.datetime) 82 | 83 | df = df.set_index('datetime') 84 | 85 | rng = pd.date_range(datetime.datetime(year, 5, 1, 0, 0), datetime.datetime(year, 10, 1, 0, 0), freq=dt) 86 | 87 | df.loc[:, 'datetime'] = df.index.values 88 | 89 | df.reset_index(drop=True, inplace=True) 90 | 91 | rematch_tracks(df.copy(), rng, str(year), crsr, ssr, p) 92 | 93 | except Exception as e: 94 | print(e) 95 | 96 | 97 | if __name__ == "__main__": 98 | 99 | print("reading index file...") 100 | 101 | start_date, end_date = get_date_range(sys.argv[1:]) 102 | 103 | df = pd.read_csv("../data/slice_data/labeled_slices_020618.csv") 104 | 105 | df.apply(partial(pd.to_numeric, errors='ignore')) 106 | 107 | df.info() 108 | 109 | df.loc[:, 'datetime'] = pd.to_datetime(df.datetime) 110 | 111 | df = df.set_index('datetime') 112 | 113 | df = df[start_date:end_date] 114 | 115 | df = df.drop('Unnamed: 0', axis=1) 116 | df = df.drop('Unnamed: 0.1', axis=1) 117 | df = df.drop('Unnamed: 0.1.1', axis=1) 118 | 119 | print("finished reading index file..") 120 | 121 | print("Calculating attributes..") 122 | fname = [] 123 | 124 | for rid, row in df.iterrows(): 125 | 126 | if rid.year == 2015: 127 | 128 | fn = "E:/p12_slices/92017_slices/2015/" + row.filename 129 | 130 | if rid.year == 2016: 131 | 132 | fn = "F:/" + row.filename 133 | 134 | fname.append(fn) 135 | 136 | df.loc[:, 'filename'] = fname 137 | 138 | print("finished calculating attributes..") 139 | 140 | 141 | for p in [0.0, 0.5, 0.90, 0.95]: 142 | creation_of_tracks(p, df, '15T') 143 | rematching_tracks(p, '15T') 144 | -------------------------------------------------------------------------------- /mcs/utils/tracking.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pickle 3 | import pandas as pd 4 | import os 5 | from scipy.spatial.distance import pdist 6 | import datetime 7 | from geopandas import GeoDataFrame 8 | from shapely.geometry import MultiPoint 9 | from scipy.misc import imread 10 | 11 | feature_list = ['area', 'convex_area', 'eccentricity', 12 | 'intense_area', 'convection_area', 13 | 'convection_stratiform_ratio', 'intense_stratiform_ratio', 14 | 'intense_convection_ratio', 'mean_intensity', 'max_intensity', 15 | 'intensity_variance', 'major_axis_length', 'minor_axis_length', 16 | 'solidity'] 17 | 18 | def to_datetime(time_string): 19 | 20 | if int(time_string[0:2]) > 90: 21 | year = "19" + time_string[0:2] 22 | else: 23 | year = "20" + time_string[0:2] 24 | 25 | month = time_string[2:4] 26 | day = time_string[4:6] 27 | hour = time_string[7:9] 28 | minute = time_string[9:11] 29 | 30 | return datetime.datetime(int(year), int(month), int(day), int(hour), int(minute)) 31 | 32 | 33 | def set_dates(df): 34 | 35 | dates = [] 36 | 37 | for fn in df['filename']: 38 | 39 | dates.append(to_datetime(fn[-21:-10])) 40 | 41 | df.loc[:, 'datetime'] = dates 42 | 43 | return df 44 | 45 | def get_geometry(df, slice_location): 46 | 47 | geometry = [] 48 | 49 | for idx, row in df.iterrows(): 50 | 51 | xmin = row['xmin'] 52 | ymin = row['ymin'] 53 | 54 | img = imread(slice_location + row['filename'], mode='P') 55 | 56 | y, x = np.where(img >= 10) 57 | 58 | polt = MultiPoint(np.array(list(zip(xmin+x, ymin+y)))).convex_hull 59 | 60 | geometry.append(polt) 61 | 62 | return geometry 63 | 64 | def get_normalization(df): 65 | 66 | norm = [] 67 | 68 | for col in feature_list: 69 | norm.append(np.max(df[col].values)) 70 | 71 | return norm 72 | 73 | 74 | def create_tracks(df, rng, prefix, crsr, ssr, p, slice_location, norm=None): 75 | 76 | #print("Selecting CRSR:", str(crsr), " SSR: ", str(ssr), " Probability: ", str(p)) 77 | big_df = df[(df.CRSR==crsr) & (df.SSR==ssr) & (df.mcs_proba >= p)].copy() 78 | 79 | #print("Cacluating dates based on filenames") 80 | big_df = set_dates(big_df) 81 | 82 | #print("Cleaning up index") 83 | big_df = big_df.reset_index() 84 | 85 | #print("Calculating normalization factors for each feature") 86 | if norm is None: 87 | normalization = get_normalization(big_df) 88 | else: 89 | normalization = get_normalization(norm) 90 | 91 | #print("Calculating convex hull geometry") 92 | 93 | geo_df = GeoDataFrame(big_df, geometry=get_geometry(big_df, slice_location)) 94 | 95 | #print("Cleaning up geodataframe index") 96 | geo_df = geo_df.reset_index(drop=True) 97 | 98 | #print("Initializing storm numbers") 99 | geo_df['storm_num'] = np.nan 100 | 101 | storm_num = 0 102 | 103 | #print("finding current times") 104 | cur_time = geo_df[geo_df['datetime'] == rng[0].to_pydatetime()] 105 | 106 | #print("setting initial storm numbers") 107 | #set storm numbers for the first time period 108 | for idx, row in cur_time.iterrows(): 109 | 110 | geo_df.loc[idx, 'storm_num'] = storm_num 111 | 112 | storm_num += 1 113 | 114 | #print("running storm tracking") 115 | #run until the second to last time period 116 | for i in range(len(rng)-1): 117 | 118 | #print(crsr, ssr, p, rng[i]) 119 | 120 | cur_time = geo_df[geo_df['datetime'] == rng[i].to_pydatetime()] 121 | 122 | next_time = geo_df[geo_df['datetime'] == rng[i+1].to_pydatetime()] 123 | 124 | if len(cur_time) > 0 and len(next_time) > 0: 125 | 126 | distance_matrix = np.ones(shape=(len(cur_time), len(next_time)), dtype=np.float) * np.nan 127 | 128 | for cc, (cid, crow) in enumerate(cur_time.iterrows()): 129 | for nc, (nid, nrow) in enumerate(next_time.iterrows()): 130 | 131 | if crow['geometry'].intersects(nrow['geometry']): 132 | 133 | distance_matrix[cc, nc] = pdist([crow[feature_list].values / normalization, 134 | nrow[feature_list].values / normalization]) 135 | 136 | a = np.copy(distance_matrix) 137 | 138 | while np.sum(~np.isnan(a)) > 0: 139 | 140 | track, candidate = np.where(a == np.nanmin(a)) 141 | 142 | c_idx = next_time[candidate[0]:candidate[0]+1].index[0] 143 | t_idx = cur_time[track[0]:track[0]+1].index[0] 144 | 145 | next_time.loc[c_idx, 'storm_num'] = geo_df.loc[t_idx, 'storm_num'] 146 | 147 | geo_df.loc[c_idx, 'storm_num'] = geo_df.loc[t_idx, 'storm_num'] 148 | 149 | a[track[0], :] = np.nan 150 | 151 | a[:, candidate[0]] = np.nan 152 | 153 | new_storms = next_time[next_time['storm_num'].isnull()] 154 | 155 | for idx, row in new_storms.iterrows(): 156 | 157 | geo_df.loc[idx, 'storm_num'] = storm_num 158 | 159 | storm_num += 1 160 | 161 | out_folder = "../data/track_data/unmatched/" + prefix 162 | 163 | if not os.path.exists(out_folder): 164 | os.makedirs(out_folder) 165 | 166 | filename = out_folder + "/" + prefix + "_" + str(crsr).zfill(2) + "_" + str(ssr).zfill(3) + "_p" + str(int(p*100)).zfill(2) + ".pkl" 167 | pickle.dump(geo_df, open(filename, "wb")) 168 | 169 | print("Finished ", filename) 170 | 171 | 172 | def rematch_tracks(df, rng, prefix, crsr, ssr, p, buffer_size=25, norm=None): 173 | 174 | df['storm_loc'] = 'm' 175 | 176 | grouped = df.groupby('storm_num') 177 | 178 | for gid, group in grouped: 179 | 180 | if len(group) >= 2: 181 | 182 | idx_s = group.index[0] 183 | idx_e = group.index[-1] 184 | 185 | df.loc[idx_s, 'storm_loc'] = 's' 186 | df.loc[idx_e, 'storm_loc'] = 'f' 187 | 188 | df['rematched'] = False 189 | 190 | if norm is None: 191 | normalization = get_normalization(df) 192 | else: 193 | normalization = get_normalization(norm) 194 | 195 | starts = [] 196 | ends = [] 197 | 198 | for d in rng: 199 | 200 | dfs = df[(pd.to_datetime(df.datetime)==d) & (df.storm_loc == 'f')] 201 | 202 | dff = df[(pd.to_datetime(df.datetime)>(d + datetime.timedelta(minutes=15))) & \ 203 | (pd.to_datetime(df.datetime)<=(d + datetime.timedelta(minutes=60))) & \ 204 | (df.storm_loc == 's') & (~df.rematched)] 205 | 206 | if len(dfs) > 0 and len(dff) > 0: 207 | 208 | distance_matrix = np.ones(shape=(len(dfs), len(dff)), dtype=np.float) * np.nan 209 | 210 | for cc, (cid, crow) in enumerate(dfs.iterrows()): 211 | for nc, (nid, nrow) in enumerate(dff.iterrows()): 212 | 213 | if crow['geometry'].buffer(buffer_size).intersects(nrow['geometry']): 214 | 215 | distance_matrix[cc, nc] = pdist([crow[feature_list].values / normalization, 216 | nrow[feature_list].values / normalization]) 217 | 218 | a = np.copy(distance_matrix) 219 | 220 | while np.sum(~np.isnan(a)) > 0: 221 | 222 | track, candidate = np.where(a == np.nanmin(a)) 223 | 224 | c_idx = dff[candidate[0]:candidate[0]+1].index[0] 225 | t_idx = dfs[track[0]:track[0]+1].index[0] 226 | 227 | cur_stormnum = dfs.loc[t_idx, 'storm_num'] 228 | nex_stormnum = dff.loc[c_idx, 'storm_num'] 229 | 230 | c_idx = df[df.storm_num==nex_stormnum].index.values 231 | 232 | df.loc[c_idx, 'storm_num'] = cur_stormnum 233 | df.loc[c_idx, 'rematched'] = True 234 | 235 | t_idx = df[df.storm_num==cur_stormnum].index.values 236 | 237 | df.loc[t_idx, 'rematched'] = True 238 | 239 | a[track[0], :] = np.nan 240 | 241 | a[:, candidate[0]] = np.nan 242 | 243 | out_folder = "../data/track_data/rematched/" + prefix 244 | 245 | if not os.path.exists(out_folder): 246 | os.makedirs(out_folder) 247 | 248 | filename = out_folder + "/" + prefix + "_" + str(crsr).zfill(2) + "_" + str(ssr).zfill(3) + "_p" + str(int(p*100)).zfill(2) + ".pkl" 249 | pickle.dump(df, open(filename, "wb")) 250 | 251 | print("Finished", filename) -------------------------------------------------------------------------------- /mcs_future/notebooks/checksum.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ahaberlie/MCS/5ac4f38975b360b1c882beeb6f03f8193bb97127/mcs_future/notebooks/checksum.jpg -------------------------------------------------------------------------------- /mcs_future/utils/mapping_help.py: -------------------------------------------------------------------------------- 1 | 2 | from math import floor 3 | from matplotlib import patheffects 4 | from matplotlib import patches as mpatches 5 | import cartopy.io.shapereader as shpreader 6 | import shapely.geometry as sgeom 7 | from netCDF4 import Dataset 8 | import numpy as np 9 | from shapely.geometry import Point 10 | import geopandas as gpd 11 | from math import log 12 | from math import exp 13 | from copy import copy 14 | import cartopy 15 | import cartopy.crs as ccrs 16 | 17 | import pickle 18 | import numpy as np 19 | import pandas as pd 20 | 21 | from matplotlib import cm 22 | 23 | def quantize_perc_agw(value): 24 | 25 | if value < 5: 26 | return 0 27 | elif value >= 5 and value < 15: 28 | return 1 29 | elif value >= 15 and value < 30: 30 | return 2 31 | elif value >= 30 and value < 60: 32 | return 3 33 | elif value >= 60 and value < 80: 34 | return 4 35 | elif value >= 80 and value < 120: 36 | return 5 37 | elif value >= 120 and value < 150: 38 | return 6 39 | elif value >= 150 and value < 200: 40 | return 7 41 | elif value >= 200 and value < 250: 42 | return 8 43 | elif value >= 250 and value < 300: 44 | return 9 45 | elif value >= 300: 46 | return 10 47 | elif np.isnan(value): 48 | return None 49 | 50 | def draw_perc_agw(ax, geom, vals, debug=False): 51 | 52 | for g, val in zip(geom, vals): 53 | 54 | if np.isfinite(val): 55 | 56 | if debug: 57 | x = g.centroid.x 58 | y = g.centroid.y 59 | 60 | ax.text(x, y, str(int(val)), transform=ccrs.PlateCarree()) 61 | 62 | quant = quantize_perc_agw(val) 63 | 64 | facecolor = cm.BrBG(quant/10) 65 | 66 | else: 67 | facecolor='grey' 68 | 69 | 70 | ax.add_geometries([g], ccrs.PlateCarree(), 71 | facecolor=facecolor, edgecolor=facecolor) 72 | 73 | def quantize_perc_agw_prop_raw(value): 74 | 75 | val = np.zeros(shape=value.shape, dtype=int) 76 | 77 | y, x = np.where((value >= 10)) 78 | val[y, x] = 1 79 | y, x = np.where((value >= 20)) 80 | val[y, x] = 2 81 | y, x = np.where((value >= 30)) 82 | val[y, x] = 3 83 | y, x = np.where((value >= 40)) 84 | val[y, x] = 4 85 | y, x = np.where((value >= 50)) 86 | val[y, x] = 5 87 | y, x = np.where((value >= 60)) 88 | val[y, x] = 6 89 | y, x = np.where((value >= 70)) 90 | val[y, x] = 7 91 | y, x = np.where((value >= 80)) 92 | val[y, x] = 8 93 | y, x = np.where((value >= 90)) 94 | val[y, x] = 9 95 | 96 | return val 97 | 98 | def quantize_perc_agw_prop(value): 99 | 100 | if value < 10: 101 | return 0 102 | elif value >= 10 and value < 20: 103 | return 1 104 | elif value >= 20 and value < 30: 105 | return 2 106 | elif value >= 30 and value < 40: 107 | return 3 108 | elif value >= 40 and value < 50: 109 | return 4 110 | elif value >= 50 and value < 60: 111 | return 5 112 | elif value >= 60 and value < 70: 113 | return 6 114 | elif value >= 70 and value < 80: 115 | return 7 116 | elif value >= 80 and value < 90: 117 | return 8 118 | elif value >= 90: 119 | return 9 120 | elif np.isnan(value): 121 | return None 122 | 123 | def draw_perc_agw_prop(ax, geom, vals, debug=False): 124 | 125 | for g, val in zip(geom, vals): 126 | 127 | if np.isfinite(val): 128 | 129 | if debug: 130 | x = g.centroid.x 131 | y = g.centroid.y 132 | 133 | ax.text(x, y, str(int(val)), transform=ccrs.PlateCarree()) 134 | 135 | quant = quantize_perc_agw_prop(val) 136 | 137 | facecolor = cm.BrBG(quant/9) 138 | 139 | else: 140 | facecolor='grey' 141 | 142 | 143 | ax.add_geometries([g], ccrs.PlateCarree(), 144 | facecolor=facecolor, edgecolor=facecolor) 145 | 146 | def wrf_to_lon_lat(lons, lats, x, y): 147 | 148 | longs = [lons[x1, y1] for (x1,y1) in zip(y, x)] 149 | latis = [lats[x1, y1] for (x1,y1) in zip(y, x)] 150 | 151 | return longs, latis 152 | 153 | 154 | def get_point_subset(df, outline, wrf_ref=None, within=True): 155 | 156 | xc = np.array([np.mean([x, y]) for (x, y) in zip(df.xmin.values, df.xmax.values)]) 157 | yc = np.array([np.mean([x, y]) for (x, y) in zip(df.ymin.values, df.ymax.values)]) 158 | 159 | if wrf_ref is not None: 160 | 161 | nc = Dataset(wrf_ref) 162 | 163 | lons = nc.variables['XLONG'][:,:] 164 | lats = nc.variables['XLAT'][:,:] 165 | 166 | lo, la = wrf_to_lon_lat(lons, lats, xc.astype(int), yc.astype(int)) 167 | 168 | else: 169 | 170 | lo, la = NOWrad_to_lon_lat(np.array(xc), np.array(yc)) 171 | 172 | df['lats'] = la 173 | df['lons'] = lo 174 | 175 | geometry = [Point(xy) for xy in zip(df.lons, df.lats)] 176 | df = df.drop(['lons', 'lats'], axis=1) 177 | crs = {'init': 'epsg:4326'} 178 | points = gpd.GeoDataFrame(df, crs=crs, geometry=geometry) 179 | 180 | points_ = gpd.sjoin(points, outline, how="inner", op='within') 181 | 182 | return points_ 183 | 184 | def draw_states(ax): 185 | 186 | shapename = 'admin_1_states_provinces_lakes_shp' 187 | states_shp = shpreader.natural_earth(resolution='50m', 188 | category='cultural', name=shapename) 189 | 190 | for state, info in zip(shpreader.Reader(states_shp).geometries(), shpreader.Reader(states_shp).records()): 191 | if info.attributes['admin'] == 'United States of America': 192 | 193 | ax.add_geometries([state], ccrs.PlateCarree(), 194 | facecolor='None', edgecolor='k') 195 | 196 | def find_side(ls, side): 197 | """From http://nbviewer.jupyter.org/gist/ajdawson/dd536f786741e987ae4e 198 | Given a shapely LineString which is assumed to be rectangular, return the 199 | line corresponding to a given side of the rectangle.""" 200 | 201 | minx, miny, maxx, maxy = ls.bounds 202 | points = {'left': [(minx, miny), (minx, maxy)], 203 | 'right': [(maxx, miny), (maxx, maxy)], 204 | 'bottom': [(minx, miny), (maxx, miny)], 205 | 'top': [(minx, maxy), (maxx, maxy)],} 206 | return sgeom.LineString(points[side]) 207 | 208 | 209 | def lambert_xticks(ax, ticks, fontsize=12): 210 | """From http://nbviewer.jupyter.org/gist/ajdawson/dd536f786741e987ae4e 211 | Draw ticks on the bottom x-axis of a Lambert Conformal projection.""" 212 | 213 | te = lambda xy: xy[0] 214 | lc = lambda t, n, b: np.vstack((np.zeros(n) + t, np.linspace(b[2], b[3], n))).T 215 | xticks, xticklabels = _lambert_ticks(ax, ticks, 'bottom', lc, te) 216 | ax.xaxis.tick_bottom() 217 | ax.set_xticks(xticks) 218 | ax.set_xticklabels([ax.xaxis.get_major_formatter()(xtick) for xtick in xticklabels], fontsize=fontsize) 219 | 220 | 221 | def lambert_yticks(ax, ticks, fontsize=12): 222 | """From http://nbviewer.jupyter.org/gist/ajdawson/dd536f786741e987ae4e 223 | Draw ricks on the left y-axis of a Lamber Conformal projection.""" 224 | 225 | te = lambda xy: xy[1] 226 | lc = lambda t, n, b: np.vstack((np.linspace(b[0], b[1], n), np.zeros(n) + t)).T 227 | yticks, yticklabels = _lambert_ticks(ax, ticks, 'left', lc, te) 228 | ax.yaxis.tick_left() 229 | ax.set_yticks(yticks) 230 | ax.set_yticklabels([ax.yaxis.get_major_formatter()(ytick) for ytick in yticklabels], fontsize=fontsize) 231 | 232 | def _lambert_ticks(ax, ticks, tick_location, line_constructor, tick_extractor): 233 | """From http://nbviewer.jupyter.org/gist/ajdawson/dd536f786741e987ae4e 234 | Get the tick locations and labels for an axis of a Lambert Conformal projection.""" 235 | 236 | outline_patch = sgeom.LineString(ax.outline_patch.get_path().vertices.tolist()) 237 | axis = find_side(outline_patch, tick_location) 238 | n_steps = 30 239 | extent = ax.get_extent(ccrs.PlateCarree()) 240 | _ticks = [] 241 | for t in ticks: 242 | xy = line_constructor(t, n_steps, extent) 243 | proj_xyz = ax.projection.transform_points(ccrs.Geodetic(), xy[:, 0], xy[:, 1]) 244 | xyt = proj_xyz[..., :2] 245 | ls = sgeom.LineString(xyt.tolist()) 246 | locs = axis.intersection(ls) 247 | if not locs: 248 | tick = [None] 249 | else: 250 | tick = tick_extractor(locs.xy) 251 | _ticks.append(tick[0]) 252 | # Remove ticks that aren't visible: 253 | ticklabels = copy(ticks) 254 | while True: 255 | try: 256 | index = _ticks.index(None) 257 | except ValueError: 258 | break 259 | _ticks.pop(index) 260 | ticklabels.pop(index) 261 | return _ticks, ticklabels 262 | 263 | def quantize_season_raw(value): 264 | 265 | val = np.zeros(shape=value.shape, dtype=int) 266 | 267 | y, x = np.where((value >= 5)) 268 | val[y, x] = 1 269 | y, x = np.where((value >= 10)) 270 | val[y, x] = 2 271 | y, x = np.where((value >= 20)) 272 | val[y, x] = 3 273 | y, x = np.where((value >= 30)) 274 | val[y, x] = 4 275 | y, x = np.where((value >= 60)) 276 | val[y, x] = 5 277 | y, x = np.where((value >= 90)) 278 | val[y, x] = 6 279 | y, x = np.where((value >= 120)) 280 | val[y, x] = 7 281 | y, x = np.where((value >= 150)) 282 | val[y, x] = 8 283 | 284 | return val 285 | 286 | def quantize_season(value): 287 | 288 | if value > 0 and value < 5: 289 | return 0 290 | elif value >= 5 and value < 10: 291 | return 1 292 | elif value >= 10 and value < 20: 293 | return 2 294 | elif value >= 20 and value < 30: 295 | return 3 296 | elif value >= 30 and value < 60: 297 | return 4 298 | elif value >= 60 and value < 90: 299 | return 5 300 | elif value >= 90 and value < 120: 301 | return 6 302 | elif value >= 120: 303 | return 7 304 | 305 | def draw_states(ax): 306 | 307 | shapename = 'admin_1_states_provinces_lakes_shp' 308 | states_shp = shpreader.natural_earth(resolution='50m', 309 | category='cultural', name=shapename) 310 | 311 | for state, info in zip(shpreader.Reader(states_shp).geometries(), shpreader.Reader(states_shp).records()): 312 | if info.attributes['admin'] == 'United States of America': 313 | 314 | ax.add_geometries([state], ccrs.PlateCarree(), 315 | facecolor='None', edgecolor='k') 316 | 317 | def draw_midwest(ax): 318 | 319 | shapename = "../data/shapefiles/map/midwest_outline_latlon_grids" 320 | shp = shpreader.Reader(shapename) 321 | for outline, info in zip(shp.geometries(), shp.records()): 322 | ax.add_geometries([outline], ccrs.PlateCarree(), 323 | facecolor='None', edgecolor='k', linewidth=4) 324 | 325 | def get_season_mcs(run, season, dbz, mw=False): 326 | 327 | shapename = "../data/shapefiles/raw_data/shapefiles_day/" + run + "/" + season + '_' + dbz + '_pgw' 328 | shp = shpreader.Reader(shapename) 329 | geom = shp.geometries() 330 | 331 | if mw == False: 332 | mcs_vals = np.array([a.attributes['count'] for a in shp.records()]) 333 | mcs_vals[~np.isfinite(mcs_vals)] = 0 334 | return geom, mcs_vals 335 | 336 | else: 337 | mcs_vals = [] 338 | for a in shp.records(): 339 | if a.attributes['midwest'] == True: 340 | mcs_vals.append(a.attributes['count']) 341 | mcs_vals = np.array(mcs_vals) 342 | return mcs_vals 343 | 344 | def generate_view(plt, w_lon, e_lon, n_lat, s_lat, from_proj, to_proj): 345 | 346 | view = plt.axes([0,0,1,1], projection=to_proj) 347 | 348 | view.set_extent([w_lon, e_lon, s_lat, n_lat]) 349 | 350 | shapename = 'admin_1_states_provinces_lakes_shp' 351 | states_shp = shpreader.natural_earth(resolution='50m', 352 | category='cultural', name=shapename) 353 | 354 | for state, info in zip(shpreader.Reader(states_shp).geometries(), shpreader.Reader(states_shp).records()): 355 | if info.attributes['admin'] == 'United States of America': 356 | 357 | view.add_geometries([state], ccrs.PlateCarree(), 358 | facecolor='None', edgecolor='k') 359 | 360 | return view 361 | 362 | def draw_grids_season(ax, geom, vals, classes, debug=False): 363 | 364 | for g, val in zip(geom, vals): 365 | 366 | if np.isfinite(val) and val > 0: 367 | 368 | if debug: 369 | x = g.centroid.x 370 | y = g.centroid.y 371 | 372 | ax.text(x, y, str(int(val)), transform=ccrs.PlateCarree()) 373 | 374 | quant = quantize_season(val) 375 | 376 | facecolor = cm.viridis(quant/classes) 377 | 378 | else: 379 | facecolor='grey' 380 | 381 | 382 | ax.add_geometries([g], ccrs.PlateCarree(), 383 | facecolor=facecolor, edgecolor=facecolor) --------------------------------------------------------------------------------