├── 1_create_mat_eqCat_file.py ├── 1b_plot_eqLocs.py ├── 2_eta_0.py ├── 3_NND.py ├── 4_dist_tau.py ├── 5_plot_lat_t.py ├── 6_createClust.py ├── 7_productivity.py ├── 7b_plot_productivity.py ├── README.md ├── clust_SoCal.ipynb ├── data ├── hs_1981_2011_all.mat ├── hs_1981_2011_all_NND_Mc_3.0.mat ├── hs_1981_2011_all_NND_Mc_4.0.mat ├── hs_1981_2011_all_RT_Mc_3.0.mat └── hs_1981_2011_all_RT_Mc_4.0.mat ├── plots ├── SI_indAS_deficiencyOK_v5.0.pdf ├── T_R_hs_1981_2011_all_Mc_3.0.png ├── T_R_hs_1981_2011_all_Mc_4.0.png ├── hs_1981_2011_all_Mc_3.0_ASprod.png ├── hs_1981_2011_all_Mc_4.0_ASprod.png ├── hs_1981_2011_all_NND_hist_Mc_3.0.png ├── hs_1981_2011_all_NND_hist_Mc_4.0.png ├── hs_1981_2011_all_spanningTree_Mc_3.0.png └── hs_1981_2011_all_spanningTree_Mc_4.0.png ├── src ├── EqCat.py ├── __init__.py ├── clustering.py ├── data_utils.py └── datetime_utils.py └── test_scripts └── 1_2D_density_plots.py /1_create_mat_eqCat_file.py: -------------------------------------------------------------------------------- 1 | #!python3.7 2 | ''' 3 | Created on March 28th, 2019 4 | 5 | - load Hauksson, Shearer 2011 eq catalog from scec data center, alterntive catalogs 6 | - save as .mat binary for fast data I/O 7 | - note that the original catalog is not provided and has to be downloaded from the web: 8 | https://scedc.caltech.edu/eq-catalogs/altcatalogs.html 9 | 10 | @author: tgoebel 11 | ''' 12 | #------------------------------------------------------------------------------ 13 | import os 14 | 15 | #------------------------------my modules-------------------------------------- 16 | from src.EqCat import EqCat 17 | 18 | eqCat = EqCat( ) 19 | 20 | #=================================1============================================== 21 | # dir, file, params 22 | #================================================================================ 23 | # change to local dir where eq. catalogs are saved 24 | # the original catalog can be found here: https://scedc.caltech.edu/research-tools/altcatalogs.html 25 | dir_in = 'data' 26 | file_in= 'hs_1981_2011_all.txt' 27 | 28 | #=================================2============================================== 29 | # load data 30 | #================================================================================ 31 | import numpy as np 32 | # 0-5 (datetime), 6(ID), 7 (lat), 8 (lon), 9 (depth), 10 (mag) 33 | mData = np.loadtxt( f"{dir_in}/{file_in}", usecols=(0,1,2,3,4,5,6,7,8,9, 10)).T 34 | print( mData.shape) 35 | 36 | eqCat.loadEqCat( f"{dir_in}/{file_in}", 'HS_reloc') 37 | 38 | print( 'total no. of events: ', eqCat.size()) 39 | print( sorted( eqCat.data.keys())) 40 | #=================================3============================================== 41 | # test plot and save to .mat binary 42 | #================================================================================ 43 | eqCat.saveMatBin( file_in.replace( 'txt', 'mat')) 44 | newEqCat = EqCat( ) 45 | newEqCat.loadMatBin( file_in.replace( 'txt', 'mat')) 46 | print( newEqCat.size()) 47 | print( sorted( newEqCat.data.keys())) 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | -------------------------------------------------------------------------------- /1b_plot_eqLocs.py: -------------------------------------------------------------------------------- 1 | #!python3.7 2 | ''' 3 | Created on March 28th, 2019 4 | 5 | - event selection 6 | - plot earthquake catalog 7 | 8 | TODO: - implement geo-referenced plotting with Basemap 9 | 10 | 11 | @author: tgoebel 12 | ''' 13 | #------------------------------------------------------------------------------ 14 | import matplotlib.pyplot as plt 15 | import numpy as np 16 | import os 17 | 18 | os.environ["PROJ_LIB"] = f"{os.environ['HOME']}/opt/anaconda3/share/proj" 19 | from mpl_toolkits.basemap import Basemap 20 | #------------------------------my modules-------------------------------------- 21 | 22 | from src.EqCat import EqCat 23 | 24 | eqCat = EqCat( ) 25 | 26 | #print( dir( dataUtils)) 27 | #=================================1============================================== 28 | # dir, file, params 29 | #================================================================================ 30 | dir_in = 'data' 31 | file_in= 'hs_1981_2011_all.mat' 32 | #xmin, xmax = -122, -114 33 | #ymin, ymax = 34, 38 34 | Mmin, Mmax = 3, None 35 | tmin, tmax = 1990, 2012 36 | 37 | 38 | #=================================2============================================== 39 | # load data, select events 40 | #================================================================================ 41 | os.chdir( dir_in) 42 | eqCat.loadMatBin( file_in) 43 | print( eqCat.methods) 44 | print( grege) 45 | print( 'total no. of events', eqCat.size()) 46 | eqCat.selectEvents( Mmin, Mmax, 'Mag') 47 | eqCat.selectEvents( tmin, tmax, 'Time') 48 | print( 'no. of events after initial selection', eqCat.size()) 49 | #=================================3============================================== 50 | # test plot T 51 | #================================================================================ 52 | projection = 'cyl' 53 | xmin,xmax = eqCat.data['Lon'].min(), eqCat.data['Lon'].max() 54 | ymin,ymax = eqCat.data['Lat'].min(), eqCat.data['Lat'].max() 55 | 56 | # setup equi distance basemap. 57 | m = Basemap( llcrnrlat = ymin,urcrnrlat = ymax, 58 | llcrnrlon = xmin,urcrnrlon = xmax, 59 | projection = projection,lat_0=(ymin+ymax)*.5,lon_0=(xmin+xmax)*.5, 60 | resolution = 'l') 61 | m.drawstates( linewidth = 1) 62 | m.drawcoastlines( linewidth= 2) 63 | a_x, a_y = m( eqCat.data['Lon'], eqCat.data['Lat']) 64 | m.plot( a_x, a_y, 'ko', ms = 1) 65 | sel7 = eqCat.data['Mag'] >=7 66 | m.plot( a_x[sel7], a_y[sel7], 'ro', ms = 8, mew= 1.5, mfc = 'none') 67 | 68 | 69 | m.drawmeridians( np.linspace( int(xmin), xmax, 4),labels=[False,False,False,True], 70 | fontsize = 12, fmt = '%.1f') 71 | m.drawparallels( np.linspace( int(ymin), ymax, 4),labels=[True,False,False,False], 72 | fontsize = 12, fmt = '%.2f') 73 | 74 | plt.savefig( file_in.replace( 'mat', 'png')) 75 | plt.show() 76 | 77 | 78 | 79 | 80 | 81 | 82 | -------------------------------------------------------------------------------- /2_eta_0.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on October 9, 2019 3 | 4 | 1) shuffle event magnitudes and times 5 | 2) computer NND values for all event pairs 6 | 3) use 1st percentile as estimate for eta_0 7 | -> note that eta_0 is required to separate clustered 8 | and background events in the following analyses steps 9 | 4) eta_0 is saved in /data directori 10 | file_out = [file_in]_Mc_[mc]_eta_0.txt 11 | @author: tgoebel 12 | ''' 13 | #------------------------------------------------------------------------------ 14 | import matplotlib as mpl 15 | #mpl.use( 'Agg') # turn off interactive plot 16 | import matplotlib.pyplot as plt 17 | import numpy as np 18 | import scipy.io 19 | import os 20 | 21 | #------------------------------my modules-------------------------------------- 22 | import src.clustering as clustering 23 | import src.data_utils as data_utils 24 | from src.EqCat import * 25 | 26 | eqCat = EqCat( ) # original cat 27 | ranCat = EqCat() # randomized, Poissonian catalog 28 | eqCatMc = EqCat() # catalog above completeness 29 | np.random.seed( 123456) 30 | #=================================1============================================== 31 | # dir, file, params 32 | #================================================================================ 33 | dir_in = 'data' 34 | file_in= 'hs_1981_2011_all.mat' 35 | 36 | #file_b = '%s_b_Mc_D.txt'%(fileIn.split('.')[0]) 37 | dPar = { 'aMc' : np.array([3.0, 4.0]), #np.array( [2.0, 2.5, 3.0, 3.5]), 38 | # fractal dimension and b for eq. (1) 39 | 'D' : 1.6, # TODO: - these values should be constrained based on the data 40 | 'b' : 1.0, # use: https://github.com/tgoebel/magnitude-distribution for b-value 41 | 42 | # number of bootstraps for randomized catalogs 43 | 'nBoot' : 100, 44 | #=================plotting============== 45 | 'eta_binsize' : .3, 46 | 47 | 'cmin' : 1, 48 | 'xmin' : -13, 'xmax' : 0, 49 | ## R-T plot 50 | 'binx' : .1, 'biny' : .1,# used for density and gaussian smoothing 51 | 'sigma' : None, #if None: default = n**(-1./(d+4)), or set Gaussian bandwidth 52 | 'Tmin' : -8, 'Tmax' : 0, 53 | 'Rmin' : -5, 'Rmax' : 3, 54 | 'cmap' : plt.cm.RdYlGn_r, 55 | 'showPlot' : False, 56 | } 57 | 58 | #================================================================================ 59 | # load data, event selection 60 | #================================================================================ 61 | eqCat.loadMatBin( os.path.join( dir_in, file_in)) 62 | print( 'total no. of events', eqCat.size()) 63 | eqCat.selectEvents( dPar['aMc'][0], None, 'Mag') 64 | #eqCat.selectEvents( tmin, tmax, 'Time') 65 | print( 'no. of events after initial selection', eqCat.size()) 66 | # project to equi-distant coordiante system for cartesian distances 67 | eqCat.toCart_coordinates( projection = 'eqdc')#'eqdc') 68 | for f_Mc in dPar['aMc']: 69 | print( '-------------- current Mc:', f_Mc, '---------------------') 70 | # select magnitude range 71 | eqCatMc.copy( eqCat) 72 | eqCatMc.selectEvents( f_Mc, None, 'Mag') 73 | print( 'catalog size after MAG selection', eqCat.size()) 74 | # this dictionary is used in module: clustering 75 | dConst = {'Mc' : f_Mc, 76 | 'b' : dPar['b'], 77 | 'D' : dPar['D']} 78 | 79 | #=============================2=================================================== 80 | # randomize catalog 81 | #================================================================================= 82 | a_Eta_0 = np.zeros( dPar['nBoot']) 83 | for i_Bs in range( dPar['nBoot']): 84 | 85 | ranCat.copy( eqCatMc) 86 | ranCat.data['X'] = np.random.uniform( eqCatMc.data['X'].min(), eqCatMc.data['X'].max(), size = eqCatMc.size()) 87 | ranCat.data['Y'] = np.random.uniform( eqCatMc.data['Y'].min(), eqCatMc.data['Y'].max(), size = eqCatMc.size()) 88 | ranCat.data['Time'] = clustering.rand_rate_uni( eqCatMc.size(), eqCatMc.data['Time'].min(), eqCatMc.data['Time'].max()) 89 | ranCat.sortCatalog( 'Time') 90 | #==================================3============================================= 91 | # compute space-time-magnitude distance, histogram 92 | #================================================================================ 93 | dNND = clustering.NND_eta( ranCat, dConst, M0 = 0, correct_co_located = True, 94 | verbose = False) 95 | a_Eta_0[i_Bs] = round( np.percentile( np.log10(dNND['aNND']), 1), 5) 96 | print( 'nBoot', i_Bs+1,'out of', dPar['nBoot'], 'eta 0 - 1st', np.percentile( np.log10(dNND['aNND']), 1)) 97 | if dPar['showPlot'] == True: # plots to check if everything is working 98 | #=================================4============================================== 99 | # plot NND histogram 100 | #================================================================================ 101 | plt.figure( 1, figsize = (10,5)) 102 | ax = plt.axes( [.12, .12, .83, .83]) 103 | ax.hist( np.log10( dNND['aNND']), np.arange( dPar['xmin'], dPar['xmax'], dPar['eta_binsize']), 104 | color = '.5', label = 'Mc = %.1f'%( f_Mc), align = 'mid', rwidth=.9) 105 | ax.plot( [-5, -5], ax.get_ylim(), 'w-', lw = 2, ) 106 | ax.plot( [-5, -5], ax.get_ylim(), 'k--', lw = 2, ) 107 | ax.plot( [a_Eta_0[i_Bs], a_Eta_0[i_Bs]], ax.get_ylim(), 'w-', lw = 2, label = '$N_\mathrm{tot}$=%i'%( ranCat.size())) 108 | ax.plot( [a_Eta_0[i_Bs], a_Eta_0[i_Bs]], ax.get_ylim(), 'r--', lw = 2, label = '$N_\mathrm{cl}$=%i'%( dNND['aNND'][dNND['aNND']<1e-5].shape[0])) 109 | 110 | ax.legend( loc = 'upper left') 111 | ax.set_xlabel( 'NND, log$_{10} \eta$') 112 | ax.set_ylabel( 'Number of Events') 113 | ax.grid( 'on') 114 | ax.set_xlim( dPar['xmin'], dPar['xmax']) 115 | 116 | 117 | #==================================4============================================================== 118 | # T-R density plot 119 | #================================================================================================= 120 | catChild = EqCat() 121 | catParent= EqCat() 122 | catChild.copy( ranCat) 123 | catParent.copy( ranCat) 124 | 125 | catChild.selEventsFromID( dNND['aEqID_c'], repeats = True) 126 | catParent.selEventsFromID( dNND['aEqID_p'], repeats = True) 127 | print( catChild.size(), catParent.size(), eqCatMc.size()) 128 | a_R, a_T = clustering.rescaled_t_r( catChild, catParent, dConst, correct_co_located = True) 129 | 130 | a_Tbin = np.arange( dPar['Tmin'], dPar['Tmax']+2*dPar['binx'], dPar['binx']) 131 | a_Rbin = np.arange( dPar['Rmin'], dPar['Rmax']+2*dPar['biny'], dPar['biny']) 132 | a_log_T = np.log10( a_T) 133 | a_log_R = np.log10( a_R) 134 | XX, YY, ZZ = data_utils.density_2D( a_log_T, a_log_R, a_Tbin, a_Rbin, sigma = dPar['sigma']) 135 | 136 | plt.figure(2, figsize= (8,10)) 137 | ax = plt.subplot(111) 138 | ax.set_title( 'Nearest Neighbor Pairs in R-T') 139 | #------------------------------------------------------------------------------ 140 | normZZ = ZZ*( dPar['binx']*dPar['biny']*eqCatMc.size()) 141 | plot1 = ax.pcolormesh( XX, YY, normZZ, cmap=dPar['cmap']) 142 | cbar = plt.colorbar(plot1, orientation = 'horizontal', shrink = .5, aspect = 20,) 143 | #ax.plot( np.log10( a_T), np.log10( a_R), 'wo', ms = 1.5, alpha = .2) 144 | # plot eta_0 to divide clustered and background mode 145 | ax.plot( [dPar['Tmin'], dPar['Tmax']], -np.array([dPar['Tmin'], dPar['Tmax']])+a_Eta_0[i_Bs], '-', lw = 1.5, color = 'w' ) 146 | ax.plot( [dPar['Tmin'], dPar['Tmax']], -np.array([dPar['Tmin'], dPar['Tmax']])+a_Eta_0[i_Bs],'--', lw = 1.5, color = '.5' ) 147 | #-----------------------labels and legends------------------------------------------------------- 148 | #cbar.set_label( 'Event Pair Density [#ev./dRdT]') 149 | cbar.set_label( 'Number of Event Pairs',labelpad=-40) 150 | ax.set_xlabel( 'Rescaled Time') 151 | ax.set_ylabel( 'Rescaled Distance') 152 | ax.set_xlim( dPar['Tmin'], dPar['Tmax']) 153 | ax.set_ylim( dPar['Rmin'], dPar['Rmax']) 154 | 155 | plt.show() 156 | #=================================3============================================== 157 | # save results 158 | #================================================================================ 159 | f_eta_0 = a_Eta_0.mean() 160 | print( 'medium eta_0', a_Eta_0.mean() 161 | file_out = '%s/%s_Mc_%.1f_eta_0.txt'%(dir_in, file_in, f_Mc) 162 | np.savetxt( file_out, np.array([f_eta_0]), fmt = '%10.3f', header='eta_0') 163 | print( 'save results', file_out 164 | scipy.io.savemat(file_out.replace('txt','mat'), 165 | {'eta_0': f_eta_0, 'eta_BS' : a_Eta_0,}, do_compression=True) 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | -------------------------------------------------------------------------------- /3_NND.py: -------------------------------------------------------------------------------- 1 | #!python2.3 2 | ''' 3 | Created on April 10th, 2019 4 | 5 | - compute nearest-neighbor distance (NND) between all event pairs (see equ. 1 in Zaliapin & Ben-Zion, 2013) 6 | - test -plot histogram of NNDs: Figure 4c in Zaliapin & Ben-Zion, 2013 7 | 8 | output: 'data/%s_NND_Mc_%.1f.mat'%(dPar['catName'], dPar['Mc']) 9 | which is a python dictionary with: 10 | { 'aNND' : aNND, - nearest neighbor space-time magnitude distance 11 | 'aEqID_p' : np.array - ID of the parent event 12 | 'aEqID_c' : np.array - ID of the child event 13 | } 14 | 15 | TODO: 16 | - constrain Mc, b and D independently through statistical analysis of the actual data 17 | 18 | @author: tgoebel 19 | ''' 20 | import time 21 | #------------------------------------------------------------------------------ 22 | import matplotlib as mpl 23 | #mpl.use( 'Agg') # turn off interactive plot 24 | import matplotlib.pyplot as plt 25 | import numpy as np 26 | import scipy.io 27 | import os 28 | 29 | #------------------------------my modules-------------------------------------- 30 | import src.clustering as clustering 31 | from src.EqCat import EqCat 32 | 33 | eqCat = EqCat( ) 34 | 35 | #=================================1============================================== 36 | # dir, file, params 37 | #================================================================================ 38 | dir_in = 'data' 39 | file_in= 'hs_1981_2011_all.mat' 40 | 41 | #file_b = '%s_b_Mc_D.txt'%(fileIn.split('.')[0]) 42 | dPar = { 'aMc' : np.array([3.0]), #3.0, 4.0]), #np.array( [2.0, 2.5, 3.0, 3.5]), 43 | # fractal dimension and b for eq. (1) 44 | 'D' : 1.6, # TODO: - these values should be contrained independently 45 | 'b' : 1.0, # use: https://github.com/tgoebel/magnitude-distribution for b-value 46 | #=================plotting============== 47 | 'eta_binsize' : .3, 48 | 'xmin' : -13, 'xmax' : 0, 49 | } 50 | 51 | #=================================2============================================== 52 | # load data, select events 53 | #================================================================================ 54 | eqCat.loadMatBin( os.path.join( dir_in, file_in)) 55 | print( 'total no. of events', eqCat.size()) 56 | eqCat.selectEvents( dPar['aMc'][0], None, 'Mag') 57 | #eqCat.selectEvents( tmin, tmax, 'Time') 58 | print( 'no. of events after initial selection', eqCat.size()) 59 | #=================================1============================================== 60 | # to cartesian coordinates 61 | #================================================================================ 62 | # two ways to do the distance comp: 1 project into equal distance azimuthal , comp Cartersian distance in 3D 63 | # 2 get surface distance from lon, lat (haversine), use pythagoras to include depth 64 | eqCat.toCart_coordinates( projection = 'eqdc') 65 | 66 | for f_Mc in dPar['aMc']: 67 | print( '-------------- current Mc:', f_Mc, '---------------------') 68 | # select magnitude range 69 | eqCat.selectEvents( f_Mc, None, 'Mag') 70 | print( 'catalog size after MAG selection', eqCat.size()) 71 | # this dictionary is used in module: clustering 72 | dConst = {'Mc' : f_Mc, 73 | 'b' : dPar['b'], 74 | 'D' : dPar['D']} 75 | #==================================2============================================= 76 | # compute space-time-magnitude distance, histogram 77 | #================================================================================ 78 | eqCat.data['Z'] = eqCat.data['Depth'] 79 | print('depth range: ', eqCat.data['Z'].min(), eqCat.data['Z'].max()) 80 | dCluster = clustering.NND_eta( eqCat, dConst, distance_3D = False, 81 | correct_co_located = True, verbose= True) 82 | ###histogram 83 | aBins = np.arange( -13, 1, dPar['eta_binsize'], dtype = float) 84 | aHist, aBins = np.histogram( np.log10( dCluster['aNND'][dCluster['aNND']>0]), aBins) 85 | aBins = aBins[0:-1] + dPar['eta_binsize']*.5 86 | # correct for binsize 87 | aHist = aHist/dPar['eta_binsize'] 88 | # to pdf (prob. density) 89 | aHist /= eqCat.size() 90 | #=================================3============================================== 91 | # save results 92 | #================================================================================ 93 | import scipy.io 94 | NND_file = 'data/%s_NND_Mc_%.1f.mat'%( file_in.split('.')[0], f_Mc) 95 | print( 'save file', NND_file) 96 | scipy.io.savemat( NND_file, dCluster, do_compression = True) 97 | 98 | #=================================4============================================== 99 | # plot histogram 100 | #================================================================================ 101 | # load eta_0 value - only for plotting purposes 102 | eta_0_file = '%s/%s_Mc_%.1f_eta_0.txt'%(dir_in, file_in, f_Mc) 103 | if os.path.isfile( eta_0_file): 104 | print( 'load eta_0 from file'), 105 | f_eta_0 = np.loadtxt( eta_0_file, dtype = float) 106 | print( 'eta_0',f_eta_0) 107 | else: 108 | f_eta_0 = -5 109 | print( 'could not find eta_0 file', eta_0_file, 'use value: ', f_eta_0) 110 | 111 | fig, ax = plt.subplots() 112 | #ax.plot( vBin, vHist, 'ko') 113 | ax.bar( aBins, aHist, width =.8*dPar['eta_binsize'], align = 'edge', color = '.5', label = 'Mc = %.1f'%( f_Mc)) 114 | ax.plot( [f_eta_0, f_eta_0], ax.get_ylim(), 'w-', lw = 2, label = '$N_\mathrm{tot}$=%i'%( eqCat.size())) 115 | ax.plot( [f_eta_0, f_eta_0], ax.get_ylim(), 'r--', lw = 2, label = '$N_\mathrm{cl}$=%i'%( dCluster['aNND'][dCluster['aNND']<1e-5].shape[0])) 116 | 117 | ax.legend( loc = 'upper left') 118 | ax.set_xlabel( 'NND, log$_{10} \eta$') 119 | ax.set_ylabel( 'Number of Events') 120 | ax.grid( 'on') 121 | ax.set_xlim( dPar['xmin'], dPar['xmax']) 122 | plt.show() 123 | 124 | plotFile = 'plots/%s_NND_hist_Mc_%.1f.png'%( file_in.split('.')[0], f_Mc) 125 | print( 'save plot', plotFile) 126 | #plt.savefig( plotFile) 127 | plt.clf() 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | -------------------------------------------------------------------------------- /4_dist_tau.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on August 16, 2016 3 | 4 | - compute inter-event times and distance and normalize by magnitude 5 | - create colormap of event-pair density (i.e. NND events) for 6 | corresponding rescaled event times and distances 7 | 8 | @author: tgoebel 9 | ''' 10 | import matplotlib as mpl 11 | mpl.use( 'Agg') # uncomment for interactive plotting 12 | 13 | import os 14 | import numpy as np 15 | import matplotlib.pyplot as plt 16 | import scipy.io 17 | #------------------------------my modules-------------------------------------- 18 | import src.data_utils as data_utils 19 | import src.clustering as clustering 20 | from src.EqCat import * 21 | 22 | eqCat = EqCat() # original catalog 23 | eqCatMc = EqCat() # this catalog wil be modfied with each Mc iteration 24 | catChild= EqCat() 25 | catParent= EqCat() 26 | np.random.seed( 123456) 27 | #=================================1============================================== 28 | # dir, file, params 29 | #================================================================================ 30 | data_dir = 'data' 31 | file_in = 'hs_1981_2011_all.mat' 32 | 33 | #file_b = '%s_b_Mc_D.txt'%(fileIn.split('.')[0]) 34 | dPar = { 'a_Mc' : np.array([4.0]), #np.array( [2.0, 2.5, 3.0, 3.5]), 35 | # fractal dimension and b for eq. (1) 36 | 'D' : 1.6, # TODO: - these values should be constrained independently 37 | 'b' : 1.0, 38 | 39 | #===================smoothing parameters============= 40 | 'binx' : .1, 'biny' : .1,# used for density and gaussian smoothing 41 | 'sigma' : None, #if None: default = n**(-1./(d+4)), 42 | #=================plotting============== 43 | 'eta_0' : -5.0, # run: 2_eta_0.py and 44 | # if eta-0-file exists: default = load this value from ASCII file 45 | #'xmin' : -13, 'xmax' : 0, 46 | 'Tmin' : -8, 'Tmax' : 0, 47 | 'Rmin' : -5, 'Rmax' : 3, 48 | 'cmap' : plt.cm.RdYlGn_r, } 49 | 50 | #=================================2============================================== 51 | # load data, select events 52 | #================================================================================ 53 | eqCat.loadMatBin( os.path.join( data_dir, file_in)) 54 | 55 | eqCat.selectEvents( dPar['a_Mc'][0], None, 'Mag') 56 | #eqCat.selectEvents( tmin, tmax, 'Time') 57 | print( 'no. of events after initial selection', eqCat.size()) 58 | 59 | # two ways to do the distance comp: 1 project into equal distance azimuthal , comp Cartersian distance in 3D 60 | # 2 get surface distance from lon, lat (haversine), use pythagoras to include depth 61 | eqCat.toCart_coordinates( projection = 'eqdc') 62 | 63 | for i in range( dPar['a_Mc'].shape[0]): 64 | f_Mc = dPar['a_Mc'][i] 65 | eta_0_file = '%s/%s_Mc_%.1f_eta_0.txt'%(data_dir, file_in, f_Mc) 66 | # load eta_0 value 67 | if os.path.isfile( eta_0_file): 68 | print( 'load eta_0 from file'), 69 | f_eta_0 = np.loadtxt( eta_0_file, dtype = float) 70 | else: 71 | print( 'could not find eta_0 file', eta_0_file, 'use value from dPar', dPar['eta_0']) 72 | f_eta_0 = dPar['eta_0'] 73 | # cut below current completeness 74 | eqCatMc.copy( eqCat) 75 | eqCatMc.selectEvents( f_Mc, None, 'Mag') 76 | print( 'current catalog size: ',eqCatMc.size()) 77 | 78 | # load nearest neighbor distances 79 | NND_file = 'data/%s_NND_Mc_%.1f.mat'%( file_in.split('.')[0], f_Mc) 80 | dNND = data_utils.loadmat(NND_file) #, struct_as_record=True) 81 | 82 | #==================================3============================================= 83 | # compute re-scaled interevent times and distances 84 | #================================================================================ 85 | catChild.copy( eqCatMc) 86 | catParent.copy( eqCatMc) 87 | catChild.selEventsFromID( dNND['aEqID_c'], repeats = True) 88 | catParent.selEventsFromID( dNND['aEqID_p'], repeats = True) 89 | print( 'size of offspring catalog', catChild.size(), 'size of parent cat', catParent.size()) 90 | 91 | # note that dictionary dPar here has to include 'b','D' and 'Mc' 92 | a_R, a_T = clustering.rescaled_t_r( catChild, catParent, {'b':dPar['b'], 'D':dPar['D'], 'Mc':f_Mc}, correct_co_located = True) 93 | RT_file = 'data/%s_RT_Mc_%.1f.mat'%( file_in.split('.')[0], f_Mc) 94 | scipy.io.savemat( RT_file, {'R' : a_R, 'T': a_T}, do_compression = True) 95 | #==================================4============================================================== 96 | # T-R density plots 97 | #================================================================================================= 98 | a_Tbin = np.arange( dPar['Tmin'], dPar['Tmax']+2*dPar['binx'], dPar['binx']) 99 | a_Rbin = np.arange( dPar['Rmin'], dPar['Rmax']+2*dPar['biny'], dPar['biny']) 100 | XX, YY, ZZ = data_utils.density_2D( np.log10( a_T), np.log10( a_R), a_Tbin, a_Rbin, sigma = dPar['sigma']) 101 | 102 | fig1 = plt.figure(1, figsize= (8,10)) 103 | ax = plt.subplot(111) 104 | ax.set_title( 'Nearest Neighbor Pairs in R-T') 105 | #------------------------------------------------------------------------------ 106 | normZZ = ZZ*( dPar['binx']*dPar['biny']*eqCatMc.size()) 107 | plot1 = ax.pcolormesh( XX, YY, normZZ, cmap=dPar['cmap']) 108 | cbar = plt.colorbar(plot1, orientation = 'horizontal', shrink = .5, aspect = 20,) 109 | #ax.plot( np.log10( a_T), np.log10( a_R), 'wo', ms = 1.5, alpha = .2) 110 | # plot eta_0 to divide clustered and background mode 111 | ax.plot( [dPar['Tmin'], dPar['Tmax']], -np.array([dPar['Tmin'], dPar['Tmax']])+f_eta_0, '-', lw = 1.5, color = 'w' ) 112 | ax.plot( [dPar['Tmin'], dPar['Tmax']], -np.array([dPar['Tmin'], dPar['Tmax']])+f_eta_0,'--', lw = 1.5, color = '.5' ) 113 | #-----------------------labels and legends------------------------------------------------------- 114 | #cbar.set_label( 'Event Pair Density [#ev./dRdT]') 115 | cbar.set_label( 'Number of Event Pairs',labelpad=-40) 116 | ax.set_xlabel( 'Rescaled Time') 117 | ax.set_ylabel( 'Rescaled Distance') 118 | ax.set_xlim( dPar['Tmin'], dPar['Tmax']) 119 | ax.set_ylim( dPar['Rmin'], dPar['Rmax']) 120 | 121 | #=================================5============================================== 122 | # save results 123 | #================================================================================ 124 | print( 'plot saved in: ','plots/T_R_%s_Mc_%.1f.png'%( file_in.split('.')[0], f_Mc)) 125 | fig1.savefig( 'plots/T_R_%s_Mc_%.1f.png'%( file_in.split('.')[0], f_Mc)) 126 | plt.show() 127 | 128 | plt.clf() 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | -------------------------------------------------------------------------------- /5_plot_lat_t.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on May 16, 2019 3 | 4 | - plot cluster families with eta <= eta_0 5 | - plot lat and time (dec. year) 6 | 7 | @author: tgoebel - Thomas Goebel University of Memphis 8 | ''' 9 | import matplotlib as mpl 10 | #mpl.use( 'Agg') # uncomment for interactive plotting 11 | 12 | import os 13 | import numpy as np 14 | import matplotlib.pyplot as plt 15 | 16 | #------------------------------my modules-------------------------------------- 17 | import src.data_utils as dataIO 18 | #import src.clustering as clustering 19 | from src.EqCat import EqCat 20 | 21 | eqCat = EqCat() # original catalog 22 | eqCatMc = EqCat() # this catalog will be modified with each Mc iteration 23 | catChild= EqCat() 24 | catParent= EqCat() 25 | 26 | #=================================1============================================== 27 | # dir, file, params 28 | #================================================================================ 29 | data_dir = 'data' 30 | plot_dir = 'plots' 31 | file_in = 'hs_1981_2011_all.mat' 32 | 33 | dPar = { 'a_Mc' : np.array([4.0]), #np.array( [2.0, 2.5, 3.0, 3.5]), 34 | #separate clustered and background 35 | 'eta_0' : -5.0, # run 2_eta_0.py and 36 | # if file exists: default = load this value from ASCII file 37 | } 38 | 39 | #=================================2============================================== 40 | # load data, select events 41 | #================================================================================ 42 | eqCat.loadMatBin( os.path.join( data_dir, file_in)) 43 | print( 'total no. of events', eqCat.size()) 44 | eqCat.selectEvents( dPar['a_Mc'][0], None, 'Mag') 45 | #eqCat.selectEvents( tmin, tmax, 'Time') 46 | print( 'no. of events after initial selection', eqCat.size()) 47 | 48 | iMc = 0 49 | for f_Mc in dPar['a_Mc']: 50 | eta_0_file = '%s/%s_Mc_%.1f_eta_0.txt'%(data_dir, file_in, f_Mc) 51 | # load eta_0 value 52 | if os.path.isfile( eta_0_file): 53 | print( 'load eta_0 from file'), 54 | f_eta_0 = np.loadtxt( eta_0_file, dtype = float) 55 | print( f_eta_0) 56 | else: 57 | print( 'could not find eta_0 file', eta_0_file, 'use value from dPar', dPar['eta_0']) 58 | f_eta_0 = dPar['eta_0'] 59 | # cut below current completeness 60 | eqCatMc.copy( eqCat) 61 | eqCatMc.selectEvents( f_Mc, None, 'Mag') 62 | print( 'current catalog size: ',eqCatMc.size()) 63 | # load nearest neighbor distances 64 | NND_file = '%s_NND_Mc_%.1f.mat'%(os.path.basename( file_in).split('.')[0], f_Mc) 65 | dNND = dataIO.loadmat( os.path.join( data_dir, NND_file)) 66 | print( dNND.keys()) 67 | dNND['aNND'] = np.log10( dNND['aNND']) 68 | #==================================3============================================= 69 | # "declustering" step 70 | #================================================================================ 71 | #catChild, catPar = create_parent_child_cat( projCat, dNND) 72 | catChild.copy( eqCat) 73 | catParent.copy( eqCat) 74 | catChild.selEventsFromID( dNND['aEqID_c'], repeats = True) 75 | catParent.selEventsFromID( dNND['aEqID_p'], repeats = True) 76 | print( 'tot. ev', eqCatMc.size(), 'parents', np.unique( catParent.data['N']).shape[0], 'children', np.unique( catChild.data['N']).shape[0]) 77 | #==================================4============================================= 78 | # spanning tree 79 | #================================================================================ 80 | plt.figure( 1) 81 | ax = plt.subplot(111) 82 | for iEv in range( catParent.size()): 83 | print( 'MS', int( catParent.data['N'][iEv]), catParent.data['Time'][iEv], eqCatMc.data['Time'][iEv]) 84 | 85 | if dNND['aNND'][iEv] < dPar['eta_0']:#triggered cluster 86 | ax.plot( [catParent.data['Time'][iEv]], [catParent.data['Lat'][iEv]], 'ro', ms = 12, alpha = .2) 87 | ax.plot( [catParent.data['Time'][iEv],catChild.data['Time'][iEv]], 88 | [catParent.data['Lat'][iEv], catChild.data['Lat'][iEv]], 'k-', marker = 'o', ms = 4, mew =1, mfc = 'none') 89 | else: # independent events 90 | ax.plot( [catChild.data['Time'][iEv]], [catChild.data['Lat'][iEv]], 'bo', ms = 5, alpha = .6) 91 | 92 | #ax.set_xlim( 2009, 2017) 93 | #=================================3============================================== 94 | # save results 95 | #================================================================================ 96 | 97 | plt.figure(1) 98 | #plt.savefig( '%s/%s_spanningTree_Mc_%.1f.png'%(plot_dir, file_in.split('.')[0], f_Mc)) 99 | ## save main shock catalog 100 | plt.show() 101 | plt.clf() 102 | 103 | 104 | iMc += 1 105 | -------------------------------------------------------------------------------- /6_createClust.py: -------------------------------------------------------------------------------- 1 | # python3.7 2 | ''' 3 | Created on Oct 7th, 2019 4 | 5 | key step in the analysis during which complete families of triggered events are assembled 6 | 7 | 1) select all event pairs with NND <= eta_0 8 | 2) place each event into a new cluster (family) with unique cluster ID 9 | or append to existing cluster if parent or offspring event are found in list 10 | of previously created clusters 11 | 12 | 13 | Input: NND_file = str() 14 | eta_0 = float() 15 | 16 | 17 | Output: 18 | dictionary with all event families 19 | dic['0'] = singles 20 | all other cluster are integer-strings followed by the associated eqID number 21 | e.g. 22 | { '0': np.array([[1243245,4253455343]]), 23 | '1': np.array([[5235,43455343,3456,56652,54]]), 24 | '2': ....} 25 | Note that: 26 | 1) output is saved as matlab binary but file cannot 27 | be read with matlab because variable names are integers 28 | 2) cluster '0' are singles 29 | 3) mainshocks can be defined as largest event or 30 | first event in a family 31 | 32 | @author: Thomas Goebel - University of Memphis 33 | ''' 34 | import matplotlib as mpl 35 | #mpl.use( 'Agg') 36 | import matplotlib.pyplot as plt 37 | 38 | import numpy as np 39 | import os, scipy.io 40 | 41 | #------------------------------my modules-------------------------------------- 42 | import src.data_utils as dataIO 43 | import src.clustering as clustering 44 | from src.EqCat import * 45 | 46 | eqCat = EqCat() # original catalog 47 | eqCatMc = EqCat() # this catalog will be modified with each Mc iteration 48 | 49 | #=================================1============================================== 50 | # dir, file, params 51 | #================================================================================ 52 | data_dir = 'data' 53 | plot_dir = 'plots' 54 | file_in = 'hs_1981_2011_all.mat' 55 | 56 | 57 | dPar = { 'a_Mc' : np.array([3.0]), #3.0, 4.0]), #np.array( [2.0, 2.5, 3.0, 3.5]), 58 | #separate clustered and background 59 | # set to None or False to use value from file,requires results from: 2_eta_0.py 60 | 'eta_0' : None, #-5.0, 61 | } 62 | 63 | #=================================2============================================== 64 | # load data, select events 65 | #================================================================================ 66 | eqCat.loadMatBin( os.path.join( data_dir, file_in)) 67 | print( 'total no. of events', eqCat.size()) 68 | eqCat.selectEvents( dPar['a_Mc'][0], None, 'Mag') 69 | #eqCat.selectEvents( tmin, tmax, 'Time') 70 | print( 'no. of events after initial selection', eqCat.size()) 71 | 72 | iMc = 0 73 | for f_Mc in dPar['a_Mc']: 74 | clust_file = file_in.replace( 'all.mat', 'Mc_%.1f_clusters.mat'%( f_Mc)) 75 | eta_0_file = '%s/%s_Mc_%.1f_eta_0.txt'%(data_dir, file_in, f_Mc) 76 | if os.path.isfile( eta_0_file): 77 | print( 'load eta_0 from file'), 78 | f_eta_0 = np.loadtxt( eta_0_file, dtype = float) 79 | print( 'eta_0',f_eta_0) 80 | else: 81 | f_eta_0 = -5 82 | print( 'could not find eta_0 file', eta_0_file, 'use value: ', f_eta_0) 83 | 84 | 85 | # cut below current completeness 86 | eqCatMc.copy( eqCat) 87 | eqCatMc.selectEvents( f_Mc, None, 'Mag') 88 | print( 'current catalog size: ',eqCatMc.size()) 89 | # load nearest neighbor distances 90 | NND_file = '%s_NND_Mc_%.1f.mat' % (file_in.split('.')[0], f_Mc) 91 | dNND = dataIO.loadmat( os.path.join( data_dir, NND_file)) 92 | dNND['aNND'] = np.log10( dNND['aNND']) 93 | 94 | #==================================3============================================= 95 | # assemble clusters 96 | #================================================================================ 97 | print( 'similarity threshold', dPar['eta_0']) 98 | # clustering according to eta_0 similarity criteria 99 | dClust = clustering.compileClust( dNND, f_eta_0, useLargerEvents = False) 100 | #=================================4========================================================================== 101 | # save results 102 | #============================================================================================================ 103 | scipy.io.savemat( os.path.join( data_dir,clust_file), dClust, do_compression=True) 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | -------------------------------------------------------------------------------- /7_productivity.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on Oct 07, 2019 3 | 4 | 1) count number of events after largest mag event (MS) within each family 5 | 2) count singles as events with 0 aftershocks 6 | 7 | @author: tgoebel 8 | ''' 9 | import matplotlib as mpl 10 | mpl.use( 'Agg') 11 | import numpy as np 12 | 13 | import os 14 | import matplotlib.pyplot as plt 15 | 16 | #----------------------my modules-------------------------------------------------------- 17 | import src.data_utils as data_utils 18 | #import src.clustering as clustering 19 | from src.EqCat import * 20 | 21 | eqCat = EqCat() # original catalog 22 | eqCatMc = EqCat() # this catalog will be modified with each Mc iteration 23 | 24 | 25 | #=================================1============================================== 26 | # dir, file, params 27 | #================================================================================ 28 | data_dir = 'data' 29 | plot_dir = 'plots' 30 | file_in = 'hs_1981_2011_all.mat' 31 | clust_file = file_in.replace( 'all.mat', 'clusters.mat') 32 | 33 | #=================================1============================================== 34 | # dir, file, params 35 | #================================================================================ 36 | 37 | dPar = { 38 | 'a_Mc' : np.array([ 3.0, 4.0]), # , 3.0, 4.0]), #3.0,4.0]), 39 | 40 | 'alpha' : 1, #exponent for test plot 41 | #=================plotting============== 42 | 'plotFormat' : 'png', 43 | } 44 | 45 | #=================================2============================================== 46 | # load data, select events 47 | #================================================================================ 48 | eqCat.loadMatBin( os.path.join( data_dir, file_in)) 49 | print( 'total no. of events', eqCat.size()) 50 | eqCat.selectEvents( dPar['a_Mc'][0], None, 'Mag') 51 | #eqCat.selectEvents( tmin, tmax, 'Time') 52 | print( 'no. of events after initial selection', eqCat.size()) 53 | 54 | 55 | iMc = 0 56 | for f_Mc in dPar['a_Mc']: 57 | # load file with IDs of events within family 58 | clust_file = file_in.replace( 'all.mat', 'Mc_%.1f_clusters.mat'%( f_Mc)) 59 | dClust = data_utils.loadmat( os.path.join( data_dir,clust_file), ) 60 | 61 | # cut below current completeness 62 | eqCatMc.copy( eqCat) 63 | eqCatMc.selectEvents( f_Mc, None, 'Mag') 64 | n_aboveMc = eqCatMc.size() 65 | print( 'current catalog size: ',eqCatMc.size()) 66 | 67 | #=================================1========================================================================== 68 | # singles are counted as MS with 0 AS 69 | #============================================================================================================ 70 | print( 'total number of clusters', len( dClust.keys()), 'no. of BG events', dClust['0'].shape[0]) 71 | a_ID_single = dClust['0'] 72 | 73 | # IDs of BG events 74 | a_iSel = np.zeros( eqCatMc.size(), dtype = int) 75 | a_mag_single = np.zeros( len( a_ID_single)) 76 | a_N_AS_single= np.zeros( len( a_ID_single)) 77 | a_N_FS_single= np.zeros( len( a_ID_single)) 78 | for i in range( a_ID_single.shape[0]): 79 | # event ID may be in catalog more than once 80 | sel_ev = eqCatMc.data['N'] == a_ID_single[i] 81 | a_mag_single[i] = eqCatMc.data['Mag'][sel_ev][0] 82 | a_iSel[sel_ev] = 1#catalog.data['N'][catalog.data['N']==aEqID[i]][0] 83 | if sel_ev.sum() != 1: 84 | error_str = 'more than event found', eqCatMc.data['N'][sel_ev] 85 | raise( ValueError( error_str)) 86 | ### remove singles from catalog 87 | eqCatMc.selDicAll( np.logical_not(a_iSel)) 88 | print( 'remaining events', eqCatMc.size(), 'BG events', len( a_mag_single)) 89 | dClust.pop('0') # remove singles 90 | #=================================2========================================================================== 91 | # get MAGs of MS with aftershocks, count aftershocks 92 | #============================================================================================================ 93 | a_N_FS = np.zeros( len( dClust.keys()), dtype = int) 94 | a_N_AS = np.zeros( len( dClust.keys()), dtype = int) 95 | a_MS_mag = np.zeros( len( dClust.keys())) 96 | a_MS_ID = np.zeros( len( dClust.keys()), dtype = int) 97 | iCl = 0 98 | for sCl in dClust.keys(): 99 | aEqID = dClust[sCl]# np.unique( dClust[sCl].flatten()) unique is not needed anymore, createCluster has been fixed 100 | print( 'cl: ', iCl+1,'out of: ', len( dClust.keys()), 'no. of ev. in cl.', len( aEqID), len( np.unique( dClust[sCl]))) 101 | # find MS mag and magnitude of entire family 102 | atmp_MAG = np.zeros( len( aEqID)) 103 | atmp_Time= np.zeros( len( aEqID)) 104 | a_iSel = np.zeros( eqCatMc.size(), dtype = int) 105 | # for each family find: event mag. and origin time 106 | for iM in range( len( aEqID)): 107 | sel_ev = eqCatMc.data['N'] == aEqID[iM] 108 | if sel_ev.sum() != 1: 109 | error_str = 'more/less than event found', eqCatMc.data['N'][sel_ev], aEqID[iM] 110 | raise( ValueError, error_str) 111 | atmp_MAG[iM] = eqCatMc.data['Mag'][sel_ev][0] 112 | atmp_Time[iM] = eqCatMc.data['Time'][sel_ev][0] 113 | a_iSel[sel_ev] = 1 114 | # remove events from catalog 115 | #catalog.selDicAll( np.logical_not(a_iSel)) 116 | #----------------------------mainshock-------------------------------------------------- 117 | selMS = atmp_MAG == atmp_MAG.max() 118 | f_tMS = atmp_Time[selMS][0] 119 | i_ID_MS = aEqID[selMS] 120 | 121 | #print( 'tMS', tMS, v_currEqID.shape[0], 'MAG', curr_cat.data['MAG'][selMS][0] 122 | #----------------------------aftershock-------------------------------------------------- 123 | selAS = atmp_Time > f_tMS 124 | selFS = atmp_Time < f_tMS 125 | #print( 'no. of aftershocks', selAS.sum() 126 | # save number of aftershocks for each MS mag 127 | a_MS_mag[iCl] = atmp_MAG[selMS][0]#, dPar['magRound']) 128 | a_N_AS[iCl] = selAS.sum() 129 | a_N_FS[iCl] = selFS.sum() 130 | a_MS_ID[iCl] = int( i_ID_MS[0]) 131 | iCl += 1 132 | 133 | #=================================3========================================================================== 134 | # compare MS+single+FS+AS to original number of events in catalog 135 | #============================================================================================================ 136 | # combine single without AS with mainshocks that do have aftershocks 137 | a_N_FS = np.append( a_N_FS, a_N_FS_single) 138 | a_N_AS = np.append( a_N_AS, a_N_AS_single) 139 | a_MS_mag = np.append( a_MS_mag, a_mag_single) 140 | a_MS_ID = np.append( a_MS_ID, a_ID_single) 141 | print( 'tot ev. in catalog', n_aboveMc,'tot events in families',a_N_FS.sum() + a_N_AS.sum() + a_MS_mag.shape[0]) 142 | #print( 'N BG', a_mag_single.shape[0], 'FS', a_N_FS_single.sum(), 'AS', a_N_AS_single.sum(), 'MS (MS+BG)', a_MS_mag.shape[0] 143 | 144 | #=================================4========================================================================== 145 | # save to ASCII text 146 | #============================================================================================================ 147 | file_out = '%s/%s_Nas_MS_Mc_%.1f.txt'%(data_dir, file_in.split('.')[0], f_Mc)#, dPar['magRound']) 148 | np.savetxt( file_out, np.array([a_MS_mag, a_N_AS, a_N_FS, a_MS_ID]).T, fmt='%10.3f%10i%10i%14i', 149 | header = 'MAG N-AS N-FS MS-ID; note N_AS=0 highlights singles or FS only') 150 | iMc += 1 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | -------------------------------------------------------------------------------- /7b_plot_productivity.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on August 16, 2016 3 | 4 | - plot No. of events for each mainshock 5 | - average number of events per mainshock 6 | - fit N ave and Mms --> alpha exponent 7 | 8 | @author: tgoebel 9 | ''' 10 | import matplotlib as mpl 11 | #mpl.use( 'Agg') 12 | 13 | import matplotlib.pyplot as plt 14 | import numpy as np 15 | import os 16 | 17 | 18 | #=================================1============================================== 19 | # dir, file, params 20 | #================================================================================ 21 | data_dir = 'data' 22 | plot_dir = 'plots' 23 | file_in = 'hs_1981_2011_all.mat' 24 | 25 | dPar = { 26 | 'magRound' : 1, # for binning 27 | 'a_Mc' : np.array([2.5, 3.0, 4.0]), 28 | 29 | #=================plotting============== 30 | 'alpha' : 1.0, # for plotting demonstration 31 | 'xmin' : 2, 'xmax' : 8, 32 | 'ymin' : 0.1, 'ymax' : 1e4, 33 | 'plotFormat' : 'png', 34 | } 35 | 36 | #================================================================================ 37 | # load file with no. of aftershocks 38 | #================================================================================ 39 | 40 | iMc = 0 41 | for f_Mc in dPar['a_Mc']: 42 | file_prod = '%s/%s_Nas_MS_Mc_%.1f.txt'%(data_dir, file_in.split('.')[0], f_Mc)#, dPar['magRound']) 43 | 44 | m_N_as = np.loadtxt( file_prod).T 45 | print( 'total no. of mainshock', m_N_as[0].shape[0]) 46 | print( 'total no. of AS', m_N_as[1].sum()) 47 | print( 'total no. of FS', m_N_as[2].sum()) 48 | #=================================2========================================================================== 49 | # count ave. no. of aftershocks per MS magnitude 50 | #============================================================================================================ 51 | aMag_round= np.around( m_N_as[0], dPar['magRound']) 52 | aMag_bin = np.array( sorted(np.unique( aMag_round))) 53 | aAveNo_AS = np.ones( len( aMag_bin))*np.nan 54 | aNo_Fam = np.zeros( len( aMag_bin)) # total number of families within mag bin 55 | aNo_AS20 = np.zeros( len( aMag_bin)) 56 | aNo_AS80 = np.zeros( len( aMag_bin)) 57 | 58 | i = 0 59 | for curr_mag in aMag_bin: 60 | selMag = curr_mag == aMag_round 61 | aAveNo_AS[i] = m_N_as[1][selMag].mean() 62 | if selMag.sum() > 0: 63 | aNo_AS20[i] = np.percentile( m_N_as[1][selMag], 20) 64 | aNo_AS80[i] = np.percentile( m_N_as[1][selMag], 80) 65 | aNo_Fam[i] = selMag.sum() 66 | print( curr_mag, 'mean N-AS', round(aAveNo_AS[i],2), aNo_AS20[i],aNo_AS80[i], 'no. of fam', aNo_Fam[i]) 67 | 68 | i += 1 69 | 70 | #=================================3========================================================================== 71 | # plot productivity law 72 | #============================================================================================================ 73 | plt.figure(1, figsize=(8,6)) 74 | ax = plt.axes([.14,.12,.78,.83])#pPlot.createFigureSquare(1) 75 | ax.semilogy( m_N_as[0], m_N_as[1], 'o', ms = 6, mew =0, mfc = '.7', alpha = .2 ) 76 | #ax.errorbar( aMag_bin, aAveNo_AS, yerr=[np.zeros(aMag_bin.shape[0]), aNo_AS80-aAveNo_AS], 77 | # fmt = 'o', ecolor = 'k', elinewidth=.7,capsize=2.5, mec = 'k', ms = 8, mew = 1, mfc = 'w') 78 | ax.errorbar( aMag_bin, aAveNo_AS, yerr=[aAveNo_AS-aNo_AS20, aNo_AS80-aAveNo_AS], 79 | fmt = 'o', ecolor = 'k', elinewidth=.7,capsize=2.5, mec = 'k', ms = 8, mew = 1, mfc = 'w') 80 | 81 | #-------------------------exponential - estimate----------------------------------------------------- 82 | mag_fit = aMag_bin[10] # force fit through this point 83 | f_no_AS_pl = aAveNo_AS[aMag_bin == mag_fit] 84 | preFac = np.log10( f_no_AS_pl) - dPar['alpha']*mag_fit 85 | a_N_hat = 10**( dPar['alpha']*aMag_bin + preFac) 86 | ax.semilogy( aMag_bin, a_N_hat, 'w-') 87 | ax.semilogy( aMag_bin, a_N_hat, '-', color = 'r', lw = 2, label = 'exp = %.1f'%( np.round( dPar['alpha'],1))) 88 | 89 | #-------------------------------labels, limits etc.----------------------------------------------- 90 | ax.set_xlim( dPar['xmin'], dPar['xmax']) 91 | ax.set_ylim( dPar['ymin'], dPar['ymax']) 92 | ax.set_xlabel( 'Mainshock Magnitude') 93 | ax.set_ylabel( 'Number of Aftershocks') 94 | ax.legend( loc = 'upper left', frameon = False) 95 | 96 | plt.savefig( '%s/%s_Mc_%.1f_ASprod.%s'%(plot_dir, file_in.split('.')[0], f_Mc, dPar['plotFormat'])) 97 | 98 | plt.show() 99 | plt.clf() 100 | 101 | iMc += 1 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # clustering-analysis 2 | Seismicity Clustering Analysis Based on nearest neighbor distances of event pairs 3 | 4 | To separate seismicity into background and clustered events, we use the distribution of nearest-neighbor event pairs and compare observed clustering characteristics with expectations from random poissonian earthquakes. Commonly, space-time distances can be described by a 2D bi-modal distribution. The first mode at small interevent times and distances highlights clustered events (e.g. aftershocks), whereas the second mode at larger distances is comprised of background events. The background mode for the California catalog corresponds to that expectation (see plots). We use the 99th percentile of nearest-neighbor distances from the randomized catalogs to separate background and clustered events, which allows for a clear separation between the two modes in California. 5 | 6 | 7 | Dependencies: 8 | 9 | Python 3.7 10 | Numpy, matplotlib, matplotlib-Basemap, scipy, scipy, datetime, calendar 11 | 12 | Use the following references if you use this code: 13 | - Zaliapin, I., and Ben-Zion, Y., 2013, Earthquake clusters in southern California I: Identification and stability: Journal of Geophysical Research: Solid Earth, v. 118, no. 6, p. 2847–2864, doi: 10.1002/jgrb.50179. 14 | 15 | - Goebel, T.H.W., Rosson, Z., Brodsky, E.E., and Walter, J.I., 2019, Aftershock deficiency of induced earthquake sequences during rapid mitigation efforts in Oklahoma: Earth and Planetary Science Letters, v. 522, p. 135–143, doi: 10.1016/j.epsl.2019.06.036. 16 | 17 | 18 | # Tutorial: 19 | 20 | 1) Download standard or relocated catalog (e.g. https://service.scedc.caltech.edu/eq-catalogs/date_mag_loc.php 21 | or https://scedc.caltech.edu/research-tools/altcatalogs.html) 22 | 2) Convert catalog to EqCat object with attribute self.data, which is a dictionary with data columns 23 | 'Time' = Decimal Year, 'Lon', 'Lat', 'Mag', 'Depth'. Use '1_create_mat_eqCat_file.py' to do the conversion and 24 | save the EqCat as matlab binary (.mat). Alternatively, earthquake catalog formats can be changed in matlab 25 | and saved as .mat with variable names: 'Time' = Decimal Year, 'Lon', 'Lat', 'Mag', 'Depth'. 26 | An example catalog is provided in the /data directory (hs_1983_2011_all.mat) 27 | 28 | The following steps require estimates of fractal dimension, D, completeness magnitude, Mc, and b-value. 29 | Mc and the b-value can be estimated using the github repository: https://github.com/tgoebel/magnitude-distribution 30 | It is recommended that the sensitivity of the results to changes in these parameters are tested. 31 | 32 | 3) Compute separation between clustered and background events: '2_eta_0.py' 33 | 34 | 4) Compute nearest neighbor distances (NND) and find parent event for each except for the first event in the catalog: 35 | '3_NND.py' 36 | 37 | 5) Assemble event families and save them within cluster. Each cluster has a unique ID which is used as variable name 38 | in a corresponding python dictionary. The clusters contain the unique event IDs of all members. Note that the cluster 39 | with ID and variable name '0' contains singles, i.e. events with parents at nearest-neighbor-distance beyond eta_0: 40 | '6_createClust.py' 41 | 42 | 6) Count the numbe of events within each cluster (or family) before (foreshocks) and after (aftershocks) the largest 43 | magnitude event in each family. Singles have 0 fore and aftershocks: 44 | '7_productivity.py' 45 | 46 | 7) Plot productivity relationship including number of aftershocks in ech family and average number of aftershocks 47 | within magnitude-binned mainshocks. Plot alpha=1 slope for comparison: 48 | '8_plot_productivity.py' 49 | 50 | All results should be compared to the provided figures using the scripts: 51 | '1b_plot_eqLocs.py', '4_dist_tau.py', '5_plot_lat_t.py' 52 | - also check results in: 53 | Zaliapin, I., and Ben-Zion, Y., 2013, Earthquake clusters in southern California I: 54 | Identification and stability: Journal of Geophysical Research: Solid Earth, v. 118, no. 6, p. 2847–2864, doi: 10.1002/jgrb.50179. 55 | and 56 | - Goebel, T.H.W., Rosson, Z., Brodsky, E.E., and Walter, J.I., 2019, Aftershock deficiency of induced earthquake sequences during rapid mitigation efforts in Oklahoma: Earth and Planetary Science Letters, v. 522, p. 135–143, doi: 10.1016/j.epsl.2019.06.036. 57 | -------------------------------------------------------------------------------- /clust_SoCal.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "582ef5f5", 6 | "metadata": {}, 7 | "source": [ 8 | "# Sample script of clustering analysis applied to a relocated seismicity catalog from Southern California see:\n", 9 | "Hauksson, Egill, Wenzheng Yang, and Peter M. Shearer. \"Waveform relocated earthquake catalog for southern California (1981 to June 2011).\" Bulletin of the Seismological Society of America 102.5 (2012): 2239-2244.\n", 10 | "\n", 11 | " and\n", 12 | " \n", 13 | "Zaliapin, Ilya, and Yehuda Ben‐Zion. \"Earthquake clusters in southern California I: Identification and stability.\" Journal of Geophysical Research: Solid Earth 118.6 (2013): 2847-2864.\n", 14 | "\n", 15 | " and\n", 16 | " \n", 17 | " Goebel, T.H.W., Rosson, Z., Brodsky, E.E., and Walter, J.I., 2019, Aftershock deficiency of induced earthquake sequences during rapid mitigation efforts in Oklahoma: Earth and Planetary Science Letters, v. 522, p. 135–143, doi: 10.1016/j.epsl.2019.06.036.\n" 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "id": "daa8acfa", 23 | "metadata": {}, 24 | "source": [ 25 | "### the following code performs three primary steps:\n", 26 | " 1. calculated nearest-neighbor distance between all events in the catalog\n", 27 | " 2. separate the seismicity catalog into families and indpendent background events based on nearest-neighbor threshold\n", 28 | " 3. Count the number of aftershocks for each family and plot number of aftershocks over mainshock magnitude" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "id": "ebe1ad2d", 34 | "metadata": {}, 35 | "source": [ 36 | "### 0: seismicity map\n", 37 | "Load Southern California seismicity catalog and plot with Basemap. \n", 38 | "(this step can be skipped if the mpl Basemap module is not installed)" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": null, 44 | "id": "bd213067", 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "#specify data file, time and magnitude range\n", 49 | "dir_in = 'data'\n", 50 | "# this file is generated with: 1_create_mat_eqCat_file.py\n", 51 | "file_in= 'hs_1981_2011_all.mat'\n", 52 | "# completeness magntiude = Mmin, and Mmax (which does not need to be specified)\n", 53 | "Mmin, Mmax = 3, None\n", 54 | "tmin, tmax = 1980, 2012" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": null, 60 | "id": "0a214aad", 61 | "metadata": {}, 62 | "outputs": [], 63 | "source": [ 64 | "#------------------------------------------------------------------------------\n", 65 | "import matplotlib.pyplot as plt\n", 66 | "import numpy as np\n", 67 | "import os\n", 68 | "\n", 69 | "#------------------------------my modules-------------------------------------- \n", 70 | "# EqCat is a Python object that is used for catlog processing\n", 71 | "from src.EqCat import EqCat\n", 72 | "eqCat = EqCat( )\n", 73 | "#for methods check source code or uncomment the following line \n", 74 | "print( 'EqCat Methods: ', eqCat.methods)\n", 75 | "#=================================2==============================================\n", 76 | "# load data, select events\n", 77 | "#================================================================================\n", 78 | "eqCat.loadMatBin( f\"{dir_in}/{file_in}\")\n", 79 | "print( 'total no. of events', eqCat.size())\n", 80 | "eqCat.selectEvents( Mmin, Mmax, 'Mag')\n", 81 | "eqCat.selectEvents( tmin, tmax, 'Time')\n", 82 | "print( 'no. of events after Mag/Time selection', eqCat.size())" 83 | ] 84 | }, 85 | { 86 | "cell_type": "markdown", 87 | "id": "185c09a3", 88 | "metadata": {}, 89 | "source": [ 90 | "the following cell will only run if you have Basemap" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": null, 96 | "id": "f6a3caf5", 97 | "metadata": {}, 98 | "outputs": [], 99 | "source": [ 100 | "try:\n", 101 | " #os.environ[\"PROJ_LIB\"] = f\"{os.environ['HOME']}/opt/anaconda3/share/proj\"\n", 102 | " from mpl_toolkits.basemap import Basemap\n", 103 | " b_map = True\n", 104 | "except:\n", 105 | " b_map = False\n", 106 | "#=================================3==============================================\n", 107 | "# test plot with Basemap\n", 108 | "#================================================================================\n", 109 | "projection = 'cyl'\n", 110 | "xmin,xmax = eqCat.data['Lon'].min(), eqCat.data['Lon'].max()\n", 111 | "ymin,ymax = eqCat.data['Lat'].min(), eqCat.data['Lat'].max()\n", 112 | "if b_map:\n", 113 | " # setup equi distance basemap.\n", 114 | " m = Basemap( llcrnrlat = ymin,urcrnrlat = ymax,\n", 115 | " llcrnrlon = xmin,urcrnrlon = xmax,\n", 116 | " projection = projection,lat_0=(ymin+ymax)*.5,lon_0=(xmin+xmax)*.5,\n", 117 | " resolution = 'l')\n", 118 | " m.drawstates( linewidth = 1)\n", 119 | " m.drawcoastlines( linewidth= 2)\n", 120 | " a_x, a_y = m( eqCat.data['Lon'], eqCat.data['Lat'])\n", 121 | " m.plot( a_x, a_y, 'ko', ms = 1)\n", 122 | " sel6 = eqCat.data['Mag'] >= 6\n", 123 | " m.plot( a_x[sel6], a_y[sel6], 'ro', ms = 8, mew= 1.5, mfc = 'none')\n", 124 | "\n", 125 | " m.drawmeridians( np.linspace( int(xmin), xmax, 4),labels=[False,False,False,True],\n", 126 | " fontsize = 12, fmt = '%.1f')\n", 127 | " m.drawparallels( np.linspace( int(ymin), ymax, 4),labels=[True,False,False,False],\n", 128 | " fontsize = 12, fmt = '%.2f')\n", 129 | "\n", 130 | " plt.savefig( file_in.replace( 'mat', 'png'))\n" 131 | ] 132 | }, 133 | { 134 | "cell_type": "markdown", 135 | "id": "a90e4096", 136 | "metadata": {}, 137 | "source": [ 138 | "### 1: Compute Nearest Neighbor Distances" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": null, 144 | "id": "069c432f", 145 | "metadata": {}, 146 | "outputs": [], 147 | "source": [ 148 | "import src.clustering as clustering\n", 149 | "# set parameters:fractal dimension and b-value\n", 150 | "dPar = { # fractal dimension and b for eq. (1) in Zaliapin & Ben-Zion\n", 151 | " 'D' : 1.6, # TODO: - these values should be contrained independently\n", 152 | " 'b' : 1.0, # use: https://github.com/tgoebel/magnitude-distribution for b-value\n", 153 | " 'Mc' : Mmin,\n", 154 | " #=================plotting==============\n", 155 | " # these parameters rarely have to be changes\n", 156 | " 'eta_binsize' : .3,\n", 157 | " 'xmin' : -13, 'xmax' : 0,\n", 158 | " }\n", 159 | "\n", 160 | "\n", 161 | "#================================================================================\n", 162 | "# to cartesian coordinates\n", 163 | "#================================================================================\n", 164 | "# two ways to do the distance comp: 1 project into equal distance azimuthal , comp Cartersian distance in 3D\n", 165 | "# 2 get surface distance from lon, lat (haversine), use pythagoras to include depth\n", 166 | "if b_map:\n", 167 | " eqCat.toCart_coordinates( projection = 'eqdc')\n", 168 | " print( 'convert to cartesian using equi-distant projection')\n", 169 | "#==================================2=============================================\n", 170 | "# compute space-time-magnitude distance, histogram\n", 171 | "#================================================================================\n", 172 | "eqCat.data['Z'] = eqCat.data['Depth']\n", 173 | "print('depth range: ', eqCat.data['Z'].min(), eqCat.data['Z'].max())\n", 174 | "dNND = clustering.NND_eta( eqCat, dPar, \n", 175 | " correct_co_located = True, verbose= True)\n", 176 | "###histogram\n", 177 | "aBins = np.arange( -13, 1, dPar['eta_binsize'], dtype = float)\n", 178 | "aHist, aBins = np.histogram( np.log10( dNND['aNND'][dNND['aNND']>0]), aBins)\n", 179 | "aBins = aBins[0:-1] + dPar['eta_binsize']*.5\n", 180 | "# correct for binsize\n", 181 | "aHist = aHist/dPar['eta_binsize']\n", 182 | "# to pdf (prob. density)\n", 183 | "aHist /= eqCat.size()\n", 184 | "#=================================3==============================================\n", 185 | "# save results\n", 186 | "#================================================================================\n", 187 | "import scipy.io\n", 188 | "NND_file = 'data/%s_NND_Mc_%.1f.mat'%( file_in.split('.')[0], dPar['Mc'])\n", 189 | "print( 'save file', NND_file)\n", 190 | "scipy.io.savemat( NND_file, dNND, do_compression = True)\n", 191 | "\n", 192 | "#=================================4==============================================\n", 193 | "# plot histogram\n", 194 | "#================================================================================\n", 195 | "# load eta_0 value - only for plotting purposes\n", 196 | "eta_0_file = '%s/%s_Mc_%.1f_eta_0.txt'%(dir_in, file_in, dPar['Mc'])\n", 197 | "if os.path.isfile( eta_0_file):\n", 198 | " print( 'load eta_0 from file'),\n", 199 | " f_eta_0 = np.loadtxt( eta_0_file, dtype = float)\n", 200 | " print( 'eta_0',f_eta_0)\n", 201 | "else:\n", 202 | " f_eta_0 = -5\n", 203 | " print( 'could not find eta_0 file', eta_0_file, 'use value: ', f_eta_0)\n", 204 | "\n", 205 | "fig, ax = plt.subplots()\n", 206 | "#ax.plot( vBin, vHist, 'ko')\n", 207 | "ax.bar( aBins, aHist, width =.8*dPar['eta_binsize'], align = 'edge', color = '.5', label = 'Mc = %.1f'%( dPar['Mc']))\n", 208 | "ax.plot( [f_eta_0, f_eta_0], ax.get_ylim(), 'w-', lw = 2, label = '$N_\\mathrm{tot}$=%i'%( eqCat.size()))\n", 209 | "ax.plot( [f_eta_0, f_eta_0], ax.get_ylim(), 'r--', lw = 2, label = '$N_\\mathrm{cl}$=%i'%( dNND['aNND'][dNND['aNND']<1e-5].shape[0]))\n", 210 | "\n", 211 | "ax.legend( loc = 'upper left')\n", 212 | "ax.set_xlabel( 'NND, log$_{10} \\eta$')\n", 213 | "ax.set_ylabel( 'Number of Events')\n", 214 | "ax.grid( 'on')\n", 215 | "ax.set_xlim( dPar['xmin'], dPar['xmax'])\n" 216 | ] 217 | }, 218 | { 219 | "cell_type": "markdown", 220 | "id": "22b4dce8", 221 | "metadata": {}, 222 | "source": [ 223 | "The above figure shows a histogram of nearest parent space-time-magnitude distance in the catalog. \n", 224 | "Note that two distinct modes arise from a typical earthquake catalog: a clustered mode (left) \n", 225 | "and a background mode (right). The former represent omori-type clustering (typically 'nearer') while the latter represents the background poisson process. Earthquakes in the background mode are earthquakes that, observably, are not triggered from a previous earthquake. $\\eta_0$ is the cutoff between these two modes. Practically, this will result in all connections between earethquakes exceeding this cutoff will be removed, thus forming distinct clusters of earthquakes.\n", 226 | "\n", 227 | "For a quick analysis, it is possible to simply pick a value that separates these modes. Smaller values will lead to smaller clusters, larger values will lead to more generous clusters but may include background seismicity." 228 | ] 229 | }, 230 | { 231 | "cell_type": "markdown", 232 | "id": "bf12b34a", 233 | "metadata": {}, 234 | "source": [ 235 | "### 2: separate clusters from independent background and compile event families" 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "execution_count": null, 241 | "id": "5b874317", 242 | "metadata": {}, 243 | "outputs": [], 244 | "source": [ 245 | "dPar['eta_0'] = f_eta_0\n", 246 | "print( 'similarity threshold', dPar['eta_0'])" 247 | ] 248 | }, 249 | { 250 | "cell_type": "code", 251 | "execution_count": null, 252 | "id": "47685619", 253 | "metadata": {}, 254 | "outputs": [], 255 | "source": [ 256 | "clust_file = file_in.replace( 'all.mat', 'Mc_%.1f_clusters.mat'%( dPar['Mc']))\n", 257 | " \n", 258 | "dNND['aNND'] = np.log10( dNND['aNND'])\n", 259 | "# clustering according to eta_0 similarity criteria\n", 260 | "dClust = clustering.compileClust( dNND, f_eta_0, useLargerEvents = False)\n", 261 | "#=================================4==========================================================================\n", 262 | "# save results\n", 263 | "#============================================================================================================\n", 264 | "scipy.io.savemat( os.path.join( dir_in,clust_file), dClust, do_compression=True)\n" 265 | ] 266 | }, 267 | { 268 | "cell_type": "markdown", 269 | "id": "b4ab2cd5", 270 | "metadata": {}, 271 | "source": [ 272 | "### let's create a couple of test plots to check whether everythin is working\n", 273 | "the first plot provides a rough overview of clustered and independent events in a rescale space-time domain\n", 274 | "\n", 275 | "This provides a visualization of the clustering behavior outlined above. This time in normalized space distance ($R_{ij}$) versus normalized time distance ($T_{ij}$). Where:\n", 276 | "\n", 277 | "$$ R_{ij} = r_{ij}^d\\times10^{bM_i/2} $$\n", 278 | "$$ T_{ij} = t_{ij}\\times10^{bM_i/2} $$" 279 | ] 280 | }, 281 | { 282 | "cell_type": "code", 283 | "execution_count": null, 284 | "id": "6a65109e", 285 | "metadata": {}, 286 | "outputs": [], 287 | "source": [ 288 | "#=======event-pair density in r-T============================\n", 289 | "catChild= EqCat()\n", 290 | "catParent= EqCat()\n", 291 | "catChild.copy( eqCat)\n", 292 | "catParent.copy( eqCat)\n", 293 | "catChild.selEventsFromID( dNND['aEqID_c'], repeats = True)\n", 294 | "catParent.selEventsFromID( dNND['aEqID_p'], repeats = True)\n", 295 | "print( 'size of offspring catalog', catChild.size(), 'size of parent cat', catParent.size()) \n", 296 | "\n", 297 | "#compute re-scaled interevent times and distances\n", 298 | "a_R, a_T = clustering.rescaled_t_r(catChild, catParent, dPar)\n", 299 | "# plot event pair density \n", 300 | "fig = clustering.plot_R_T( a_T, a_R, f_eta_0)" 301 | ] 302 | }, 303 | { 304 | "cell_type": "markdown", 305 | "id": "d53670ed", 306 | "metadata": {}, 307 | "source": [ 308 | "as you can see there are two different statistical modes \n", 309 | "(sort of like peaks in a histogram except for now you are looking at a 2D histogram)\n", 310 | "Which mode (red area) correponds to the background events and which mode marks the aftershocks?" 311 | ] 312 | }, 313 | { 314 | "cell_type": "markdown", 315 | "id": "2b47fc42", 316 | "metadata": {}, 317 | "source": [ 318 | "Now let's also look at the different families and how individual events are linked" 319 | ] 320 | }, 321 | { 322 | "cell_type": "code", 323 | "execution_count": null, 324 | "id": "7a6f75d3", 325 | "metadata": {}, 326 | "outputs": [], 327 | "source": [ 328 | "#==================================4=============================================\n", 329 | "# spanning tree\n", 330 | "#================================================================================\n", 331 | "plt.figure( 1)\n", 332 | "ax = plt.subplot(111) \n", 333 | "for iEv in range( catParent.size()):\n", 334 | " print( f\"MS-ID, {int(catParent.data['N'][iEv]):d}, t-Par: {catParent.data['Time'][iEv]:.5f},'t-child', {eqCat.data['Time'][iEv]:.5f}\", end= \"\\r\")\n", 335 | "\n", 336 | " if dNND['aNND'][iEv] < dPar['eta_0']:#triggered cluster\n", 337 | " ax.plot( [catParent.data['Time'][iEv]], [catParent.data['Lat'][iEv]], 'ro', ms = 12, alpha = .2)\n", 338 | " ax.plot( [catParent.data['Time'][iEv],catChild.data['Time'][iEv]],\n", 339 | " [catParent.data['Lat'][iEv], catChild.data['Lat'][iEv]], 'k-', marker = 'o', ms = 4, mew =1, mfc = 'none')\n", 340 | " else: # independent events\n", 341 | " ax.plot( [catChild.data['Time'][iEv]], [catChild.data['Lat'][iEv]], 'bo', ms = 5, alpha = .6)\n" 342 | ] 343 | }, 344 | { 345 | "cell_type": "markdown", 346 | "id": "fcae05b9", 347 | "metadata": {}, 348 | "source": [ 349 | "The blue dots in the above plot are independent background events. That means their nearest neighbor\n", 350 | "is beyond the chosen (or calculated) eta_0 value.\n", 351 | "The red and black circles are clustered events linked by thin black lines.\n", 352 | "The darker the red color the more events are linked to that particular parent.\n", 353 | "Black circles are the last generation in a trigger series, i.e., aftershocks that do not produce aftershocks\n", 354 | "themselves.\n", 355 | "Can you dedetect some major triggering events? (Hint: think about major eqs. in 1992, 1999, 2010)" 356 | ] 357 | }, 358 | { 359 | "cell_type": "markdown", 360 | "id": "58990e83", 361 | "metadata": {}, 362 | "source": [ 363 | "### 3: count aftershocks and plot productivity relation" 364 | ] 365 | }, 366 | { 367 | "cell_type": "code", 368 | "execution_count": null, 369 | "id": "4fdbfb97", 370 | "metadata": {}, 371 | "outputs": [], 372 | "source": [ 373 | "eqCat = EqCat( )\n", 374 | "#=================================1==============================================\n", 375 | "# load data, select events\n", 376 | "#================================================================================\n", 377 | "eqCat.loadMatBin( f\"{dir_in}/{file_in}\")\n", 378 | "eqCat.selectEvents( Mmin, Mmax, 'Mag')\n", 379 | "eqCat.selectEvents( tmin, tmax, 'Time')\n", 380 | "N_tot = eqCat.size()\n", 381 | "print( 'total no. of events', N_tot)\n", 382 | "#=================================2==========================================================================\n", 383 | "# singles are counted as MS with 0 AS\n", 384 | "#============================================================================================================\n", 385 | "print( 'total number of clusters', len( dClust.keys()), 'no. of BG events', dClust['0'].shape[0])\n", 386 | "a_ID_single = dClust['0']\n", 387 | "\n", 388 | "# IDs of BG events\n", 389 | "a_iSel = np.zeros( eqCat.size(), dtype = int)\n", 390 | "a_mag_single = np.zeros( len( a_ID_single))\n", 391 | "a_N_AS_single= np.zeros( len( a_ID_single))\n", 392 | "a_N_FS_single= np.zeros( len( a_ID_single))\n", 393 | "for i in range( a_ID_single.shape[0]):\n", 394 | " # event ID may be in catalog more than once\n", 395 | " sel_ev = eqCat.data['N'] == a_ID_single[i]\n", 396 | " a_mag_single[i] = eqCat.data['Mag'][sel_ev][0]\n", 397 | " a_iSel[sel_ev] = 1#catalog.data['N'][catalog.data['N']==aEqID[i]][0]\n", 398 | " if sel_ev.sum() != 1:\n", 399 | " error_str = 'more than event found', eqCat.data['N'][sel_ev]\n", 400 | " raise( ValueError( error_str))\n", 401 | "### remove singles from catalog\n", 402 | "eqCat.selDicAll( np.logical_not(a_iSel))\n", 403 | "print( 'remaining events', eqCat.size(), 'BG events', len( a_mag_single))\n", 404 | "dClust.pop('0') # remove singles" 405 | ] 406 | }, 407 | { 408 | "cell_type": "code", 409 | "execution_count": null, 410 | "id": "8a0f5ca4", 411 | "metadata": {}, 412 | "outputs": [], 413 | "source": [ 414 | "\n", 415 | "#=================================2==========================================================================\n", 416 | "# get MAGs of MS with aftershocks, count aftershocks\n", 417 | "#============================================================================================================\n", 418 | "a_N_FS = np.zeros( len( dClust.keys()), dtype = int)\n", 419 | "a_N_AS = np.zeros( len( dClust.keys()), dtype = int)\n", 420 | "a_MS_mag = np.zeros( len( dClust.keys()))\n", 421 | "a_MS_ID = np.zeros( len( dClust.keys()), dtype = int)\n", 422 | "iCl = 0\n", 423 | "for sCl in dClust.keys():\n", 424 | " aEqID = dClust[sCl]# np.unique( dClust[sCl].flatten()) unique is not needed anymore, createCluster has been fixed\n", 425 | " print( 'cl: ', iCl+1,'out of: ', len( dClust.keys()), 'no. of ev. in cl.', \n", 426 | " len( aEqID), len( np.unique( dClust[sCl])), end=\"\\r\")\n", 427 | " # find MS mag and magnitude of entire family\n", 428 | " atmp_MAG = np.zeros( len( aEqID))\n", 429 | " atmp_Time= np.zeros( len( aEqID))\n", 430 | " a_iSel = np.zeros( eqCat.size(), dtype = int)\n", 431 | " # for each family find: event mag. and origin time\n", 432 | " for iM in range( len( aEqID)):\n", 433 | " sel_ev = eqCat.data['N'] == aEqID[iM]\n", 434 | " if sel_ev.sum() != 1:\n", 435 | " error_str = 'more/less than event found', eqCat.data['N'][sel_ev], aEqID[iM]\n", 436 | " raise( ValueError, error_str)\n", 437 | " atmp_MAG[iM] = eqCat.data['Mag'][sel_ev][0]\n", 438 | " atmp_Time[iM] = eqCat.data['Time'][sel_ev][0]\n", 439 | " a_iSel[sel_ev] = 1\n", 440 | " # remove events from catalog\n", 441 | " #catalog.selDicAll( np.logical_not(a_iSel))\n", 442 | " #----------------------------mainshock-------------------------------------------------- \n", 443 | " selMS = atmp_MAG == atmp_MAG.max()\n", 444 | " f_tMS = atmp_Time[selMS][0]\n", 445 | " i_ID_MS = aEqID[selMS]\n", 446 | "\n", 447 | " #print( 'tMS', tMS, v_currEqID.shape[0], 'MAG', curr_cat.data['MAG'][selMS][0]\n", 448 | " #----------------------------aftershock-------------------------------------------------- \n", 449 | " selAS = atmp_Time > f_tMS\n", 450 | " selFS = atmp_Time < f_tMS\n", 451 | " #print( 'no. of aftershocks', selAS.sum()\n", 452 | " # save number of aftershocks for each MS mag\n", 453 | " a_MS_mag[iCl] = atmp_MAG[selMS][0]#, dPar['magRound'])\n", 454 | " a_N_AS[iCl] = selAS.sum()\n", 455 | " a_N_FS[iCl] = selFS.sum()\n", 456 | " a_MS_ID[iCl] = int( i_ID_MS[0])\n", 457 | " iCl += 1\n", 458 | "\n", 459 | "#=================================3==========================================================================\n", 460 | "# compare MS+single+FS+AS to original number of events in catalog\n", 461 | "#============================================================================================================\n", 462 | "# combine single without AS with mainshocks that do have aftershocks\n", 463 | "a_N_FS = np.append( a_N_FS, a_N_FS_single)\n", 464 | "a_N_AS = np.append( a_N_AS, a_N_AS_single)\n", 465 | "a_MS_mag = np.append( a_MS_mag, a_mag_single)\n", 466 | "a_MS_ID = np.append( a_MS_ID, a_ID_single)\n", 467 | "print( 'tot ev. in catalog', N_tot,'tot events in families',a_N_FS.sum() + a_N_AS.sum() + a_MS_mag.shape[0])\n", 468 | "#print( 'N BG', a_mag_single.shape[0], 'FS', a_N_FS_single.sum(), 'AS', a_N_AS_single.sum(), 'MS (MS+BG)', a_MS_mag.shape[0]\n", 469 | "\n", 470 | "#=================================4==========================================================================\n", 471 | "# save to ASCII text\n", 472 | "#============================================================================================================\n", 473 | "file_out = '%s/%s_Nas_MS_Mc_%.1f.txt'%(dir_in, file_in.split('.')[0], dPar['Mc'])#, dPar['magRound'])\n", 474 | "m_N_as = np.array([a_MS_mag, a_N_AS, a_N_FS, a_MS_ID])\n", 475 | "np.savetxt( file_out, m_N_as.T, fmt='%10.3f%10i%10i%14i',\n", 476 | " header = 'MAG N-AS N-FS MS-ID; note N_AS=0 highlights singles or FS only')\n" 477 | ] 478 | }, 479 | { 480 | "cell_type": "markdown", 481 | "id": "9d2290ea", 482 | "metadata": {}, 483 | "source": [ 484 | "the two numbers above should hopefully match, otherwise you may have to rerun the code from the beginning" 485 | ] 486 | }, 487 | { 488 | "cell_type": "code", 489 | "execution_count": null, 490 | "id": "7a66f796", 491 | "metadata": {}, 492 | "outputs": [], 493 | "source": [ 494 | "dPar['magRound'] = 1 # binning\n", 495 | "#=================plotting==============\n", 496 | "dPar['alpha'] = 1.0 # power law exponent\n", 497 | "dPar['xmin'] = 2 \n", 498 | "dPar['xmax'] = 8\n", 499 | "dPar['ymin'] = 0.1 \n", 500 | "dPar['ymax'] = 1e4\n", 501 | "\n", 502 | "#=================================2==========================================================================\n", 503 | "# count ave. no. of aftershocks per MS magnitude\n", 504 | "#============================================================================================================\n", 505 | "aMag_round= np.around( m_N_as[0], dPar['magRound'])\n", 506 | "aMag_bin = np.array( sorted(np.unique( aMag_round)))\n", 507 | "aAveNo_AS = np.ones( len( aMag_bin))*np.nan\n", 508 | "aNo_Fam = np.zeros( len( aMag_bin)) # total number of families within mag bin\n", 509 | "aNo_AS20 = np.zeros( len( aMag_bin))\n", 510 | "aNo_AS80 = np.zeros( len( aMag_bin))\n", 511 | "\n", 512 | "i = 0\n", 513 | "for curr_mag in aMag_bin:\n", 514 | " selMag = curr_mag == aMag_round\n", 515 | " aAveNo_AS[i] = m_N_as[1][selMag].mean()\n", 516 | " if selMag.sum() > 0:\n", 517 | " aNo_AS20[i] = np.percentile( m_N_as[1][selMag], 20)\n", 518 | " aNo_AS80[i] = np.percentile( m_N_as[1][selMag], 80)\n", 519 | " aNo_Fam[i] = selMag.sum()\n", 520 | " print( curr_mag, 'mean N-AS', round(aAveNo_AS[i],2), aNo_AS20[i],aNo_AS80[i], 'no. of fam', aNo_Fam[i],end=\"\\r\")\n", 521 | "\n", 522 | " i += 1\n", 523 | "\n", 524 | "#=================================3==========================================================================\n", 525 | "# plot productivity law\n", 526 | "#============================================================================================================\n", 527 | "plt.figure(1, figsize=(8,6))\n", 528 | "ax = plt.axes([.14,.12,.78,.83])#pPlot.createFigureSquare(1)\n", 529 | "ax.semilogy( m_N_as[0], m_N_as[1], 'o', ms = 6, mew =0, mfc = '.7', alpha = .2 )\n", 530 | "#ax.errorbar( aMag_bin, aAveNo_AS, yerr=[np.zeros(aMag_bin.shape[0]), aNo_AS80-aAveNo_AS],\n", 531 | "# fmt = 'o', ecolor = 'k', elinewidth=.7,capsize=2.5, mec = 'k', ms = 8, mew = 1, mfc = 'w')\n", 532 | "ax.errorbar( aMag_bin, aAveNo_AS, yerr=[aAveNo_AS-aNo_AS20, aNo_AS80-aAveNo_AS],\n", 533 | " fmt = 'o', ecolor = 'k', elinewidth=.7,capsize=2.5, mec = 'k', ms = 8, mew = 1, mfc = 'w')\n", 534 | "\n", 535 | "#-------------------------exponential - estimate-----------------------------------------------------\n", 536 | "mag_fit = aMag_bin[10] # force fit through this point\n", 537 | "f_no_AS_pl = aAveNo_AS[aMag_bin == mag_fit]\n", 538 | "preFac = np.log10( f_no_AS_pl) - dPar['alpha']*mag_fit\n", 539 | "a_N_hat = 10**( dPar['alpha']*aMag_bin + preFac)\n", 540 | "ax.semilogy( aMag_bin, a_N_hat, 'w-')\n", 541 | "ax.semilogy( aMag_bin, a_N_hat, '-', color = 'r', lw = 2, label = 'exp = %.1f'%( np.round( dPar['alpha'],1)))\n", 542 | "\n", 543 | "#-------------------------------labels, limits etc.-----------------------------------------------\n", 544 | "ax.set_xlim( dPar['xmin'], dPar['xmax'])\n", 545 | "ax.set_ylim( dPar['ymin'], dPar['ymax'])\n", 546 | "ax.set_xlabel( 'Mainshock Magnitude')\n", 547 | "ax.set_ylabel( 'Number of Aftershocks')\n", 548 | "ax.legend( loc = 'upper left', frameon = False)\n" 549 | ] 550 | }, 551 | { 552 | "cell_type": "code", 553 | "execution_count": null, 554 | "id": "d7d9a928", 555 | "metadata": {}, 556 | "outputs": [], 557 | "source": [] 558 | } 559 | ], 560 | "metadata": { 561 | "kernelspec": { 562 | "display_name": "Python 3 (ipykernel)", 563 | "language": "python", 564 | "name": "python3" 565 | }, 566 | "language_info": { 567 | "codemirror_mode": { 568 | "name": "ipython", 569 | "version": 3 570 | }, 571 | "file_extension": ".py", 572 | "mimetype": "text/x-python", 573 | "name": "python", 574 | "nbconvert_exporter": "python", 575 | "pygments_lexer": "ipython3", 576 | "version": "3.9.13" 577 | } 578 | }, 579 | "nbformat": 4, 580 | "nbformat_minor": 5 581 | } 582 | -------------------------------------------------------------------------------- /data/hs_1981_2011_all.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tgoebel/clustering-analysis/20cf9e348e7f179b0d57402de28510718fd9822e/data/hs_1981_2011_all.mat -------------------------------------------------------------------------------- /data/hs_1981_2011_all_NND_Mc_3.0.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tgoebel/clustering-analysis/20cf9e348e7f179b0d57402de28510718fd9822e/data/hs_1981_2011_all_NND_Mc_3.0.mat -------------------------------------------------------------------------------- /data/hs_1981_2011_all_NND_Mc_4.0.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tgoebel/clustering-analysis/20cf9e348e7f179b0d57402de28510718fd9822e/data/hs_1981_2011_all_NND_Mc_4.0.mat -------------------------------------------------------------------------------- /data/hs_1981_2011_all_RT_Mc_3.0.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tgoebel/clustering-analysis/20cf9e348e7f179b0d57402de28510718fd9822e/data/hs_1981_2011_all_RT_Mc_3.0.mat -------------------------------------------------------------------------------- /data/hs_1981_2011_all_RT_Mc_4.0.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tgoebel/clustering-analysis/20cf9e348e7f179b0d57402de28510718fd9822e/data/hs_1981_2011_all_RT_Mc_4.0.mat -------------------------------------------------------------------------------- /plots/SI_indAS_deficiencyOK_v5.0.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tgoebel/clustering-analysis/20cf9e348e7f179b0d57402de28510718fd9822e/plots/SI_indAS_deficiencyOK_v5.0.pdf -------------------------------------------------------------------------------- /plots/T_R_hs_1981_2011_all_Mc_3.0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tgoebel/clustering-analysis/20cf9e348e7f179b0d57402de28510718fd9822e/plots/T_R_hs_1981_2011_all_Mc_3.0.png -------------------------------------------------------------------------------- /plots/T_R_hs_1981_2011_all_Mc_4.0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tgoebel/clustering-analysis/20cf9e348e7f179b0d57402de28510718fd9822e/plots/T_R_hs_1981_2011_all_Mc_4.0.png -------------------------------------------------------------------------------- /plots/hs_1981_2011_all_Mc_3.0_ASprod.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tgoebel/clustering-analysis/20cf9e348e7f179b0d57402de28510718fd9822e/plots/hs_1981_2011_all_Mc_3.0_ASprod.png -------------------------------------------------------------------------------- /plots/hs_1981_2011_all_Mc_4.0_ASprod.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tgoebel/clustering-analysis/20cf9e348e7f179b0d57402de28510718fd9822e/plots/hs_1981_2011_all_Mc_4.0_ASprod.png -------------------------------------------------------------------------------- /plots/hs_1981_2011_all_NND_hist_Mc_3.0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tgoebel/clustering-analysis/20cf9e348e7f179b0d57402de28510718fd9822e/plots/hs_1981_2011_all_NND_hist_Mc_3.0.png -------------------------------------------------------------------------------- /plots/hs_1981_2011_all_NND_hist_Mc_4.0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tgoebel/clustering-analysis/20cf9e348e7f179b0d57402de28510718fd9822e/plots/hs_1981_2011_all_NND_hist_Mc_4.0.png -------------------------------------------------------------------------------- /plots/hs_1981_2011_all_spanningTree_Mc_3.0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tgoebel/clustering-analysis/20cf9e348e7f179b0d57402de28510718fd9822e/plots/hs_1981_2011_all_spanningTree_Mc_3.0.png -------------------------------------------------------------------------------- /plots/hs_1981_2011_all_spanningTree_Mc_4.0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tgoebel/clustering-analysis/20cf9e348e7f179b0d57402de28510718fd9822e/plots/hs_1981_2011_all_spanningTree_Mc_4.0.png -------------------------------------------------------------------------------- /src/EqCat.py: -------------------------------------------------------------------------------- 1 | #!usr/bin/python3.7 2 | """seismic catalog analysis class earthquake catalogs 3 | - data is stored in dictionary which can be extended without difficulties 4 | as long as all vectors have the same length 5 | 6 | - basic functionalities are focused on catalog I/O 7 | and initial processing (space, time, magnitude window selection) 8 | 9 | """ 10 | import os 11 | import numpy as np 12 | import scipy.io #to writer and read mat bin 13 | # the next line sets the path to PROJ LIB, should be found automatically for conda install 14 | #-----------------my modules----------------------------------------- 15 | #import ClusteringAnalysis.src.datetime_utils as dateTime 16 | import src.datetime_utils as dateTime 17 | 18 | #-------------------------------------------------------------------- 19 | class EqCat: 20 | """ 21 | 22 | (1) 23 | EqCat.data - type python dictionary 24 | e.g.: 25 | self.data = { 'N' : , #event number 26 | 'Time' : np.array([]), # in decimal years 27 | 'Lon' : np.array([]), #or lon 28 | 'Lat' : np.array([]), #or lat 29 | 'Depth' : np.array([]), #or depth 30 | 'Mag' : np.array([]), 31 | 32 | } 33 | """ 34 | def __init__( self, **kwargs ): 35 | """initiate data dictionary 36 | 37 | """ 38 | self.data = {} 39 | 40 | self.methods = [method_name for method_name in dir(self) 41 | if callable(getattr(self, method_name)) and method_name[0] != '_'] 42 | 43 | """ input use kwargs to go from cartesian to GPS coordinates, 44 | tags can be accessed: sLoc1 - sLoc3 , last one is depth or self.sLoc3 """ 45 | # if 'type' in kwargs.keys() and kwargs['type'] == 'GPS': 46 | # self.sLoc1, self.sLoc2, self.sLoc3 = 'Lon', 'Lat', 'Depth' 47 | # elif 'type' in kwargs.keys() and kwargs['type'] == 'Cart': 48 | # self.sLoc1, self.sLoc2, self.sLoc3 = 'X','Y','Z' 49 | # 50 | self.sLoc1, self.sLoc2, self.sLoc3 = 'Lon', 'Lat', 'Depth' 51 | self.sID = 'N' 52 | 53 | def copy(self, catalog ): 54 | """ deep copy of catalog object""" 55 | import copy 56 | try: 57 | for tag, vector in catalog.data.items(): 58 | self.data[tag] = copy.copy( catalog.data[tag]) 59 | except: 60 | for tag, vector in catalog.items(): 61 | self.data[tag] = copy.copy( catalog[tag]) 62 | 63 | #=========================================================================== 64 | # import routines 65 | #=========================================================================== 66 | def loadEqCat(self, file_in, catalogType, verbose=False, **kwargs): 67 | """ check what type of catalog and call correct function that handles import 68 | input: - file - catalog filename 69 | - catalogType = 'hs_reloc', focMech ... etc. 70 | = 'WaldhauserReloc' - Waldhauser's selection of repeaters at Hayward 71 | = 'hypoDD' - ID, lat, long, depth, x, y, z, x-error,y-error,z-error, yr, month, day, hour, minute, second, magnitude 72 | - kwargs['header'] - what is the line number of header info. of columns -> used for dic tags 73 | - kwargs['removeColumn'] - specify columns to be removed from original file prior to loading the file 74 | uses 'awk' 75 | - required since loadtxt assume all table entries to be floats 76 | 77 | TODO: --> biggest time sink is checking the date-time for every earthquake and converting it to dec. year --> vectorizing should help 78 | 79 | return: create eqCat object with self.data = {'Time', 'Lon', 'Lat', 'Depth', 'Mag'}, 80 | which are the standard dictionary tags 81 | """ 82 | #----------------check kwargs--------------------------------- 83 | if 'header' in kwargs.keys() and kwargs['header'] is not None: 84 | header = kwargs['header'] 85 | else: 86 | header = None 87 | if 'removeColumn' in kwargs.keys() and kwargs['removeColumn'] is not None: 88 | import src.data_utils as data_utils 89 | # remove columns and change file_name to copy of original file to keep the original 90 | file_in = data_utils.removeColumn( file_in, kwargs['removeColumn']) 91 | #-----------choose import routine------------------------------ 92 | if catalogType == 'HS_reloc': 93 | if header is None: 94 | header = 1 95 | #TODO: get dic tag from file header 96 | headList = ['YR', 'MO', 'DY', 'HR', 'MN','SC', 'N', 'Lat','Lon','Depth', 'Mag', 'nPick', 'distSta', 'rms', 'd/n', 'rMeth', 'clID', 'nEvInCl', 'nlnk','err_h','err_z','rel_err_H', 'rel_err_Z'] 97 | self.data = {} 98 | # 0-5 (datetime), 6(ID), 7 (lat), 8 (lon), 9 (depth), 10 (mag) 99 | mData = np.loadtxt(f"{file_in}", usecols=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)) 100 | print( 'no of columns', mData[0].shape[0]) 101 | print( 'no. of earthquakes', mData[:,0].shape[0]) 102 | for l in range( mData[0].shape[0] ): 103 | self.data[headList[l]] = mData[:,l] 104 | 105 | elif catalogType == 'USGS': 106 | # 'time', 'latitude', 'longitude', 'depth', 'mag', 'magType', 'nst', 'gap', 'dmin', 'rms', 'net', 'id' 107 | # 0 1 2 3 4 5 6 7 8 9 10 11 108 | 109 | ###1###Date-time 110 | mDateTime = np.genfromtxt( file_in, delimiter=(4,1,2,1,2,1,2,1,2,1,4), 111 | skip_header=1, usecols=(0,2,4,6,8,10)).T 112 | headDate = ['YR', 'MO', 'DY', 'HR', 'MN', 'SC'] 113 | for i in range( len(headDate)): 114 | self.data[headDate[i]] = mDateTime[i] 115 | ###2### ID 116 | #mID = np.loadtxt( file_in, delimiter=',', skiprows=1, usecols=(10,11), dtype = str).T 117 | #self.data['ID'] = np.array([ int(mID[1,i].strip( mID[0,i])) for i in range( mID.shape[1])], dtype = int) 118 | self.data['ID'] = np.arange( len( self.data['YR'])) 119 | ###3### location, magnitude, gap etc. 120 | header = ['Lat', 'Lon', 'Depth', 'Mag']#, 'Nst', 'Gap', 'Dmin', 'rms'] 121 | mData = np.loadtxt( file_in, delimiter=',', skiprows=1, 122 | usecols=(1,2,3,4),#,6,7,8,9), 123 | dtype = float).T 124 | for i in range( len(header)): 125 | self.data[header[i]] = mData[i] 126 | 127 | elif catalogType == 'Kilauea': 128 | mData = np.loadtxt( file_in).T 129 | # :TODO convert np.array to python dictionary 130 | 131 | #convert date to decimal year 132 | self.data['Time'] = np.array([]) 133 | for i in range( self.data['Mag'].shape[0] ): 134 | if verbose == True: 135 | print( i+1, 'out of', self.data['Mag'].shape[0]) 136 | YR, MO, DY, HR, MN, SC = dateTime.checkDateTime( [self.data['YR'][i], self.data['MO'][i],self.data['DY'][i], self.data['HR'][i],self.data['MN'][i],self.data['SC'][i]]) 137 | self.data['Time'] = np.append( self.data['Time'], 138 | dateTime.dateTime2decYr( [YR, MO, DY, HR, MN, SC])) 139 | #sort catalog chronologically 140 | self.sortCatalog('Time') 141 | 142 | ##clean up 143 | if 'removeColumn' in kwargs.keys() and kwargs['removeColumn'] is not None: 144 | print( "delete: %s, than hit: y"%( file_in)) 145 | removeFile = input( ' ') 146 | print( removeFile) 147 | if os.path.isfile( file_in) and removeFile == 'y': 148 | os.system( "rm %s"%( file_in)) 149 | 150 | #======================================2========================================== 151 | # basic processing and catalog event selection 152 | #================================================================================= 153 | def size(self): 154 | if 'Time' in self.data.keys(): 155 | return len( self.data['Time']) 156 | else: 157 | return None 158 | 159 | 160 | def selectEvents(self, min, max, tag, **kwargs): 161 | """ 162 | returns events with time, coordinates, rel.Magnitude that corresponds to a certain time frame 163 | -cut catalog includes lower bound (min) but excludes upper bound (max) 164 | input: min, max = window of events 165 | min - can be set to string for columns that contain strings, e.g. type, magType etc. 166 | if min is not a string: 167 | min = None, select only events below max 168 | max = None, select only events above min 169 | tag can be 'Time' or magnitude , location, Mw... depending on dictionary 170 | kwargs: includeBoundaryEvents = True; include events with times equal to min and max otherwise 171 | include only lower boundary (min event) 172 | returnSel = returns IDs of selected events (type np.array([], int)) 173 | 174 | example: selectEvents( 3, 5, 'Mag', includeBoundaryEvents = True) - all events between 3 and 5 including M=3 and M=5 events 175 | selectEvents( 3, None, 'Mag') - everything above M=3 excluding M=3 events 176 | selectEvents( 4, None, 'Mag') and then selectEvents( 'w', None, 'MagType') - all Mws above Mw = 4 177 | 178 | """ 179 | if 'includeBoundaryEvents' in kwargs.keys() and kwargs['includeBoundaryEvents'] == True: 180 | if min == None or max == None: 181 | error_str = 'both boundaries have to be set to include boundary events' 182 | raise( ValueError( error_str)) 183 | else: 184 | sel = np.logical_and( self.data[tag] >= float(min), self.data[tag] <= float(max ) ) 185 | else: 186 | if isinstance( min, str ): 187 | #str columns, e.g magType .. 188 | sel = [i for i, x in enumerate( self.data[tag] ) if x == min] 189 | elif isinstance( min, (int, float) ) or min == None: 190 | if max == None: 191 | sel = self.data[tag] >= float(min) 192 | elif min == None: 193 | sel = self.data[tag] < max 194 | else: 195 | sel = np.logical_and( self.data[tag] >= float(min), self.data[tag] < float(max) ) 196 | else: 197 | error_str = 'unknown input min = %s'%(min) 198 | raise( ValueError( error_str)) 199 | #sel = np.arange( self.size(), dtype = int )[sel] 200 | if 'returnSel' in kwargs.keys() and kwargs['returnSel'] == True: 201 | return sel 202 | else: 203 | self.selDicAll( sel) 204 | 205 | def sortCatalog(self, tag, **kwargs): 206 | """sort catalog according to tag (string) e.g. Time, Mag, .... 207 | kwargs: beginWithBiggest = True , sort beginning with Biggest value 208 | returnSel = return boolean """ 209 | #get boolean vector for sorting 210 | vSortBool = self.data[tag].ravel().argsort() 211 | if 'beginWithBiggest' in kwargs.keys() and kwargs['beginWithBiggest'] == True: 212 | if 'returnSel' in kwargs.keys() and kwargs['returnSel'] == True: 213 | return vSortBool 214 | else: 215 | self.selDicAll( vSortBool[::-1]) 216 | else: 217 | if 'returnSel' in kwargs.keys() and kwargs['returnSel'] == True: 218 | return vSortBool 219 | else: 220 | self.selDicAll( vSortBool) 221 | 222 | def selDicAll(self, sel): 223 | """apply boolean vector to entire data 224 | e.g. for sorting or cutting ... """ 225 | for tag, vector in self.data.items(): #loop through all entries (tag = vector name, vector = entries) 226 | # for NND analysis first event is missing (orphan), so sel.shape = vector.shape - 1 227 | #if sel.shape[0] != vector.shape[0]: 228 | # print( tag, 'does not have the right dimension: %i %i'%(vector.shape[0], sel.shape[0]) 229 | #else: 230 | self.data[tag] = self.data[tag][sel] 231 | 232 | def selEventsFromID(self, a_ID, **kwargs): 233 | """ select events specified by list of IDs (self.data['N']) 234 | -----------------input 235 | 236 | kwargs: repeats = True , if eqIDs are repeated keep them in catalog and maintain the same order 237 | default = False, every earthquake is only ones in catalog, for several events with same ID keep only the first event 238 | 239 | ----------------return: 240 | eq catalog that corresponds to vEqID """ 241 | Nev= len( a_ID) 242 | repeats = False 243 | if 'repeats' in kwargs.keys() and kwargs['repeats'] == True: 244 | a_sel = np.ones( Nev, dtype = int) 245 | v_i = np.arange( self.size(), dtype = int) 246 | i = 0 247 | for currID in a_ID: # put one at location of ID match 248 | sel_curr_ev = self.data['N']==int(currID) 249 | if sel_curr_ev.sum() > 0: 250 | a_sel[i] = int( v_i[sel_curr_ev][0]) 251 | i += 1 252 | else: 253 | a_sel = np.in1d( self.data['N'], a_ID, assume_unique=True) 254 | self.selDicAll( a_sel) 255 | 256 | #======================================3========================================== 257 | # .mat binary load save 258 | #================================================================================= 259 | def check_keys(self, ): 260 | ''' 261 | checks if entries in dictionary are mat-objects. If yes 262 | to dict is called to change them to nested dictionaries 263 | ''' 264 | for key in self.data: 265 | if isinstance(self.data[key], scipy.io.matlab.mio5_params.mat_struct): 266 | self.data[key] = self.todict( self.data[key]) 267 | 268 | def todict(self, matobj): 269 | ''' 270 | A recursive function which constructs from matobjects nested dictionaries 271 | ''' 272 | dData = {} 273 | for strg in matobj._fieldnames: 274 | elem = matobj.__dict__[strg] 275 | if isinstance(elem, scipy.io.matlab.mio5_params.mat_struct): 276 | dData[strg] = self.todict(elem) 277 | else: 278 | dData[strg] = elem 279 | return dData 280 | 281 | def saveMatBin(self, file): 282 | """save dic to bin file""" 283 | #scipy.io.savemat(file, self.data, appendmat=False, format = '4', oned_as = 'row' , do_compression = True) 284 | scipy.io.savemat(file, self.data, appendmat=True, format = '5',do_compression = True ) 285 | 286 | 287 | def loadMatBin(self, filename): 288 | ''' 289 | this function should be called instead of direct scipy.io.loadmat 290 | as it helps with additional non-variable tags in python dictionaries from .mat files 291 | 292 | 293 | --> can handle 'nested' variables in matlab where variable contain several structures 294 | ''' 295 | 296 | self.data = scipy.io.loadmat(filename,struct_as_record=False, squeeze_me=True) 297 | self.check_keys( ) 298 | l_tags = list( self.data.keys()) 299 | for tag in l_tags: 300 | if tag[0] == '_': 301 | #print( 'remove', tag, self.data[tag] 302 | self.data.pop( tag, None) 303 | #else: 304 | # print( tag, self.data[tag].shape[0] 305 | 306 | #======================================4========================================== 307 | # projections, rotations etc. 308 | #================================================================================= 309 | def toCart_coordinates(self, **kwargs): 310 | """ 311 | :input 312 | **kwargs['projection'] = 'aeqd' - (default) azimuthal equidistant 313 | 'eqdc' - equi distant conical projection 314 | 'cyl' - cynlidrical equidistant - not working 315 | 'returnProjection' : True - return basemap object 316 | use equidistant projection to convert lon, lat to X, Y coordinates 317 | :output catalog attributes: - self.data['X'], self.data['Y'], self.data['Depth'] in km 318 | return True or basemap object, m 319 | 320 | """ 321 | os.environ["PROJ_LIB"] = f"{os.environ['HOME']}/opt/anaconda3/share/proj"# adjust, comment out as needed 322 | from mpl_toolkits.basemap import Basemap 323 | projection = 'aeqd' 324 | if 'projection' in kwargs.keys() and kwargs['projection'] is not None: 325 | projection = kwargs['projection'] 326 | from mpl_toolkits.basemap import Basemap 327 | xmin,xmax = self.data['Lon'].min(), self.data['Lon'].max() 328 | ymin,ymax = self.data['Lat'].min(), self.data['Lat'].max() 329 | 330 | # setup equi distance basemap. 331 | m = Basemap( llcrnrlat = ymin,urcrnrlat = ymax, 332 | llcrnrlon = xmin,urcrnrlon = xmax, 333 | projection = projection,lat_0=(ymin+ymax)*.5,lon_0=(xmin+xmax)*.5, 334 | resolution = 'l') 335 | 336 | self.data['X'], self.data['Y'] = m( self.data['Lon'], self.data['Lat']) 337 | if projection == 'cyl': 338 | pass 339 | else: 340 | self.data['X'] *= 1e-3 341 | self.data['Y'] *= 1e-3 342 | if 'returnProjection' in kwargs.keys() and kwargs['returnProjection'] == True: 343 | return m 344 | else: 345 | return True 346 | 347 | #======================================5========================================== 348 | # shuffling, random catalog 349 | #================================================================================= 350 | def randomize_cat(self): 351 | """ 352 | - create a randomized catalog with same average rate, no. of events and 353 | spatial extent as the initial catalog 354 | 355 | :return: - random Poissonian catalog, uniform spatial distribution 356 | """ 357 | ## randomize event times -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- 1 | #!python2.7 2 | __all__ = ["data_utils", "datetime_utils"] -------------------------------------------------------------------------------- /src/clustering.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # python3.7 3 | ''' 4 | Created on April 10th, 2019 5 | 6 | - function required for clustering analysis based on nearest-neighbor distances 7 | 8 | - NND_eta - eq. 1 for NND in Zaliaping & Ben-Zion 2013 9 | 10 | @author: tgoebel 11 | ''' 12 | import numpy as np 13 | import matplotlib.pyplot as plt 14 | import warnings 15 | #=============================================================================== 16 | # my modules 17 | #=============================================================================== 18 | import src.data_utils as data_utils 19 | 20 | #=============================================================================== 21 | # 22 | #=============================================================================== 23 | def NND_eta( eqCat, dConst, verbose = False, **kwargs): 24 | """ 25 | - NND_eta - eq. 1 for NND in Zaliapin & Ben-Zion 2013 26 | search for 'parent event' i.e. earthquake that occurred closest in space-time-magnitude domain 27 | but prior to the current event 28 | here: [jC] - are the off spring events and we try to find the closest parent, occurring earlier in time 29 | [sel_tau_par] - are the potential parent events that occurred before [jC], we select the closest in time 30 | 31 | Parameters 32 | ---------- 33 | catalog - catalog.data['Time'], 'Lon', 'Lat' (or 'X', 'Y',) 'Depth', 'MAG' 34 | - time, cartesian coordinates (X,Y, Depth), magnitude 35 | dConst - {'Mc':float, 'b':float, 'D':float} # dictionary with statistical seismicity parameters 36 | - completeness , b-value, fractal dimension 37 | kwargs - rmax (default: = 500) - maximum space window (for faster computation) 38 | - tmax (default: = 20) - maximum time window (for faster computation) 39 | - correct_co_located = True, add gaussian uncertainty to avoid product going to zero for co-located earthquakes 40 | - haversine = True - use haversine distance at surface instead of 3D cartesian distance 41 | - M0 - reference magnitude, default: M0 = 0 42 | Returns 43 | ------- 44 | - { 'aNND' : aNND, - nearest neighbor space-time magnitude distance 45 | 'aEqID_p' : np.array - ID of the parent event 46 | 'aEqID_c' : np.array - ID of the child event 47 | 'Time' : np.array - origin time of offspring 48 | } 49 | 50 | see: Clustering Analysis of Seismicity and Aftershock Identification, Zaliapin, I. (2008) 51 | 52 | """ 53 | #-------------------------------set args and kwargs----------------------------------------------- 54 | rmax = 500 # in km 55 | tmax = 20 # in years 56 | M0 = 0 # reference mag 57 | if 'M0' in kwargs.keys() and kwargs['M0'] is not None: 58 | M0 = kwargs['M0'] 59 | if 'rmax' in kwargs.keys() and kwargs['rmax'] is not None: 60 | rmax = kwargs['rmax'] 61 | if 'tmax' in kwargs.keys() and kwargs['tmax'] is not None: 62 | tmax = kwargs['tmax'] 63 | #-----------------------------add small uncertainty to X in case events are colocated-------------------------- 64 | if 'correct_co_located' in kwargs.keys() and kwargs['correct_co_located'] == True: 65 | vUncer = np.random.randn( eqCat.size())*1e-10 66 | eqCat.data['Lon'] += vUncer 67 | #------------------------------------------------------------------------------ 68 | aNND = np.zeros( eqCat.size()) 69 | vID_p = np.zeros( eqCat.size()) 70 | vID_c = np.zeros( eqCat.size()) 71 | a_M_MS_ref= (eqCat.data['Mag'] - M0)# mainshock mag with respect to reference 72 | 73 | for jC in range( eqCat.size()): 74 | if verbose == True: 75 | print( f"event {jC+1:d} of {eqCat.size():d}", end= "\r") 76 | # interevent times: take events that happend before t_i 77 | # child - parent > 0 78 | tau = eqCat.data['Time'][jC] - eqCat.data['Time'] 79 | sel_tau_par = tau > 0 80 | if sel_tau_par.sum() > 0: 81 | 82 | vcurr_ID = np.arange( eqCat.size(), dtype = int)[sel_tau_par] 83 | # if cartesian coordinates are available 84 | if 'X' in eqCat.data.keys() and 'Y' in eqCat.data.keys(): 85 | vR = np.sqrt( (eqCat.data['X'][jC] - eqCat.data['X'][vcurr_ID])**2 + (eqCat.data['Y'][jC] - eqCat.data['Y'][vcurr_ID])**2 ) 86 | else: 87 | # haversine distance 88 | vR = haversine( eqCat.data['Lon'][jC], eqCat.data['Lat'][jC],eqCat.data['Lon'][vcurr_ID], eqCat.data['Lat'][vcurr_ID] ) 89 | sel_r_par = vR < rmax 90 | if sel_r_par.sum() > 0: 91 | vcurr_ID = vcurr_ID[sel_r_par] 92 | curr_Eta = tau[vcurr_ID]* (vR[sel_r_par]**dConst['D']) *( 10**(-dConst['b']*a_M_MS_ref[vcurr_ID])) 93 | sel_min = curr_Eta == curr_Eta.min() 94 | aNND[jC] = curr_Eta[sel_min][0] 95 | vID_p[jC] = eqCat.data['N'][vcurr_ID][sel_min][0] 96 | vID_c[jC] = eqCat.data['N'][jC] 97 | #print( 'parent', eqCat.data['N'][vcurr_ID][sel_min][0], 'offspring', eqCat.data['N'][jC] 98 | #print( 'parent', eqCat.data['Time'][vcurr_ID][sel_min][0], 'offspring', eqCat.data['Time'][jC] 99 | 100 | if sel_min.sum() > 1: 101 | print( aNND[jC], curr_Eta[sel_min], eqCat.data['N'][vcurr_ID][sel_min]) 102 | print( eqCat.data['Lon'][vcurr_ID][sel_min], eqCat.data['Lat'][vcurr_ID][sel_min]) 103 | sel2 = aNND > 0 104 | if np.logical_not(sel2).sum() > 0: 105 | print( f"{np.logical_not(sel2).sum()} %i events with NND=0 ") 106 | #raise ValueError, error_str 107 | # remove events with aNND < 0; i.e. event at the beginning with no preceding parent 108 | return { 'aNND' : aNND[sel2], 'aEqID_p' : vID_p[sel2], 'aEqID_c' : vID_c[sel2], 'Time' : eqCat.data['Time'][sel2]} 109 | #return { 'aNND' : aNND, 'aEqID_p' : vID_p, 'aEqID_c' : vID_c, 'Time' : eqCat.data['Time'][1::]} 110 | 111 | 112 | def rFromTau( dt, b, D, eta_0, M_MS ): 113 | """ 114 | - compute maximum distance R for events in cluster 115 | based on interevent time, eta_0 and D (fractal dimension) 116 | :INPUT 117 | dt - array or float 118 | interevent times (dt relative to MS or first event in family) 119 | b - Gutenberg-Richter b-value 120 | D - fractal dimension, usually D~1.6 121 | eta_0 - empiricallly determined separation line between clustered and background 122 | mode 123 | M_MS - mainshock magnitude (here we assume only one triggering generation) 124 | :return: 125 | """ 126 | return ( -eta_0/dt * 10**( b*M_MS))**(1/D)*1e-3 127 | 128 | def rescaled_t_r(catChild, catPar, dConst, **kwargs): 129 | """ 130 | - compute rescaled time and distance 131 | 132 | Parameters 133 | ---------- 134 | catChild, catPar - objects of type SeisCatDic containing parent and child events 135 | dConst = 'b', 'D' - b-value, fractal dimension 136 | kwargs = distance_3D = True default : False i.e. 2D Euclidean distance 137 | 138 | Returns 139 | ------- 140 | - a_R, a_tau 141 | 142 | 143 | see: Clustering Analysis of Seismicity and Aftershock Identification, Zaliapin, I. (2008) 144 | 145 | """ 146 | #-------------------------------set args and kwargs----------------------------------------------- 147 | M0 = 0 148 | if 'M0' in kwargs.keys() and kwargs['M0'] is not None: 149 | M0 = kwargs['M0'] 150 | #-----------------------------add small uncertainty to X in case events are colocated-------------------------- 151 | if 'correct_co_located' in kwargs.keys() and kwargs['correct_co_located'] == True: 152 | vUncer = np.random.randn( catChild.size())*1e-10 153 | catChild.data['Lon'] += vUncer 154 | #------------------------------------------------------------------------------ 155 | #vMagCorr = 10**(-0.5*dConst['b']*(catPar.data['MAG']-M0) ) 156 | vMagCorr = 10**(-0.5*dConst['b']*(catPar.data['Mag']-M0) ) 157 | # if cartesian coordinates are available 158 | if 'X' in catChild.data.keys() and 'X' in catPar.data.keys(): 159 | a_R = np.sqrt((catChild.data['X'] - catPar.data['X']) ** 2 + (catChild.data['Y'] - catPar.data['Y']) ** 2) ** \ 160 | dConst['D'] * vMagCorr 161 | 162 | else: 163 | a_R = haversine(catChild.data['Lon'], catChild.data['Lat'], 164 | catPar.data['Lon'], catPar.data['Lat'])**dConst['D']*vMagCorr 165 | 166 | a_dt = catChild.data['Time']-catPar.data['Time']#interevent times 167 | a_tau = (a_dt)*vMagCorr 168 | sel2 = a_tau < 0 169 | if sel2.sum() > 0: 170 | #print( catChild.data['N'][sel2]) 171 | #print( catPar.data['N'][sel2]) 172 | error_str = '%i parents occurred after offspring, check order of origin time in catChild, catPar'%(sel2.sum()) 173 | raise( ValueError( error_str)) 174 | return a_R, a_tau 175 | 176 | 177 | def compileClust( dNND, simThreshold, verbose = True, **kwargs): 178 | """ 179 | assuming parent and off-spring is connected via unique measurement (e.g. nearest-neighbor distance) 180 | - create clusters of event pairs based on some similarity criteria 181 | e.g. a) based on cross-correlation coefficients between pairs 182 | b) based on space-time-magnitude distance 183 | - main input are pairs of connected events separated in parent and offspring 184 | (one parent can have many children, but child has only one parent) 185 | 1) find initial singles beyond threshold 186 | 2) find pairs below threshold and assemble clusters 187 | - take all event pairs with values below (eta_0) or above (CCC), 188 | --> pairs beyond the threshold do not have to be considered 189 | if offspring meets similarity criteria: 190 | - go through each pair and find cluster for child event by searching if the 191 | corresponding ID is already in any of the previous clusters 192 | - attach to existing cluster or create new cluster 193 | 3) - check if several offspring are connected to same parent and if 194 | different clusters have to be combined in case of ID repetition 195 | --> this is implemented as a while loop 196 | 4) - remove potential multiple IDs from clusters 197 | 198 | :Input - simThreshold = similarity parameter 199 | - vID_parent - event IDs 200 | - vID_child 201 | - vSimValues - all similarity values 202 | kwargs['useLargerEvents'] = False, 203 | 204 | :Return dClust - python dictionary that contains all clusters labeled numerically 205 | from '0' - not clustered 206 | '1' - '[nCLmax]' - clustered events 207 | each dictionary column contains IDs of children [first row] and parents [second row] 208 | """ 209 | # dNND = { 'aEqID_c' : vID_child, 210 | # 'aEqID_p' : vID_parent, 211 | # 'aNND' : vSim} 212 | # remove identical parents and off-spring if eq is in catalog several times 213 | sel = abs(dNND['aEqID_c']-dNND['aEqID_p']) > 0 214 | dNND= data_utils.selDicAll(dNND, sel) 215 | 216 | # check that dNND is sorted by time 217 | if 'Time' in dNND.keys(): 218 | i_sort = np.argsort( dNND['Time']) 219 | dNND = data_utils.selDicAll(dNND, i_sort) 220 | else: 221 | error_str = "'Time' key missing, add offspring origin time to dNND" 222 | raise ValueError( error_str) 223 | #==================================1============================================= 224 | # initial selection of events beyond threshold (single event) 225 | #================================================================================ 226 | ### events without trigger 227 | if 'useLargerEvents' in kwargs.keys() and kwargs['useLargerEvents'] == True: 228 | print( 'assuming threshold (%s) is a MINIMUM, select similarity values ABOVE this threshold'%( simThreshold)) 229 | sel_single = dNND['aNND'] <= simThreshold 230 | # remove independent events 231 | dNND_trig = data_utils.selectDataRange( dNND, simThreshold, None, 'aNND') 232 | else: 233 | print( 'assuming threshold (%s) is a MAXIMUM, select similarity values BELOW this threshold'%( simThreshold)) 234 | sel_single = dNND['aNND'] >= simThreshold 235 | # remove independent events 236 | dNND_trig = data_utils.selDicAll( dNND, np.logical_not( sel_single)) 237 | # preliminary single selection with eta > eta_0, may contain cluster events 238 | vID_single = dNND['aEqID_c'][sel_single] # could be singles or parents but not offspring 239 | sel_first = np.in1d( dNND['aEqID_p'][0], vID_single) 240 | if dNND['aNND'][0] > simThreshold and sel_first.sum() == 0: 241 | vID_single = np.append( dNND['aEqID_p'][0], vID_single) 242 | 243 | if verbose == True: 244 | print( f"---------compileClust - initial numbers:------") 245 | print( f"No. singles: {vID_single.shape[0]}"), 246 | print( f"No. triggered: {dNND_trig['aEqID_c'].shape[0]}, {dNND_trig['aEqID_p'].shape[0]}," 247 | f"No. tot. {dNND_trig['aEqID_p'].shape[0]} {sel_single.sum()+dNND_trig['aEqID_c'].shape[0]}") 248 | #==================================2============================================= 249 | # find clustered events 250 | #================================================================================ 251 | # initiate vectors and dic during first run 252 | curr_child_ID = dNND_trig['aEqID_c'][0] 253 | curr_par_ID = dNND_trig['aEqID_p'][0] 254 | v_pastEqIDs = np.array( [curr_child_ID, curr_par_ID] ) 255 | v_pastClIDs = np.array( [1, 1] ) 256 | # dClust['0'] = singles 257 | dClust = { '1' : np.array( [[curr_child_ID], 258 | [curr_par_ID ] ])} 259 | # for each child find the corresponding parent ID 260 | # if child or parent ID are already part of a cluster append to this cluster 261 | nCl = 2 262 | for iEv in range(1, dNND_trig['aEqID_p'].shape[0]): 263 | #print( 'nPair', iEv+1, 'out of', len( dNND_trig['aEqID_p']), 'iCl', nCl 264 | curr_child_ID = dNND_trig['aEqID_c'][iEv] 265 | curr_par_ID = dNND_trig['aEqID_p'][iEv] 266 | # check if parent or child are part of previous cluster 267 | sel_child = curr_child_ID == v_pastEqIDs 268 | sel_par = curr_par_ID == v_pastEqIDs 269 | 270 | if sel_par.sum() > 0 or sel_child.sum() > 0: 271 | # find which cluster event pair belongs to 272 | if sel_par.sum() and sel_child.sum(): # both already part of a cluster 273 | curr_cl_ID1 = v_pastClIDs[sel_par][0] 274 | curr_cl_ID2 = v_pastClIDs[sel_child][0] 275 | # merge clusters and add IDs 276 | dClust[str(curr_cl_ID1)] = np.hstack( (dClust[str(curr_cl_ID1)], 277 | np.array([[curr_child_ID], [curr_par_ID ] ]) 278 | )) 279 | dClust[str(curr_cl_ID1)] = np.hstack( (dClust[str(curr_cl_ID1)], dClust[str(curr_cl_ID2)])) 280 | # add new events but previous cluster ID 281 | v_pastEqIDs = np.append( v_pastEqIDs, np.array([curr_child_ID, curr_par_ID] ) ) 282 | v_pastClIDs = np.append( v_pastClIDs, np.array([ curr_cl_ID1, curr_cl_ID1] ) ) 283 | # remove second cluster ID from dClust 284 | dClust.pop( str(curr_cl_ID2)) 285 | # remove from past eq IDs and past cl IDs 286 | sel = curr_cl_ID2 != v_pastClIDs 287 | v_pastEqIDs = v_pastEqIDs[sel] 288 | v_pastClIDs = v_pastClIDs[sel] 289 | else: # only one is part of a cluster 290 | if sel_par.sum() > 0: # parent already part of a cluster 291 | curr_cl_ID = v_pastClIDs[sel_par][0] 292 | else:# child already part of a cluster 293 | curr_cl_ID = v_pastClIDs[sel_child][0] 294 | dClust[str(curr_cl_ID)] = np.hstack( (dClust[str(curr_cl_ID)], 295 | np.array([[curr_child_ID], [curr_par_ID ] ]) 296 | )) 297 | v_pastEqIDs = np.append( v_pastEqIDs, np.array([curr_child_ID, curr_par_ID] ) ) 298 | v_pastClIDs = np.append( v_pastClIDs, np.array([ curr_cl_ID, curr_cl_ID ] ) ) 299 | else: # start a new cluster 300 | dClust[str(nCl)] = np.array( [[curr_child_ID], 301 | [curr_par_ID ] ]) 302 | v_pastEqIDs = np.append( v_pastEqIDs, np.array([curr_child_ID, curr_par_ID] ) ) 303 | v_pastClIDs = np.append( v_pastClIDs, np.array([ nCl, nCl ] ) ) 304 | nCl += 1 305 | # check if children have same parent 306 | nTotChild = 0 307 | #=================================3========================================================================== 308 | # remove events from singles if in cluster, remove multiple IDs 309 | #============================================================================================================ 310 | # create vector of triggered eqIDs and count triggered events 311 | vID_Trig_all = np.array([]) 312 | vclID_allEv = np.array([], dtype = int) 313 | for tag in sorted( dClust.keys()): 314 | #print( 'iCl', tag, 'nEv in cluster', np.unique( dClust[tag].flatten()).shape[0] 315 | #print( dClust[tag][0] 316 | aID_flat_uni = np.unique( dClust[tag].flatten()) 317 | #nTotTrig += aID_flat_uni.shape[0] 318 | vID_Trig_all = np.append( vID_Trig_all, aID_flat_uni ) 319 | vclID_allEv = np.append( vclID_allEv, np.ones( aID_flat_uni.shape[0], dtype = int)*int(tag)) 320 | # remove multiple ID entries --> possible since pairs are always appeneded 321 | dClust[tag] = aID_flat_uni 322 | nTotChild += dClust[tag].shape[0]-1 323 | #====================================4======================================================================== 324 | # check for events in more than one cluster, merge clusters 325 | #============================================================================================================ 326 | # sel_same = np.in1d( vID_Trig_all, np.array([ 3049419, 9020431, 9172305, 9173365, 15332137])) 327 | # print( "events in trig_all before double remove: ", sel_same.sum(), vID_Trig_all[sel_same]) 328 | aIDs, aCounts = np.unique( vID_Trig_all, return_counts=True) 329 | selDouble = aCounts > 1 330 | if verbose == True: 331 | print( f"N event IDs in more than one cluster: {selDouble.sum()}") 332 | i_run = 1 333 | while selDouble.sum() > 0: 334 | if verbose == True: 335 | print( '%i. run to remove doubles'%(i_run)) 336 | for ID in np.unique( aIDs[selDouble]): 337 | selCl = ID == vID_Trig_all 338 | aClID = np.unique( vclID_allEv[selCl]) 339 | for iCl in range( len( aClID)-1): 340 | if verbose == True: 341 | print( 'iCl with same events', str( aClID[0]), str( aClID[iCl+1]), 'evID: ', int(ID)) 342 | #A# merge clusters that have same events 343 | dClust[str(aClID[0])] = np.unique( np.hstack( (dClust[str(int( aClID[0]))], dClust[str( int(aClID[iCl+1]))]))) 344 | #B# remove cluster IDs from dictionary 345 | dClust.pop( str( int( aClID[iCl+1]))) 346 | #C# remove double event and corresponding clID from: 347 | # vID_Trig_all 348 | sel_rem = ID != vID_Trig_all 349 | vID_Trig_all = vID_Trig_all[sel_rem] 350 | # and vclID_allEv 351 | vclID_allEv = vclID_allEv[sel_rem] 352 | # leave one event with new clID, i.e. clId of first cluster that contains ID 353 | vclID_allEv = np.append( vclID_allEv, aClID[0]) 354 | vID_Trig_all = np.append( vID_Trig_all, ID) 355 | aIDs, aCounts = np.unique( vID_Trig_all, return_counts=True) 356 | selDouble = aCounts > 1 357 | i_run += 1 358 | # find events within initial single selection (eta > eta_0) 359 | # which are actually part of clustered events 360 | sel_single = np.ones( vID_single.shape[0], dtype = int) > 0 361 | iS = 0 362 | for ID_single in vID_single: 363 | sel = ID_single == vID_Trig_all 364 | if sel.sum() > 0: # remove this event from singles 365 | sel_single[iS] = False 366 | iS += 1 367 | if verbose == True: 368 | print("initial singles now parents - remove from dClust['0']: ",np.array([~sel_single]).sum()) 369 | 370 | vID_single = vID_single[sel_single] 371 | if verbose == True: 372 | print( "---------------final result--------------------------") 373 | print( f" Ntot in cluster: {len( vID_Trig_all)}, N-parent(=N-clust): {len(dClust.keys())}," 374 | f"No. singles: {vID_single.shape[0]}, Ntot. offspring (includes doubles): {nTotChild}") 375 | print( "trig. fraction: ", round((len( vID_Trig_all)-len(dClust.keys()))/dNND['aNND'].shape[0],2), "frac.MS: ", round( len(dClust.keys())/dNND['aNND'].shape[0],2), "single: ", round((vID_single.shape[0]/dNND['aNND'].shape[0]),2)) 376 | print( 'Ntot in cat.', dNND['aNND'].shape[0]+1, 'N-trig + N-ind', len( vID_Trig_all)+vID_single.shape[0]) 377 | 378 | dClust[str(0)] = vID_single 379 | return dClust 380 | 381 | def addClID2cat( seisCat, dClust, test_plot = False, **kwargs): 382 | """ 383 | - add new column (i.e. dictionary tag='famID') for seisCat 384 | that specifies which cluster each event belongs to 385 | - !note that if offspring generation should be recorded run: 386 | clustering.offspring_gen() first 387 | and use output dictionary as input for this fct. 388 | 389 | :param dClust: python dictionary 390 | each dic. element specified by key is a vector of evIDs 391 | or three row matrix with evID, iGen and average leaf depth 392 | 393 | 394 | :param seisCat: 395 | :return: seisCat (with new tags: 396 | 'famID' - record family links between events 397 | optional: 398 | (note that 'clID' is commonly used for waveform-based relocations) 399 | 'iGen' - record offspring generation within family) 400 | 'LD' - average lead depth for each cluster 401 | 402 | """ 403 | # sort original catalog to get ID of first event 404 | seisCat.sortCatalog( 'Time') 405 | 406 | # first row is clusterID and second row event ID from catalog 407 | nRows = 2 408 | b_add_iGen = False 409 | if len( dClust['0'].shape) > 1: 410 | b_add_iGen = True 411 | # additional rows for trig. generation and average lead depth 412 | nRows = 4 413 | mClust = np.zeros([nRows, seisCat.size()]) 414 | nGen = 0 415 | nFam = 0 416 | nEv = 0 417 | i = 0 418 | for sCl in dClust.keys(): 419 | iCl = int(sCl) 420 | # print( f"------iCl: {iCl}, nEv: {nEv}--------, evID={dClust[sCl]}") 421 | #earthquake event IDs 422 | if b_add_iGen == False: 423 | nEv = dClust[sCl].shape[0] 424 | mClust[1, i:i + nEv] = dClust[sCl] 425 | else: 426 | nEv = dClust[sCl].shape[1] 427 | mClust[1, i:i + nEv] = dClust[sCl][0] 428 | # family IDS 429 | mClust[0,i:i+nEv] = np.ones(nEv)*iCl 430 | nFam += len( dClust[sCl]) 431 | if b_add_iGen == True: 432 | nGen += len( dClust[sCl][1]) 433 | # trig generation 434 | mClust[2,i:i+nEv] = dClust[sCl][1] 435 | # ave. lead depth 436 | mClust[3, i:i + nEv] = dClust[sCl][2] 437 | i += nEv 438 | #---------include first event in catalog as single------------- 439 | selFirst = seisCat.data['N'][0] == mClust[1] 440 | if selFirst.sum() == 0: 441 | ID_first = int( seisCat.data['N'][0]) #[~selUni][0]) 442 | print( 'first ev. in catalog -ID:', ID_first, int( seisCat.data['N'][0]), 'last ev. in mClust', mClust[1,-1], 'should=0') 443 | mClust[1] = np.hstack( (ID_first, mClust[1,0:-1]))# not needed if catalog is sorted by Time 444 | #sel_same = np.in1d( mClust[1], seisCat.data['N']) 445 | # check that every event ID is represented only once 446 | __, aID, aN_uni = np.unique( mClust[1], return_counts = True, return_index=True) 447 | sel = aN_uni > 1 448 | if sel.sum() > 0: 449 | error_str = f"ev. ID represented more than once: {mClust[1][aID[sel]]}, 'N-repeats: ', {aN_uni[sel]}" 450 | raise ValueError( error_str) 451 | #--sort both cluster ID matrix and cat with respect to IDs 452 | sortSel = mClust[1].argsort() 453 | mClust = mClust.T[sortSel].T 454 | seisCat.sortCatalog('N') #--otherwise clIDs get assigned to wrong event 455 | 456 | if test_plot == True: 457 | plt.figure() 458 | plt.subplot( 211) 459 | plt.plot( mClust[1], mClust[1]-seisCat.data['N'], 'ko') 460 | plt.xlabel( 'Event ID in Clust') 461 | plt.ylabel( 'Diff. Events IDs (0)') 462 | plt.subplot( 212) 463 | plt.plot(mClust[1], mClust[0], 'ko') 464 | plt.xlabel('Event ID in Clust') 465 | plt.ylabel('Cluster ID') 466 | #plt.plot( plt.gca().get_xlim(), plt.gca().get_xlim(), 'r--') 467 | plt.show() 468 | 469 | seisCat.data['famID'] = np.int32( mClust[0]) 470 | if b_add_iGen == True: 471 | seisCat.data['iGen'] = np.int16(mClust[2]) 472 | seisCat.sortCatalog( 'Time') 473 | return seisCat 474 | 475 | def offspring_gen( dClust, dNND, f_eta_0, **kwargs): 476 | """ 477 | - trace back triggering chain chronologically and assign trig generation 478 | - start with parent generation, then add end leafs 479 | a) identify all parents within cluster 480 | b) sort by time 481 | c) assign the same iGen to offspring of the same parent (hierarchical) 482 | compute average leaf depth: 483 | = 1/n sum( d_i) = ave. depth across end leafs 484 | __________________________________ 485 | input: seisCat = object SeismicityCatalog 486 | used to get origin times of offspring events 487 | dNND = 488 | 'aEqID_c' - unique event IDs of offspring 489 | 'aEqID_p ' - events IDs of parents, these are paired to a_ID_child so order matters 490 | parents can have many offspring, so repeats are possible here 491 | 'Time' - offspring origin time from catalog, in case IDs are not chronological 492 | 493 | dClust - '[famID]' = np.array([ offSpringIDs]) 494 | ---------------------------------- 495 | return: 496 | dGen - python dictionary 497 | 'famID' : np.array([3, N]) 498 | # dGen[famID][0] = evIDs 499 | # dGen[famID][1] = trig generation 500 | # dGen[famID][2] = ave. leaf depth 501 | - average lead depth (same number for entire cluster) 502 | """ 503 | #=========================1======================================== 504 | # count generations of offspring events 505 | #================================================================== 506 | dGen = {} 507 | l_famID = list( dClust.keys()) 508 | # singles are all 0 generation 509 | dGen['0'] = np.zeros( (3, len( dClust['0']))) 510 | # set ev IDs in new dic 511 | dGen['0'][0] = dClust['0'] 512 | 513 | # ave LD = 1 514 | dGen['0'][2] = np.ones( len( dClust['0'])) 515 | 516 | # ignore singles below 517 | l_famID.remove( '0') 518 | for famID in l_famID: 519 | ###find ori. time for each child 520 | sel_chi_t = np.in1d( dNND['aEqID_c'], dClust[famID]) 521 | # filter for parent - child NND < eta_0 522 | sel_chi_t2 = dNND['aNND'][sel_chi_t] < f_eta_0 523 | curr_iPar = dNND['aEqID_p'][sel_chi_t][sel_chi_t2] 524 | curr_iChi = dNND['aEqID_c'][sel_chi_t][sel_chi_t2] 525 | curr_tChi = dNND['Time'][np.in1d( dNND['aEqID_c'],curr_iChi)] 526 | 527 | ##sort cluster IDs with respect to offspring time 528 | sel_sort = np.argsort( curr_tChi) 529 | first_ID = curr_iChi[sel_sort][0] 530 | 531 | # get unique parents and sort by time 532 | uni_curr_iPar = np.unique(curr_iPar) 533 | uni_par_times = curr_tChi[np.in1d(curr_iChi, uni_curr_iPar)] 534 | uni_curr_iPar = curr_iChi[np.in1d(curr_iChi, uni_curr_iPar)] 535 | sort_uni_par = np.argsort( uni_par_times) 536 | uni_curr_iPar = uni_curr_iPar[sort_uni_par] 537 | # check if parent of first pair needs to be added 538 | if np.isin( curr_iPar[0], uni_curr_iPar).sum() == 0: 539 | uni_curr_iPar = np.hstack(( curr_iPar[0], uni_curr_iPar)) 540 | # add end leafs (offspring that are not parents) 541 | sel_endLeaf = ~np.in1d(curr_iChi, curr_iPar) 542 | uni_curr_iPar = np.hstack((uni_curr_iPar, curr_iChi[sel_endLeaf])) 543 | #----------initiate new vectors------------------------ 544 | uni_iGen_pastPar = np.zeros( len(curr_tChi)+1) 545 | uni_id_pastPar = np.zeros( len(curr_tChi)+1) 546 | ## assign chronological triggering generation 547 | curr_iGen = np.zeros( len(curr_tChi)+1) 548 | iGen = 0 549 | for iPar in range( len(uni_curr_iPar)): 550 | # check if current parent is offspring of other parent 551 | pastPar = curr_iPar[uni_curr_iPar[iPar] == curr_iChi] 552 | if len( pastPar) > 0: 553 | sel_pastPar = pastPar == uni_id_pastPar 554 | else: 555 | sel_pastPar = np.array([False]) 556 | if sel_pastPar.sum() > 0: 557 | # add 1 to previous parent triggering generation 558 | curr_iGen[iPar] = uni_iGen_pastPar[sel_pastPar][0]+1 559 | uni_iGen_pastPar[iPar] = uni_iGen_pastPar[sel_pastPar][0]+1 560 | uni_id_pastPar[iPar] = uni_curr_iPar[iPar] 561 | else: 562 | curr_iGen[iPar] = iGen 563 | uni_iGen_pastPar[iPar] = iGen 564 | uni_id_pastPar[iPar] = uni_curr_iPar[iPar] 565 | iGen += 1 # assign new trig generation 566 | # save evID, trigger generation in dictionary 567 | dGen[famID] = np.zeros( (3, len(uni_id_pastPar))) 568 | 569 | dGen[famID][0] = uni_id_pastPar 570 | dGen[famID][1] = curr_iGen 571 | # =========================3======================================== 572 | # compute ave. leaf depth 573 | # ================================================================== 574 | sel_endLeaf = ~np.in1d(curr_iChi, curr_iPar) 575 | dGen[famID][2] = np.ones( len(dClust[famID]))*curr_iGen[1::][sel_endLeaf].mean() 576 | return dGen 577 | 578 | def offspring_gen_test( dClust, dNND, f_eta_0, **kwargs): 579 | #=========================1======================================== 580 | # add origin times from seisCat to dNND 581 | #================================================================== 582 | # sort dNND and seisCat by offspring ID!!- seisCat.data['Time'] is added to dNND 583 | # sortSel = np.argsort( dNND['aEqID_c']) 584 | # for tag in list(dNND.keys()): 585 | # dNND[tag] = dNND[tag][sortSel] 586 | # seisCat.sortCatalog('Time') 587 | # firstEvID = seisCat.data['N'][0] 588 | # seisCat.sortCatalog('N') 589 | # # add offspring origin time to dNND 590 | # sel = firstEvID == seisCat.data['N'] 591 | # dNND['at_c'] = seisCat.data['Time'][~sel] 592 | # check that dNND is sorted by time 593 | # if 'Time' in dNND.keys(): 594 | # i_sort = np.argsort( dNND['Time']) 595 | # dNND = data_utils.selDicAll(dNND, i_sort) 596 | # else: 597 | # error_str = "'Time' key missing, add offspring origin time to dNND" 598 | # raise ValueError( error_str) 599 | #=========================2======================================== 600 | # count generations of offspring events 601 | #================================================================== 602 | dGen = {} 603 | l_famID = list( dClust.keys()) 604 | # singles are all 0 generation 605 | dGen['0'] = np.zeros( (3, len( dClust['0']))) 606 | # set ev IDs in new dic 607 | dGen['0'][0] = dClust['0'] 608 | # ave LD = 1 609 | dGen['0'][2] = np.ones( len( dClust['0'])) 610 | # ignore singles below 611 | l_famID.remove( '0') 612 | for famID in l_famID: 613 | ###find ori. time for each child 614 | sel_chi_t = np.in1d( dNND['aEqID_c'], dClust[famID]) 615 | # filter for parent - child NND < eta_0 616 | sel_chi_t2 = dNND['aNND'][sel_chi_t] < f_eta_0 617 | curr_iPar = dNND['aEqID_p'][sel_chi_t][sel_chi_t2] 618 | curr_iChi = dNND['aEqID_c'][sel_chi_t][sel_chi_t2] 619 | curr_tChi = dNND['Time'][np.in1d( dNND['aEqID_c'],curr_iChi)] 620 | # curr_tChi = np.zeros( len( curr_iPar)) 621 | # for iP in range( len( curr_iChi)): 622 | # curr_tChi[iP] = dNND['at_c'][dNND['aEqID_c']==curr_iChi[iP]] 623 | 624 | ##sort cluster IDs with respect to offspring time 625 | sel_sort = np.argsort( curr_tChi) 626 | 627 | curr_tChi = curr_tChi[sel_sort] 628 | curr_iChi = curr_iChi[sel_sort] 629 | curr_iPar = curr_iPar[sel_sort] 630 | 631 | # parent IDs have one less element than complete cluster (first event has no parent) 632 | #sel_sort = np.hstack((0, sel_sort+1)) 633 | # make sure dClust[famID] = dNND['aEqID_c']+ 634 | firstID = dClust[famID][0] 635 | 636 | uni_curr_iPar = np.unique( curr_iPar) 637 | # sort unique parents by time 638 | print( uni_curr_iPar) 639 | uni_par_times = curr_tChi[np.in1d(curr_iChi, uni_curr_iPar)] 640 | uni_curr_iPar = curr_iChi[np.in1d(curr_iChi, uni_curr_iPar)] 641 | sort_uni_par = np.argsort( uni_par_times) 642 | uni_curr_iPar = uni_curr_iPar[sort_uni_par] 643 | # check if parent of first pair needs to be added 644 | if np.isin( curr_iPar[0], uni_curr_iPar).sum() == 0: 645 | uni_curr_iPar = np.hstack(( curr_iPar[0], uni_curr_iPar)) 646 | # add end leafs (offspring that are not parents) 647 | sel_endLeaf = ~np.in1d(curr_iChi, curr_iPar) 648 | uni_curr_iPar = np.hstack((uni_curr_iPar, curr_iChi[sel_endLeaf])) 649 | #----------initiate new vectors------------------------ 650 | uni_iGen_pastPar = np.zeros( len(curr_tChi)+1) 651 | uni_id_pastPar = np.zeros( len(curr_tChi)+1) 652 | ## assign chronological triggering generation 653 | curr_iGen = np.zeros( len(curr_tChi)+1) 654 | iGen = 0 655 | for iPar in range( len(uni_curr_iPar)): 656 | # # assign trig gen starting from oldest parent 657 | # sel_hier_par = curr_iPar == uni_curr_iPar[iPar] 658 | # # print("current parent: ", uni_curr_iPar[iPar], "offspring: ", curr_iChi[sel_hier_par]) 659 | # # print( "past parents", uni_id_pastPar) 660 | # # print( "past parent iGen", uni_iGen_pastPar) 661 | # check if current parent is offspring of other parent 662 | pastPar = curr_iPar[uni_curr_iPar[iPar] == curr_iChi] 663 | #print( uni_curr_iPar[iPar], pastPar, uni_id_pastPar) 664 | if len( pastPar) > 0: 665 | sel_pastPar = pastPar == uni_id_pastPar 666 | else: 667 | sel_pastPar = np.array([False]) 668 | if sel_pastPar.sum() > 0: 669 | print(uni_curr_iPar[iPar], "past parent: ", pastPar, "trig gen: ", uni_iGen_pastPar[sel_pastPar][0]+1) 670 | # add 1 to previous parent triggering generation 671 | curr_iGen[iPar] = uni_iGen_pastPar[sel_pastPar][0]+1 672 | uni_iGen_pastPar[iPar] = uni_iGen_pastPar[sel_pastPar][0]+1 673 | uni_id_pastPar[iPar] = uni_curr_iPar[iPar] 674 | else: 675 | print( "trig gen: ", iGen) 676 | curr_iGen[iPar] = iGen 677 | uni_iGen_pastPar[iPar] = iGen 678 | uni_id_pastPar[iPar] = uni_curr_iPar[iPar] 679 | iGen += 1 # assign new trig generation 680 | 681 | print( uni_id_pastPar) 682 | print( curr_iGen) 683 | # save evID, trigger generation in dictionary 684 | dGen[famID] = np.zeros( (3, len(uni_id_pastPar))) 685 | dGen[famID][0] = uni_id_pastPar 686 | dGen[famID][1] = curr_iGen 687 | # =========================3======================================== 688 | # compute ave. lead depth 689 | # ================================================================== 690 | # end leafs = events without offspring, curr_iPar is already < eta_0) 691 | #print( len( curr_iGen), len( curr_iChi), len( uni_curr_iPar)) 692 | sel_endLeaf = ~np.in1d(curr_iChi, curr_iPar) 693 | print( 'end leaf off. ID', curr_iChi[sel_endLeaf]) 694 | print( ' leaf depths ', curr_iGen[1::][sel_endLeaf]) 695 | print( 'mean leaf depth: ', curr_iGen[1::][sel_endLeaf].mean()) 696 | dGen[famID][2] = np.ones( len(dClust[famID]))*curr_iGen[1::][sel_endLeaf].mean() 697 | # recall data structure: 698 | # dGen[famID][0] = evIDs 699 | # dGen[famID][1] = trig generation 700 | # dGen[famID][2] = ave. leaf depth 701 | return dGen 702 | #================================================================================= 703 | # create random catalogs 704 | #================================================================================= 705 | # create uniform times 706 | def rand_rate_uni( N, tmin, tmax, **kwargs): 707 | """ draw N random numbers out of a Poisson distribution defined by mu, between tmin and tmax, 708 | 709 | kwargs: - random uniform variable between min and max 710 | 711 | return: vector of N origin times between tmin and tmax """ 712 | return np.random.uniform( tmin, tmax, size = N) 713 | 714 | 715 | # ------------------------------------------------------------------------------------------ 716 | def haversine(lon1, lat1, lon2, lat2, **kwargs): 717 | """ 718 | haversine formula implementation 719 | https://en.wikipedia.org/wiki/Great-circle_distance 720 | great circle distance between two points 721 | :input lon1, lat1 722 | lon2, lat2 723 | 724 | gR - Earth radius (global variable) 725 | :output distance - great circle distance in kilometer 726 | """ 727 | i_radius = 6371 728 | # convert to radians 729 | lon1 = lon1 * np.pi / 180 730 | lon2 = lon2 * np.pi / 180 731 | lat1 = lat1 * np.pi / 180 732 | lat2 = lat2 * np.pi / 180 733 | # haversine formula 734 | dlon = lon2 - lon1 735 | dlat = lat2 - lat1 736 | a = np.sin(dlat / 2) ** 2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon / 2) ** 2 737 | c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a)) 738 | distance = i_radius * c 739 | return distance 740 | 741 | # ==================================4============================================================== 742 | # T-R density plots 743 | # ================================================================================================= 744 | def plot_R_T( a_T, a_R, f_eta_0, **kwargs): 745 | """ 746 | - plot rescaled distance over rescaled time 747 | Parameters: 748 | dPar = {'binx': .1, 'biny': .1, # used for density and gaussian smoothing 749 | 'sigma': None, # if None: default = n**(-1./(d+4)), 750 | 'Tmin': -8, 'Tmax': 0, 751 | 'Rmin': -5, 'Rmax': 3, 752 | 'cmap': plt.cm.RdYlGn_r} 753 | Use kwargs['dPar'] = python dictionary 754 | 'binx', 'biny', etc. to overwrite 755 | defaults for specific or all parameters 756 | :param kwargs: 757 | :return: fig - figure handle - use fig.axes to get list of corresponding axes 758 | """ 759 | dPar = {'binx': .1, 'biny': .1, # used for density and gaussian smoothing 760 | 'sigma': None, # if None: default = n**(-1./(d+4)), 761 | 'Tmin': -8, 'Tmax': 0, 762 | 'Rmin': -5, 'Rmax': 3, 763 | 'cmap': plt.cm.RdYlGn_r} 764 | if 'dPar' in kwargs.keys() and kwargs['dPar'] is not None: 765 | for tag in kwargs['dPar'].keys(): 766 | print( f"overwrite plot_R_T param: {tag}={kwargs['dPar'][tag]}") 767 | dPar[tag] = kwargs['dPar'][tag] 768 | a_Tbin = np.arange(dPar['Tmin'], dPar['Tmax'] + 2 * dPar['binx'], dPar['binx']) 769 | a_Rbin = np.arange(dPar['Rmin'], dPar['Rmax'] + 2 * dPar['biny'], dPar['biny']) 770 | sel = a_T > 0 771 | XX, YY, ZZ = data_utils.density_2D(np.log10(a_T[sel]), np.log10(a_R[sel]), a_Tbin, a_Rbin, sigma=dPar['sigma']) 772 | 773 | fig = plt.figure( figsize=(7, 9)) 774 | ax = plt.subplot(111) 775 | ax.set_title('Nearest Neighbor Pairs in R-T') 776 | # ------------------------------------------------------------------------------ 777 | normZZ = ZZ * (dPar['binx'] * dPar['biny'] * len(a_R)) 778 | plot1 = ax.pcolormesh(XX, YY, normZZ, cmap=dPar['cmap']) 779 | cbar = plt.colorbar(plot1, orientation='horizontal', shrink=.5, aspect=20, ) 780 | # ax.plot( np.log10( a_T), np.log10( a_R), 'wo', ms = 1.5, alpha = .2) 781 | # plot eta_0 to divide clustered and background mode 782 | ax.plot([dPar['Tmin'], dPar['Tmax']], -np.array([dPar['Tmin'], dPar['Tmax']]) + f_eta_0, '-', lw=1.5, color='w') 783 | ax.plot([dPar['Tmin'], dPar['Tmax']], -np.array([dPar['Tmin'], dPar['Tmax']]) + f_eta_0, '--', lw=1.5, color='.5') 784 | # -----------------------labels and legends------------------------------------------------------- 785 | # cbar.set_label( 'Event Pair Density [#ev./dRdT]') 786 | cbar.set_label('Number of Event Pairs', labelpad=-60) 787 | ax.set_xlabel('Rescaled Time') 788 | ax.set_ylabel('Rescaled Distance') 789 | ax.set_xlim(dPar['Tmin'], dPar['Tmax']) 790 | ax.set_ylim(dPar['Rmin'], dPar['Rmax']) 791 | # fig.axes 792 | return fig -------------------------------------------------------------------------------- /src/data_utils.py: -------------------------------------------------------------------------------- 1 | #!usr/bin/python2.7 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | helper functions for easier file handling (mainly ASCII) 6 | and data I/O, density estimates, 2D Gaussian smoothing etc. 7 | 8 | 9 | @author tgoebel - UC Santa Cruz 10 | """ 11 | import os 12 | import numpy as np 13 | import scipy.io 14 | #================================================================================ 15 | # data I/O 16 | #================================================================================ 17 | def removeColumn( file_in, lCol): 18 | """ 19 | remove all columns specified in lCol 20 | 1) create duplicate file called 'dummy_file.txt' in cwd 21 | 2) remove column using awk 22 | 3) return file_name of dublicate 23 | """ 24 | # example syntax to remove three columns 25 | #os.system( "awk '{\$24=""; \$25=""; \$26=""; print(}' in_file.txt > out_file.txt") 26 | lStr = [] 27 | for col in lCol: 28 | lStr.append( "$%s=\"\"; "%( col)) 29 | tmp_file = 'dummy_file.txt' 30 | command_str = "awk '{ %s print(}' %s > %s"%( ''.join( lStr), file_in, tmp_file) 31 | os.system( command_str) 32 | return tmp_file 33 | 34 | def loadmat(filename, verbose = False): 35 | ''' 36 | this function should be called instead of directly calling scipy.io.loadmat 37 | which is used within the method 38 | (1) - filters dictionary tags 39 | (2) - properly recovers python dictionaries 40 | from mat files. check dic tag which are still mat-objects 41 | (3) - correct arrays of the form: np.array([[ 1, 2, 3]]) to np.array([ 1, 2, 3]), squeeze_me=True 42 | (4) - can handle 'nested' variables in matlab where variable contain several structures 43 | 44 | ''' 45 | data = scipy.io.loadmat(filename, struct_as_record=True, squeeze_me=True) 46 | data = _check_keys(data) 47 | for tag in list( data.keys()): 48 | if tag[0] == '_': 49 | if verbose == True: 50 | print( 'remove', tag, data[tag]) 51 | data.pop( tag) 52 | return data 53 | 54 | def _check_keys( dData): 55 | ''' 56 | checks if entries in dictionary are mat-objects. If yes 57 | to dict is called to change them to nested dictionaries 58 | ''' 59 | for key in dData: 60 | if isinstance(dData[key], scipy.io.matlab.mio5_params.mat_struct): 61 | dData[key] = _todict(dData[key]) 62 | return dData 63 | 64 | def _todict( matobj): 65 | ''' 66 | A recursive function which constructs from matobjects nested dictionaries 67 | ''' 68 | dData = {} 69 | for strg in matobj._fieldnames: 70 | elem = matobj.__dict__[strg] 71 | if isinstance(elem, scipy.io.matlab.mio5_params.mat_struct): 72 | dData[strg] = _todict(elem) 73 | else: 74 | dData[strg] = elem 75 | return dData 76 | #================================================================================ 77 | # density estimates and smoothing 78 | #================================================================================ 79 | def density_2D( x, y, x_bin, y_bin, **kwargs): 80 | """ 81 | 2D, smoothed event density for point cloud with coordinates x,y 82 | uses method: scipy.stats.kde.gaussian_kde 83 | see: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.gaussian_kde.html 84 | :input x,y - dataset 85 | x_bin, y_bin - binned x and y vectors 86 | 87 | 88 | kwargs['sigma'] - specify gaussian smoothing kernel ('bw_method' in scipy.stats.kde) 89 | default: = n**( -1./(d+3)) adapted scott rule for slightly tighter bandiwdth 90 | - 'scott' 91 | sigma = n**( -1./(d+4)), d- number of dimensions, n - number of data points 92 | - 'silverman' 93 | sigma = (n * (d + 2) / 4.)**(-1. / (d + 4)) 94 | - float( ) = set Gaussian Bandwidth directlty 95 | 96 | 97 | return XX, YY, ZZ - 2D binned x and y coordinates and density for each cell 98 | """ 99 | from scipy.stats import kde 100 | n,d = x.shape[0],2 101 | sigma = n**( -1./(d+2.5)) 102 | if 'sigma' in kwargs.keys() and kwargs['sigma'] is not None: 103 | sigma = kwargs['sigma'] 104 | # Evaluate a gaussian kde on a regular grid of nbins x nbins over data extents 105 | fct_Gauss2D = kde.gaussian_kde( np.array([x,y]), bw_method = sigma) 106 | # meshgrid of x and y coordinates 107 | XX,YY = np.meshgrid( x_bin, y_bin) 108 | ZZ = fct_Gauss2D( np.vstack([XX.flatten(), YY.flatten()])).reshape( XX.shape) 109 | dx, dy = x_bin[1] - x_bin[0], y_bin[1] - y_bin[0] 110 | # check if integral is ~ zero, better: use midepoint method 111 | print( 'check if integral ~1', round(ZZ.sum()*( dx*dy),3)) #ZZ[ZZ>0].mean()*(XX.max()-XX.min())*(YY.max()-YY.min())) 112 | return XX-.5*dx, YY-.5*dy, ZZ 113 | 114 | #================================================================================ 115 | # dictionary processing 116 | #================================================================================ 117 | def copyDic( dic): 118 | """ create a copy of dic""" 119 | import copy 120 | dCopy = {} 121 | for tag in dic.keys(): 122 | dCopy[tag] = copy.copy( dic[tag]) 123 | return dCopy 124 | 125 | def selectDataRange(dicOri, min, max, tag, **kwargs): 126 | """ 127 | select data within given range, set min = None or max =None for only lower or upper bound 128 | """ 129 | dic = copyDic(dicOri) 130 | if 'includeBoundaryEvents' in kwargs.keys() and kwargs['includeBoundaryEvents'] == True: 131 | if min == None or max == None: 132 | error_str = 'both boundaries have to be set to include boundary events' 133 | raise( ValueError( error_str)) 134 | else: 135 | sel = np.logical_and( dic[tag] >= float(min), dic[tag] <= float(max ) ) 136 | if max == None: 137 | sel = dic[tag] > float(min) 138 | elif min == None: 139 | sel = dic[tag] < max 140 | else: 141 | sel = np.logical_and( dic[tag] > float(min), dic[tag] < float(max) ) 142 | sel = np.arange( dic[tag].shape[0], dtype = int )[sel] 143 | if 'returnSel' in kwargs.keys() and kwargs['returnSel'] == True: 144 | return sel 145 | else: 146 | return selDicAll(dic, sel, **kwargs) 147 | 148 | 149 | def selDicAll(dic, curr_sel, **kwargs): 150 | """apply boolean vector to entire data 151 | e.g. for sorting or cutting ... """ 152 | newDic = {} 153 | for tag, vector in dic.items(): 154 | newDic[tag] = dic[tag][curr_sel] 155 | return newDic 156 | 157 | 158 | 159 | -------------------------------------------------------------------------------- /src/datetime_utils.py: -------------------------------------------------------------------------------- 1 | #!usr/bin/python2.7 2 | # -*- coding: utf-8 -*- 3 | """ 4 | convert year month day hour min sec to decimal year and vs. verse 5 | 6 | @author tgoebel - UC Santa Cruz 7 | """ 8 | from __future__ import division 9 | import numpy as np 10 | 11 | import time, datetime, calendar 12 | from datetime import datetime as dt 13 | 14 | 15 | def mo_to_sec( value): 16 | return value*(aveDyYr()/12)*24*3600 17 | 18 | def sec_to_mo( value): 19 | return value/((aveDyMo())*24*3600) 20 | 21 | def dy_to_sec( value): 22 | return value*24*3600 23 | 24 | def sec_to_dy( value): 25 | return value/(24*3600) 26 | 27 | 28 | def aveDyYr(): 29 | """ how many days in a year""" 30 | return 365 + 1/4 - 1/100 + 1/400 31 | 32 | def aveDyMo(): 33 | """ how many days in a month """ 34 | return aveDyYr()/12 35 | 36 | def checkDateTime( dateTime): 37 | """ check that hour != 24, MN != 60, SC != 60 """ 38 | YR, MO, DY, HR, MN, SC = int(dateTime[0]), int(dateTime[1]), int(dateTime[2]), int(dateTime[3]),int(dateTime[4]), float(dateTime[5]) 39 | if isinstance( YR, (float, int)): 40 | if SC < 0: 41 | SC = 0 42 | elif SC - 60 >= 0: 43 | MN += int((SC/60)) 44 | SC -= 60*int( (SC/60)) 45 | if MN < 0: 46 | MN = 0 47 | elif MN - 60 >= 0: 48 | HR += int((MN/60)) 49 | MN -= 60.*int( (MN/60.)) 50 | if HR < 0: 51 | HR = 0 52 | elif HR - 24 >= 0: 53 | HR = 23 54 | MN = 59 55 | SC = 59.999 56 | elif isinstance( YR, (np.ndarray)): 57 | #set all values below zero to zero 58 | sel = SC < 0 59 | SC[sel] = 0 60 | sel = MN < 0 61 | MN[sel] = 0 62 | sel = HR < 0 63 | HR[sel] = 0 64 | #set 60 to zero and 24 to 23.59.59.99 65 | sel = abs(SC - 60) < 1e-6 66 | SC[sel] = 0 67 | MN[sel] = MN[sel] + 1 68 | sel = 60 - MN < 1e-6 69 | MN[sel] = 0 70 | HR[sel] = HR[sel] + 1 71 | sel = 24 - HR < 1e-6 72 | HR[sel] = 23 73 | MN[sel] = 59 74 | SC[sel] = 59.99 75 | return YR, MO, DY, HR, MN, SC 76 | 77 | 78 | #------------------------------------------------------------------------------ 79 | # date-time conversions 80 | #------------------------------------------------------------------------------ 81 | def dateTime2decYr( datetime_in, **kwargs ): 82 | """ 83 | input: datetime_in = array containing time columns year - second 84 | out = date in decimal year 85 | """ 86 | try: 87 | o_dt = datetime.datetime( int( datetime_in[0] ), int( datetime_in[1] ), int( datetime_in[2] ), int( datetime_in[3] ), int( datetime_in[4] ), int( round( datetime_in[5])-1e-3)) 88 | except: 89 | error_msg = "datetime array not valid - %s; check if date and time is correct, e.g. no SC > 60.." % datetime_in 90 | raise( ValueError, error_msg) 91 | time_sc = o_dt.hour*3600 + o_dt.minute*60 + o_dt.second 92 | # get no. of day within current year between 0 to 364 and ad time in seconds 93 | dayOfYear_seconds = ( o_dt.timetuple().tm_yday - 1 ) * 86400.0 + time_sc 94 | if calendar.isleap( o_dt.year): 95 | year_fraction = dayOfYear_seconds / ( 86400.0 * 366 ) 96 | else: 97 | year_fraction = dayOfYear_seconds / ( 86400.0 * 365 ) 98 | # dec year = current year + day_time (in dec year) 99 | return o_dt.year + year_fraction 100 | 101 | def decYr2datetime( decimalYear ): 102 | """ 103 | convert decimal year to year/month/day... 104 | """ 105 | year = np.floor( decimalYear) 106 | rest = decimalYear-year 107 | 108 | if year%4 == 0: # leap year 109 | ndays = 366 110 | feb = 29 111 | else: 112 | ndays = 365 113 | feb = 28 114 | decDay = rest * ndays 115 | 116 | if decDay >= 0 and decDay <= 31: 117 | month = 1 118 | day = np.ceil( decDay ) 119 | rest = (decDay) -np.floor( decDay ) 120 | elif decDay >= 0 and decDay <= 31+feb: 121 | month = 2 122 | day = np.ceil( decDay- 31 ) 123 | rest = 1 -(day - (decDay - 31 )) 124 | elif decDay >= 31+feb and decDay <= 2*31+feb: 125 | month = 3 126 | day = np.ceil( decDay- (31+feb )) 127 | rest = 1 -(day - (decDay -(31+feb ))) 128 | elif decDay >= 2*31+feb and decDay <= 3*31+feb-1: 129 | month = 4 130 | day = np.ceil( decDay- (2*31+feb)) 131 | rest = 1 -(day - (decDay -(2*31+feb))) 132 | elif decDay >= 3*31+feb-1 and decDay <= 4*31+feb-1: 133 | month = 5 134 | day = np.ceil( decDay -(3*31+feb-1) ) 135 | rest = 1 -(day - (decDay -(3*31+feb-1))) 136 | elif decDay >= 4*31+feb-1 and decDay <= 5*31+feb-2: 137 | month = 6 138 | day = np.ceil( decDay-(4*31+feb-1)) 139 | rest = 1 -(day - (decDay -(4*31+feb-1))) 140 | elif decDay >= 5*31+feb-2 and decDay <= 6*31+feb-2: 141 | month = 7 142 | day = np.ceil( decDay-(5*31+feb-2) ) 143 | rest = 1 -(day - (decDay -(5*31+feb-2))) 144 | elif decDay >= 6*31+feb-2 and decDay <= 7*31+feb-2: 145 | month = 8 146 | day = np.ceil( decDay -(6*31+feb-2)) 147 | rest = 1 -(day - (decDay -(6*31+feb-2))) 148 | elif decDay >= 7*31+feb-2 and decDay <= 8*31+feb-3: 149 | month = 9 150 | day = np.ceil( decDay -(7*31+feb-2) ) 151 | rest = 1 -(day - (decDay -(7*31+feb-2))) 152 | elif decDay >= 8*31+feb-3 and decDay <= 9*31+feb-3: 153 | month = 10 154 | day = np.ceil( decDay -(8*31+feb-3)) 155 | rest = 1 -(day - (decDay -(8*31+feb-3))) 156 | elif decDay >= 9*31+feb-3 and decDay <= 10*31+feb-4: 157 | month = 11 158 | day = np.ceil( decDay -(9*31+feb-3)) 159 | rest = 1 -(day - (decDay -(9*31+feb-3))) 160 | elif decDay >= 10*31+feb-4 and decDay <= 11*31+feb-4: 161 | month = 12 162 | day = np.ceil( decDay -(10*31+feb-4)) 163 | rest = 1 -(day - (decDay -(10*31+feb-4))) 164 | else: 165 | print( 'wrong input decimal year') 166 | hour = np.floor( rest * 24 ) 167 | rest = 24*rest-hour 168 | minute = np.floor( rest * 60 ) 169 | rest = 60*rest-minute 170 | second = rest * 60 171 | if day == 0: # for int decimal years 172 | day = 1 173 | try: 174 | return [int(year[0]), int(month), int(day[0]), int(hour[0]), int(minute[0]), second[0]] 175 | except: 176 | return [int(year), int(month), int(day), int(hour), int(minute), second] 177 | -------------------------------------------------------------------------------- /test_scripts/1_2D_density_plots.py: -------------------------------------------------------------------------------- 1 | # python2.7 2 | """ 3 | - functions to plot binned and smoothed desnities of 2D data points 4 | 5 | --> plot as 2D probability density with Sum^x_y = 1, Integral = 1 6 | -For that purpose: divide by current sum (~number of events) and dx, dy 7 | which are the bins in x and y 8 | """ 9 | 10 | 11 | import numpy as np 12 | import matplotlib.pyplot as plt 13 | 14 | np.random.seed(12345) 15 | #================================================================================ 16 | # fct. definitions 17 | #================================================================================ 18 | def density_2D( x, y, x_bin, y_bin, **kwargs): 19 | """ 20 | 2D, smoothed event density for point cloud with coordinates x,y 21 | uses method: scipy.stats.kde.gaussian_kde 22 | :input x,y - dataset 23 | x_bin, y_bin - binned x and y vectors 24 | 25 | 26 | kwargs['sigma'] - specify gaussian smoothing kernel ('bw_method' in scipy.stats.kde) 27 | default: = 'scott' 28 | sigma = n**( -1./(d+4)), d- number of dimensions, n - number of data points 29 | - 'silverman' 30 | sigma = (n * (d + 2) / 4.)**(-1. / (d + 4)) 31 | - float( ) = set Gaussian Bandwidth directlty 32 | 33 | 34 | return XX, YY, ZZ - 2D binned x and y coordinates and density for each cell 35 | """ 36 | from scipy.stats import kde 37 | sigma = 'scott' 38 | if 'sigma' in kwargs.keys() and kwargs['sigma'] is not None: 39 | sigma = kwargs['sigma'] 40 | # Evaluate a gaussian kde on a regular grid of nbins x nbins over data extents 41 | fct_Gauss2D = kde.gaussian_kde( np.array([x,y]), bw_method = sigma) 42 | # meshgrid of x and y coordinates 43 | XX,YY = np.meshgrid( x_bin, y_bin) 44 | ZZ = fct_Gauss2D( np.vstack([XX.flatten(), YY.flatten()])).reshape( XX.shape) 45 | dx, dy = x_bin[1] - x_bin[0], y_bin[1] - y_bin[0] 46 | # check if integral is ~ zero, better: use midepoint method 47 | print( 'check if integral ~1', ZZ.sum()*( dx*dy)) #ZZ[ZZ>0].mean()*(XX.max()-XX.min())*(YY.max()-YY.min())) 48 | return XX-.5*dx, YY-.5*dy, ZZ 49 | #return XX, YY, ZZ 50 | #================================================================================ 51 | # parameters 52 | #================================================================================ 53 | Nev = 10 54 | nbins = 30 55 | xmin, xmax = -2.5, 2.5 56 | ymin, ymax = -5, 5 57 | 58 | sigma = .1 # Gaussian smoothing kernel 59 | 60 | # binsize in x and y 61 | dx, dy= float(xmax-xmin)/nbins, float(ymax-ymin)/nbins 62 | 63 | #================================================================================ 64 | # create random data, and binned data 65 | #================================================================================ 66 | # Create data: 200 points 67 | data = np.random.multivariate_normal([0, 0], [[1, 0.5], [0.5, 3]], Nev) 68 | x, y = data.T 69 | a_xbin = np.arange( xmin-dx, xmax+2*dx, dx) 70 | a_ybin = np.arange( ymin-dy, ymax+2*dy, dy) 71 | 72 | #================================================================================ 73 | # compute Gaussian density 74 | #================================================================================ 75 | XX,YY,ZZ = density_2D( x, y, a_xbin, a_ybin, sigma = sigma) 76 | #================================================================================ 77 | # plots 78 | #================================================================================ 79 | # Create a figure with 6 plot areas 80 | fig, axes = plt.subplots(ncols=6, nrows=1, figsize=(21, 5)) 81 | 82 | # Everything sarts with a Scatterplot 83 | axes[0].set_title('Scatterplot') 84 | axes[0].plot(x, y, 'ko') 85 | # As you can see there is a lot of overplottin here! 86 | 87 | # Thus we can cut the plotting window in several hexbins 88 | 89 | axes[1].set_title('Hexbin') 90 | axes[1].hexbin(x, y, gridsize=nbins, cmap=plt.cm.BuGn_r) 91 | axes[1].plot( x, y, 'ko', ms= 2) 92 | axes[1].set_xlim( axes[0].get_xlim()) 93 | axes[1].set_ylim( axes[0].get_ylim()) 94 | 95 | # 2D Histogram 96 | axes[2].set_title('2D Histogram') 97 | counts, xedges, yedges, __ = axes[2].hist2d( x, y, bins=nbins, cmap=plt.cm.BuGn_r, normed = True) 98 | axes[2].plot( x, y, 'ko', ms= 2) 99 | axes[2].set_xlim( axes[0].get_xlim()) 100 | axes[2].set_ylim( axes[0].get_ylim()) 101 | 102 | dx,dy = (xedges[1]-xedges[0]), (yedges[1]-yedges[0]) 103 | #print(xedges, yedges, counts) 104 | print( 'check if integral ~1', counts.sum()*( dx*dy), counts.mean()*(xedges[-1]-xedges[0])*(yedges[-1]-yedges[0])) 105 | 106 | 107 | 108 | # plot a density 109 | axes[3].set_title('Gaussian Smoothing') 110 | axes[3].pcolormesh( XX, YY, ZZ, cmap=plt.cm.BuGn_r) 111 | axes[3].plot( x, y, 'ko', ms= 2) 112 | axes[3].set_xlim( axes[0].get_xlim()) 113 | axes[3].set_ylim( axes[0].get_ylim()) 114 | 115 | 116 | 117 | # add shading 118 | axes[4].set_title('2D Density with shading') 119 | axes[4].pcolormesh( XX,YY,ZZ, shading='gouraud', cmap=plt.cm.BuGn_r) 120 | axes[4].set_xlim( axes[0].get_xlim()) 121 | axes[4].set_ylim( axes[0].get_ylim()) 122 | 123 | 124 | # contour 125 | axes[5].set_title('Contour') 126 | axes[5].pcolormesh( XX, YY, ZZ, shading='gouraud', cmap=plt.cm.BuGn_r) 127 | axes[5].contour( XX, YY, ZZ ) 128 | axes[5].set_xlim( axes[0].get_xlim()) 129 | axes[5].set_ylim( axes[0].get_ylim()) 130 | plt.show() 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | --------------------------------------------------------------------------------