├── 1_create_mat_eqCat_file.py
├── 1b_plot_eqLocs.py
├── 2_eta_0.py
├── 3_NND.py
├── 4_dist_tau.py
├── 5_plot_lat_t.py
├── 6_createClust.py
├── 7_productivity.py
├── 7b_plot_productivity.py
├── README.md
├── clust_SoCal.ipynb
├── data
    ├── hs_1981_2011_all.mat
    ├── hs_1981_2011_all_NND_Mc_3.0.mat
    ├── hs_1981_2011_all_NND_Mc_4.0.mat
    ├── hs_1981_2011_all_RT_Mc_3.0.mat
    └── hs_1981_2011_all_RT_Mc_4.0.mat
├── plots
    ├── SI_indAS_deficiencyOK_v5.0.pdf
    ├── T_R_hs_1981_2011_all_Mc_3.0.png
    ├── T_R_hs_1981_2011_all_Mc_4.0.png
    ├── hs_1981_2011_all_Mc_3.0_ASprod.png
    ├── hs_1981_2011_all_Mc_4.0_ASprod.png
    ├── hs_1981_2011_all_NND_hist_Mc_3.0.png
    ├── hs_1981_2011_all_NND_hist_Mc_4.0.png
    ├── hs_1981_2011_all_spanningTree_Mc_3.0.png
    └── hs_1981_2011_all_spanningTree_Mc_4.0.png
├── src
    ├── EqCat.py
    ├── __init__.py
    ├── clustering.py
    ├── data_utils.py
    └── datetime_utils.py
└── test_scripts
    └── 1_2D_density_plots.py


/1_create_mat_eqCat_file.py:
--------------------------------------------------------------------------------
 1 | #!python3.7
 2 | '''
 3 | Created on March 28th,  2019
 4 | 
 5 | - load Hauksson, Shearer 2011 eq catalog from scec data center, alterntive catalogs
 6 | - save as .mat binary for fast data I/O
 7 | - note that the original catalog is not provided and has to be downloaded from the web:
 8 |   https://scedc.caltech.edu/eq-catalogs/altcatalogs.html
 9 | 
10 | @author: tgoebel
11 | '''
12 | #------------------------------------------------------------------------------
13 | import os
14 | 
15 | #------------------------------my modules-------------------------------------- 
16 | from src.EqCat import EqCat
17 | 
18 | eqCat = EqCat( )
19 | 
20 | #=================================1==============================================
21 | #                            dir, file, params
22 | #================================================================================
23 | # change to local dir where eq. catalogs are saved
24 | # the original catalog can be found here: https://scedc.caltech.edu/research-tools/altcatalogs.html
25 | dir_in = 'data'
26 | file_in= 'hs_1981_2011_all.txt'
27 | 
28 | #=================================2==============================================
29 | #                            load data
30 | #================================================================================
31 | import numpy as np
32 | # 0-5 (datetime), 6(ID), 7 (lat), 8 (lon), 9 (depth), 10 (mag)
33 | mData = np.loadtxt( f"{dir_in}/{file_in}", usecols=(0,1,2,3,4,5,6,7,8,9, 10)).T
34 | print( mData.shape)
35 | 
36 | eqCat.loadEqCat( f"{dir_in}/{file_in}", 'HS_reloc')
37 | 
38 | print( 'total no. of events: ', eqCat.size())
39 | print( sorted( eqCat.data.keys()))
40 | #=================================3==============================================
41 | #                     test plot and save to .mat binary
42 | #================================================================================
43 | eqCat.saveMatBin( file_in.replace( 'txt', 'mat'))
44 | newEqCat = EqCat( )
45 | newEqCat.loadMatBin( file_in.replace( 'txt', 'mat'))
46 | print( newEqCat.size())
47 | print( sorted( newEqCat.data.keys()))
48 | 
49 | 
50 | 
51 | 
52 | 
53 | 
54 | 
55 | 


--------------------------------------------------------------------------------
/1b_plot_eqLocs.py:
--------------------------------------------------------------------------------
 1 | #!python3.7
 2 | '''
 3 | Created on March 28th,  2019
 4 | 
 5 | - event selection
 6 | - plot earthquake catalog
 7 | 
 8 | TODO: - implement geo-referenced plotting with Basemap
 9 | 
10 | 
11 | @author: tgoebel
12 | '''
13 | #------------------------------------------------------------------------------
14 | import matplotlib.pyplot as plt
15 | import numpy as np
16 | import os
17 | 
18 | os.environ["PROJ_LIB"] = f"{os.environ['HOME']}/opt/anaconda3/share/proj"
19 | from mpl_toolkits.basemap import Basemap
20 | #------------------------------my modules-------------------------------------- 
21 | 
22 | from src.EqCat import EqCat
23 | 
24 | eqCat = EqCat( )
25 | 
26 | #print( dir( dataUtils))
27 | #=================================1==============================================
28 | #                            dir, file, params
29 | #================================================================================
30 | dir_in = 'data'
31 | file_in= 'hs_1981_2011_all.mat'
32 | #xmin, xmax = -122, -114
33 | #ymin, ymax = 34, 38
34 | Mmin, Mmax = 3, None
35 | tmin, tmax = 1990, 2012
36 | 
37 | 
38 | #=================================2==============================================
39 | #                            load data, select events
40 | #================================================================================
41 | os.chdir( dir_in)
42 | eqCat.loadMatBin(  file_in)
43 | print( eqCat.methods)
44 | print( grege)
45 | print(  'total no. of events', eqCat.size())
46 | eqCat.selectEvents( Mmin, Mmax, 'Mag')
47 | eqCat.selectEvents( tmin, tmax, 'Time')
48 | print( 'no. of events after initial selection', eqCat.size())
49 | #=================================3==============================================
50 | #                          test plot T
51 | #================================================================================
52 | projection = 'cyl'
53 | xmin,xmax = eqCat.data['Lon'].min(), eqCat.data['Lon'].max()
54 | ymin,ymax = eqCat.data['Lat'].min(), eqCat.data['Lat'].max()
55 | 
56 | # setup equi distance basemap.
57 | m = Basemap( llcrnrlat  =  ymin,urcrnrlat  =  ymax,
58 |              llcrnrlon  =  xmin,urcrnrlon  =  xmax,
59 |              projection = projection,lat_0=(ymin+ymax)*.5,lon_0=(xmin+xmax)*.5,
60 |              resolution = 'l')
61 | m.drawstates( linewidth = 1)
62 | m.drawcoastlines( linewidth= 2)
63 | a_x, a_y = m( eqCat.data['Lon'], eqCat.data['Lat'])
64 | m.plot( a_x, a_y, 'ko', ms = 1)
65 | sel7 = eqCat.data['Mag'] >=7
66 | m.plot( a_x[sel7], a_y[sel7], 'ro', ms = 8, mew= 1.5, mfc = 'none')
67 | 
68 | 
69 | m.drawmeridians( np.linspace( int(xmin), xmax, 4),labels=[False,False,False,True],
70 |                  fontsize = 12, fmt = '%.1f')
71 | m.drawparallels( np.linspace( int(ymin), ymax, 4),labels=[True,False,False,False],
72 |                  fontsize = 12, fmt = '%.2f')
73 | 
74 | plt.savefig( file_in.replace( 'mat', 'png'))
75 | plt.show()
76 | 
77 | 
78 | 
79 | 
80 | 
81 | 
82 | 


--------------------------------------------------------------------------------
/2_eta_0.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Created on October 9, 2019
  3 | 
  4 |     1) shuffle event magnitudes and times
  5 |     2) computer NND values for all event pairs
  6 |     3) use 1st percentile as estimate for eta_0
  7 |         -> note that eta_0 is required to separate clustered
  8 |            and background events in the following analyses steps
  9 |     4) eta_0 is saved in /data directori
 10 |        file_out = [file_in]_Mc_[mc]_eta_0.txt
 11 | @author: tgoebel
 12 | '''
 13 | #------------------------------------------------------------------------------
 14 | import matplotlib as mpl
 15 | #mpl.use( 'Agg') # turn off interactive plot
 16 | import matplotlib.pyplot as plt
 17 | import numpy as np
 18 | import scipy.io
 19 | import os
 20 | 
 21 | #------------------------------my modules--------------------------------------
 22 | import src.clustering as clustering
 23 | import src.data_utils as data_utils
 24 | from   src.EqCat import *
 25 | 
 26 | eqCat   = EqCat( ) # original cat
 27 | ranCat  = EqCat()  # randomized, Poissonian catalog
 28 | eqCatMc = EqCat()  # catalog above completeness
 29 | np.random.seed( 123456)
 30 | #=================================1==============================================
 31 | #                            dir, file, params
 32 | #================================================================================
 33 | dir_in = 'data'
 34 | file_in= 'hs_1981_2011_all.mat'
 35 | 
 36 | #file_b  = '%s_b_Mc_D.txt'%(fileIn.split('.')[0])
 37 | dPar  = {   'aMc'         :  np.array([3.0, 4.0]), #np.array( [2.0, 2.5, 3.0, 3.5]),
 38 |             # fractal dimension and b for eq. (1)
 39 |             'D'           : 1.6, # TODO: - these values should be constrained based on the data
 40 |             'b'           : 1.0, # use: https://github.com/tgoebel/magnitude-distribution for b-value
 41 | 
 42 |             # number of bootstraps for randomized catalogs
 43 |             'nBoot' : 100,
 44 |             #=================plotting==============
 45 |             'eta_binsize' :  .3,
 46 | 
 47 |             'cmin' : 1, 
 48 |             'xmin' : -13, 'xmax' : 0,
 49 |             ## R-T plot
 50 |             'binx' : .1, 'biny' : .1,# used for density and gaussian smoothing
 51 |             'sigma'   : None, #if None: default = n**(-1./(d+4)), or set Gaussian bandwidth
 52 |             'Tmin' :  -8, 'Tmax' : 0,
 53 |             'Rmin' :  -5, 'Rmax' : 3,
 54 |             'cmap'        : plt.cm.RdYlGn_r,
 55 |             'showPlot'    :  False,
 56 |           }
 57 | 
 58 | #================================================================================
 59 | #                      load data, event selection
 60 | #================================================================================
 61 | eqCat.loadMatBin(  os.path.join( dir_in, file_in))
 62 | print( 'total no. of events', eqCat.size())
 63 | eqCat.selectEvents( dPar['aMc'][0], None, 'Mag')
 64 | #eqCat.selectEvents( tmin, tmax, 'Time')
 65 | print( 'no. of events after initial selection', eqCat.size())
 66 | # project to equi-distant coordiante system for cartesian distances
 67 | eqCat.toCart_coordinates( projection = 'eqdc')#'eqdc')
 68 | for f_Mc in dPar['aMc']:
 69 |     print( '-------------- current Mc:', f_Mc, '---------------------')
 70 |     # select magnitude range
 71 |     eqCatMc.copy( eqCat)
 72 |     eqCatMc.selectEvents( f_Mc, None, 'Mag')
 73 |     print( 'catalog size after MAG selection', eqCat.size())
 74 |     # this dictionary is used in module: clustering
 75 |     dConst = {'Mc' : f_Mc,
 76 |                'b' : dPar['b'],
 77 |                'D' : dPar['D']}
 78 | 
 79 |     #=============================2===================================================
 80 |     #                    randomize catalog
 81 |     #=================================================================================
 82 |     a_Eta_0 = np.zeros( dPar['nBoot'])
 83 |     for i_Bs in range( dPar['nBoot']):
 84 | 
 85 |         ranCat.copy( eqCatMc)
 86 |         ranCat.data['X']     = np.random.uniform( eqCatMc.data['X'].min(), eqCatMc.data['X'].max(), size = eqCatMc.size())
 87 |         ranCat.data['Y']     = np.random.uniform( eqCatMc.data['Y'].min(), eqCatMc.data['Y'].max(), size = eqCatMc.size())
 88 |         ranCat.data['Time']  = clustering.rand_rate_uni( eqCatMc.size(), eqCatMc.data['Time'].min(), eqCatMc.data['Time'].max())
 89 |         ranCat.sortCatalog( 'Time')
 90 |         #==================================3=============================================
 91 |         #                   compute space-time-magnitude distance, histogram
 92 |         #================================================================================
 93 |         dNND = clustering.NND_eta( ranCat, dConst,  M0 = 0,   correct_co_located = True,
 94 |                                    verbose = False)
 95 |         a_Eta_0[i_Bs] = round( np.percentile( np.log10(dNND['aNND']), 1), 5)
 96 |         print( 'nBoot', i_Bs+1,'out of', dPar['nBoot'], 'eta 0 - 1st', np.percentile( np.log10(dNND['aNND']), 1))
 97 |         if dPar['showPlot'] == True: # plots to check if everything is working
 98 |             #=================================4==============================================
 99 |             #                          plot NND histogram
100 |             #================================================================================
101 |             plt.figure( 1, figsize = (10,5))
102 |             ax = plt.axes( [.12, .12, .83, .83])
103 |             ax.hist( np.log10( dNND['aNND']), np.arange( dPar['xmin'], dPar['xmax'], dPar['eta_binsize']),
104 |                             color = '.5', label = 'Mc = %.1f'%( f_Mc), align = 'mid', rwidth=.9)
105 |             ax.plot( [-5, -5], ax.get_ylim(), 'w-',  lw = 2, )
106 |             ax.plot( [-5, -5], ax.get_ylim(), 'k--', lw = 2, )
107 |             ax.plot( [a_Eta_0[i_Bs], a_Eta_0[i_Bs]], ax.get_ylim(), 'w-',  lw = 2, label = '$N_\mathrm{tot}$=%i'%( ranCat.size()))
108 |             ax.plot( [a_Eta_0[i_Bs], a_Eta_0[i_Bs]], ax.get_ylim(), 'r--', lw = 2, label = '$N_\mathrm{cl}$=%i'%( dNND['aNND'][dNND['aNND']<1e-5].shape[0]))
109 | 
110 |             ax.legend( loc = 'upper left')
111 |             ax.set_xlabel( 'NND, log$_{10} \eta$')
112 |             ax.set_ylabel( 'Number of Events')
113 |             ax.grid( 'on')
114 |             ax.set_xlim( dPar['xmin'], dPar['xmax'])
115 | 
116 | 
117 |             #==================================4==============================================================
118 |             #                           T-R density plot
119 |             #=================================================================================================
120 |             catChild = EqCat()
121 |             catParent= EqCat()
122 |             catChild.copy(  ranCat)
123 |             catParent.copy( ranCat)
124 | 
125 |             catChild.selEventsFromID(    dNND['aEqID_c'], repeats = True)
126 |             catParent.selEventsFromID(   dNND['aEqID_p'], repeats = True)
127 |             print( catChild.size(), catParent.size(), eqCatMc.size())
128 |             a_R, a_T = clustering.rescaled_t_r( catChild, catParent, dConst, correct_co_located = True)
129 | 
130 |             a_Tbin = np.arange( dPar['Tmin'], dPar['Tmax']+2*dPar['binx'], dPar['binx'])
131 |             a_Rbin = np.arange( dPar['Rmin'], dPar['Rmax']+2*dPar['biny'], dPar['biny'])
132 |             a_log_T = np.log10( a_T)
133 |             a_log_R = np.log10( a_R)
134 |             XX, YY, ZZ = data_utils.density_2D( a_log_T, a_log_R, a_Tbin, a_Rbin, sigma = dPar['sigma'])
135 | 
136 |             plt.figure(2, figsize= (8,10))
137 |             ax = plt.subplot(111)
138 |             ax.set_title( 'Nearest Neighbor Pairs in R-T')
139 |             #------------------------------------------------------------------------------
140 |             normZZ = ZZ*( dPar['binx']*dPar['biny']*eqCatMc.size())
141 |             plot1 = ax.pcolormesh( XX, YY, normZZ, cmap=dPar['cmap'])
142 |             cbar  = plt.colorbar(plot1, orientation = 'horizontal', shrink = .5, aspect = 20,)
143 |             #ax.plot(  np.log10( a_T), np.log10( a_R), 'wo', ms = 1.5, alpha = .2)
144 |             # plot eta_0 to divide clustered and background mode
145 |             ax.plot( [dPar['Tmin'], dPar['Tmax']],  -np.array([dPar['Tmin'], dPar['Tmax']])+a_Eta_0[i_Bs], '-', lw = 1.5, color = 'w' )
146 |             ax.plot( [dPar['Tmin'], dPar['Tmax']],  -np.array([dPar['Tmin'], dPar['Tmax']])+a_Eta_0[i_Bs],'--', lw = 1.5, color = '.5' )
147 |             #-----------------------labels and legends-------------------------------------------------------
148 |             #cbar.set_label( 'Event Pair Density [#ev./dRdT]')
149 |             cbar.set_label( 'Number of Event Pairs',labelpad=-40)
150 |             ax.set_xlabel( 'Rescaled Time')
151 |             ax.set_ylabel( 'Rescaled Distance')
152 |             ax.set_xlim( dPar['Tmin'], dPar['Tmax'])
153 |             ax.set_ylim( dPar['Rmin'], dPar['Rmax'])
154 | 
155 |             plt.show()
156 |     #=================================3==============================================
157 |     #                            save results
158 |     #================================================================================
159 |     f_eta_0 = a_Eta_0.mean()
160 |     print( 'medium eta_0', a_Eta_0.mean()
161 |     file_out = '%s/%s_Mc_%.1f_eta_0.txt'%(dir_in, file_in, f_Mc)
162 |     np.savetxt( file_out, np.array([f_eta_0]), fmt = '%10.3f', header='eta_0')
163 |     print( 'save results', file_out
164 |     scipy.io.savemat(file_out.replace('txt','mat'),
165 |                      {'eta_0': f_eta_0, 'eta_BS' : a_Eta_0,}, do_compression=True)
166 | 
167 | 
168 | 
169 | 
170 | 
171 | 
172 | 
173 | 
174 | 
175 | 
176 | 
177 | 
178 | 
179 | 
180 | 


--------------------------------------------------------------------------------
/3_NND.py:
--------------------------------------------------------------------------------
  1 | #!python2.3
  2 | '''
  3 | Created on April 10th,  2019
  4 | 
  5 | - compute nearest-neighbor distance (NND) between all event pairs (see equ. 1 in  Zaliapin & Ben-Zion, 2013)
  6 | - test -plot histogram of NNDs: Figure 4c in Zaliapin & Ben-Zion, 2013
  7 | 
  8 | output: 'data/%s_NND_Mc_%.1f.mat'%(dPar['catName'], dPar['Mc'])
  9 |         which is a python dictionary with: 
 10 |             {     'aNND'       : aNND,     - nearest neighbor space-time magnitude distance
 11 |                  'aEqID_p'    : np.array  - ID of the parent event
 12 |                  'aEqID_c'    : np.array  - ID of the child  event
 13 |             }
 14 | 
 15 | TODO:
 16 |     - constrain Mc, b and D independently through statistical analysis of the actual data
 17 | 
 18 | @author: tgoebel
 19 | '''
 20 | import time
 21 | #------------------------------------------------------------------------------
 22 | import matplotlib as mpl
 23 | #mpl.use( 'Agg') # turn off interactive plot
 24 | import matplotlib.pyplot as plt
 25 | import numpy as np
 26 | import scipy.io
 27 | import os
 28 |  
 29 | #------------------------------my modules-------------------------------------- 
 30 | import src.clustering as clustering
 31 | from src.EqCat import EqCat
 32 | 
 33 | eqCat = EqCat( )
 34 | 
 35 | #=================================1==============================================
 36 | #                            dir, file, params
 37 | #================================================================================
 38 | dir_in = 'data'
 39 | file_in= 'hs_1981_2011_all.mat'
 40 | 
 41 | #file_b  = '%s_b_Mc_D.txt'%(fileIn.split('.')[0])
 42 | dPar  = {   'aMc'         :  np.array([3.0]), #3.0, 4.0]), #np.array( [2.0, 2.5, 3.0, 3.5]),
 43 |             # fractal dimension and b for eq. (1)
 44 |             'D'           : 1.6, # TODO: - these values should be contrained independently
 45 |             'b'           : 1.0, # use: https://github.com/tgoebel/magnitude-distribution for b-value
 46 |             #=================plotting==============
 47 |             'eta_binsize' :  .3,
 48 |             'xmin' : -13, 'xmax' : 0,
 49 |           }
 50 | 
 51 | #=================================2==============================================
 52 | #                            load data, select events
 53 | #================================================================================
 54 | eqCat.loadMatBin(  os.path.join( dir_in, file_in))
 55 | print( 'total no. of events', eqCat.size())
 56 | eqCat.selectEvents( dPar['aMc'][0], None, 'Mag')
 57 | #eqCat.selectEvents( tmin, tmax, 'Time')
 58 | print( 'no. of events after initial selection', eqCat.size())
 59 | #=================================1==============================================
 60 | #                           to cartesian coordinates
 61 | #================================================================================
 62 | # two ways to do the distance comp: 1 project into equal distance azimuthal , comp Cartersian distance in 3D
 63 | #                                   2 get surface distance from lon, lat (haversine), use pythagoras to include depth
 64 | eqCat.toCart_coordinates( projection = 'eqdc')
 65 | 
 66 | for f_Mc in dPar['aMc']:
 67 |     print( '-------------- current Mc:', f_Mc, '---------------------')
 68 |     # select magnitude range
 69 |     eqCat.selectEvents( f_Mc, None, 'Mag')
 70 |     print( 'catalog size after MAG selection', eqCat.size())
 71 |     # this dictionary is used in module: clustering
 72 |     dConst = {'Mc' : f_Mc,
 73 |                'b' : dPar['b'],
 74 |                'D' : dPar['D']}
 75 |     #==================================2=============================================
 76 |     #                       compute space-time-magnitude distance, histogram
 77 |     #================================================================================
 78 |     eqCat.data['Z'] = eqCat.data['Depth']
 79 |     print('depth range: ', eqCat.data['Z'].min(), eqCat.data['Z'].max())
 80 |     dCluster = clustering.NND_eta( eqCat, dConst,    distance_3D = False,
 81 |                                                     correct_co_located = True, verbose= True)
 82 |     ###histogram
 83 |     aBins       = np.arange( -13, 1, dPar['eta_binsize'], dtype = float)
 84 |     aHist, aBins = np.histogram( np.log10( dCluster['aNND'][dCluster['aNND']>0]), aBins)
 85 |     aBins = aBins[0:-1] + dPar['eta_binsize']*.5
 86 |     # correct for binsize
 87 |     aHist = aHist/dPar['eta_binsize']
 88 |     # to pdf (prob. density)
 89 |     aHist /= eqCat.size()
 90 |     #=================================3==============================================
 91 |     #                            save results
 92 |     #================================================================================
 93 |     import scipy.io
 94 |     NND_file = 'data/%s_NND_Mc_%.1f.mat'%( file_in.split('.')[0], f_Mc)
 95 |     print( 'save file', NND_file)
 96 |     scipy.io.savemat( NND_file, dCluster, do_compression  = True)
 97 |     
 98 |     #=================================4==============================================
 99 |     #                          plot histogram
100 |     #================================================================================
101 |     # load eta_0 value - only for plotting purposes
102 |     eta_0_file = '%s/%s_Mc_%.1f_eta_0.txt'%(dir_in, file_in, f_Mc)
103 |     if os.path.isfile( eta_0_file):
104 |         print( 'load eta_0 from file'),
105 |         f_eta_0 = np.loadtxt( eta_0_file, dtype = float)
106 |         print( 'eta_0',f_eta_0)
107 |     else:
108 |         f_eta_0 = -5
109 |         print( 'could not find eta_0 file', eta_0_file, 'use value: ', f_eta_0)
110 | 
111 |     fig, ax = plt.subplots()
112 |     #ax.plot( vBin, vHist, 'ko')
113 |     ax.bar( aBins, aHist, width =.8*dPar['eta_binsize'], align = 'edge', color = '.5', label = 'Mc = %.1f'%( f_Mc))
114 |     ax.plot( [f_eta_0, f_eta_0], ax.get_ylim(), 'w-',  lw = 2, label = '$N_\mathrm{tot}$=%i'%( eqCat.size()))
115 |     ax.plot( [f_eta_0, f_eta_0], ax.get_ylim(), 'r--', lw = 2, label = '$N_\mathrm{cl}$=%i'%( dCluster['aNND'][dCluster['aNND']<1e-5].shape[0]))
116 | 
117 |     ax.legend( loc = 'upper left')
118 |     ax.set_xlabel( 'NND, log$_{10} \eta$')
119 |     ax.set_ylabel( 'Number of Events')
120 |     ax.grid( 'on')
121 |     ax.set_xlim( dPar['xmin'], dPar['xmax'])
122 |     plt.show()
123 | 
124 |     plotFile = 'plots/%s_NND_hist_Mc_%.1f.png'%( file_in.split('.')[0], f_Mc)
125 |     print( 'save plot', plotFile)
126 |     #plt.savefig( plotFile)
127 |     plt.clf()
128 |     
129 | 
130 | 
131 | 
132 | 
133 | 
134 | 
135 | 
136 | 
137 | 


--------------------------------------------------------------------------------
/4_dist_tau.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Created on August 16, 2016
  3 | 
  4 | - compute inter-event times and distance and normalize by magnitude
  5 | - create colormap of event-pair density (i.e. NND events) for
  6 |     corresponding rescaled event times and distances
  7 | 
  8 | @author: tgoebel
  9 | '''
 10 | import matplotlib as mpl
 11 | mpl.use( 'Agg') # uncomment for interactive plotting
 12 | 
 13 | import os
 14 | import numpy as np
 15 | import matplotlib.pyplot as plt
 16 | import scipy.io
 17 | #------------------------------my modules-------------------------------------- 
 18 | import src.data_utils as data_utils
 19 | import src.clustering as clustering
 20 | from src.EqCat import *
 21 | 
 22 | eqCat   = EqCat() # original catalog
 23 | eqCatMc = EqCat() # this catalog wil be modfied with each Mc iteration
 24 | catChild=  EqCat()
 25 | catParent= EqCat()
 26 | np.random.seed( 123456)
 27 | #=================================1==============================================
 28 | #                            dir, file, params
 29 | #================================================================================
 30 | data_dir = 'data'
 31 | file_in  = 'hs_1981_2011_all.mat'
 32 | 
 33 | #file_b  = '%s_b_Mc_D.txt'%(fileIn.split('.')[0])
 34 | dPar  = {   'a_Mc'        :  np.array([4.0]), #np.array( [2.0, 2.5, 3.0, 3.5]),
 35 |             # fractal dimension and b for eq. (1)
 36 |             'D'           : 1.6, # TODO: - these values should be constrained independently
 37 |             'b'           : 1.0,
 38 |  
 39 |             #===================smoothing parameters=============
 40 |             'binx' : .1, 'biny' : .1,# used for density and gaussian smoothing
 41 |             'sigma'   : None, #if None: default = n**(-1./(d+4)),
 42 |             #=================plotting==============
 43 |             'eta_0'       : -5.0, # run: 2_eta_0.py and
 44 |                                   # if eta-0-file exists: default = load this value from ASCII file
 45 |             #'xmin' : -13, 'xmax' : 0,
 46 |             'Tmin' :  -8, 'Tmax' : 0,
 47 |             'Rmin' :  -5, 'Rmax' : 3,
 48 |             'cmap' : plt.cm.RdYlGn_r, }
 49 | 
 50 | #=================================2==============================================
 51 | #                            load data, select events
 52 | #================================================================================
 53 | eqCat.loadMatBin(  os.path.join( data_dir, file_in))
 54 | 
 55 | eqCat.selectEvents( dPar['a_Mc'][0], None, 'Mag')
 56 | #eqCat.selectEvents( tmin, tmax, 'Time')
 57 | print( 'no. of events after initial selection', eqCat.size())
 58 | 
 59 | # two ways to do the distance comp: 1 project into equal distance azimuthal , comp Cartersian distance in 3D
 60 | #                                   2 get surface distance from lon, lat (haversine), use pythagoras to include depth
 61 | eqCat.toCart_coordinates( projection = 'eqdc')
 62 | 
 63 | for i in range( dPar['a_Mc'].shape[0]):
 64 |     f_Mc =  dPar['a_Mc'][i]
 65 |     eta_0_file = '%s/%s_Mc_%.1f_eta_0.txt'%(data_dir, file_in, f_Mc)
 66 |     # load eta_0 value
 67 |     if os.path.isfile( eta_0_file):
 68 |         print( 'load eta_0 from file'),
 69 |         f_eta_0 = np.loadtxt( eta_0_file, dtype = float)
 70 |     else:
 71 |         print( 'could not find eta_0 file', eta_0_file, 'use value from dPar', dPar['eta_0'])
 72 |         f_eta_0 = dPar['eta_0']
 73 |     # cut below current completeness
 74 |     eqCatMc.copy( eqCat)
 75 |     eqCatMc.selectEvents( f_Mc, None, 'Mag')
 76 |     print( 'current catalog size: ',eqCatMc.size())
 77 |     
 78 |     # load nearest neighbor distances 
 79 |     NND_file = 'data/%s_NND_Mc_%.1f.mat'%( file_in.split('.')[0], f_Mc)
 80 |     dNND   = data_utils.loadmat(NND_file) #,  struct_as_record=True)
 81 | 
 82 |     #==================================3=============================================
 83 |     #                       compute re-scaled interevent times and distances
 84 |     #================================================================================ 
 85 |     catChild.copy( eqCatMc)
 86 |     catParent.copy( eqCatMc)
 87 |     catChild.selEventsFromID(    dNND['aEqID_c'], repeats = True)
 88 |     catParent.selEventsFromID(   dNND['aEqID_p'], repeats = True)
 89 |     print( 'size of offspring catalog', catChild.size(), 'size of parent cat', catParent.size())  
 90 | 
 91 |     # note that dictionary dPar here has to include 'b','D' and 'Mc'
 92 |     a_R, a_T = clustering.rescaled_t_r( catChild, catParent, {'b':dPar['b'], 'D':dPar['D'], 'Mc':f_Mc}, correct_co_located = True)
 93 |     RT_file = 'data/%s_RT_Mc_%.1f.mat'%( file_in.split('.')[0], f_Mc)
 94 |     scipy.io.savemat( RT_file, {'R' : a_R, 'T': a_T}, do_compression  = True)
 95 |     #==================================4==============================================================
 96 |     #                       T-R density plots
 97 |     #=================================================================================================
 98 |     a_Tbin = np.arange( dPar['Tmin'], dPar['Tmax']+2*dPar['binx'], dPar['binx'])
 99 |     a_Rbin = np.arange( dPar['Rmin'], dPar['Rmax']+2*dPar['biny'], dPar['biny'])
100 |     XX, YY, ZZ = data_utils.density_2D( np.log10( a_T), np.log10( a_R), a_Tbin, a_Rbin, sigma = dPar['sigma'])
101 |     
102 |     fig1 = plt.figure(1, figsize= (8,10))
103 |     ax = plt.subplot(111)
104 |     ax.set_title( 'Nearest Neighbor Pairs in R-T')
105 |     #------------------------------------------------------------------------------ 
106 |     normZZ = ZZ*( dPar['binx']*dPar['biny']*eqCatMc.size())
107 |     plot1 = ax.pcolormesh( XX, YY, normZZ, cmap=dPar['cmap'])
108 |     cbar  = plt.colorbar(plot1, orientation = 'horizontal', shrink = .5, aspect = 20,)
109 |     #ax.plot(  np.log10( a_T), np.log10( a_R), 'wo', ms = 1.5, alpha = .2)
110 |     # plot eta_0 to divide clustered and background mode
111 |     ax.plot( [dPar['Tmin'], dPar['Tmax']],  -np.array([dPar['Tmin'], dPar['Tmax']])+f_eta_0, '-', lw = 1.5, color = 'w' )
112 |     ax.plot( [dPar['Tmin'], dPar['Tmax']],  -np.array([dPar['Tmin'], dPar['Tmax']])+f_eta_0,'--', lw = 1.5, color = '.5' )
113 |     #-----------------------labels and legends-------------------------------------------------------
114 |     #cbar.set_label( 'Event Pair Density [#ev./dRdT]') 
115 |     cbar.set_label( 'Number of Event Pairs',labelpad=-40)
116 |     ax.set_xlabel( 'Rescaled Time')
117 |     ax.set_ylabel( 'Rescaled Distance') 
118 |     ax.set_xlim( dPar['Tmin'], dPar['Tmax'])
119 |     ax.set_ylim( dPar['Rmin'], dPar['Rmax'])
120 | 
121 |     #=================================5==============================================
122 |     #                           save results
123 |     #================================================================================
124 |     print( 'plot saved in: ','plots/T_R_%s_Mc_%.1f.png'%( file_in.split('.')[0], f_Mc))
125 |     fig1.savefig( 'plots/T_R_%s_Mc_%.1f.png'%( file_in.split('.')[0], f_Mc))
126 |     plt.show()
127 |      
128 |     plt.clf()
129 | 
130 | 
131 | 
132 | 
133 | 
134 | 
135 | 
136 | 
137 | 


--------------------------------------------------------------------------------
/5_plot_lat_t.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Created on May 16, 2019
  3 | 
  4 | - plot cluster families with eta <= eta_0 
  5 | - plot lat and time (dec. year)
  6 | 
  7 | @author: tgoebel - Thomas Goebel University of Memphis
  8 | '''
  9 | import matplotlib as mpl
 10 | #mpl.use( 'Agg') # uncomment for interactive plotting
 11 | 
 12 | import os
 13 | import numpy as np
 14 | import matplotlib.pyplot as plt
 15 | 
 16 | #------------------------------my modules-------------------------------------- 
 17 | import src.data_utils as dataIO
 18 | #import src.clustering as clustering
 19 | from src.EqCat import EqCat
 20 | 
 21 | eqCat   = EqCat() # original catalog
 22 | eqCatMc = EqCat() # this catalog will be modified with each Mc iteration
 23 | catChild=  EqCat()
 24 | catParent= EqCat()
 25 | 
 26 | #=================================1==============================================
 27 | #                            dir, file, params
 28 | #================================================================================
 29 | data_dir = 'data'
 30 | plot_dir = 'plots'
 31 | file_in  = 'hs_1981_2011_all.mat'
 32 | 
 33 | dPar  = {   'a_Mc'        :  np.array([4.0]), #np.array( [2.0, 2.5, 3.0, 3.5]),
 34 |             #separate clustered and background
 35 |             'eta_0'       : -5.0, # run 2_eta_0.py and
 36 |                                   # if file exists: default = load this value from ASCII file
 37 |             }
 38 | 
 39 | #=================================2==============================================
 40 | #                            load data, select events
 41 | #================================================================================
 42 | eqCat.loadMatBin(  os.path.join( data_dir, file_in))
 43 | print( 'total no. of events', eqCat.size())
 44 | eqCat.selectEvents( dPar['a_Mc'][0], None, 'Mag')
 45 | #eqCat.selectEvents( tmin, tmax, 'Time')
 46 | print( 'no. of events after initial selection', eqCat.size())
 47 | 
 48 | iMc = 0
 49 | for f_Mc in dPar['a_Mc']:
 50 |     eta_0_file = '%s/%s_Mc_%.1f_eta_0.txt'%(data_dir, file_in, f_Mc)
 51 |     # load eta_0 value
 52 |     if os.path.isfile( eta_0_file):
 53 |         print( 'load eta_0 from file'),
 54 |         f_eta_0 = np.loadtxt( eta_0_file, dtype = float)
 55 |         print( f_eta_0)
 56 |     else:
 57 |         print( 'could not find eta_0 file', eta_0_file, 'use value from dPar', dPar['eta_0'])
 58 |         f_eta_0 = dPar['eta_0']
 59 |     # cut below current completeness
 60 |     eqCatMc.copy( eqCat)
 61 |     eqCatMc.selectEvents( f_Mc, None, 'Mag')
 62 |     print( 'current catalog size: ',eqCatMc.size())
 63 |     # load nearest neighbor distances
 64 |     NND_file = '%s_NND_Mc_%.1f.mat'%(os.path.basename( file_in).split('.')[0], f_Mc)
 65 |     dNND = dataIO.loadmat( os.path.join( data_dir, NND_file))
 66 |     print( dNND.keys())
 67 |     dNND['aNND'] = np.log10( dNND['aNND'])
 68 |     #==================================3=============================================
 69 |     #                          "declustering" step
 70 |     #================================================================================  
 71 |     #catChild, catPar = create_parent_child_cat( projCat, dNND)
 72 |     catChild.copy( eqCat)
 73 |     catParent.copy( eqCat)
 74 |     catChild.selEventsFromID( dNND['aEqID_c'], repeats = True)
 75 |     catParent.selEventsFromID( dNND['aEqID_p'], repeats = True)
 76 |     print( 'tot. ev', eqCatMc.size(), 'parents', np.unique( catParent.data['N']).shape[0], 'children', np.unique( catChild.data['N']).shape[0])
 77 |     #==================================4=============================================
 78 |     #                          spanning tree
 79 |     #================================================================================
 80 |     plt.figure( 1)
 81 |     ax = plt.subplot(111)  
 82 |     for iEv in range( catParent.size()):
 83 |         print( 'MS', int( catParent.data['N'][iEv]), catParent.data['Time'][iEv], eqCatMc.data['Time'][iEv])
 84 | 
 85 |         if dNND['aNND'][iEv] < dPar['eta_0']:#triggered cluster
 86 |             ax.plot( [catParent.data['Time'][iEv]], [catParent.data['Lat'][iEv]], 'ro', ms = 12, alpha = .2)
 87 |             ax.plot( [catParent.data['Time'][iEv],catChild.data['Time'][iEv]],
 88 |                       [catParent.data['Lat'][iEv], catChild.data['Lat'][iEv]], 'k-', marker = 'o', ms = 4, mew =1, mfc = 'none')
 89 |         else: # independent events
 90 |             ax.plot( [catChild.data['Time'][iEv]], [catChild.data['Lat'][iEv]], 'bo', ms = 5, alpha = .6)
 91 |     
 92 |     #ax.set_xlim( 2009, 2017)
 93 |     #=================================3==============================================
 94 |     #                           save results
 95 |     #================================================================================
 96 | 
 97 |     plt.figure(1)
 98 |     #plt.savefig( '%s/%s_spanningTree_Mc_%.1f.png'%(plot_dir, file_in.split('.')[0], f_Mc))
 99 |     ## save main shock catalog
100 |     plt.show()
101 |     plt.clf()
102 | 
103 | 
104 |     iMc += 1
105 | 


--------------------------------------------------------------------------------
/6_createClust.py:
--------------------------------------------------------------------------------
  1 | # python3.7
  2 | '''    
  3 |         Created on Oct 7th, 2019
  4 | 
  5 |      key step in the analysis during which complete families of triggered events are assembled
  6 | 
  7 |      1) select all event pairs with NND <= eta_0
  8 |      2) place each event into a new cluster (family) with unique cluster ID
  9 |         or append to existing cluster if parent or offspring event are found in list
 10 |         of previously created clusters
 11 |     
 12 |     
 13 |     Input:   NND_file = str()
 14 |              eta_0    = float()
 15 |              
 16 |       
 17 |     Output:
 18 |             dictionary with all event families 
 19 |             dic['0'] = singles
 20 |             all other cluster are integer-strings followed by the associated eqID number
 21 |               e.g.
 22 |               {   '0': np.array([[1243245,4253455343]]),
 23 |                   '1': np.array([[5235,43455343,3456,56652,54]]),
 24 |                   '2':  ....}
 25 |             Note that:
 26 |              1)   output is saved as matlab binary but file cannot
 27 |                   be read with matlab because variable names are integers
 28 |              2) cluster '0' are singles
 29 |              3) mainshocks can be defined as largest  event or
 30 |                 first event in a family
 31 | 
 32 | @author: Thomas Goebel - University of Memphis
 33 | '''
 34 | import matplotlib as mpl
 35 | #mpl.use( 'Agg')
 36 | import matplotlib.pyplot as plt
 37 | 
 38 | import numpy as np
 39 | import os, scipy.io
 40 | 
 41 | #------------------------------my modules-------------------------------------- 
 42 | import src.data_utils as dataIO
 43 | import src.clustering as clustering
 44 | from src.EqCat import *
 45 | 
 46 | eqCat   = EqCat() # original catalog
 47 | eqCatMc = EqCat() # this catalog will be modified with each Mc iteration
 48 | 
 49 | #=================================1==============================================
 50 | #                            dir, file, params
 51 | #================================================================================
 52 | data_dir   = 'data'
 53 | plot_dir   = 'plots'
 54 | file_in    = 'hs_1981_2011_all.mat'
 55 | 
 56 | 
 57 | dPar  = {   'a_Mc'        :  np.array([3.0]), #3.0, 4.0]), #np.array( [2.0, 2.5, 3.0, 3.5]),
 58 |             #separate clustered and background
 59 |             # set to None or False to use value from file,requires results from: 2_eta_0.py
 60 |             'eta_0'       : None, #-5.0,
 61 |             }
 62 | 
 63 | #=================================2==============================================
 64 | #                            load data, select events
 65 | #================================================================================
 66 | eqCat.loadMatBin(  os.path.join( data_dir, file_in))
 67 | print( 'total no. of events', eqCat.size())
 68 | eqCat.selectEvents( dPar['a_Mc'][0], None, 'Mag')
 69 | #eqCat.selectEvents( tmin, tmax, 'Time')
 70 | print( 'no. of events after initial selection', eqCat.size())
 71 | 
 72 | iMc = 0
 73 | for f_Mc in dPar['a_Mc']:
 74 |     clust_file = file_in.replace( 'all.mat', 'Mc_%.1f_clusters.mat'%( f_Mc))
 75 |     eta_0_file = '%s/%s_Mc_%.1f_eta_0.txt'%(data_dir, file_in, f_Mc)
 76 |     if os.path.isfile( eta_0_file):
 77 |         print( 'load eta_0 from file'),
 78 |         f_eta_0 = np.loadtxt( eta_0_file, dtype = float)
 79 |         print( 'eta_0',f_eta_0)
 80 |     else:
 81 |         f_eta_0 = -5
 82 |         print( 'could not find eta_0 file', eta_0_file, 'use value: ', f_eta_0)
 83 | 
 84 | 
 85 |     # cut below current completeness
 86 |     eqCatMc.copy( eqCat)
 87 |     eqCatMc.selectEvents( f_Mc, None, 'Mag')
 88 |     print( 'current catalog size: ',eqCatMc.size())
 89 |     # load nearest neighbor distances
 90 |     NND_file = '%s_NND_Mc_%.1f.mat' % (file_in.split('.')[0], f_Mc)
 91 |     dNND = dataIO.loadmat( os.path.join( data_dir, NND_file))
 92 |     dNND['aNND'] = np.log10( dNND['aNND'])
 93 |  
 94 |     #==================================3=============================================
 95 |     #                      assemble clusters
 96 |     #================================================================================
 97 |     print( 'similarity threshold', dPar['eta_0'])
 98 |     # clustering according to eta_0 similarity criteria
 99 |     dClust = clustering.compileClust( dNND, f_eta_0, useLargerEvents = False)
100 |     #=================================4==========================================================================
101 |     #                           save results
102 |     #============================================================================================================
103 |     scipy.io.savemat( os.path.join( data_dir,clust_file), dClust, do_compression=True)
104 | 
105 | 
106 | 
107 | 
108 | 
109 | 
110 | 
111 | 
112 | 
113 | 
114 | 
115 | 


--------------------------------------------------------------------------------
/7_productivity.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Created on Oct 07, 2019
  3 | 
  4 |     1) count number of events after largest mag event (MS) within each family
  5 |     2) count singles as events with 0 aftershocks
  6 | 
  7 | @author: tgoebel
  8 | '''
  9 | import matplotlib as mpl
 10 | mpl.use( 'Agg')
 11 | import numpy as np
 12 | 
 13 | import os
 14 | import matplotlib.pyplot as plt
 15 | 
 16 | #----------------------my modules-------------------------------------------------------- 
 17 | import src.data_utils as data_utils
 18 | #import src.clustering as clustering
 19 | from src.EqCat import *
 20 | 
 21 | eqCat   = EqCat() # original catalog
 22 | eqCatMc = EqCat() # this catalog will be modified with each Mc iteration
 23 | 
 24 | 
 25 | #=================================1==============================================
 26 | #                            dir, file, params
 27 | #================================================================================
 28 | data_dir   = 'data'
 29 | plot_dir   = 'plots'
 30 | file_in    = 'hs_1981_2011_all.mat'
 31 | clust_file = file_in.replace( 'all.mat', 'clusters.mat')
 32 | 
 33 | #=================================1==============================================
 34 | #          dir, file, params
 35 | #================================================================================
 36 | 
 37 | dPar  = {
 38 |             'a_Mc'         :  np.array([  3.0, 4.0]), # , 3.0, 4.0]), #3.0,4.0]),
 39 | 
 40 |             'alpha'       :  1, #exponent for test plot
 41 |             #=================plotting==============
 42 |             'plotFormat' : 'png',
 43 |             }
 44 | 
 45 | #=================================2==============================================
 46 | #                            load data, select events
 47 | #================================================================================
 48 | eqCat.loadMatBin(  os.path.join( data_dir, file_in))
 49 | print( 'total no. of events', eqCat.size())
 50 | eqCat.selectEvents( dPar['a_Mc'][0], None, 'Mag')
 51 | #eqCat.selectEvents( tmin, tmax, 'Time')
 52 | print( 'no. of events after initial selection', eqCat.size())
 53 | 
 54 | 
 55 | iMc = 0
 56 | for f_Mc in dPar['a_Mc']:
 57 |     # load file with IDs of events within family
 58 |     clust_file = file_in.replace( 'all.mat', 'Mc_%.1f_clusters.mat'%( f_Mc))
 59 |     dClust = data_utils.loadmat( os.path.join( data_dir,clust_file), )
 60 | 
 61 |     # cut below current completeness
 62 |     eqCatMc.copy( eqCat)
 63 |     eqCatMc.selectEvents( f_Mc, None, 'Mag')
 64 |     n_aboveMc = eqCatMc.size()
 65 |     print( 'current catalog size: ',eqCatMc.size())
 66 | 
 67 |     #=================================1==========================================================================
 68 |     #                     singles are counted as MS with 0 AS
 69 |     #============================================================================================================
 70 |     print( 'total number of clusters', len(  dClust.keys()), 'no. of BG events', dClust['0'].shape[0])
 71 |     a_ID_single  = dClust['0']
 72 | 
 73 |     # IDs of BG events
 74 |     a_iSel       = np.zeros( eqCatMc.size(), dtype = int)
 75 |     a_mag_single = np.zeros( len( a_ID_single))
 76 |     a_N_AS_single= np.zeros( len( a_ID_single))
 77 |     a_N_FS_single= np.zeros( len( a_ID_single))
 78 |     for i in range( a_ID_single.shape[0]):
 79 |         # event ID may be in catalog more than once
 80 |         sel_ev          = eqCatMc.data['N'] == a_ID_single[i]
 81 |         a_mag_single[i] = eqCatMc.data['Mag'][sel_ev][0]
 82 |         a_iSel[sel_ev] = 1#catalog.data['N'][catalog.data['N']==aEqID[i]][0]
 83 |         if sel_ev.sum() != 1:
 84 |             error_str = 'more than event found', eqCatMc.data['N'][sel_ev]
 85 |             raise( ValueError( error_str))
 86 |     ### remove singles from catalog
 87 |     eqCatMc.selDicAll( np.logical_not(a_iSel))
 88 |     print( 'remaining events', eqCatMc.size(), 'BG events', len( a_mag_single))
 89 |     dClust.pop('0') # remove singles
 90 |     #=================================2==========================================================================
 91 |     #                   get MAGs of MS with aftershocks, count aftershocks
 92 |     #============================================================================================================
 93 |     a_N_FS    = np.zeros( len( dClust.keys()), dtype = int)
 94 |     a_N_AS    = np.zeros( len( dClust.keys()), dtype = int)
 95 |     a_MS_mag  = np.zeros( len( dClust.keys()))
 96 |     a_MS_ID   = np.zeros( len( dClust.keys()), dtype = int)
 97 |     iCl = 0
 98 |     for sCl in dClust.keys():
 99 |         aEqID = dClust[sCl]# np.unique( dClust[sCl].flatten()) unique is not needed anymore, createCluster has been fixed
100 |         print( 'cl: ', iCl+1,'out of: ', len( dClust.keys()), 'no. of ev. in cl.', len( aEqID), len( np.unique( dClust[sCl])))
101 |         # find MS mag and magnitude of entire family
102 |         atmp_MAG = np.zeros( len( aEqID))
103 |         atmp_Time= np.zeros( len( aEqID))
104 |         a_iSel   = np.zeros( eqCatMc.size(), dtype = int)
105 |         # for each family find: event mag. and origin time
106 |         for iM in range( len( aEqID)):
107 |             sel_ev        = eqCatMc.data['N'] == aEqID[iM]
108 |             if sel_ev.sum() != 1:
109 |                 error_str = 'more/less than event found', eqCatMc.data['N'][sel_ev], aEqID[iM]
110 |                 raise(  ValueError, error_str)
111 |             atmp_MAG[iM]  = eqCatMc.data['Mag'][sel_ev][0]
112 |             atmp_Time[iM] = eqCatMc.data['Time'][sel_ev][0]
113 |             a_iSel[sel_ev] = 1
114 |         # remove events from catalog
115 |         #catalog.selDicAll( np.logical_not(a_iSel))
116 |         #----------------------------mainshock-------------------------------------------------- 
117 |         selMS     = atmp_MAG == atmp_MAG.max()
118 |         f_tMS     = atmp_Time[selMS][0]
119 |         i_ID_MS   = aEqID[selMS]
120 | 
121 |         #print( 'tMS', tMS, v_currEqID.shape[0], 'MAG', curr_cat.data['MAG'][selMS][0]
122 |         #----------------------------aftershock-------------------------------------------------- 
123 |         selAS     = atmp_Time > f_tMS
124 |         selFS     = atmp_Time < f_tMS
125 |         #print( 'no. of aftershocks', selAS.sum()
126 |         # save number of aftershocks for each MS mag
127 |         a_MS_mag[iCl] = atmp_MAG[selMS][0]#, dPar['magRound'])
128 |         a_N_AS[iCl]   = selAS.sum()
129 |         a_N_FS[iCl]   = selFS.sum()
130 |         a_MS_ID[iCl]  = int( i_ID_MS[0])
131 |         iCl += 1
132 | 
133 |     #=================================3==========================================================================
134 |     #                  compare MS+single+FS+AS to original number of events in catalog
135 |     #============================================================================================================
136 |     # combine single without AS with mainshocks that do have aftershocks
137 |     a_N_FS    = np.append( a_N_FS, a_N_FS_single)
138 |     a_N_AS    = np.append( a_N_AS, a_N_AS_single)
139 |     a_MS_mag  = np.append( a_MS_mag, a_mag_single)
140 |     a_MS_ID   = np.append( a_MS_ID, a_ID_single)
141 |     print( 'tot ev. in catalog', n_aboveMc,'tot events in families',a_N_FS.sum() + a_N_AS.sum() + a_MS_mag.shape[0])
142 |     #print( 'N BG', a_mag_single.shape[0], 'FS', a_N_FS_single.sum(), 'AS', a_N_AS_single.sum(), 'MS (MS+BG)', a_MS_mag.shape[0]
143 | 
144 |     #=================================4==========================================================================
145 |     #                    save to ASCII text
146 |     #============================================================================================================
147 |     file_out = '%s/%s_Nas_MS_Mc_%.1f.txt'%(data_dir, file_in.split('.')[0], f_Mc)#, dPar['magRound'])
148 |     np.savetxt( file_out, np.array([a_MS_mag, a_N_AS, a_N_FS, a_MS_ID]).T, fmt='%10.3f%10i%10i%14i',
149 |                 header = 'MAG          N-AS          N-FS        MS-ID; note N_AS=0 highlights singles or FS only')
150 |     iMc += 1
151 | 
152 | 
153 | 
154 | 
155 | 
156 | 
157 | 
158 | 
159 | 
160 | 
161 | 


--------------------------------------------------------------------------------
/7b_plot_productivity.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Created on August 16, 2016
  3 | 
  4 | - plot No. of events for each mainshock
  5 | - average number of events per mainshock
  6 | - fit N ave and Mms --> alpha exponent
  7 | 
  8 | @author: tgoebel
  9 | '''
 10 | import matplotlib as mpl
 11 | #mpl.use( 'Agg')
 12 | 
 13 | import matplotlib.pyplot as plt
 14 | import numpy as np
 15 | import os
 16 | 
 17 | 
 18 | #=================================1==============================================
 19 | #          dir, file, params
 20 | #================================================================================
 21 | data_dir   = 'data'
 22 | plot_dir   = 'plots'
 23 | file_in    = 'hs_1981_2011_all.mat'
 24 | 
 25 | dPar  = {
 26 |             'magRound'    : 1, # for binning
 27 |             'a_Mc'        : np.array([2.5, 3.0, 4.0]),
 28 | 
 29 |             #=================plotting==============
 30 |             'alpha'       : 1.0, # for plotting demonstration
 31 |             'xmin' :  2,  'xmax' : 8,
 32 |             'ymin' : 0.1, 'ymax' : 1e4,
 33 |             'plotFormat' : 'png',
 34 |             }
 35 | 
 36 | #================================================================================
 37 | #                            load file with no. of aftershocks
 38 | #================================================================================
 39 | 
 40 | iMc = 0
 41 | for f_Mc in dPar['a_Mc']:
 42 |     file_prod = '%s/%s_Nas_MS_Mc_%.1f.txt'%(data_dir, file_in.split('.')[0], f_Mc)#, dPar['magRound'])
 43 | 
 44 |     m_N_as = np.loadtxt( file_prod).T
 45 |     print( 'total no. of mainshock', m_N_as[0].shape[0])
 46 |     print( 'total no. of AS', m_N_as[1].sum())
 47 |     print( 'total no. of FS', m_N_as[2].sum())
 48 |     #=================================2==========================================================================
 49 |     #                           count ave. no. of aftershocks per MS magnitude
 50 |     #============================================================================================================
 51 |     aMag_round= np.around( m_N_as[0], dPar['magRound'])
 52 |     aMag_bin  = np.array( sorted(np.unique( aMag_round)))
 53 |     aAveNo_AS = np.ones( len( aMag_bin))*np.nan
 54 |     aNo_Fam   = np.zeros( len( aMag_bin)) # total number of families within mag bin
 55 |     aNo_AS20  = np.zeros( len( aMag_bin))
 56 |     aNo_AS80  = np.zeros( len( aMag_bin))
 57 | 
 58 |     i = 0
 59 |     for curr_mag in aMag_bin:
 60 |         selMag       = curr_mag == aMag_round
 61 |         aAveNo_AS[i] = m_N_as[1][selMag].mean()
 62 |         if selMag.sum() > 0:
 63 |             aNo_AS20[i]  = np.percentile( m_N_as[1][selMag], 20)
 64 |             aNo_AS80[i]  = np.percentile( m_N_as[1][selMag], 80)
 65 |         aNo_Fam[i]   = selMag.sum()
 66 |         print( curr_mag, 'mean N-AS', round(aAveNo_AS[i],2),  aNo_AS20[i],aNo_AS80[i], 'no. of fam', aNo_Fam[i])
 67 | 
 68 |         i += 1
 69 | 
 70 |     #=================================3==========================================================================
 71 |     #                           plot productivity law
 72 |     #============================================================================================================
 73 |     plt.figure(1, figsize=(8,6))
 74 |     ax = plt.axes([.14,.12,.78,.83])#pPlot.createFigureSquare(1)
 75 |     ax.semilogy( m_N_as[0],   m_N_as[1],     'o',  ms = 6, mew =0, mfc = '.7', alpha = .2 )
 76 |     #ax.errorbar( aMag_bin,  aAveNo_AS, yerr=[np.zeros(aMag_bin.shape[0]), aNo_AS80-aAveNo_AS],
 77 |     #             fmt = 'o', ecolor = 'k', elinewidth=.7,capsize=2.5, mec = 'k', ms = 8, mew = 1, mfc = 'w')
 78 |     ax.errorbar( aMag_bin,  aAveNo_AS, yerr=[aAveNo_AS-aNo_AS20, aNo_AS80-aAveNo_AS],
 79 |                  fmt = 'o', ecolor = 'k', elinewidth=.7,capsize=2.5, mec = 'k', ms = 8, mew = 1, mfc = 'w')
 80 | 
 81 |     #-------------------------exponential - estimate-----------------------------------------------------
 82 |     mag_fit    = aMag_bin[10] # force fit through this point
 83 |     f_no_AS_pl = aAveNo_AS[aMag_bin == mag_fit]
 84 |     preFac     = np.log10( f_no_AS_pl) - dPar['alpha']*mag_fit
 85 |     a_N_hat    = 10**( dPar['alpha']*aMag_bin + preFac)
 86 |     ax.semilogy( aMag_bin, a_N_hat, 'w-')
 87 |     ax.semilogy( aMag_bin, a_N_hat, '-', color = 'r', lw = 2, label = 'exp = %.1f'%( np.round( dPar['alpha'],1)))
 88 | 
 89 |     #-------------------------------labels, limits etc.-----------------------------------------------
 90 |     ax.set_xlim( dPar['xmin'], dPar['xmax'])
 91 |     ax.set_ylim( dPar['ymin'], dPar['ymax'])
 92 |     ax.set_xlabel( 'Mainshock Magnitude')
 93 |     ax.set_ylabel( 'Number of Aftershocks')
 94 |     ax.legend( loc = 'upper left', frameon = False)
 95 | 
 96 |     plt.savefig( '%s/%s_Mc_%.1f_ASprod.%s'%(plot_dir, file_in.split('.')[0], f_Mc, dPar['plotFormat']))
 97 |     
 98 |     plt.show()
 99 |     plt.clf()
100 | 
101 |     iMc += 1
102 | 
103 | 
104 | 
105 | 
106 | 
107 | 
108 | 
109 | 
110 | 
111 | 
112 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # clustering-analysis
 2 | Seismicity Clustering Analysis Based on nearest neighbor distances of event pairs
 3 | 
 4 | To separate seismicity into background and clustered events, we use the distribution of nearest-neighbor event pairs and compare observed clustering characteristics with expectations from random poissonian earthquakes. Commonly, space-time distances can be described by a 2D bi-modal distribution. The first mode at small interevent times and distances highlights clustered events (e.g. aftershocks), whereas the second mode at larger distances is comprised of background events. The background mode for the California catalog corresponds to that expectation (see plots). We use the 99th percentile of nearest-neighbor distances from the randomized catalogs to separate background and clustered events, which allows for a clear separation between the two modes in California.
 5 | 
 6 | 
 7 | Dependencies:
 8 | 
 9 | Python 3.7
10 | Numpy, matplotlib, matplotlib-Basemap, scipy, scipy, datetime, calendar
11 | 
12 | Use the following references if you use this code:
13 | - Zaliapin, I., and Ben-Zion, Y., 2013, Earthquake clusters in southern California I: Identification and stability: Journal of Geophysical Research: Solid Earth, v. 118, no. 6, p. 2847–2864, doi: 10.1002/jgrb.50179.
14 | 
15 | - Goebel, T.H.W., Rosson, Z., Brodsky, E.E., and Walter, J.I., 2019, Aftershock deficiency of induced earthquake sequences during rapid mitigation efforts in Oklahoma: Earth and Planetary Science Letters, v. 522, p. 135–143, doi: 10.1016/j.epsl.2019.06.036.
16 | 
17 | 
18 | # Tutorial:
19 | 
20 | 1) Download standard or relocated catalog (e.g. https://service.scedc.caltech.edu/eq-catalogs/date_mag_loc.php
21 | or https://scedc.caltech.edu/research-tools/altcatalogs.html)
22 | 2) Convert catalog to EqCat object with attribute self.data, which is a dictionary with data columns
23 | 'Time' = Decimal Year, 'Lon', 'Lat', 'Mag', 'Depth'. Use '1_create_mat_eqCat_file.py' to do the conversion and
24 | save the EqCat as matlab binary (.mat). Alternatively, earthquake catalog formats can be changed in matlab
25 | and saved as .mat with variable names: 'Time' = Decimal Year, 'Lon', 'Lat', 'Mag', 'Depth'.
26 | An example catalog is provided in the /data directory (hs_1983_2011_all.mat)
27 | 
28 | The following steps require estimates of fractal dimension, D, completeness magnitude, Mc, and b-value.
29 | Mc and the b-value can be estimated using the github repository: https://github.com/tgoebel/magnitude-distribution
30 | It is recommended that the sensitivity of the results to changes in these parameters are tested.
31 | 
32 | 3) Compute separation between clustered and background events: '2_eta_0.py'
33 | 
34 | 4) Compute nearest neighbor distances (NND) and find parent event for each except for the first event in the catalog:
35 | '3_NND.py'
36 | 
37 | 5) Assemble event families and save them within cluster. Each cluster has a unique ID which is used as variable name
38 | in a corresponding python dictionary. The clusters contain the unique event IDs of all members. Note that the cluster
39 | with ID and variable name '0' contains singles, i.e. events with parents at nearest-neighbor-distance beyond eta_0:
40 | '6_createClust.py'
41 | 
42 | 6) Count the numbe of events within each cluster (or family) before (foreshocks) and after (aftershocks) the largest
43 | magnitude event in each family. Singles have 0 fore and aftershocks:
44 | '7_productivity.py'
45 | 
46 | 7) Plot productivity relationship including number of aftershocks in ech family and average number of aftershocks
47 | within magnitude-binned mainshocks. Plot alpha=1 slope for comparison:
48 | '8_plot_productivity.py'
49 | 
50 | All results should be compared to the provided figures using the scripts:
51 | '1b_plot_eqLocs.py', '4_dist_tau.py', '5_plot_lat_t.py'
52 | - also check results in:
53 |   Zaliapin, I., and Ben-Zion, Y., 2013, Earthquake clusters in southern California I: 
54 |   Identification and stability: Journal of Geophysical Research: Solid Earth, v. 118, no. 6, p. 2847–2864, doi: 10.1002/jgrb.50179.
55 |   and 
56 | - Goebel, T.H.W., Rosson, Z., Brodsky, E.E., and Walter, J.I., 2019, Aftershock deficiency of induced earthquake sequences during rapid mitigation efforts in Oklahoma: Earth and Planetary Science Letters, v. 522, p. 135–143, doi: 10.1016/j.epsl.2019.06.036.
57 | 


--------------------------------------------------------------------------------
/clust_SoCal.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "582ef5f5",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# Sample script of clustering analysis applied to a relocated seismicity catalog from Southern California see:\n",
  9 |     "Hauksson, Egill, Wenzheng Yang, and Peter M. Shearer. \"Waveform relocated earthquake catalog for southern California (1981 to June 2011).\" Bulletin of the Seismological Society of America 102.5 (2012): 2239-2244.\n",
 10 |     "\n",
 11 |     "    and\n",
 12 |     "    \n",
 13 |     "Zaliapin, Ilya, and Yehuda Ben‐Zion. \"Earthquake clusters in southern California I: Identification and stability.\" Journal of Geophysical Research: Solid Earth 118.6 (2013): 2847-2864.\n",
 14 |     "\n",
 15 |     "    and\n",
 16 |     "    \n",
 17 |     "    Goebel, T.H.W., Rosson, Z., Brodsky, E.E., and Walter, J.I., 2019, Aftershock deficiency of induced earthquake sequences during rapid mitigation efforts in Oklahoma: Earth and Planetary Science Letters, v. 522, p. 135–143, doi: 10.1016/j.epsl.2019.06.036.\n"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "markdown",
 22 |    "id": "daa8acfa",
 23 |    "metadata": {},
 24 |    "source": [
 25 |     "### the following code performs three primary steps:\n",
 26 |     "    1. calculated nearest-neighbor distance between all events in the catalog\n",
 27 |     "    2. separate the seismicity catalog into families and indpendent background events based on nearest-neighbor threshold\n",
 28 |     "    3. Count the number of aftershocks for each family and plot number of aftershocks over mainshock magnitude"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "markdown",
 33 |    "id": "ebe1ad2d",
 34 |    "metadata": {},
 35 |    "source": [
 36 |     "### 0: seismicity map\n",
 37 |     "Load Southern California seismicity catalog and plot with Basemap. \n",
 38 |     "(this step can be skipped if the mpl Basemap module is not installed)"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": null,
 44 |    "id": "bd213067",
 45 |    "metadata": {},
 46 |    "outputs": [],
 47 |    "source": [
 48 |     "#specify data file, time and magnitude range\n",
 49 |     "dir_in = 'data'\n",
 50 |     "# this file is generated with: 1_create_mat_eqCat_file.py\n",
 51 |     "file_in= 'hs_1981_2011_all.mat'\n",
 52 |     "# completeness magntiude = Mmin, and Mmax (which does not need to be specified)\n",
 53 |     "Mmin, Mmax = 3, None\n",
 54 |     "tmin, tmax = 1980, 2012"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "code",
 59 |    "execution_count": null,
 60 |    "id": "0a214aad",
 61 |    "metadata": {},
 62 |    "outputs": [],
 63 |    "source": [
 64 |     "#------------------------------------------------------------------------------\n",
 65 |     "import matplotlib.pyplot as plt\n",
 66 |     "import numpy as np\n",
 67 |     "import os\n",
 68 |     "\n",
 69 |     "#------------------------------my modules-------------------------------------- \n",
 70 |     "# EqCat is a Python object that is used for catlog processing\n",
 71 |     "from src.EqCat import EqCat\n",
 72 |     "eqCat = EqCat( )\n",
 73 |     "#for methods check source code or uncomment the following line \n",
 74 |     "print( 'EqCat Methods: ', eqCat.methods)\n",
 75 |     "#=================================2==============================================\n",
 76 |     "#                            load data, select events\n",
 77 |     "#================================================================================\n",
 78 |     "eqCat.loadMatBin( f\"{dir_in}/{file_in}\")\n",
 79 |     "print(  'total no. of events', eqCat.size())\n",
 80 |     "eqCat.selectEvents( Mmin, Mmax, 'Mag')\n",
 81 |     "eqCat.selectEvents( tmin, tmax, 'Time')\n",
 82 |     "print( 'no. of events after Mag/Time selection', eqCat.size())"
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "markdown",
 87 |    "id": "185c09a3",
 88 |    "metadata": {},
 89 |    "source": [
 90 |     "the following cell will only run if you have Basemap"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": null,
 96 |    "id": "f6a3caf5",
 97 |    "metadata": {},
 98 |    "outputs": [],
 99 |    "source": [
100 |     "try:\n",
101 |     "    #os.environ[\"PROJ_LIB\"] = f\"{os.environ['HOME']}/opt/anaconda3/share/proj\"\n",
102 |     "    from mpl_toolkits.basemap import Basemap\n",
103 |     "    b_map = True\n",
104 |     "except:\n",
105 |     "    b_map = False\n",
106 |     "#=================================3==============================================\n",
107 |     "#                          test plot with Basemap\n",
108 |     "#================================================================================\n",
109 |     "projection = 'cyl'\n",
110 |     "xmin,xmax = eqCat.data['Lon'].min(), eqCat.data['Lon'].max()\n",
111 |     "ymin,ymax = eqCat.data['Lat'].min(), eqCat.data['Lat'].max()\n",
112 |     "if b_map:\n",
113 |     "    # setup equi distance basemap.\n",
114 |     "    m = Basemap( llcrnrlat  =  ymin,urcrnrlat  =  ymax,\n",
115 |     "                 llcrnrlon  =  xmin,urcrnrlon  =  xmax,\n",
116 |     "                 projection = projection,lat_0=(ymin+ymax)*.5,lon_0=(xmin+xmax)*.5,\n",
117 |     "                 resolution = 'l')\n",
118 |     "    m.drawstates( linewidth = 1)\n",
119 |     "    m.drawcoastlines( linewidth= 2)\n",
120 |     "    a_x, a_y = m( eqCat.data['Lon'], eqCat.data['Lat'])\n",
121 |     "    m.plot( a_x, a_y, 'ko', ms = 1)\n",
122 |     "    sel6 = eqCat.data['Mag'] >= 6\n",
123 |     "    m.plot( a_x[sel6], a_y[sel6], 'ro', ms = 8, mew= 1.5, mfc = 'none')\n",
124 |     "\n",
125 |     "    m.drawmeridians( np.linspace( int(xmin), xmax, 4),labels=[False,False,False,True],\n",
126 |     "                     fontsize = 12, fmt = '%.1f')\n",
127 |     "    m.drawparallels( np.linspace( int(ymin), ymax, 4),labels=[True,False,False,False],\n",
128 |     "                     fontsize = 12, fmt = '%.2f')\n",
129 |     "\n",
130 |     "    plt.savefig( file_in.replace( 'mat', 'png'))\n"
131 |    ]
132 |   },
133 |   {
134 |    "cell_type": "markdown",
135 |    "id": "a90e4096",
136 |    "metadata": {},
137 |    "source": [
138 |     "### 1: Compute Nearest Neighbor Distances"
139 |    ]
140 |   },
141 |   {
142 |    "cell_type": "code",
143 |    "execution_count": null,
144 |    "id": "069c432f",
145 |    "metadata": {},
146 |    "outputs": [],
147 |    "source": [
148 |     "import src.clustering as clustering\n",
149 |     "# set parameters:fractal dimension and b-value\n",
150 |     "dPar  = {   # fractal dimension and b for eq. (1) in Zaliapin & Ben-Zion\n",
151 |     "            'D'           : 1.6, # TODO: - these values should be contrained independently\n",
152 |     "            'b'           : 1.0, # use: https://github.com/tgoebel/magnitude-distribution for b-value\n",
153 |     "            'Mc'          : Mmin,\n",
154 |     "            #=================plotting==============\n",
155 |     "             # these parameters rarely have to be changes\n",
156 |     "            'eta_binsize' :  .3,\n",
157 |     "            'xmin' : -13, 'xmax' : 0,\n",
158 |     "          }\n",
159 |     "\n",
160 |     "\n",
161 |     "#================================================================================\n",
162 |     "#                           to cartesian coordinates\n",
163 |     "#================================================================================\n",
164 |     "# two ways to do the distance comp: 1 project into equal distance azimuthal , comp Cartersian distance in 3D\n",
165 |     "#                                   2 get surface distance from lon, lat (haversine), use pythagoras to include depth\n",
166 |     "if b_map:\n",
167 |     "    eqCat.toCart_coordinates( projection = 'eqdc')\n",
168 |     "    print( 'convert to cartesian using equi-distant projection')\n",
169 |     "#==================================2=============================================\n",
170 |     "#                       compute space-time-magnitude distance, histogram\n",
171 |     "#================================================================================\n",
172 |     "eqCat.data['Z'] = eqCat.data['Depth']\n",
173 |     "print('depth range: ', eqCat.data['Z'].min(), eqCat.data['Z'].max())\n",
174 |     "dNND = clustering.NND_eta( eqCat, dPar,  \n",
175 |     "                              correct_co_located = True, verbose= True)\n",
176 |     "###histogram\n",
177 |     "aBins        = np.arange( -13, 1, dPar['eta_binsize'], dtype = float)\n",
178 |     "aHist, aBins = np.histogram( np.log10( dNND['aNND'][dNND['aNND']>0]), aBins)\n",
179 |     "aBins = aBins[0:-1] + dPar['eta_binsize']*.5\n",
180 |     "# correct for binsize\n",
181 |     "aHist = aHist/dPar['eta_binsize']\n",
182 |     "# to pdf (prob. density)\n",
183 |     "aHist /= eqCat.size()\n",
184 |     "#=================================3==============================================\n",
185 |     "#                            save results\n",
186 |     "#================================================================================\n",
187 |     "import scipy.io\n",
188 |     "NND_file = 'data/%s_NND_Mc_%.1f.mat'%( file_in.split('.')[0], dPar['Mc'])\n",
189 |     "print( 'save file', NND_file)\n",
190 |     "scipy.io.savemat( NND_file, dNND, do_compression  = True)\n",
191 |     "\n",
192 |     "#=================================4==============================================\n",
193 |     "#                          plot histogram\n",
194 |     "#================================================================================\n",
195 |     "# load eta_0 value - only for plotting purposes\n",
196 |     "eta_0_file = '%s/%s_Mc_%.1f_eta_0.txt'%(dir_in, file_in, dPar['Mc'])\n",
197 |     "if os.path.isfile( eta_0_file):\n",
198 |     "    print( 'load eta_0 from file'),\n",
199 |     "    f_eta_0 = np.loadtxt( eta_0_file, dtype = float)\n",
200 |     "    print( 'eta_0',f_eta_0)\n",
201 |     "else:\n",
202 |     "    f_eta_0 = -5\n",
203 |     "    print( 'could not find eta_0 file', eta_0_file, 'use value: ', f_eta_0)\n",
204 |     "\n",
205 |     "fig, ax = plt.subplots()\n",
206 |     "#ax.plot( vBin, vHist, 'ko')\n",
207 |     "ax.bar( aBins, aHist, width =.8*dPar['eta_binsize'], align = 'edge', color = '.5', label = 'Mc = %.1f'%( dPar['Mc']))\n",
208 |     "ax.plot( [f_eta_0, f_eta_0], ax.get_ylim(), 'w-',  lw = 2, label = '$N_\\mathrm{tot}$=%i'%( eqCat.size()))\n",
209 |     "ax.plot( [f_eta_0, f_eta_0], ax.get_ylim(), 'r--', lw = 2, label = '$N_\\mathrm{cl}$=%i'%( dNND['aNND'][dNND['aNND']<1e-5].shape[0]))\n",
210 |     "\n",
211 |     "ax.legend( loc = 'upper left')\n",
212 |     "ax.set_xlabel( 'NND, log$_{10} \\eta$')\n",
213 |     "ax.set_ylabel( 'Number of Events')\n",
214 |     "ax.grid( 'on')\n",
215 |     "ax.set_xlim( dPar['xmin'], dPar['xmax'])\n"
216 |    ]
217 |   },
218 |   {
219 |    "cell_type": "markdown",
220 |    "id": "22b4dce8",
221 |    "metadata": {},
222 |    "source": [
223 |     "The above figure shows a histogram of nearest parent space-time-magnitude distance in the catalog. \n",
224 |     "Note that two distinct modes arise from a typical earthquake catalog: a clustered mode (left) \n",
225 |     "and a background mode (right). The former represent omori-type clustering (typically 'nearer') while the latter represents the background poisson process. Earthquakes in the background mode are earthquakes that, observably, are not triggered from a previous earthquake. $\\eta_0$ is the cutoff between these two modes. Practically, this will result in all connections between earethquakes exceeding this cutoff will be removed, thus forming distinct clusters of earthquakes.\n",
226 |     "\n",
227 |     "For a quick analysis, it is possible to simply pick a value that separates these modes. Smaller values will lead to smaller clusters, larger values will lead to more generous clusters but may include background seismicity."
228 |    ]
229 |   },
230 |   {
231 |    "cell_type": "markdown",
232 |    "id": "bf12b34a",
233 |    "metadata": {},
234 |    "source": [
235 |     "### 2: separate clusters from independent background and compile event families"
236 |    ]
237 |   },
238 |   {
239 |    "cell_type": "code",
240 |    "execution_count": null,
241 |    "id": "5b874317",
242 |    "metadata": {},
243 |    "outputs": [],
244 |    "source": [
245 |     "dPar['eta_0'] = f_eta_0\n",
246 |     "print( 'similarity threshold', dPar['eta_0'])"
247 |    ]
248 |   },
249 |   {
250 |    "cell_type": "code",
251 |    "execution_count": null,
252 |    "id": "47685619",
253 |    "metadata": {},
254 |    "outputs": [],
255 |    "source": [
256 |     "clust_file = file_in.replace( 'all.mat', 'Mc_%.1f_clusters.mat'%( dPar['Mc']))\n",
257 |     "    \n",
258 |     "dNND['aNND'] = np.log10( dNND['aNND'])\n",
259 |     "# clustering according to eta_0 similarity criteria\n",
260 |     "dClust = clustering.compileClust( dNND, f_eta_0, useLargerEvents = False)\n",
261 |     "#=================================4==========================================================================\n",
262 |     "#                           save results\n",
263 |     "#============================================================================================================\n",
264 |     "scipy.io.savemat( os.path.join( dir_in,clust_file), dClust, do_compression=True)\n"
265 |    ]
266 |   },
267 |   {
268 |    "cell_type": "markdown",
269 |    "id": "b4ab2cd5",
270 |    "metadata": {},
271 |    "source": [
272 |     "### let's create a couple of test plots to check whether everythin is working\n",
273 |     "the first plot provides a rough overview of clustered and independent events in a rescale space-time domain\n",
274 |     "\n",
275 |     "This provides a visualization of the clustering behavior outlined above. This time in normalized space distance ($R_{ij}$) versus normalized time distance ($T_{ij}$). Where:\n",
276 |     "\n",
277 |     "$$ R_{ij} = r_{ij}^d\\times10^{bM_i/2} $$\n",
278 |     "$$ T_{ij} = t_{ij}\\times10^{bM_i/2} $$"
279 |    ]
280 |   },
281 |   {
282 |    "cell_type": "code",
283 |    "execution_count": null,
284 |    "id": "6a65109e",
285 |    "metadata": {},
286 |    "outputs": [],
287 |    "source": [
288 |     "#=======event-pair density in r-T============================\n",
289 |     "catChild=  EqCat()\n",
290 |     "catParent= EqCat()\n",
291 |     "catChild.copy(  eqCat)\n",
292 |     "catParent.copy( eqCat)\n",
293 |     "catChild.selEventsFromID(    dNND['aEqID_c'], repeats = True)\n",
294 |     "catParent.selEventsFromID(   dNND['aEqID_p'], repeats = True)\n",
295 |     "print( 'size of offspring catalog', catChild.size(), 'size of parent cat', catParent.size())  \n",
296 |     "\n",
297 |     "#compute re-scaled interevent times and distances\n",
298 |     "a_R, a_T = clustering.rescaled_t_r(catChild, catParent, dPar)\n",
299 |     "# plot event pair density \n",
300 |     "fig = clustering.plot_R_T( a_T, a_R, f_eta_0)"
301 |    ]
302 |   },
303 |   {
304 |    "cell_type": "markdown",
305 |    "id": "d53670ed",
306 |    "metadata": {},
307 |    "source": [
308 |     "as you can see there are two different statistical modes \n",
309 |     "(sort of like peaks in a histogram except for now you are looking at a 2D histogram)\n",
310 |     "Which mode (red area) correponds to the background events and which mode marks the aftershocks?"
311 |    ]
312 |   },
313 |   {
314 |    "cell_type": "markdown",
315 |    "id": "2b47fc42",
316 |    "metadata": {},
317 |    "source": [
318 |     "Now let's also look at the different families and how individual events are linked"
319 |    ]
320 |   },
321 |   {
322 |    "cell_type": "code",
323 |    "execution_count": null,
324 |    "id": "7a6f75d3",
325 |    "metadata": {},
326 |    "outputs": [],
327 |    "source": [
328 |     "#==================================4=============================================\n",
329 |     "#                          spanning tree\n",
330 |     "#================================================================================\n",
331 |     "plt.figure( 1)\n",
332 |     "ax = plt.subplot(111)  \n",
333 |     "for iEv in range( catParent.size()):\n",
334 |     "    print( f\"MS-ID, {int(catParent.data['N'][iEv]):d}, t-Par: {catParent.data['Time'][iEv]:.5f},'t-child', {eqCat.data['Time'][iEv]:.5f}\", end= \"\\r\")\n",
335 |     "\n",
336 |     "    if dNND['aNND'][iEv] < dPar['eta_0']:#triggered cluster\n",
337 |     "        ax.plot( [catParent.data['Time'][iEv]], [catParent.data['Lat'][iEv]], 'ro', ms = 12, alpha = .2)\n",
338 |     "        ax.plot( [catParent.data['Time'][iEv],catChild.data['Time'][iEv]],\n",
339 |     "                  [catParent.data['Lat'][iEv], catChild.data['Lat'][iEv]], 'k-', marker = 'o', ms = 4, mew =1, mfc = 'none')\n",
340 |     "    else: # independent events\n",
341 |     "        ax.plot( [catChild.data['Time'][iEv]], [catChild.data['Lat'][iEv]], 'bo', ms = 5, alpha = .6)\n"
342 |    ]
343 |   },
344 |   {
345 |    "cell_type": "markdown",
346 |    "id": "fcae05b9",
347 |    "metadata": {},
348 |    "source": [
349 |     "The blue dots in the above plot are independent background events. That means their nearest neighbor\n",
350 |     "is beyond the chosen (or calculated) eta_0 value.\n",
351 |     "The red and black circles are clustered events linked by thin black lines.\n",
352 |     "The darker the red color the more events are linked to that particular parent.\n",
353 |     "Black circles are the last generation in a trigger series, i.e., aftershocks that do not produce aftershocks\n",
354 |     "themselves.\n",
355 |     "Can you dedetect some major triggering events? (Hint: think about major eqs. in 1992, 1999, 2010)"
356 |    ]
357 |   },
358 |   {
359 |    "cell_type": "markdown",
360 |    "id": "58990e83",
361 |    "metadata": {},
362 |    "source": [
363 |     "### 3: count aftershocks and plot productivity relation"
364 |    ]
365 |   },
366 |   {
367 |    "cell_type": "code",
368 |    "execution_count": null,
369 |    "id": "4fdbfb97",
370 |    "metadata": {},
371 |    "outputs": [],
372 |    "source": [
373 |     "eqCat = EqCat( )\n",
374 |     "#=================================1==============================================\n",
375 |     "#                            load data, select events\n",
376 |     "#================================================================================\n",
377 |     "eqCat.loadMatBin( f\"{dir_in}/{file_in}\")\n",
378 |     "eqCat.selectEvents( Mmin, Mmax, 'Mag')\n",
379 |     "eqCat.selectEvents( tmin, tmax, 'Time')\n",
380 |     "N_tot = eqCat.size()\n",
381 |     "print(  'total no. of events', N_tot)\n",
382 |     "#=================================2==========================================================================\n",
383 |     "#                     singles are counted as MS with 0 AS\n",
384 |     "#============================================================================================================\n",
385 |     "print( 'total number of clusters', len(  dClust.keys()), 'no. of BG events', dClust['0'].shape[0])\n",
386 |     "a_ID_single  = dClust['0']\n",
387 |     "\n",
388 |     "# IDs of BG events\n",
389 |     "a_iSel       = np.zeros( eqCat.size(), dtype = int)\n",
390 |     "a_mag_single = np.zeros( len( a_ID_single))\n",
391 |     "a_N_AS_single= np.zeros( len( a_ID_single))\n",
392 |     "a_N_FS_single= np.zeros( len( a_ID_single))\n",
393 |     "for i in range( a_ID_single.shape[0]):\n",
394 |     "    # event ID may be in catalog more than once\n",
395 |     "    sel_ev          = eqCat.data['N'] == a_ID_single[i]\n",
396 |     "    a_mag_single[i] = eqCat.data['Mag'][sel_ev][0]\n",
397 |     "    a_iSel[sel_ev] = 1#catalog.data['N'][catalog.data['N']==aEqID[i]][0]\n",
398 |     "    if sel_ev.sum() != 1:\n",
399 |     "        error_str = 'more than event found', eqCat.data['N'][sel_ev]\n",
400 |     "        raise( ValueError( error_str))\n",
401 |     "### remove singles from catalog\n",
402 |     "eqCat.selDicAll( np.logical_not(a_iSel))\n",
403 |     "print( 'remaining events', eqCat.size(), 'BG events', len( a_mag_single))\n",
404 |     "dClust.pop('0') # remove singles"
405 |    ]
406 |   },
407 |   {
408 |    "cell_type": "code",
409 |    "execution_count": null,
410 |    "id": "8a0f5ca4",
411 |    "metadata": {},
412 |    "outputs": [],
413 |    "source": [
414 |     "\n",
415 |     "#=================================2==========================================================================\n",
416 |     "#                   get MAGs of MS with aftershocks, count aftershocks\n",
417 |     "#============================================================================================================\n",
418 |     "a_N_FS    = np.zeros( len( dClust.keys()), dtype = int)\n",
419 |     "a_N_AS    = np.zeros( len( dClust.keys()), dtype = int)\n",
420 |     "a_MS_mag  = np.zeros( len( dClust.keys()))\n",
421 |     "a_MS_ID   = np.zeros( len( dClust.keys()), dtype = int)\n",
422 |     "iCl = 0\n",
423 |     "for sCl in dClust.keys():\n",
424 |     "    aEqID = dClust[sCl]# np.unique( dClust[sCl].flatten()) unique is not needed anymore, createCluster has been fixed\n",
425 |     "    print( 'cl: ', iCl+1,'out of: ', len( dClust.keys()), 'no. of ev. in cl.', \n",
426 |     "          len( aEqID), len( np.unique( dClust[sCl])), end=\"\\r\")\n",
427 |     "    # find MS mag and magnitude of entire family\n",
428 |     "    atmp_MAG = np.zeros( len( aEqID))\n",
429 |     "    atmp_Time= np.zeros( len( aEqID))\n",
430 |     "    a_iSel   = np.zeros( eqCat.size(), dtype = int)\n",
431 |     "    # for each family find: event mag. and origin time\n",
432 |     "    for iM in range( len( aEqID)):\n",
433 |     "        sel_ev        = eqCat.data['N'] == aEqID[iM]\n",
434 |     "        if sel_ev.sum() != 1:\n",
435 |     "            error_str = 'more/less than event found', eqCat.data['N'][sel_ev], aEqID[iM]\n",
436 |     "            raise(  ValueError, error_str)\n",
437 |     "        atmp_MAG[iM]  = eqCat.data['Mag'][sel_ev][0]\n",
438 |     "        atmp_Time[iM] = eqCat.data['Time'][sel_ev][0]\n",
439 |     "        a_iSel[sel_ev] = 1\n",
440 |     "    # remove events from catalog\n",
441 |     "    #catalog.selDicAll( np.logical_not(a_iSel))\n",
442 |     "    #----------------------------mainshock-------------------------------------------------- \n",
443 |     "    selMS     = atmp_MAG == atmp_MAG.max()\n",
444 |     "    f_tMS     = atmp_Time[selMS][0]\n",
445 |     "    i_ID_MS   = aEqID[selMS]\n",
446 |     "\n",
447 |     "    #print( 'tMS', tMS, v_currEqID.shape[0], 'MAG', curr_cat.data['MAG'][selMS][0]\n",
448 |     "    #----------------------------aftershock-------------------------------------------------- \n",
449 |     "    selAS     = atmp_Time > f_tMS\n",
450 |     "    selFS     = atmp_Time < f_tMS\n",
451 |     "    #print( 'no. of aftershocks', selAS.sum()\n",
452 |     "    # save number of aftershocks for each MS mag\n",
453 |     "    a_MS_mag[iCl] = atmp_MAG[selMS][0]#, dPar['magRound'])\n",
454 |     "    a_N_AS[iCl]   = selAS.sum()\n",
455 |     "    a_N_FS[iCl]   = selFS.sum()\n",
456 |     "    a_MS_ID[iCl]  = int( i_ID_MS[0])\n",
457 |     "    iCl += 1\n",
458 |     "\n",
459 |     "#=================================3==========================================================================\n",
460 |     "#                  compare MS+single+FS+AS to original number of events in catalog\n",
461 |     "#============================================================================================================\n",
462 |     "# combine single without AS with mainshocks that do have aftershocks\n",
463 |     "a_N_FS    = np.append( a_N_FS, a_N_FS_single)\n",
464 |     "a_N_AS    = np.append( a_N_AS, a_N_AS_single)\n",
465 |     "a_MS_mag  = np.append( a_MS_mag, a_mag_single)\n",
466 |     "a_MS_ID   = np.append( a_MS_ID, a_ID_single)\n",
467 |     "print( 'tot ev. in catalog', N_tot,'tot events in families',a_N_FS.sum() + a_N_AS.sum() + a_MS_mag.shape[0])\n",
468 |     "#print( 'N BG', a_mag_single.shape[0], 'FS', a_N_FS_single.sum(), 'AS', a_N_AS_single.sum(), 'MS (MS+BG)', a_MS_mag.shape[0]\n",
469 |     "\n",
470 |     "#=================================4==========================================================================\n",
471 |     "#                    save to ASCII text\n",
472 |     "#============================================================================================================\n",
473 |     "file_out = '%s/%s_Nas_MS_Mc_%.1f.txt'%(dir_in, file_in.split('.')[0], dPar['Mc'])#, dPar['magRound'])\n",
474 |     "m_N_as   = np.array([a_MS_mag, a_N_AS, a_N_FS, a_MS_ID])\n",
475 |     "np.savetxt( file_out, m_N_as.T, fmt='%10.3f%10i%10i%14i',\n",
476 |     "            header = 'MAG          N-AS          N-FS        MS-ID; note N_AS=0 highlights singles or FS only')\n"
477 |    ]
478 |   },
479 |   {
480 |    "cell_type": "markdown",
481 |    "id": "9d2290ea",
482 |    "metadata": {},
483 |    "source": [
484 |     "the two numbers above should hopefully match, otherwise you may have to rerun the code from the beginning"
485 |    ]
486 |   },
487 |   {
488 |    "cell_type": "code",
489 |    "execution_count": null,
490 |    "id": "7a66f796",
491 |    "metadata": {},
492 |    "outputs": [],
493 |    "source": [
494 |     "dPar['magRound'] = 1 # binning\n",
495 |     "#=================plotting==============\n",
496 |     "dPar['alpha']    =  1.0 # power law exponent\n",
497 |     "dPar['xmin']     =  2  \n",
498 |     "dPar['xmax']     =  8\n",
499 |     "dPar['ymin']     = 0.1  \n",
500 |     "dPar['ymax']     = 1e4\n",
501 |     "\n",
502 |     "#=================================2==========================================================================\n",
503 |     "#                           count ave. no. of aftershocks per MS magnitude\n",
504 |     "#============================================================================================================\n",
505 |     "aMag_round= np.around( m_N_as[0], dPar['magRound'])\n",
506 |     "aMag_bin  = np.array( sorted(np.unique( aMag_round)))\n",
507 |     "aAveNo_AS = np.ones( len( aMag_bin))*np.nan\n",
508 |     "aNo_Fam   = np.zeros( len( aMag_bin)) # total number of families within mag bin\n",
509 |     "aNo_AS20  = np.zeros( len( aMag_bin))\n",
510 |     "aNo_AS80  = np.zeros( len( aMag_bin))\n",
511 |     "\n",
512 |     "i = 0\n",
513 |     "for curr_mag in aMag_bin:\n",
514 |     "    selMag       = curr_mag == aMag_round\n",
515 |     "    aAveNo_AS[i] = m_N_as[1][selMag].mean()\n",
516 |     "    if selMag.sum() > 0:\n",
517 |     "        aNo_AS20[i]  = np.percentile( m_N_as[1][selMag], 20)\n",
518 |     "        aNo_AS80[i]  = np.percentile( m_N_as[1][selMag], 80)\n",
519 |     "    aNo_Fam[i]   = selMag.sum()\n",
520 |     "    print( curr_mag, 'mean N-AS', round(aAveNo_AS[i],2),  aNo_AS20[i],aNo_AS80[i], 'no. of fam', aNo_Fam[i],end=\"\\r\")\n",
521 |     "\n",
522 |     "    i += 1\n",
523 |     "\n",
524 |     "#=================================3==========================================================================\n",
525 |     "#                           plot productivity law\n",
526 |     "#============================================================================================================\n",
527 |     "plt.figure(1, figsize=(8,6))\n",
528 |     "ax = plt.axes([.14,.12,.78,.83])#pPlot.createFigureSquare(1)\n",
529 |     "ax.semilogy( m_N_as[0],   m_N_as[1],     'o',  ms = 6, mew =0, mfc = '.7', alpha = .2 )\n",
530 |     "#ax.errorbar( aMag_bin,  aAveNo_AS, yerr=[np.zeros(aMag_bin.shape[0]), aNo_AS80-aAveNo_AS],\n",
531 |     "#             fmt = 'o', ecolor = 'k', elinewidth=.7,capsize=2.5, mec = 'k', ms = 8, mew = 1, mfc = 'w')\n",
532 |     "ax.errorbar( aMag_bin,  aAveNo_AS, yerr=[aAveNo_AS-aNo_AS20, aNo_AS80-aAveNo_AS],\n",
533 |     "             fmt = 'o', ecolor = 'k', elinewidth=.7,capsize=2.5, mec = 'k', ms = 8, mew = 1, mfc = 'w')\n",
534 |     "\n",
535 |     "#-------------------------exponential - estimate-----------------------------------------------------\n",
536 |     "mag_fit    = aMag_bin[10] # force fit through this point\n",
537 |     "f_no_AS_pl = aAveNo_AS[aMag_bin == mag_fit]\n",
538 |     "preFac     = np.log10( f_no_AS_pl) - dPar['alpha']*mag_fit\n",
539 |     "a_N_hat    = 10**( dPar['alpha']*aMag_bin + preFac)\n",
540 |     "ax.semilogy( aMag_bin, a_N_hat, 'w-')\n",
541 |     "ax.semilogy( aMag_bin, a_N_hat, '-', color = 'r', lw = 2, label = 'exp = %.1f'%( np.round( dPar['alpha'],1)))\n",
542 |     "\n",
543 |     "#-------------------------------labels, limits etc.-----------------------------------------------\n",
544 |     "ax.set_xlim( dPar['xmin'], dPar['xmax'])\n",
545 |     "ax.set_ylim( dPar['ymin'], dPar['ymax'])\n",
546 |     "ax.set_xlabel( 'Mainshock Magnitude')\n",
547 |     "ax.set_ylabel( 'Number of Aftershocks')\n",
548 |     "ax.legend( loc = 'upper left', frameon = False)\n"
549 |    ]
550 |   },
551 |   {
552 |    "cell_type": "code",
553 |    "execution_count": null,
554 |    "id": "d7d9a928",
555 |    "metadata": {},
556 |    "outputs": [],
557 |    "source": []
558 |   }
559 |  ],
560 |  "metadata": {
561 |   "kernelspec": {
562 |    "display_name": "Python 3 (ipykernel)",
563 |    "language": "python",
564 |    "name": "python3"
565 |   },
566 |   "language_info": {
567 |    "codemirror_mode": {
568 |     "name": "ipython",
569 |     "version": 3
570 |    },
571 |    "file_extension": ".py",
572 |    "mimetype": "text/x-python",
573 |    "name": "python",
574 |    "nbconvert_exporter": "python",
575 |    "pygments_lexer": "ipython3",
576 |    "version": "3.9.13"
577 |   }
578 |  },
579 |  "nbformat": 4,
580 |  "nbformat_minor": 5
581 | }
582 | 


--------------------------------------------------------------------------------
/data/hs_1981_2011_all.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tgoebel/clustering-analysis/20cf9e348e7f179b0d57402de28510718fd9822e/data/hs_1981_2011_all.mat


--------------------------------------------------------------------------------
/data/hs_1981_2011_all_NND_Mc_3.0.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tgoebel/clustering-analysis/20cf9e348e7f179b0d57402de28510718fd9822e/data/hs_1981_2011_all_NND_Mc_3.0.mat


--------------------------------------------------------------------------------
/data/hs_1981_2011_all_NND_Mc_4.0.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tgoebel/clustering-analysis/20cf9e348e7f179b0d57402de28510718fd9822e/data/hs_1981_2011_all_NND_Mc_4.0.mat


--------------------------------------------------------------------------------
/data/hs_1981_2011_all_RT_Mc_3.0.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tgoebel/clustering-analysis/20cf9e348e7f179b0d57402de28510718fd9822e/data/hs_1981_2011_all_RT_Mc_3.0.mat


--------------------------------------------------------------------------------
/data/hs_1981_2011_all_RT_Mc_4.0.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tgoebel/clustering-analysis/20cf9e348e7f179b0d57402de28510718fd9822e/data/hs_1981_2011_all_RT_Mc_4.0.mat


--------------------------------------------------------------------------------
/plots/SI_indAS_deficiencyOK_v5.0.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tgoebel/clustering-analysis/20cf9e348e7f179b0d57402de28510718fd9822e/plots/SI_indAS_deficiencyOK_v5.0.pdf


--------------------------------------------------------------------------------
/plots/T_R_hs_1981_2011_all_Mc_3.0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tgoebel/clustering-analysis/20cf9e348e7f179b0d57402de28510718fd9822e/plots/T_R_hs_1981_2011_all_Mc_3.0.png


--------------------------------------------------------------------------------
/plots/T_R_hs_1981_2011_all_Mc_4.0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tgoebel/clustering-analysis/20cf9e348e7f179b0d57402de28510718fd9822e/plots/T_R_hs_1981_2011_all_Mc_4.0.png


--------------------------------------------------------------------------------
/plots/hs_1981_2011_all_Mc_3.0_ASprod.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tgoebel/clustering-analysis/20cf9e348e7f179b0d57402de28510718fd9822e/plots/hs_1981_2011_all_Mc_3.0_ASprod.png


--------------------------------------------------------------------------------
/plots/hs_1981_2011_all_Mc_4.0_ASprod.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tgoebel/clustering-analysis/20cf9e348e7f179b0d57402de28510718fd9822e/plots/hs_1981_2011_all_Mc_4.0_ASprod.png


--------------------------------------------------------------------------------
/plots/hs_1981_2011_all_NND_hist_Mc_3.0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tgoebel/clustering-analysis/20cf9e348e7f179b0d57402de28510718fd9822e/plots/hs_1981_2011_all_NND_hist_Mc_3.0.png


--------------------------------------------------------------------------------
/plots/hs_1981_2011_all_NND_hist_Mc_4.0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tgoebel/clustering-analysis/20cf9e348e7f179b0d57402de28510718fd9822e/plots/hs_1981_2011_all_NND_hist_Mc_4.0.png


--------------------------------------------------------------------------------
/plots/hs_1981_2011_all_spanningTree_Mc_3.0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tgoebel/clustering-analysis/20cf9e348e7f179b0d57402de28510718fd9822e/plots/hs_1981_2011_all_spanningTree_Mc_3.0.png


--------------------------------------------------------------------------------
/plots/hs_1981_2011_all_spanningTree_Mc_4.0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tgoebel/clustering-analysis/20cf9e348e7f179b0d57402de28510718fd9822e/plots/hs_1981_2011_all_spanningTree_Mc_4.0.png


--------------------------------------------------------------------------------
/src/EqCat.py:
--------------------------------------------------------------------------------
  1 | #!usr/bin/python3.7
  2 | """seismic catalog analysis class earthquake catalogs
  3 | -  data is stored in dictionary which can be extended without difficulties
  4 | as long as all vectors have the same length
  5 | 
  6 | - basic functionalities are focused on catalog I/O
  7 |   and initial processing (space, time, magnitude window selection) 
  8 | 
  9 | """
 10 | import os
 11 | import numpy as np
 12 | import scipy.io #to writer and read mat bin
 13 | # the next line sets the path to PROJ LIB, should be found automatically for conda install
 14 | #-----------------my modules-----------------------------------------
 15 | #import ClusteringAnalysis.src.datetime_utils as dateTime
 16 | import src.datetime_utils as dateTime
 17 | 
 18 | #--------------------------------------------------------------------
 19 | class EqCat:
 20 |     """
 21 | 
 22 |     (1) 
 23 |     EqCat.data - type python dictionary
 24 |     e.g.:
 25 |     self.data = {       'N'          : , #event number 
 26 |                         'Time'       : np.array([]), # in decimal years
 27 |                         'Lon'        : np.array([]), #or lon
 28 |                         'Lat'        : np.array([]), #or lat
 29 |                         'Depth'      : np.array([]), #or depth
 30 |                         'Mag'        : np.array([]),
 31 | 
 32 |                            }  
 33 |     """
 34 |     def __init__( self, **kwargs ):
 35 |         """initiate data dictionary
 36 | 
 37 |         """
 38 |         self.data           = {}
 39 | 
 40 |         self.methods = [method_name for method_name in dir(self)
 41 |              if callable(getattr(self, method_name)) and method_name[0] != '_']
 42 | 
 43 |         """ input use kwargs to go from cartesian to GPS coordinates,
 44 |         tags can be accessed: sLoc1 - sLoc3 , last one is depth or self.sLoc3 """
 45 | #         if 'type' in kwargs.keys() and kwargs['type'] == 'GPS':
 46 | #             self.sLoc1, self.sLoc2, self.sLoc3 = 'Lon', 'Lat', 'Depth'
 47 | #         elif 'type' in kwargs.keys() and kwargs['type'] == 'Cart':
 48 | #             self.sLoc1, self.sLoc2, self.sLoc3 = 'X','Y','Z'
 49 | #         
 50 |         self.sLoc1, self.sLoc2, self.sLoc3 = 'Lon', 'Lat', 'Depth'
 51 |         self.sID = 'N'
 52 |         
 53 |     def copy(self, catalog ):
 54 |         """ deep copy of catalog object"""
 55 |         import copy
 56 |         try:
 57 |             for tag, vector in catalog.data.items():
 58 |                 self.data[tag] = copy.copy( catalog.data[tag])
 59 |         except:
 60 |             for tag, vector in catalog.items():
 61 |                 self.data[tag] = copy.copy( catalog[tag])
 62 | 
 63 |     #===========================================================================
 64 |     #                         import routines
 65 |     #===========================================================================
 66 |     def loadEqCat(self, file_in, catalogType, verbose=False, **kwargs):
 67 |         """ check what type of catalog and call correct function that handles import
 68 |         input: - file         - catalog filename
 69 |                - catalogType  = 'hs_reloc', focMech ... etc.
 70 |                               = 'WaldhauserReloc' - Waldhauser's selection of repeaters at Hayward
 71 |                               = 'hypoDD' - ID, lat, long, depth, x, y, z, x-error,y-error,z-error, yr, month, day, hour, minute, second, magnitude
 72 |                - kwargs['header']       - what is the line number of header info. of columns -> used for dic tags
 73 |                - kwargs['removeColumn'] - specify columns to be removed from original file prior to loading the file
 74 |                                            uses 'awk'
 75 |                                         - required since loadtxt assume all table entries to be floats
 76 |         
 77 |         TODO: --> biggest time sink is checking the date-time for every earthquake and converting it to dec. year --> vectorizing should help                                   
 78 |         
 79 |         return: create eqCat object with self.data = {'Time', 'Lon', 'Lat', 'Depth', 'Mag'}, 
 80 |                 which are the standard dictionary tags 
 81 |         """
 82 |         #----------------check kwargs---------------------------------
 83 |         if 'header' in kwargs.keys() and kwargs['header'] is not None:
 84 |             header = kwargs['header']
 85 |         else:
 86 |             header = None
 87 |         if 'removeColumn' in kwargs.keys() and kwargs['removeColumn'] is not None:
 88 |             import src.data_utils as data_utils
 89 |             # remove columns and change file_name to copy of original file to keep the original
 90 |             file_in = data_utils.removeColumn( file_in, kwargs['removeColumn'])
 91 |         #-----------choose import routine------------------------------
 92 |         if catalogType == 'HS_reloc':
 93 |             if header is None:
 94 |                 header = 1
 95 |             #TODO: get dic tag from file header
 96 |             headList = ['YR', 'MO', 'DY', 'HR', 'MN','SC', 'N', 'Lat','Lon','Depth', 'Mag', 'nPick', 'distSta', 'rms', 'd/n', 'rMeth', 'clID', 'nEvInCl',  'nlnk','err_h','err_z','rel_err_H', 'rel_err_Z']
 97 |             self.data = {}
 98 |             # 0-5 (datetime), 6(ID), 7 (lat), 8 (lon), 9 (depth), 10 (mag)
 99 |             mData = np.loadtxt(f"{file_in}", usecols=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10))
100 |             print( 'no of columns', mData[0].shape[0])
101 |             print( 'no. of earthquakes', mData[:,0].shape[0])
102 |             for l in range( mData[0].shape[0] ):
103 |                 self.data[headList[l]] = mData[:,l]
104 |                 
105 |         elif catalogType == 'USGS':
106 |             # 'time', 'latitude', 'longitude', 'depth', 'mag', 'magType', 'nst', 'gap', 'dmin', 'rms', 'net', 'id'
107 |             #    0         1          2           3       4       5         6       7       8       9     10    11
108 | 
109 |             ###1###Date-time
110 |             mDateTime     = np.genfromtxt( file_in, delimiter=(4,1,2,1,2,1,2,1,2,1,4),
111 |                                        skip_header=1, usecols=(0,2,4,6,8,10)).T
112 |             headDate = ['YR', 'MO', 'DY', 'HR', 'MN', 'SC']
113 |             for i in range( len(headDate)):
114 |                 self.data[headDate[i]] = mDateTime[i]
115 |             ###2### ID
116 |             #mID = np.loadtxt( file_in, delimiter=',', skiprows=1, usecols=(10,11), dtype = str).T
117 |             #self.data['ID'] = np.array([ int(mID[1,i].strip( mID[0,i])) for i in range( mID.shape[1])], dtype = int)
118 |             self.data['ID']  = np.arange( len( self.data['YR']))
119 |             ###3### location, magnitude, gap etc.
120 |             header = ['Lat', 'Lon', 'Depth', 'Mag']#, 'Nst', 'Gap', 'Dmin', 'rms']
121 |             mData = np.loadtxt( file_in, delimiter=',', skiprows=1,
122 |                                 usecols=(1,2,3,4),#,6,7,8,9),
123 |                                 dtype = float).T
124 |             for i in range( len(header)):
125 |                 self.data[header[i]] = mData[i]
126 | 
127 |         elif catalogType == 'Kilauea':
128 |             mData = np.loadtxt( file_in).T
129 |             # :TODO convert np.array to python dictionary
130 | 
131 |         #convert date to decimal year
132 |         self.data['Time'] = np.array([])
133 |         for i in range( self.data['Mag'].shape[0] ):
134 |             if verbose == True:
135 |                 print( i+1, 'out of', self.data['Mag'].shape[0])
136 |             YR, MO, DY, HR, MN, SC = dateTime.checkDateTime( [self.data['YR'][i], self.data['MO'][i],self.data['DY'][i], self.data['HR'][i],self.data['MN'][i],self.data['SC'][i]])
137 |             self.data['Time'] = np.append( self.data['Time'], 
138 |                                            dateTime.dateTime2decYr( [YR, MO, DY, HR, MN, SC]))
139 |         #sort catalog chronologically
140 |         self.sortCatalog('Time')
141 | 
142 |         ##clean up
143 |         if 'removeColumn' in kwargs.keys() and kwargs['removeColumn'] is not None:
144 |             print( "delete: %s, than hit: y"%( file_in))
145 |             removeFile = input( ' ')
146 |             print( removeFile)
147 |             if os.path.isfile( file_in) and removeFile == 'y':
148 |                 os.system( "rm %s"%( file_in))
149 | 
150 |     #======================================2==========================================
151 |     #                            basic processing and catalog event selection
152 |     #=================================================================================
153 |     def size(self):
154 |         if 'Time' in self.data.keys():
155 |             return len( self.data['Time'])
156 |         else:
157 |             return None
158 |     
159 |     
160 |     def selectEvents(self, min, max, tag, **kwargs):
161 |         """
162 |         returns events with time, coordinates, rel.Magnitude that corresponds to a certain time frame
163 |         -cut catalog includes lower bound (min) but excludes upper bound (max)
164 |         input:  min, max = window of events
165 |                 min      - can be set to string for columns that contain strings, e.g. type, magType  etc.
166 |                 if min is not a string:
167 |                 min = None, select only events below max
168 |                 max = None, select only events above min
169 |                 tag can be 'Time' or magnitude , location, Mw... depending on dictionary
170 |         kwargs: includeBoundaryEvents = True; include events with times equal to min and max otherwise
171 |                                               include only lower boundary (min event)  
172 |                 returnSel             = returns IDs of selected events (type np.array([], int))
173 |         
174 |         example: selectEvents( 3, 5, 'Mag', includeBoundaryEvents = True) - all events between 3 and 5 including M=3 and M=5 events
175 |                  selectEvents( 3, None, 'Mag')  - everything above M=3 excluding M=3 events
176 |                  selectEvents( 4, None, 'Mag') and then selectEvents( 'w', None, 'MagType') - all Mws above Mw = 4
177 |                 
178 |         """
179 |         if 'includeBoundaryEvents' in kwargs.keys() and kwargs['includeBoundaryEvents'] == True:
180 |             if min == None or max == None:
181 |                 error_str = 'both boundaries have to be set to include boundary events'
182 |                 raise( ValueError( error_str))
183 |             else:
184 |                 sel = np.logical_and( self.data[tag] >= float(min), self.data[tag] <= float(max ) )          
185 |         else:
186 |             if isinstance( min, str ):
187 |                 #str columns, e.g magType ..
188 |                 sel = [i for i, x in enumerate( self.data[tag] ) if x == min]  
189 |             elif isinstance( min, (int, float) ) or min == None:
190 |                 if max == None:
191 |                     sel = self.data[tag] >= float(min)
192 |                 elif min == None:
193 |                     sel = self.data[tag] < max
194 |                 else:
195 |                     sel = np.logical_and( self.data[tag] >= float(min), self.data[tag] < float(max) )
196 |             else:
197 |                 error_str = 'unknown input min = %s'%(min)
198 |                 raise( ValueError( error_str))
199 |         #sel = np.arange( self.size(), dtype = int )[sel]
200 |         if 'returnSel' in kwargs.keys() and kwargs['returnSel'] == True:
201 |             return sel
202 |         else:        
203 |             self.selDicAll( sel) 
204 | 
205 |     def sortCatalog(self, tag, **kwargs):
206 |         """sort catalog according to tag (string) e.g. Time, Mag, ....
207 |         kwargs: beginWithBiggest = True , sort beginning with Biggest value
208 |                 returnSel        = return boolean """
209 |         #get boolean vector for sorting
210 |         vSortBool  = self.data[tag].ravel().argsort()
211 |         if 'beginWithBiggest' in kwargs.keys() and kwargs['beginWithBiggest'] == True:
212 |             if 'returnSel' in kwargs.keys() and kwargs['returnSel'] == True:
213 |                 return vSortBool
214 |             else:
215 |                 self.selDicAll( vSortBool[::-1])
216 |         else:
217 |             if 'returnSel' in kwargs.keys() and kwargs['returnSel'] == True:
218 |                 return vSortBool
219 |             else:
220 |                 self.selDicAll( vSortBool)
221 | 
222 |     def selDicAll(self, sel):
223 |         """apply boolean vector to entire data
224 |         e.g. for sorting or cutting ... """
225 |         for tag, vector in self.data.items(): #loop through all entries (tag = vector name, vector = entries)
226 |             # for NND analysis first event is missing (orphan), so sel.shape = vector.shape - 1
227 |             #if sel.shape[0] != vector.shape[0]:
228 |             #    print( tag, 'does not have the right dimension: %i %i'%(vector.shape[0], sel.shape[0])
229 |             #else:
230 |             self.data[tag] = self.data[tag][sel]
231 | 
232 |     def selEventsFromID(self, a_ID, **kwargs):
233 |         """ select events specified by list of IDs (self.data['N'])
234 |         -----------------input
235 | 
236 |         kwargs:  repeats = True , if eqIDs are repeated keep them in catalog and maintain the same order
237 |                 default  = False, every earthquake is only ones in catalog, for several events with same ID keep only the first event               
238 |         
239 |         ----------------return:
240 |         eq catalog that corresponds to vEqID """
241 |         Nev= len( a_ID)     
242 |         repeats = False
243 |         if 'repeats' in kwargs.keys() and kwargs['repeats'] == True:
244 |              a_sel  = np.ones(   Nev, dtype = int)
245 |              v_i    = np.arange( self.size(), dtype = int)
246 |              i = 0
247 |              for currID in a_ID: # put one at location of ID match
248 |                  sel_curr_ev = self.data['N']==int(currID)
249 |                  if sel_curr_ev.sum() > 0:
250 |                      a_sel[i] = int( v_i[sel_curr_ev][0])
251 |                  i += 1
252 |         else:
253 |             a_sel = np.in1d( self.data['N'], a_ID, assume_unique=True)
254 |         self.selDicAll( a_sel)
255 | 
256 |     #======================================3==========================================
257 |     #                            .mat binary load save
258 |     #=================================================================================
259 |     def check_keys(self, ):
260 |         '''
261 |         checks if entries in dictionary are mat-objects. If yes
262 |         to dict is called to change them to nested dictionaries
263 |         '''
264 |         for key in self.data:
265 |             if isinstance(self.data[key], scipy.io.matlab.mio5_params.mat_struct):
266 |                 self.data[key] = self.todict( self.data[key])
267 | 
268 |     def todict(self, matobj):
269 |         '''
270 |         A recursive function which constructs from matobjects nested dictionaries
271 |         '''
272 |         dData = {}
273 |         for strg in matobj._fieldnames:
274 |             elem = matobj.__dict__[strg]
275 |             if isinstance(elem, scipy.io.matlab.mio5_params.mat_struct):
276 |                 dData[strg] = self.todict(elem)
277 |             else:
278 |                 dData[strg] = elem
279 |         return dData
280 | 
281 |     def saveMatBin(self, file):
282 |         """save dic to bin file"""
283 |         #scipy.io.savemat(file, self.data, appendmat=False, format = '4', oned_as = 'row' ,  do_compression = True)
284 |         scipy.io.savemat(file, self.data, appendmat=True, format = '5',do_compression = True )
285 |     
286 | 
287 |     def loadMatBin(self, filename):
288 |         '''
289 |         this function should be called instead of direct scipy.io.loadmat
290 |         as it helps with additional non-variable tags in python dictionaries from .mat files
291 | 
292 | 
293 |         --> can handle 'nested' variables in matlab where variable contain several structures
294 |         '''
295 |         
296 |         self.data = scipy.io.loadmat(filename,struct_as_record=False, squeeze_me=True)
297 |         self.check_keys( )
298 |         l_tags = list( self.data.keys())
299 |         for tag in l_tags:
300 |             if tag[0] == '_':
301 |                 #print( 'remove', tag, self.data[tag]
302 |                 self.data.pop( tag, None)
303 |             #else:
304 |             #    print( tag, self.data[tag].shape[0]
305 | 
306 |     #======================================4==========================================
307 |     #                            projections, rotations etc.
308 |     #=================================================================================
309 |     def toCart_coordinates(self, **kwargs):
310 |         """
311 |         :input
312 |         **kwargs['projection']  =   'aeqd' - (default) azimuthal equidistant
313 |                                     'eqdc' - equi distant conical projection
314 |                                     'cyl'  - cynlidrical equidistant - not working
315 |                 'returnProjection' : True  - return basemap object
316 |         use equidistant projection to convert lon, lat to X, Y coordinates
317 |         :output catalog attributes:   - self.data['X'], self.data['Y'], self.data['Depth'] in km
318 |                 return True or basemap object, m
319 |         
320 |         """
321 |         os.environ["PROJ_LIB"] = f"{os.environ['HOME']}/opt/anaconda3/share/proj"# adjust, comment out as needed
322 |         from mpl_toolkits.basemap import Basemap
323 |         projection = 'aeqd'
324 |         if 'projection' in kwargs.keys() and kwargs['projection'] is not None:
325 |             projection = kwargs['projection']
326 |         from mpl_toolkits.basemap import Basemap
327 |         xmin,xmax = self.data['Lon'].min(), self.data['Lon'].max()
328 |         ymin,ymax = self.data['Lat'].min(), self.data['Lat'].max()
329 | 
330 |         # setup equi distance basemap.
331 |         m = Basemap( llcrnrlat  =  ymin,urcrnrlat  =  ymax,
332 |                      llcrnrlon  =  xmin,urcrnrlon  =  xmax,
333 |                      projection = projection,lat_0=(ymin+ymax)*.5,lon_0=(xmin+xmax)*.5,
334 |                      resolution = 'l')
335 | 
336 |         self.data['X'], self.data['Y'] = m( self.data['Lon'], self.data['Lat'])
337 |         if projection == 'cyl':
338 |             pass
339 |         else:
340 |             self.data['X'] *= 1e-3
341 |             self.data['Y'] *= 1e-3
342 |         if 'returnProjection' in kwargs.keys() and kwargs['returnProjection'] == True:
343 |             return m
344 |         else:
345 |             return True
346 | 
347 |     #======================================5==========================================
348 |     #                           shuffling, random catalog
349 |     #=================================================================================
350 |     def randomize_cat(self):
351 |         """
352 |         - create a randomized catalog with same average rate, no. of events and
353 |           spatial extent as the initial catalog
354 | 
355 |         :return: - random Poissonian catalog, uniform spatial distribution
356 |         """
357 |         ## randomize event times


--------------------------------------------------------------------------------
/src/__init__.py:
--------------------------------------------------------------------------------
1 | #!python2.7
2 | __all__ = ["data_utils", "datetime_utils"]


--------------------------------------------------------------------------------
/src/clustering.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # python3.7
  3 | '''
  4 | Created on April 10th, 2019
  5 | 
  6 |     - function required for clustering analysis based on nearest-neighbor distances
  7 |     
  8 |     - NND_eta - eq. 1 for NND in Zaliaping & Ben-Zion 2013
  9 | 
 10 | @author: tgoebel
 11 | '''
 12 | import numpy as np
 13 | import matplotlib.pyplot as plt
 14 | import warnings
 15 | #===============================================================================
 16 | #                          my modules
 17 | #===============================================================================
 18 | import src.data_utils as data_utils
 19 | 
 20 | #===============================================================================
 21 | # 
 22 | #===============================================================================
 23 | def NND_eta( eqCat, dConst, verbose = False, **kwargs):
 24 |     """
 25 |         - NND_eta - eq. 1 for NND in Zaliapin & Ben-Zion 2013
 26 |     search for 'parent event' i.e. earthquake that occurred closest in space-time-magnitude domain 
 27 |                                    but prior to the current event
 28 |         here: [jC]          - are the off spring events and we try to find the closest parent, occurring earlier in time
 29 |               [sel_tau_par] - are the potential parent events that occurred before [jC], we select the closest in time
 30 | 
 31 |     Parameters
 32 |     ----------
 33 |     catalog     - catalog.data['Time'], 'Lon', 'Lat' (or 'X', 'Y',) 'Depth', 'MAG'
 34 |                 - time, cartesian coordinates (X,Y, Depth), magnitude
 35 |     dConst      - {'Mc':float, 'b':float, 'D':float} #  dictionary with statistical seismicity parameters
 36 |                    - completeness , b-value, fractal dimension
 37 |     kwargs      - rmax (default: = 500) - maximum space window (for faster computation)
 38 |                 - tmax (default: =  20) - maximum time window (for faster computation)
 39 |                 - correct_co_located = True, add gaussian uncertainty to avoid product going to zero for co-located earthquakes
 40 |                 - haversine = True - use haversine distance at surface instead of 3D cartesian distance
 41 |                 - M0 - reference magnitude, default: M0 = 0
 42 |     Returns
 43 |     -------
 44 |     - {  'aNND'       : aNND,     - nearest neighbor space-time magnitude distance
 45 |          'aEqID_p'    : np.array  - ID of the parent event
 46 |          'aEqID_c'    : np.array  - ID of the child  event
 47 |          'Time'       : np.array  - origin time of offspring
 48 |         } 
 49 | 
 50 |     see: Clustering Analysis of Seismicity and Aftershock Identification, Zaliapin, I. (2008)
 51 | 
 52 |     """
 53 |     #-------------------------------set args and kwargs----------------------------------------------- 
 54 |     rmax = 500 # in km
 55 |     tmax = 20 # in years
 56 |     M0 = 0 # reference mag
 57 |     if 'M0' in kwargs.keys() and kwargs['M0'] is not None:
 58 |         M0 = kwargs['M0']
 59 |     if 'rmax' in kwargs.keys() and kwargs['rmax'] is not None:
 60 |         rmax = kwargs['rmax']
 61 |     if 'tmax' in kwargs.keys() and kwargs['tmax'] is not None:
 62 |         tmax = kwargs['tmax']
 63 |     #-----------------------------add small uncertainty to X in case events are colocated-------------------------- 
 64 |     if 'correct_co_located' in kwargs.keys() and kwargs['correct_co_located'] == True:
 65 |         vUncer = np.random.randn( eqCat.size())*1e-10
 66 |         eqCat.data['Lon']    += vUncer
 67 |     #------------------------------------------------------------------------------
 68 |     aNND     = np.zeros( eqCat.size())
 69 |     vID_p    = np.zeros( eqCat.size())
 70 |     vID_c    = np.zeros( eqCat.size())
 71 |     a_M_MS_ref= (eqCat.data['Mag'] - M0)# mainshock mag with respect to reference
 72 |  
 73 |     for jC in range( eqCat.size()):
 74 |         if verbose == True:
 75 |             print( f"event {jC+1:d} of {eqCat.size():d}", end= "\r")
 76 |         # interevent times: take events that happend before t_i 
 77 |         #           child             - parent                > 0 
 78 |         tau         =  eqCat.data['Time'][jC] - eqCat.data['Time']
 79 |         sel_tau_par = tau > 0
 80 |         if sel_tau_par.sum() > 0:
 81 | 
 82 |             vcurr_ID = np.arange( eqCat.size(), dtype = int)[sel_tau_par]
 83 |             # if cartesian coordinates are available
 84 |             if 'X' in eqCat.data.keys() and 'Y' in eqCat.data.keys():
 85 |                 vR = np.sqrt( (eqCat.data['X'][jC] - eqCat.data['X'][vcurr_ID])**2 + (eqCat.data['Y'][jC] - eqCat.data['Y'][vcurr_ID])**2 )
 86 |             else:
 87 |                 #  haversine distance
 88 |                 vR = haversine( eqCat.data['Lon'][jC], eqCat.data['Lat'][jC],eqCat.data['Lon'][vcurr_ID], eqCat.data['Lat'][vcurr_ID] )
 89 |             sel_r_par = vR < rmax
 90 |             if sel_r_par.sum() > 0:
 91 |                 vcurr_ID = vcurr_ID[sel_r_par]
 92 |                 curr_Eta = tau[vcurr_ID]* (vR[sel_r_par]**dConst['D']) *( 10**(-dConst['b']*a_M_MS_ref[vcurr_ID]))
 93 |                 sel_min  = curr_Eta == curr_Eta.min()
 94 |                 aNND[jC]    = curr_Eta[sel_min][0]
 95 |                 vID_p[jC]   = eqCat.data['N'][vcurr_ID][sel_min][0]
 96 |                 vID_c[jC]   = eqCat.data['N'][jC]
 97 |                 #print( 'parent', eqCat.data['N'][vcurr_ID][sel_min][0],  'offspring', eqCat.data['N'][jC]
 98 |                 #print( 'parent', eqCat.data['Time'][vcurr_ID][sel_min][0],  'offspring', eqCat.data['Time'][jC]
 99 | 
100 |                 if sel_min.sum() > 1:
101 |                     print( aNND[jC], curr_Eta[sel_min], eqCat.data['N'][vcurr_ID][sel_min])
102 |                     print( eqCat.data['Lon'][vcurr_ID][sel_min], eqCat.data['Lat'][vcurr_ID][sel_min])
103 |     sel2 = aNND > 0
104 |     if np.logical_not(sel2).sum() > 0:
105 |         print( f"{np.logical_not(sel2).sum()} %i events with NND=0 ")
106 |         #raise ValueError, error_str
107 |     #  remove events with aNND < 0; i.e. event at the beginning with no preceding parent
108 |     return {  'aNND' : aNND[sel2], 'aEqID_p' : vID_p[sel2], 'aEqID_c' : vID_c[sel2], 'Time' : eqCat.data['Time'][sel2]}
109 |     #return {  'aNND' : aNND, 'aEqID_p' : vID_p, 'aEqID_c' : vID_c, 'Time' : eqCat.data['Time'][1::]}
110 | 
111 | 
112 | def rFromTau( dt, b, D, eta_0, M_MS ):
113 |     """
114 |         - compute maximum distance R for events in cluster
115 |           based on interevent time, eta_0 and D (fractal dimension)
116 |     :INPUT
117 |           dt    - array or float
118 |                interevent times (dt relative to MS or first event in family)
119 |           b     - Gutenberg-Richter b-value
120 |           D     - fractal dimension, usually D~1.6
121 |           eta_0 - empiricallly determined separation line between clustered and background
122 |                   mode
123 |           M_MS  - mainshock magnitude (here we assume only one triggering generation)
124 |     :return:
125 |     """
126 |     return ( -eta_0/dt * 10**( b*M_MS))**(1/D)*1e-3
127 | 
128 | def rescaled_t_r(catChild, catPar, dConst, **kwargs):
129 |     """
130 |     - compute rescaled time and distance
131 | 
132 |     Parameters
133 |     ----------
134 |     catChild, catPar - objects of type SeisCatDic containing parent and child events
135 |     dConst      =  'b', 'D' -  b-value, fractal dimension
136 |     kwargs       = distance_3D = True default : False i.e. 2D Euclidean distance
137 | 
138 |     Returns
139 |     -------
140 |     - a_R, a_tau
141 | 
142 | 
143 |     see: Clustering Analysis of Seismicity and Aftershock Identification, Zaliapin, I. (2008)
144 | 
145 |     """
146 |     #-------------------------------set args and kwargs-----------------------------------------------
147 |     M0 = 0
148 |     if 'M0' in kwargs.keys() and kwargs['M0'] is not None:
149 |         M0 = kwargs['M0']
150 |     #-----------------------------add small uncertainty to X in case events are colocated-------------------------- 
151 |     if 'correct_co_located' in kwargs.keys() and kwargs['correct_co_located'] == True:
152 |         vUncer = np.random.randn( catChild.size())*1e-10
153 |         catChild.data['Lon']    += vUncer
154 |     #------------------------------------------------------------------------------         
155 |     #vMagCorr = 10**(-0.5*dConst['b']*(catPar.data['MAG']-M0) )
156 |     vMagCorr = 10**(-0.5*dConst['b']*(catPar.data['Mag']-M0) )
157 |     # if cartesian coordinates are available
158 |     if 'X' in catChild.data.keys() and 'X' in catPar.data.keys():
159 |         a_R = np.sqrt((catChild.data['X'] - catPar.data['X']) ** 2 + (catChild.data['Y'] - catPar.data['Y']) ** 2) ** \
160 |               dConst['D'] * vMagCorr
161 | 
162 |     else:
163 |         a_R = haversine(catChild.data['Lon'], catChild.data['Lat'],
164 |                    catPar.data['Lon'],   catPar.data['Lat'])**dConst['D']*vMagCorr
165 | 
166 |     a_dt = catChild.data['Time']-catPar.data['Time']#interevent times
167 |     a_tau = (a_dt)*vMagCorr
168 |     sel2 = a_tau < 0
169 |     if sel2.sum() > 0:
170 |         #print( catChild.data['N'][sel2])
171 |         #print( catPar.data['N'][sel2])
172 |         error_str = '%i parents occurred after offspring, check order of origin time in catChild, catPar'%(sel2.sum())
173 |         raise( ValueError( error_str))
174 |     return a_R, a_tau
175 | 
176 | 
177 | def compileClust( dNND, simThreshold, verbose = True,  **kwargs):
178 |     """
179 |     assuming parent and off-spring is connected via unique measurement (e.g. nearest-neighbor distance)
180 |     - create clusters of event pairs based on some similarity criteria
181 |             e.g. a) based on cross-correlation coefficients between pairs
182 |                  b) based on space-time-magnitude distance
183 |     - main input are pairs of connected events separated in parent and offspring
184 |                   (one parent can have many children, but child has only one parent)
185 |     1) find initial singles beyond threshold
186 |     2) find pairs below threshold and assemble clusters
187 |         - take all event pairs with values below (eta_0) or above (CCC),
188 |            --> pairs beyond the threshold do not have to be considered
189 |             if offspring meets similarity criteria:
190 |                 - go through each pair and find cluster for child event by searching if the
191 |                   corresponding ID is already in any of the previous clusters
192 |                 -  attach to existing cluster or create new cluster
193 |     3) - check if several offspring are connected to same parent and if 
194 |          different clusters have to be combined in case of ID repetition
195 |          --> this is implemented as a while loop
196 |     4) - remove potential multiple IDs from clusters
197 | 
198 |     :Input    - simThreshold = similarity parameter
199 |               - vID_parent   - event IDs
200 |               - vID_child
201 |               - vSimValues   - all similarity values
202 |               kwargs['useLargerEvents'] = False, 
203 | 
204 |     :Return  dClust - python dictionary that contains all clusters labeled numerically
205 |                      from '0' - not clustered
206 |                           '1' - '[nCLmax]' - clustered events
207 |                       each dictionary column contains IDs of children [first row] and parents [second row]
208 |     """
209 |     # dNND = { 'aEqID_c' : vID_child,
210 |     #          'aEqID_p' : vID_parent,
211 |     #          'aNND'    : vSim}
212 |     # remove identical parents and off-spring if eq is in catalog several times
213 |     sel = abs(dNND['aEqID_c']-dNND['aEqID_p']) > 0
214 |     dNND= data_utils.selDicAll(dNND, sel)
215 | 
216 |     # check that dNND is sorted by time
217 |     if 'Time' in dNND.keys():
218 |         i_sort = np.argsort( dNND['Time'])
219 |         dNND   = data_utils.selDicAll(dNND, i_sort)
220 |     else:
221 |         error_str = "'Time' key missing, add offspring origin time to dNND"
222 |         raise ValueError( error_str)
223 |     #==================================1=============================================
224 |     #                  initial  selection of events beyond threshold (single event)
225 |     #================================================================================
226 |     ### events without trigger
227 |     if 'useLargerEvents' in kwargs.keys() and kwargs['useLargerEvents'] == True:
228 |         print( 'assuming threshold (%s) is a MINIMUM, select similarity values ABOVE this threshold'%( simThreshold))
229 |         sel_single     = dNND['aNND'] <= simThreshold
230 |         # remove independent events
231 |         dNND_trig = data_utils.selectDataRange( dNND, simThreshold, None, 'aNND')
232 |     else:
233 |         print( 'assuming threshold (%s) is a MAXIMUM, select similarity values BELOW this threshold'%( simThreshold))
234 |         sel_single     = dNND['aNND'] >= simThreshold
235 |         # remove independent events
236 |         dNND_trig = data_utils.selDicAll( dNND, np.logical_not( sel_single))
237 |     # preliminary single selection with eta > eta_0, may contain cluster events
238 |     vID_single  = dNND['aEqID_c'][sel_single] # could be singles or parents but not offspring
239 |     sel_first = np.in1d( dNND['aEqID_p'][0], vID_single)
240 |     if dNND['aNND'][0] > simThreshold and sel_first.sum() == 0:
241 |         vID_single = np.append(  dNND['aEqID_p'][0], vID_single)
242 | 
243 |     if verbose == True:
244 |         print( f"---------compileClust - initial numbers:------")
245 |         print(  f"No. singles: {vID_single.shape[0]}"),
246 |         print(  f"No. triggered: {dNND_trig['aEqID_c'].shape[0]}, {dNND_trig['aEqID_p'].shape[0]},"
247 |                 f"No. tot. {dNND_trig['aEqID_p'].shape[0]} {sel_single.sum()+dNND_trig['aEqID_c'].shape[0]}")
248 |     #==================================2=============================================
249 |     #                      find clustered events
250 |     #================================================================================
251 |     # initiate vectors and dic during first run
252 |     curr_child_ID     = dNND_trig['aEqID_c'][0]
253 |     curr_par_ID       = dNND_trig['aEqID_p'][0]
254 |     v_pastEqIDs = np.array(  [curr_child_ID, curr_par_ID] )
255 |     v_pastClIDs = np.array(  [1, 1] )
256 |     # dClust['0'] = singles
257 |     dClust = {  '1'     : np.array( [[curr_child_ID],
258 |                                      [curr_par_ID  ] ])}
259 |     # for each child find the corresponding parent ID
260 |     # if child or parent ID are already part of a cluster append to this cluster
261 |     nCl = 2
262 |     for iEv in range(1, dNND_trig['aEqID_p'].shape[0]):
263 |         #print( 'nPair', iEv+1, 'out of', len( dNND_trig['aEqID_p']), 'iCl', nCl
264 |         curr_child_ID     = dNND_trig['aEqID_c'][iEv]
265 |         curr_par_ID       = dNND_trig['aEqID_p'][iEv]
266 |         # check if parent or child are part of previous cluster
267 |         sel_child = curr_child_ID == v_pastEqIDs
268 |         sel_par   = curr_par_ID   == v_pastEqIDs
269 | 
270 |         if sel_par.sum() > 0 or sel_child.sum() > 0:
271 |             # find which cluster event pair belongs to
272 |             if sel_par.sum() and sel_child.sum(): # both already part of a cluster
273 |                 curr_cl_ID1 = v_pastClIDs[sel_par][0]
274 |                 curr_cl_ID2 = v_pastClIDs[sel_child][0]
275 |                 # merge clusters and add IDs
276 |                 dClust[str(curr_cl_ID1)] =    np.hstack( (dClust[str(curr_cl_ID1)],
277 |                                                              np.array([[curr_child_ID], [curr_par_ID  ] ])
278 |                                                              ))
279 |                 dClust[str(curr_cl_ID1)] = np.hstack( (dClust[str(curr_cl_ID1)], dClust[str(curr_cl_ID2)]))
280 |                 # add new events but previous cluster ID
281 |                 v_pastEqIDs = np.append(  v_pastEqIDs, np.array([curr_child_ID, curr_par_ID] ) )
282 |                 v_pastClIDs = np.append(  v_pastClIDs, np.array([   curr_cl_ID1, curr_cl_ID1] ) )
283 |                 # remove second cluster ID from dClust
284 |                 dClust.pop( str(curr_cl_ID2))
285 |                 # remove from past eq IDs and past cl IDs
286 |                 sel = curr_cl_ID2 != v_pastClIDs
287 |                 v_pastEqIDs = v_pastEqIDs[sel]
288 |                 v_pastClIDs = v_pastClIDs[sel]
289 |             else: # only one is part of a cluster
290 |                 if sel_par.sum() > 0: # parent already part of a cluster
291 |                     curr_cl_ID = v_pastClIDs[sel_par][0]
292 |                 else:# child already part of a cluster
293 |                     curr_cl_ID = v_pastClIDs[sel_child][0]
294 |                 dClust[str(curr_cl_ID)] =    np.hstack( (dClust[str(curr_cl_ID)],
295 |                                                              np.array([[curr_child_ID], [curr_par_ID  ] ])
296 |                                                              ))
297 |                 v_pastEqIDs = np.append(  v_pastEqIDs, np.array([curr_child_ID, curr_par_ID] ) )
298 |                 v_pastClIDs = np.append(  v_pastClIDs, np.array([   curr_cl_ID, curr_cl_ID        ] ) )
299 |         else: # start a new cluster
300 |             dClust[str(nCl)] =    np.array( [[curr_child_ID],
301 |                                              [curr_par_ID  ] ])
302 |             v_pastEqIDs = np.append(  v_pastEqIDs, np.array([curr_child_ID, curr_par_ID] ) )
303 |             v_pastClIDs = np.append(  v_pastClIDs, np.array([          nCl, nCl        ] ) )
304 |             nCl += 1
305 |     # check if children have same parent
306 |     nTotChild = 0
307 |     #=================================3==========================================================================
308 |     #                 remove events from singles if in cluster, remove multiple IDs
309 |     #============================================================================================================
310 |     # create vector of triggered eqIDs and count triggered events
311 |     vID_Trig_all = np.array([])
312 |     vclID_allEv  = np.array([], dtype = int)
313 |     for tag in sorted( dClust.keys()):
314 |         #print( 'iCl', tag, 'nEv in cluster', np.unique( dClust[tag].flatten()).shape[0]
315 |         #print( dClust[tag][0]
316 |         aID_flat_uni = np.unique( dClust[tag].flatten())
317 |         #nTotTrig   += aID_flat_uni.shape[0]
318 |         vID_Trig_all = np.append( vID_Trig_all, aID_flat_uni )
319 |         vclID_allEv  = np.append( vclID_allEv, np.ones( aID_flat_uni.shape[0], dtype = int)*int(tag))
320 |         # remove multiple ID entries --> possible since pairs are always appeneded
321 |         dClust[tag] = aID_flat_uni
322 |         nTotChild  += dClust[tag].shape[0]-1
323 |     #====================================4========================================================================
324 |     #                       check for events in more than one cluster, merge clusters
325 |     #============================================================================================================
326 |     # sel_same = np.in1d( vID_Trig_all, np.array([ 3049419,  9020431,  9172305,  9173365, 15332137]))
327 |     # print( "events in trig_all before double remove: ", sel_same.sum(), vID_Trig_all[sel_same])
328 |     aIDs, aCounts = np.unique( vID_Trig_all, return_counts=True)
329 |     selDouble = aCounts > 1
330 |     if verbose == True:
331 |         print( f"N event IDs in more than one cluster: {selDouble.sum()}")
332 |     i_run = 1
333 |     while selDouble.sum() > 0:
334 |         if verbose == True:
335 |             print( '%i. run to remove doubles'%(i_run))
336 |         for ID in np.unique( aIDs[selDouble]):
337 |             selCl = ID == vID_Trig_all
338 |             aClID = np.unique( vclID_allEv[selCl])
339 |             for iCl in range( len( aClID)-1):
340 |                 if verbose == True:
341 |                     print( 'iCl with same events', str( aClID[0]), str( aClID[iCl+1]), 'evID: ', int(ID))
342 |                 #A# merge clusters that have same events
343 |                 dClust[str(aClID[0])] = np.unique( np.hstack( (dClust[str(int( aClID[0]))], dClust[str( int(aClID[iCl+1]))])))
344 |                 #B# remove cluster IDs from dictionary
345 |                 dClust.pop( str( int( aClID[iCl+1])))
346 |             #C# remove double event and corresponding clID from:
347 |             # vID_Trig_all
348 |             sel_rem = ID != vID_Trig_all
349 |             vID_Trig_all = vID_Trig_all[sel_rem]
350 |             # and vclID_allEv
351 |             vclID_allEv  = vclID_allEv[sel_rem]
352 |             # leave one  event with new clID, i.e. clId of first cluster that contains ID
353 |             vclID_allEv  = np.append( vclID_allEv, aClID[0])
354 |             vID_Trig_all = np.append( vID_Trig_all, ID)
355 |         aIDs, aCounts = np.unique( vID_Trig_all, return_counts=True)
356 |         selDouble = aCounts > 1
357 |         i_run += 1
358 |     # find events within initial single selection (eta > eta_0)
359 |     # which are actually part of clustered events
360 |     sel_single = np.ones( vID_single.shape[0], dtype = int) > 0
361 |     iS = 0
362 |     for ID_single in  vID_single:
363 |         sel = ID_single == vID_Trig_all
364 |         if sel.sum() > 0: # remove this event from singles
365 |             sel_single[iS] = False
366 |         iS += 1
367 |     if verbose == True:
368 |         print("initial singles now parents - remove from dClust['0']: ",np.array([~sel_single]).sum())
369 | 
370 |     vID_single = vID_single[sel_single]
371 |     if verbose == True:
372 |         print( "---------------final result--------------------------")
373 |         print(  f" Ntot in cluster: {len( vID_Trig_all)}, N-parent(=N-clust): {len(dClust.keys())},"
374 |             f"No. singles: {vID_single.shape[0]}, Ntot. offspring (includes doubles):  {nTotChild}")
375 |         print( "trig. fraction: ", round((len( vID_Trig_all)-len(dClust.keys()))/dNND['aNND'].shape[0],2), "frac.MS: ", round( len(dClust.keys())/dNND['aNND'].shape[0],2), "single: ", round((vID_single.shape[0]/dNND['aNND'].shape[0]),2))
376 |         print(  'Ntot in cat.', dNND['aNND'].shape[0]+1, 'N-trig + N-ind', len( vID_Trig_all)+vID_single.shape[0])
377 | 
378 |     dClust[str(0)] =  vID_single
379 |     return dClust
380 | 
381 | def addClID2cat( seisCat, dClust, test_plot = False, **kwargs):
382 |     """
383 |     - add new column (i.e. dictionary tag='famID') for seisCat
384 |         that specifies which cluster each event belongs to
385 |     - !note that if offspring generation should be recorded run:
386 |       clustering.offspring_gen() first
387 |       and use output dictionary as input for this fct.
388 | 
389 |     :param dClust:  python dictionary
390 |                     each dic. element specified by key is a vector of evIDs
391 |                     or three row matrix with evID, iGen and average leaf depth
392 | 
393 | 
394 |     :param seisCat:
395 |     :return: seisCat (with new tags:
396 |                         'famID' - record family links between events
397 |                         optional:
398 |                         (note that 'clID' is commonly used for waveform-based relocations)
399 |                         'iGen'  - record offspring generation within family)
400 |                         'LD'    - average lead depth for each cluster
401 | 
402 |     """
403 |     # sort original catalog to get ID of first event
404 |     seisCat.sortCatalog( 'Time')
405 | 
406 |     # first row is clusterID and second row event ID from catalog
407 |     nRows  = 2
408 |     b_add_iGen = False
409 |     if len( dClust['0'].shape) > 1:
410 |         b_add_iGen = True
411 |         # additional rows for trig. generation and average lead depth
412 |         nRows = 4
413 |     mClust = np.zeros([nRows, seisCat.size()])
414 |     nGen = 0
415 |     nFam = 0
416 |     nEv = 0
417 |     i   = 0
418 |     for sCl in dClust.keys():
419 |         iCl = int(sCl)
420 |         # print( f"------iCl: {iCl}, nEv: {nEv}--------, evID={dClust[sCl]}")
421 |         #earthquake event IDs
422 |         if b_add_iGen == False:
423 |             nEv = dClust[sCl].shape[0]
424 |             mClust[1, i:i + nEv] = dClust[sCl]
425 |         else:
426 |             nEv = dClust[sCl].shape[1]
427 |             mClust[1, i:i + nEv] = dClust[sCl][0]
428 |         # family IDS
429 |         mClust[0,i:i+nEv] = np.ones(nEv)*iCl
430 |         nFam += len( dClust[sCl])
431 |         if b_add_iGen == True:
432 |             nGen += len( dClust[sCl][1])
433 |             # trig generation
434 |             mClust[2,i:i+nEv] = dClust[sCl][1]
435 |             # ave. lead depth
436 |             mClust[3, i:i + nEv] = dClust[sCl][2]
437 |         i += nEv
438 |     #---------include first event in catalog as single-------------
439 |     selFirst = seisCat.data['N'][0] == mClust[1]
440 |     if selFirst.sum() == 0:
441 |         ID_first = int( seisCat.data['N'][0]) #[~selUni][0])
442 |         print( 'first ev. in catalog -ID:', ID_first, int( seisCat.data['N'][0]), 'last ev. in mClust', mClust[1,-1], 'should=0')
443 |         mClust[1] = np.hstack( (ID_first, mClust[1,0:-1]))# not needed if catalog is sorted by Time
444 |     #sel_same = np.in1d( mClust[1], seisCat.data['N'])
445 |     # check that every event ID is represented only once
446 |     __, aID, aN_uni = np.unique( mClust[1], return_counts = True, return_index=True)
447 |     sel = aN_uni > 1
448 |     if sel.sum() > 0:
449 |         error_str = f"ev. ID represented more than once: {mClust[1][aID[sel]]}, 'N-repeats: ', {aN_uni[sel]}"
450 |         raise ValueError( error_str)
451 |     #--sort both cluster ID matrix and cat with respect to IDs
452 |     sortSel = mClust[1].argsort()
453 |     mClust = mClust.T[sortSel].T
454 |     seisCat.sortCatalog('N') #--otherwise clIDs get assigned to wrong event
455 | 
456 |     if test_plot == True:
457 |         plt.figure()
458 |         plt.subplot( 211)
459 |         plt.plot( mClust[1], mClust[1]-seisCat.data['N'], 'ko')
460 |         plt.xlabel( 'Event ID in Clust')
461 |         plt.ylabel( 'Diff. Events IDs (0)')
462 |         plt.subplot( 212)
463 |         plt.plot(mClust[1],  mClust[0], 'ko')
464 |         plt.xlabel('Event ID in Clust')
465 |         plt.ylabel('Cluster ID')
466 |         #plt.plot( plt.gca().get_xlim(), plt.gca().get_xlim(), 'r--')
467 |         plt.show()
468 | 
469 |     seisCat.data['famID'] = np.int32( mClust[0])
470 |     if b_add_iGen == True:
471 |         seisCat.data['iGen'] = np.int16(mClust[2])
472 |     seisCat.sortCatalog( 'Time')
473 |     return seisCat
474 | 
475 | def offspring_gen( dClust, dNND, f_eta_0, **kwargs):
476 |     """
477 |     - trace back triggering chain chronologically and assign trig generation
478 |     - start with parent generation, then add end leafs
479 |             a) identify all parents within cluster
480 |             b) sort by time
481 |             c) assign the same iGen to offspring of the same parent (hierarchical)
482 |             compute average leaf depth:
483 |                 <d> = 1/n sum( d_i) = ave. depth across end leafs
484 |     __________________________________
485 |     input:  seisCat   = object SeismicityCatalog
486 |                         used to get origin times of offspring events
487 |             dNND =
488 |             'aEqID_c'  - unique event IDs of offspring
489 |             'aEqID_p ' - events IDs of parents, these are paired to a_ID_child so order matters
490 |                           parents can have many offspring, so repeats are possible here
491 |             'Time'     - offspring origin time from catalog, in case IDs are not chronological
492 | 
493 |             dClust - '[famID]' = np.array([ offSpringIDs])
494 |     ----------------------------------
495 |     return:
496 |             dGen    - python dictionary
497 |                     'famID' : np.array([3, N])
498 |                     # dGen[famID][0] = evIDs
499 |                     # dGen[famID][1] = trig generation
500 |                     # dGen[famID][2] = ave. leaf depth
501 |                              - average lead depth (same number for entire cluster)
502 |     """
503 |     #=========================1========================================
504 |     #            count generations of offspring events
505 |     #==================================================================
506 |     dGen = {}
507 |     l_famID = list( dClust.keys())
508 |     # singles are all 0 generation
509 |     dGen['0'] = np.zeros( (3, len( dClust['0'])))
510 |     # set ev IDs in new dic
511 |     dGen['0'][0] = dClust['0']
512 | 
513 |     # ave LD = 1
514 |     dGen['0'][2] = np.ones( len( dClust['0']))
515 | 
516 |     # ignore singles below
517 |     l_famID.remove( '0')
518 |     for famID in l_famID:
519 |         ###find ori. time for each child
520 |         sel_chi_t  = np.in1d(  dNND['aEqID_c'], dClust[famID])
521 |         # filter for parent - child NND < eta_0
522 |         sel_chi_t2 = dNND['aNND'][sel_chi_t] < f_eta_0
523 |         curr_iPar  = dNND['aEqID_p'][sel_chi_t][sel_chi_t2]
524 |         curr_iChi  = dNND['aEqID_c'][sel_chi_t][sel_chi_t2]
525 |         curr_tChi  = dNND['Time'][np.in1d( dNND['aEqID_c'],curr_iChi)]
526 | 
527 |         ##sort cluster IDs with respect to offspring time
528 |         sel_sort  = np.argsort( curr_tChi)
529 |         first_ID  = curr_iChi[sel_sort][0]
530 | 
531 |         # get unique parents and sort by time
532 |         uni_curr_iPar = np.unique(curr_iPar)
533 |         uni_par_times = curr_tChi[np.in1d(curr_iChi, uni_curr_iPar)]
534 |         uni_curr_iPar = curr_iChi[np.in1d(curr_iChi, uni_curr_iPar)]
535 |         sort_uni_par  = np.argsort( uni_par_times)
536 |         uni_curr_iPar = uni_curr_iPar[sort_uni_par]
537 |         # check if parent of first pair needs to be added
538 |         if np.isin( curr_iPar[0], uni_curr_iPar).sum() == 0:
539 |             uni_curr_iPar = np.hstack(( curr_iPar[0], uni_curr_iPar))
540 |         # add end leafs (offspring that are not parents)
541 |         sel_endLeaf   = ~np.in1d(curr_iChi, curr_iPar)
542 |         uni_curr_iPar = np.hstack((uni_curr_iPar, curr_iChi[sel_endLeaf]))
543 |         #----------initiate new vectors------------------------
544 |         uni_iGen_pastPar = np.zeros( len(curr_tChi)+1)
545 |         uni_id_pastPar   = np.zeros( len(curr_tChi)+1)
546 |         ## assign chronological triggering generation
547 |         curr_iGen        = np.zeros( len(curr_tChi)+1)
548 |         iGen          = 0
549 |         for iPar in range( len(uni_curr_iPar)):
550 |             # check if current parent is offspring of other parent
551 |             pastPar = curr_iPar[uni_curr_iPar[iPar] == curr_iChi]
552 |             if len( pastPar) > 0:
553 |                 sel_pastPar = pastPar == uni_id_pastPar
554 |             else:
555 |                 sel_pastPar = np.array([False])
556 |             if sel_pastPar.sum() > 0:
557 |                 # add 1 to previous parent triggering generation
558 |                 curr_iGen[iPar]         = uni_iGen_pastPar[sel_pastPar][0]+1
559 |                 uni_iGen_pastPar[iPar]  = uni_iGen_pastPar[sel_pastPar][0]+1
560 |                 uni_id_pastPar[iPar]    = uni_curr_iPar[iPar]
561 |             else:
562 |                 curr_iGen[iPar]         = iGen
563 |                 uni_iGen_pastPar[iPar]  = iGen
564 |                 uni_id_pastPar[iPar]    = uni_curr_iPar[iPar]
565 |                 iGen += 1  # assign new trig generation
566 |         # save evID, trigger generation in dictionary
567 |         dGen[famID]    = np.zeros( (3, len(uni_id_pastPar)))
568 | 
569 |         dGen[famID][0] = uni_id_pastPar
570 |         dGen[famID][1] = curr_iGen
571 |         # =========================3========================================
572 |         #             compute ave. leaf depth
573 |         # ==================================================================
574 |         sel_endLeaf = ~np.in1d(curr_iChi, curr_iPar)
575 |         dGen[famID][2] = np.ones( len(dClust[famID]))*curr_iGen[1::][sel_endLeaf].mean()
576 |     return dGen
577 | 
578 | def offspring_gen_test( dClust, dNND, f_eta_0, **kwargs):
579 |     #=========================1========================================
580 |     #               add origin times from seisCat to dNND
581 |     #==================================================================
582 |     # sort  dNND and seisCat by offspring ID!!- seisCat.data['Time'] is added to dNND
583 |     # sortSel = np.argsort( dNND['aEqID_c'])
584 |     # for tag in list(dNND.keys()):
585 |     #     dNND[tag] = dNND[tag][sortSel]
586 |     # seisCat.sortCatalog('Time')
587 |     # firstEvID = seisCat.data['N'][0]
588 |     # seisCat.sortCatalog('N')
589 |     # # add offspring origin time to dNND
590 |     # sel = firstEvID == seisCat.data['N']
591 |     # dNND['at_c'] = seisCat.data['Time'][~sel]
592 |     # check that dNND is sorted by time
593 |     # if 'Time' in dNND.keys():
594 |     #     i_sort = np.argsort( dNND['Time'])
595 |     #     dNND   = data_utils.selDicAll(dNND, i_sort)
596 |     # else:
597 |     #     error_str = "'Time' key missing, add offspring origin time to dNND"
598 |     #     raise ValueError( error_str)
599 |     #=========================2========================================
600 |     #            count generations of offspring events
601 |     #==================================================================
602 |     dGen = {}
603 |     l_famID = list( dClust.keys())
604 |     # singles are all 0 generation
605 |     dGen['0'] = np.zeros( (3, len( dClust['0'])))
606 |     # set ev IDs in new dic
607 |     dGen['0'][0] = dClust['0']
608 |     # ave LD = 1
609 |     dGen['0'][2] = np.ones( len( dClust['0']))
610 |     # ignore singles below
611 |     l_famID.remove( '0')
612 |     for famID in l_famID:
613 |         ###find ori. time for each child
614 |         sel_chi_t  = np.in1d(  dNND['aEqID_c'], dClust[famID])
615 |         # filter for parent - child NND < eta_0
616 |         sel_chi_t2 = dNND['aNND'][sel_chi_t] < f_eta_0
617 |         curr_iPar  = dNND['aEqID_p'][sel_chi_t][sel_chi_t2]
618 |         curr_iChi  = dNND['aEqID_c'][sel_chi_t][sel_chi_t2]
619 |         curr_tChi  = dNND['Time'][np.in1d( dNND['aEqID_c'],curr_iChi)]
620 |         # curr_tChi = np.zeros( len( curr_iPar))
621 |         # for iP in range( len( curr_iChi)):
622 |         #     curr_tChi[iP] = dNND['at_c'][dNND['aEqID_c']==curr_iChi[iP]]
623 | 
624 |         ##sort cluster IDs with respect to offspring time
625 |         sel_sort  = np.argsort( curr_tChi)
626 | 
627 |         curr_tChi = curr_tChi[sel_sort]
628 |         curr_iChi = curr_iChi[sel_sort]
629 |         curr_iPar = curr_iPar[sel_sort]
630 | 
631 |         # parent IDs have one less element than complete cluster (first event has no parent)
632 |         #sel_sort =  np.hstack((0, sel_sort+1))
633 |         # make sure dClust[famID] = dNND['aEqID_c']+
634 |         firstID = dClust[famID][0]
635 | 
636 |         uni_curr_iPar = np.unique( curr_iPar)
637 |         # sort unique parents by time
638 |         print( uni_curr_iPar)
639 |         uni_par_times = curr_tChi[np.in1d(curr_iChi, uni_curr_iPar)]
640 |         uni_curr_iPar = curr_iChi[np.in1d(curr_iChi, uni_curr_iPar)]
641 |         sort_uni_par  = np.argsort( uni_par_times)
642 |         uni_curr_iPar = uni_curr_iPar[sort_uni_par]
643 |         # check if parent of first pair needs to be added
644 |         if np.isin( curr_iPar[0], uni_curr_iPar).sum() == 0:
645 |             uni_curr_iPar = np.hstack(( curr_iPar[0], uni_curr_iPar))
646 |         # add end leafs (offspring that are not parents)
647 |         sel_endLeaf   = ~np.in1d(curr_iChi, curr_iPar)
648 |         uni_curr_iPar = np.hstack((uni_curr_iPar, curr_iChi[sel_endLeaf]))
649 |         #----------initiate new vectors------------------------
650 |         uni_iGen_pastPar = np.zeros( len(curr_tChi)+1)
651 |         uni_id_pastPar   = np.zeros( len(curr_tChi)+1)
652 |         ## assign chronological triggering generation
653 |         curr_iGen        = np.zeros( len(curr_tChi)+1)
654 |         iGen          = 0
655 |         for iPar in range( len(uni_curr_iPar)):
656 |             # # assign trig gen starting from oldest parent
657 |             # sel_hier_par = curr_iPar == uni_curr_iPar[iPar]
658 |             # # print("current parent: ", uni_curr_iPar[iPar], "offspring: ", curr_iChi[sel_hier_par])
659 |             # # print( "past parents", uni_id_pastPar)
660 |             # # print( "past parent iGen", uni_iGen_pastPar)
661 |             # check if current parent is offspring of other parent
662 |             pastPar = curr_iPar[uni_curr_iPar[iPar] == curr_iChi]
663 |             #print( uni_curr_iPar[iPar], pastPar, uni_id_pastPar)
664 |             if len( pastPar) > 0:
665 |                 sel_pastPar = pastPar == uni_id_pastPar
666 |             else:
667 |                 sel_pastPar = np.array([False])
668 |             if sel_pastPar.sum() > 0:
669 |                 print(uni_curr_iPar[iPar], "past parent: ", pastPar, "trig gen: ", uni_iGen_pastPar[sel_pastPar][0]+1)
670 |                 # add 1 to previous parent triggering generation
671 |                 curr_iGen[iPar]         = uni_iGen_pastPar[sel_pastPar][0]+1
672 |                 uni_iGen_pastPar[iPar]  = uni_iGen_pastPar[sel_pastPar][0]+1
673 |                 uni_id_pastPar[iPar]    = uni_curr_iPar[iPar]
674 |             else:
675 |                 print( "trig gen: ", iGen)
676 |                 curr_iGen[iPar]         = iGen
677 |                 uni_iGen_pastPar[iPar]  = iGen
678 |                 uni_id_pastPar[iPar]    = uni_curr_iPar[iPar]
679 |                 iGen += 1  # assign new trig generation
680 | 
681 |         print( uni_id_pastPar)
682 |         print( curr_iGen)
683 |         # save evID, trigger generation in dictionary
684 |         dGen[famID]    = np.zeros( (3, len(uni_id_pastPar)))
685 |         dGen[famID][0] = uni_id_pastPar
686 |         dGen[famID][1] = curr_iGen
687 |         # =========================3========================================
688 |         #             compute ave. lead depth
689 |         # ==================================================================
690 |         # end leafs = events without offspring, curr_iPar is already < eta_0)
691 |         #print( len( curr_iGen), len( curr_iChi), len( uni_curr_iPar))
692 |         sel_endLeaf = ~np.in1d(curr_iChi, curr_iPar)
693 |         print( 'end leaf off. ID', curr_iChi[sel_endLeaf])
694 |         print( ' leaf depths ', curr_iGen[1::][sel_endLeaf])
695 |         print( 'mean leaf depth: ', curr_iGen[1::][sel_endLeaf].mean())
696 |         dGen[famID][2] = np.ones( len(dClust[famID]))*curr_iGen[1::][sel_endLeaf].mean()
697 |     # recall data structure:
698 |     # dGen[famID][0] = evIDs
699 |     # dGen[famID][1] = trig generation
700 |     # dGen[famID][2] = ave. leaf depth
701 |     return dGen
702 | #=================================================================================
703 | #                      create random catalogs
704 | #=================================================================================
705 | # create uniform times
706 | def rand_rate_uni( N, tmin, tmax, **kwargs):
707 |     """  draw N random numbers out of a Poisson distribution defined by mu, between tmin and tmax,
708 | 
709 |     kwargs: - random uniform variable between min and max
710 | 
711 |     return: vector of N origin times between tmin and tmax """
712 |     return np.random.uniform( tmin, tmax, size = N)
713 | 
714 | 
715 | # ------------------------------------------------------------------------------------------
716 | def haversine(lon1, lat1, lon2, lat2, **kwargs):
717 |     """
718 |     haversine formula implementation
719 |     https://en.wikipedia.org/wiki/Great-circle_distance
720 |     great circle distance between two points
721 |     :input   lon1, lat1
722 |              lon2, lat2
723 | 
724 |     		  gR - Earth radius (global variable)
725 |     :output  distance - great circle distance in kilometer
726 |     """
727 |     i_radius = 6371
728 |     # convert to radians
729 |     lon1 = lon1 * np.pi / 180
730 |     lon2 = lon2 * np.pi / 180
731 |     lat1 = lat1 * np.pi / 180
732 |     lat2 = lat2 * np.pi / 180
733 |     # haversine formula
734 |     dlon = lon2 - lon1
735 |     dlat = lat2 - lat1
736 |     a = np.sin(dlat / 2) ** 2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon / 2) ** 2
737 |     c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))
738 |     distance = i_radius * c
739 |     return distance
740 | 
741 | # ==================================4==============================================================
742 | #                       T-R density plots
743 | # =================================================================================================
744 | def plot_R_T( a_T, a_R, f_eta_0, **kwargs):
745 |     """
746 |         - plot rescaled distance over rescaled time
747 |         Parameters:
748 |     dPar = {'binx': .1, 'biny': .1,  # used for density and gaussian smoothing
749 |             'sigma': None,  # if None: default = n**(-1./(d+4)),
750 |             'Tmin': -8, 'Tmax': 0,
751 |             'Rmin': -5, 'Rmax': 3,
752 |             'cmap': plt.cm.RdYlGn_r}
753 |     Use kwargs['dPar'] = python dictionary
754 |                         'binx', 'biny', etc. to overwrite
755 |                         defaults for specific or all parameters
756 |     :param kwargs:
757 |     :return: fig - figure handle - use fig.axes to get list of corresponding axes
758 |     """
759 |     dPar = {'binx': .1, 'biny': .1,  # used for density and gaussian smoothing
760 |             'sigma': None,  # if None: default = n**(-1./(d+4)),
761 |             'Tmin': -8, 'Tmax': 0,
762 |             'Rmin': -5, 'Rmax': 3,
763 |             'cmap': plt.cm.RdYlGn_r}
764 |     if 'dPar' in kwargs.keys() and kwargs['dPar'] is not None:
765 |         for tag in kwargs['dPar'].keys():
766 |             print( f"overwrite plot_R_T param: {tag}={kwargs['dPar'][tag]}")
767 |             dPar[tag] = kwargs['dPar'][tag]
768 |     a_Tbin = np.arange(dPar['Tmin'], dPar['Tmax'] + 2 * dPar['binx'], dPar['binx'])
769 |     a_Rbin = np.arange(dPar['Rmin'], dPar['Rmax'] + 2 * dPar['biny'], dPar['biny'])
770 |     sel = a_T > 0
771 |     XX, YY, ZZ = data_utils.density_2D(np.log10(a_T[sel]), np.log10(a_R[sel]), a_Tbin, a_Rbin, sigma=dPar['sigma'])
772 | 
773 |     fig = plt.figure( figsize=(7, 9))
774 |     ax = plt.subplot(111)
775 |     ax.set_title('Nearest Neighbor Pairs in R-T')
776 |     # ------------------------------------------------------------------------------
777 |     normZZ = ZZ * (dPar['binx'] * dPar['biny'] * len(a_R))
778 |     plot1 = ax.pcolormesh(XX, YY, normZZ, cmap=dPar['cmap'])
779 |     cbar = plt.colorbar(plot1, orientation='horizontal', shrink=.5, aspect=20, )
780 |     # ax.plot(  np.log10( a_T), np.log10( a_R), 'wo', ms = 1.5, alpha = .2)
781 |     # plot eta_0 to divide clustered and background mode
782 |     ax.plot([dPar['Tmin'], dPar['Tmax']], -np.array([dPar['Tmin'], dPar['Tmax']]) + f_eta_0, '-', lw=1.5, color='w')
783 |     ax.plot([dPar['Tmin'], dPar['Tmax']], -np.array([dPar['Tmin'], dPar['Tmax']]) + f_eta_0, '--', lw=1.5, color='.5')
784 |     # -----------------------labels and legends-------------------------------------------------------
785 |     # cbar.set_label( 'Event Pair Density [#ev./dRdT]')
786 |     cbar.set_label('Number of Event Pairs', labelpad=-60)
787 |     ax.set_xlabel('Rescaled Time')
788 |     ax.set_ylabel('Rescaled Distance')
789 |     ax.set_xlim(dPar['Tmin'], dPar['Tmax'])
790 |     ax.set_ylim(dPar['Rmin'], dPar['Rmax'])
791 |     # fig.axes
792 |     return fig


--------------------------------------------------------------------------------
/src/data_utils.py:
--------------------------------------------------------------------------------
  1 | #!usr/bin/python2.7
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | 
  5 |     helper functions for easier file handling (mainly ASCII)
  6 |     and data I/O, density estimates, 2D Gaussian smoothing etc.
  7 | 
  8 | 
  9 | @author tgoebel - UC Santa Cruz
 10 | """
 11 | import os
 12 | import numpy as np
 13 | import scipy.io
 14 | #================================================================================
 15 | #                           data I/O
 16 | #================================================================================   
 17 | def removeColumn( file_in, lCol):
 18 |     """
 19 |     remove all columns specified in lCol
 20 |     1) create duplicate file called 'dummy_file.txt' in cwd
 21 |     2) remove column using awk
 22 |     3) return file_name of dublicate
 23 |     """
 24 |     # example syntax to remove three columns
 25 |     #os.system( "awk '{\$24=""; \$25=""; \$26=""; print(}' in_file.txt > out_file.txt")
 26 |     lStr = []
 27 |     for col in lCol:
 28 |         lStr.append( "$%s=\"\"; "%( col))
 29 |     tmp_file    = 'dummy_file.txt'
 30 |     command_str = "awk '{ %s print(}' %s > %s"%( ''.join( lStr), file_in, tmp_file)
 31 |     os.system( command_str)           
 32 |     return tmp_file
 33 | 
 34 | def loadmat(filename, verbose = False):
 35 |     '''
 36 |     this function should be called instead of directly calling scipy.io.loadmat 
 37 |     which is used within the method
 38 |         (1) - filters dictionary tags 
 39 |         (2) - properly recovers python dictionaries
 40 |                from mat files. check dic tag which are still mat-objects
 41 |         (3) - correct arrays of the form: np.array([[  1, 2, 3]]) to np.array([  1, 2, 3]), squeeze_me=True
 42 |         (4) - can handle 'nested' variables in matlab where variable contain several structures
 43 |     
 44 |     '''
 45 |     data = scipy.io.loadmat(filename, struct_as_record=True, squeeze_me=True)
 46 |     data = _check_keys(data)
 47 |     for tag in list( data.keys()):
 48 |         if tag[0] == '_':
 49 |             if verbose == True:
 50 |                 print( 'remove', tag, data[tag])
 51 |             data.pop( tag)
 52 |     return data
 53 | 
 54 | def _check_keys( dData):
 55 |     '''
 56 |     checks if entries in dictionary are mat-objects. If yes
 57 |     to dict is called to change them to nested dictionaries
 58 |     '''
 59 |     for key in dData:
 60 |         if isinstance(dData[key], scipy.io.matlab.mio5_params.mat_struct):
 61 |             dData[key] = _todict(dData[key])
 62 |     return dData        
 63 | 
 64 | def _todict( matobj):
 65 |     '''
 66 |     A recursive function which constructs from matobjects nested dictionaries
 67 |     '''
 68 |     dData = {}
 69 |     for strg in matobj._fieldnames:
 70 |         elem = matobj.__dict__[strg]
 71 |         if isinstance(elem, scipy.io.matlab.mio5_params.mat_struct):
 72 |             dData[strg] = _todict(elem)
 73 |         else:
 74 |             dData[strg] = elem
 75 |     return dData
 76 | #================================================================================
 77 | #                           density estimates and smoothing
 78 | #================================================================================   
 79 | def density_2D( x, y, x_bin, y_bin, **kwargs):
 80 |     """
 81 |         2D, smoothed event density for point cloud with coordinates x,y
 82 |         uses method: scipy.stats.kde.gaussian_kde
 83 |         see: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.gaussian_kde.html
 84 |     :input     x,y            - dataset
 85 |                x_bin, y_bin   - binned x and y vectors
 86 |     
 87 |     
 88 |         kwargs['sigma'] - specify gaussian smoothing kernel ('bw_method' in scipy.stats.kde)
 89 |                           default: =  n**( -1./(d+3)) adapted scott rule for slightly tighter bandiwdth
 90 |                         -  'scott' 
 91 |                               sigma = n**( -1./(d+4)), d- number of dimensions, n - number of data points
 92 |                         - 'silverman'
 93 |                               sigma = (n * (d + 2) / 4.)**(-1. / (d + 4))
 94 |                         - float( )   = set Gaussian Bandwidth directlty
 95 |                                         
 96 |                            
 97 |     return XX, YY, ZZ - 2D binned x and y coordinates and density for each cell
 98 |     """
 99 |     from scipy.stats import kde
100 |     n,d = x.shape[0],2
101 |     sigma = n**( -1./(d+2.5))
102 |     if 'sigma' in kwargs.keys() and kwargs['sigma'] is not None:
103 |         sigma = kwargs['sigma']
104 |     # Evaluate a gaussian kde on a regular grid of nbins x nbins over data extents
105 |     fct_Gauss2D = kde.gaussian_kde( np.array([x,y]), bw_method = sigma)
106 |     # meshgrid of x and y coordinates
107 |     XX,YY   = np.meshgrid( x_bin, y_bin)
108 |     ZZ = fct_Gauss2D( np.vstack([XX.flatten(), YY.flatten()])).reshape( XX.shape)
109 |     dx, dy = x_bin[1] - x_bin[0], y_bin[1] - y_bin[0]
110 |     # check if integral is ~ zero, better: use midepoint method
111 |     print( 'check if integral ~1', round(ZZ.sum()*( dx*dy),3)) #ZZ[ZZ>0].mean()*(XX.max()-XX.min())*(YY.max()-YY.min()))
112 |     return XX-.5*dx, YY-.5*dy, ZZ
113 | 
114 | #================================================================================
115 | #                          dictionary processing
116 | #================================================================================ 
117 | def copyDic( dic):
118 |     """ create a copy of dic"""
119 |     import copy
120 |     dCopy = {}
121 |     for tag in dic.keys():
122 |         dCopy[tag] = copy.copy( dic[tag])
123 |     return dCopy
124 | 
125 | def selectDataRange(dicOri, min, max, tag, **kwargs):
126 |     """
127 |     select data within given range, set min = None or max =None for only lower or upper bound
128 |     """
129 |     dic = copyDic(dicOri)
130 |     if 'includeBoundaryEvents' in kwargs.keys() and kwargs['includeBoundaryEvents'] == True:
131 |         if min == None or max == None:
132 |             error_str = 'both boundaries have to be set to include boundary events'
133 |             raise( ValueError( error_str))
134 |         else:
135 |             sel = np.logical_and( dic[tag] >= float(min), dic[tag] <= float(max ) )          
136 |     if max == None:
137 |         sel = dic[tag] > float(min)
138 |     elif min == None:
139 |         sel = dic[tag] < max
140 |     else:
141 |         sel = np.logical_and( dic[tag] > float(min), dic[tag] < float(max) )
142 |     sel = np.arange( dic[tag].shape[0], dtype = int )[sel]
143 |     if 'returnSel' in kwargs.keys() and kwargs['returnSel'] == True:
144 |         return sel
145 |     else:        
146 |         return selDicAll(dic, sel, **kwargs) 
147 | 
148 | 
149 | def selDicAll(dic, curr_sel, **kwargs):
150 |     """apply boolean vector to entire data
151 |     e.g. for sorting or cutting ... """
152 |     newDic = {}
153 |     for tag, vector in dic.items():
154 |         newDic[tag] = dic[tag][curr_sel]
155 |     return newDic
156 | 
157 | 
158 | 
159 | 


--------------------------------------------------------------------------------
/src/datetime_utils.py:
--------------------------------------------------------------------------------
  1 | #!usr/bin/python2.7
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | convert year month day hour min sec to decimal year and vs. verse
  5 | 
  6 | @author tgoebel - UC Santa Cruz
  7 | """
  8 | from __future__ import division
  9 | import numpy as np
 10 | 
 11 | import time, datetime, calendar
 12 | from datetime import datetime as dt
 13 | 
 14 | 
 15 | def mo_to_sec( value):
 16 |     return value*(aveDyYr()/12)*24*3600
 17 | 
 18 | def sec_to_mo( value):
 19 |     return value/((aveDyMo())*24*3600)
 20 | 
 21 | def dy_to_sec( value):
 22 |     return value*24*3600
 23 | 
 24 | def sec_to_dy( value):
 25 |     return value/(24*3600)
 26 | 
 27 | 
 28 | def aveDyYr():
 29 |     """ how many days in a year"""
 30 |     return 365 + 1/4 - 1/100 + 1/400
 31 | 
 32 | def aveDyMo(): 
 33 |     """ how many days in a month """
 34 |     return aveDyYr()/12
 35 | 
 36 | def checkDateTime( dateTime):
 37 |     """ check that hour != 24, MN != 60, SC != 60 """
 38 |     YR, MO, DY, HR, MN, SC = int(dateTime[0]), int(dateTime[1]), int(dateTime[2]), int(dateTime[3]),int(dateTime[4]), float(dateTime[5])
 39 |     if isinstance( YR, (float, int)):
 40 |         if SC < 0:
 41 |             SC = 0
 42 |         elif SC - 60 >= 0:
 43 |             MN += int((SC/60))
 44 |             SC -= 60*int( (SC/60))
 45 |         if MN < 0:
 46 |             MN = 0
 47 |         elif MN - 60 >= 0:
 48 |             HR += int((MN/60))
 49 |             MN -= 60.*int( (MN/60.))
 50 |         if HR < 0:
 51 |             HR = 0
 52 |         elif HR - 24 >= 0:
 53 |             HR = 23
 54 |             MN = 59
 55 |             SC = 59.999
 56 |     elif isinstance( YR, (np.ndarray)):
 57 |         #set all values below zero to zero
 58 |         sel = SC < 0
 59 |         SC[sel] = 0
 60 |         sel = MN < 0
 61 |         MN[sel] = 0
 62 |         sel = HR < 0
 63 |         HR[sel] = 0        
 64 |         #set 60 to zero and 24 to 23.59.59.99
 65 |         sel = abs(SC - 60) < 1e-6
 66 |         SC[sel] = 0
 67 |         MN[sel] = MN[sel] + 1 
 68 |         sel = 60 - MN < 1e-6
 69 |         MN[sel] = 0
 70 |         HR[sel] = HR[sel] + 1   
 71 |         sel = 24 - HR < 1e-6
 72 |         HR[sel] = 23
 73 |         MN[sel] = 59
 74 |         SC[sel] = 59.99
 75 |     return YR, MO, DY, HR, MN, SC
 76 | 
 77 | 
 78 | #------------------------------------------------------------------------------ 
 79 | #                        date-time conversions
 80 | #------------------------------------------------------------------------------
 81 | def dateTime2decYr( datetime_in, **kwargs ):
 82 |     """
 83 |     input: datetime_in = array containing time columns year - second
 84 |                    out = date in decimal year
 85 |     """
 86 |     try:
 87 |         o_dt = datetime.datetime( int( datetime_in[0] ), int( datetime_in[1] ), int( datetime_in[2] ), int( datetime_in[3] ), int( datetime_in[4] ), int( round( datetime_in[5])-1e-3))
 88 |     except:
 89 |         error_msg = "datetime array not valid - %s; check if date and time is correct, e.g. no SC > 60.." % datetime_in
 90 |         raise( ValueError, error_msg)
 91 |     time_sc = o_dt.hour*3600 + o_dt.minute*60 + o_dt.second
 92 |     # get no. of day within current year between 0 to 364 and ad time in seconds
 93 |     dayOfYear_seconds = ( o_dt.timetuple().tm_yday - 1 ) * 86400.0 + time_sc
 94 |     if calendar.isleap( o_dt.year):
 95 |         year_fraction = dayOfYear_seconds / ( 86400.0 * 366 )
 96 |     else:
 97 |         year_fraction = dayOfYear_seconds / ( 86400.0 * 365 )
 98 |     # dec year = current year + day_time (in dec year)
 99 |     return o_dt.year + year_fraction
100 | 
101 | def decYr2datetime( decimalYear ):
102 |     """
103 |     convert decimal year to year/month/day... 
104 |     """
105 |     year = np.floor( decimalYear)
106 |     rest = decimalYear-year
107 |     
108 |     if year%4 == 0: # leap year
109 | 	    ndays = 366    
110 | 	    feb = 29
111 |     else:
112 | 	    ndays = 365
113 | 	    feb = 28
114 |     decDay = rest * ndays 
115 | 
116 |     if decDay >= 0 and decDay <= 31:
117 | 	    month = 1
118 | 	    day  = np.ceil( decDay )
119 | 	    rest = (decDay) -np.floor( decDay )
120 |     elif decDay >= 0 and decDay <= 31+feb:
121 | 	    month = 2
122 | 	    day = np.ceil( decDay-  31 )
123 | 	    rest = 1 -(day - (decDay - 31 ))
124 |     elif decDay >= 31+feb and decDay <= 2*31+feb:
125 | 	    month = 3
126 | 	    day = np.ceil( decDay- (31+feb ))
127 | 	    rest = 1 -(day - (decDay -(31+feb )))
128 |     elif decDay >= 2*31+feb and decDay <= 3*31+feb-1:
129 | 	    month = 4
130 | 	    day = np.ceil( decDay- (2*31+feb))
131 | 	    rest = 1 -(day - (decDay -(2*31+feb)))
132 |     elif decDay >= 3*31+feb-1 and decDay <= 4*31+feb-1:
133 | 	    month = 5
134 | 	    day = np.ceil( decDay -(3*31+feb-1) )
135 | 	    rest = 1 -(day - (decDay -(3*31+feb-1)))
136 |     elif decDay >= 4*31+feb-1 and decDay <= 5*31+feb-2:
137 | 	    month = 6
138 | 	    day = np.ceil( decDay-(4*31+feb-1))
139 | 	    rest = 1 -(day - (decDay -(4*31+feb-1)))
140 |     elif decDay >= 5*31+feb-2 and decDay <= 6*31+feb-2:
141 | 	    month = 7
142 | 	    day = np.ceil( decDay-(5*31+feb-2) )
143 | 	    rest = 1 -(day - (decDay -(5*31+feb-2)))
144 |     elif decDay >= 6*31+feb-2 and decDay <= 7*31+feb-2:
145 | 	    month = 8
146 | 	    day = np.ceil( decDay -(6*31+feb-2))
147 | 	    rest = 1 -(day - (decDay -(6*31+feb-2)))
148 |     elif decDay >= 7*31+feb-2 and decDay <= 8*31+feb-3:
149 | 	    month = 9
150 | 	    day = np.ceil( decDay -(7*31+feb-2) )
151 | 	    rest = 1 -(day - (decDay -(7*31+feb-2)))
152 |     elif decDay >= 8*31+feb-3 and decDay <= 9*31+feb-3:
153 | 	    month = 10
154 | 	    day = np.ceil( decDay -(8*31+feb-3))
155 | 	    rest = 1 -(day - (decDay -(8*31+feb-3)))
156 |     elif decDay >= 9*31+feb-3 and decDay <= 10*31+feb-4:
157 | 	    month = 11
158 | 	    day = np.ceil( decDay -(9*31+feb-3))
159 | 	    rest = 1 -(day - (decDay -(9*31+feb-3)))
160 |     elif decDay >= 10*31+feb-4 and decDay <= 11*31+feb-4:
161 | 	    month = 12
162 | 	    day = np.ceil( decDay -(10*31+feb-4))
163 | 	    rest = 1 -(day - (decDay -(10*31+feb-4)))
164 |     else:
165 | 	    print( 'wrong input decimal year')
166 |     hour   = np.floor( rest * 24 )
167 |     rest   = 24*rest-hour
168 |     minute = np.floor( rest * 60 )
169 |     rest   = 60*rest-minute
170 |     second =  rest * 60     
171 |     if  day == 0: # for int decimal years
172 |         day = 1
173 |     try:  
174 |         return [int(year[0]), int(month), int(day[0]), int(hour[0]), int(minute[0]), second[0]]
175 |     except:
176 |         return [int(year), int(month), int(day), int(hour), int(minute), second]
177 | 


--------------------------------------------------------------------------------
/test_scripts/1_2D_density_plots.py:
--------------------------------------------------------------------------------
  1 | # python2.7
  2 | """
  3 |         - functions to plot binned and smoothed desnities of 2D data points
  4 |         
  5 |             --> plot as 2D probability density with Sum^x_y = 1, Integral = 1
  6 |             -For that purpose: divide by current sum (~number of events) and dx, dy
  7 |                which are the bins in x and y
  8 | """
  9 | 
 10 | 
 11 | import numpy as np
 12 | import matplotlib.pyplot as plt
 13 | 
 14 | np.random.seed(12345)
 15 | #================================================================================
 16 | #                           fct. definitions
 17 | #================================================================================   
 18 | def density_2D( x, y, x_bin, y_bin, **kwargs):
 19 |     """
 20 |         2D, smoothed event density for point cloud with coordinates x,y
 21 |         uses method: scipy.stats.kde.gaussian_kde
 22 |     :input     x,y            - dataset
 23 |                x_bin, y_bin   - binned x and y vectors
 24 |     
 25 |     
 26 |         kwargs['sigma'] - specify gaussian smoothing kernel ('bw_method' in scipy.stats.kde)
 27 |                           default: = 'scott' 
 28 |                               sigma = n**( -1./(d+4)), d- number of dimensions, n - number of data points
 29 |                         - 'silverman'
 30 |                               sigma = (n * (d + 2) / 4.)**(-1. / (d + 4))
 31 |                         - float( )   = set Gaussian Bandwidth directlty
 32 |                                         
 33 |                            
 34 |     return XX, YY, ZZ - 2D binned x and y coordinates and density for each cell
 35 |     """
 36 |     from scipy.stats import kde
 37 |     sigma = 'scott'
 38 |     if 'sigma' in kwargs.keys() and kwargs['sigma'] is not None:
 39 |         sigma = kwargs['sigma']
 40 |     # Evaluate a gaussian kde on a regular grid of nbins x nbins over data extents
 41 |     fct_Gauss2D = kde.gaussian_kde( np.array([x,y]), bw_method = sigma)
 42 |     # meshgrid of x and y coordinates
 43 |     XX,YY   = np.meshgrid( x_bin, y_bin)
 44 |     ZZ = fct_Gauss2D( np.vstack([XX.flatten(), YY.flatten()])).reshape( XX.shape)
 45 |     dx, dy = x_bin[1] - x_bin[0], y_bin[1] - y_bin[0]
 46 |     # check if integral is ~ zero, better: use midepoint method
 47 |     print( 'check if integral ~1', ZZ.sum()*( dx*dy)) #ZZ[ZZ>0].mean()*(XX.max()-XX.min())*(YY.max()-YY.min()))
 48 |     return XX-.5*dx, YY-.5*dy, ZZ
 49 |     #return XX, YY, ZZ
 50 | #================================================================================
 51 | #                          parameters
 52 | #================================================================================  
 53 | Nev   = 10
 54 | nbins = 30
 55 | xmin, xmax = -2.5, 2.5
 56 | ymin, ymax = -5,    5
 57 | 
 58 | sigma = .1 # Gaussian smoothing kernel
 59 | 
 60 | # binsize in x and y
 61 | dx, dy= float(xmax-xmin)/nbins, float(ymax-ymin)/nbins
 62 | 
 63 | #================================================================================
 64 | #                    create random data, and binned data
 65 | #================================================================================   
 66 | # Create data: 200 points
 67 | data = np.random.multivariate_normal([0, 0], [[1, 0.5], [0.5, 3]], Nev)
 68 | x, y = data.T
 69 | a_xbin = np.arange( xmin-dx, xmax+2*dx, dx) 
 70 | a_ybin = np.arange( ymin-dy, ymax+2*dy, dy)
 71 | 
 72 | #================================================================================
 73 | #                     compute Gaussian density
 74 | #================================================================================ 
 75 | XX,YY,ZZ = density_2D( x, y, a_xbin, a_ybin, sigma = sigma)
 76 | #================================================================================
 77 | #                           plots
 78 | #================================================================================ 
 79 | # Create a figure with 6 plot areas
 80 | fig, axes = plt.subplots(ncols=6, nrows=1, figsize=(21, 5))
 81 |  
 82 | # Everything sarts with a Scatterplot
 83 | axes[0].set_title('Scatterplot')
 84 | axes[0].plot(x, y, 'ko')
 85 | # As you can see there is a lot of overplottin here!
 86 |  
 87 | # Thus we can cut the plotting window in several hexbins
 88 | 
 89 | axes[1].set_title('Hexbin')
 90 | axes[1].hexbin(x, y, gridsize=nbins, cmap=plt.cm.BuGn_r)
 91 | axes[1].plot( x, y, 'ko', ms= 2)
 92 | axes[1].set_xlim( axes[0].get_xlim())
 93 | axes[1].set_ylim( axes[0].get_ylim())
 94 | 
 95 | # 2D Histogram
 96 | axes[2].set_title('2D Histogram')
 97 | counts, xedges, yedges, __ = axes[2].hist2d( x, y, bins=nbins, cmap=plt.cm.BuGn_r, normed = True)
 98 | axes[2].plot( x, y, 'ko', ms= 2)
 99 | axes[2].set_xlim( axes[0].get_xlim())
100 | axes[2].set_ylim( axes[0].get_ylim())
101 | 
102 | dx,dy = (xedges[1]-xedges[0]), (yedges[1]-yedges[0])
103 | #print(xedges, yedges, counts)
104 | print( 'check if integral ~1', counts.sum()*( dx*dy), counts.mean()*(xedges[-1]-xedges[0])*(yedges[-1]-yedges[0]))
105 | 
106 | 
107 |  
108 | # plot a density
109 | axes[3].set_title('Gaussian Smoothing')
110 | axes[3].pcolormesh( XX, YY, ZZ, cmap=plt.cm.BuGn_r)
111 | axes[3].plot( x, y, 'ko', ms= 2)
112 | axes[3].set_xlim( axes[0].get_xlim())
113 | axes[3].set_ylim( axes[0].get_ylim())
114 | 
115 | 
116 | 
117 | # add shading
118 | axes[4].set_title('2D Density with shading')
119 | axes[4].pcolormesh( XX,YY,ZZ, shading='gouraud', cmap=plt.cm.BuGn_r)
120 | axes[4].set_xlim( axes[0].get_xlim())
121 | axes[4].set_ylim( axes[0].get_ylim())
122 | 
123 |  
124 | # contour
125 | axes[5].set_title('Contour')
126 | axes[5].pcolormesh( XX, YY, ZZ, shading='gouraud', cmap=plt.cm.BuGn_r)
127 | axes[5].contour(     XX, YY, ZZ )
128 | axes[5].set_xlim( axes[0].get_xlim())
129 | axes[5].set_ylim( axes[0].get_ylim())
130 | plt.show()
131 | 
132 | 
133 | 
134 | 
135 | 
136 | 
137 | 
138 | 
139 | 
140 | 
141 | 


--------------------------------------------------------------------------------