├── Analyses
    ├── Python_lib
    │   ├── __init__.py
    │   ├── QGIS
    │   │   ├── __init__.py
    │   │   └── pylib_QGIS.py
    │   ├── catalog
    │   │   ├── __init__.py
    │   │   └── pylib_catalog.py
    │   ├── plotting
    │   │   └── __init__.py
    │   ├── regression
    │   │   ├── __init__.py
    │   │   └── pylib_stats.py
    │   └── ground_motions
    │   │   ├── __init__.py
    │   │   └── pylib_Willis15CA_Vs30.py
    ├── README.md
    ├── Code_Verification
    │   ├── synthetic_datasets
    │   │   ├── read_me.txt
    │   │   └── create_synthetic_ds1.stan
    │   ├── preprocessing
    │   │   ├── PlotUsableMagRrupCatalog.py
    │   │   └── ComputeUsableMagRrupCatalog.R
    │   └── regression
    │   │   ├── ds1
    │   │       ├── comparison_inla_model1_time.py
    │   │       ├── main_cmdstan_model1_NGAWest3CA.py
    │   │       ├── main_pystan_model1_NGAWest3CA.py
    │   │       ├── main_cmdstan_model1_NGAWest2CA.py
    │   │       ├── main_cmdstan_model1_NGAWest2CANorth.py
    │   │       ├── main_pystan_model1_NGAWest2CA.py
    │   │       ├── main_pystan_model1_NGAWest2CANorth.py
    │   │       ├── main_inla_model1_NGAWest2CA.R
    │   │       └── main_inla_model1_NGAWest2CANorth.R
    │   │   ├── ds2
    │   │       ├── main_pystan_model2_corr_cells_NGAWest3CA_sparse.py
    │   │       ├── main_pystan_model2_corr_cells_NGAWest2CA_sparse.py
    │   │       ├── main_pystan_model2_corr_cells_NGAWest2CANorth_sparse.py
    │   │       ├── main_pystan_model2_corr_cells_NGAWest3CA.py
    │   │       ├── main_pystan_model2_uncorr_cells_NGAWest3CA.py
    │   │       ├── main_cmdstan_model2_corr_cells_NGAWest3CA.py
    │   │       ├── main_cmdstan_model2_uncorr_cells_NGAWest3CA.py
    │   │       ├── main_cmdstan_model2_corr_cells_NGAWest2CA.py
    │   │       ├── main_cmdstan_model2_uncorr_cells_NGAWest2CA.py
    │   │       ├── main_pystan_model2_uncorr_cells_NGAWest2CA.py
    │   │       ├── main_cmdstan_model2_corr_cells_NGAWest2CANorth.py
    │   │       ├── main_cmdstan_model2_uncorr_cells_NGAWest2CANorth.py
    │   │       ├── main_pystan_model2_uncorr_cells_NGAWest2CANorth.py
    │   │       ├── main_inla_model2_uncorr_cells_NGAWest2CANorth.R
    │   │       └── main_pystan_model2_corr_cells_NGAWest2CA.py
    │   │   └── ds3
    │   │       ├── main_pystan_model3_corr_cells_NGAWest3CA.py
    │   │       ├── main_pystan_model3_uncorr_cells_NGAWest3CA.py
    │   │       ├── main_pystan_model3_corr_cells_NGAWest2CA.py
    │   │       ├── main_pystan_model3_uncorr_cells_NGAWest2CA.py
    │   │       ├── main_pystan_model3_corr_cells_NGAWest2CANorth.py
    │   │       ├── main_pystan_model3_uncorr_cells_NGAWest2CANorth.py
    │   │       ├── main_cmdstan_model3_uncorr_cells_NGAWest3CA.py
    │   │       ├── main_cmdstan_model3_corr_cells_NGAWest3CA.py
    │   │       ├── main_cmdstan_model3_corr_cells_NGAWest2CA.py
    │   │       ├── main_cmdstan_model3_uncorr_cells_NGAWest2CA.py
    │   │       ├── main_cmdstan_model3_corr_cells_NGAWest2CANorth.py
    │   │       ├── main_cmdstan_model3_uncorr_cells_NGAWest2CANorth.py
    │   │       ├── main_inla_model3_uncorr_cells_NGAWest2CANorth.R
    │   │       └── main_inla_model3_uncorr_cells_NGAWest3CA.R
    ├── R_lib
    │   └── auxiliary_functions.R
    ├── Prediction
    │   └── create_scen_dataframe.py
    ├── Regression
    │   └── README.md
    └── Stan_lib
    │   ├── regression_stan_model1_unbounded_hyp.stan
    │   └── regression_stan_model1_unbounded_hyp_chol.stan
├── .gitignore
├── requirements.txt
├── Examples
    ├── example1
    │   ├── regression_stan_model.stan
    │   └── regression_inla_postprocessing.py
    └── example2
    │   ├── create_reg_dataset.py
    │   └── regression_inla.R
└── README.md


/Analyses/Python_lib/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Analyses/Python_lib/QGIS/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Analyses/Python_lib/catalog/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Analyses/Python_lib/plotting/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Analyses/Python_lib/regression/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Analyses/Python_lib/ground_motions/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Files and folders to ignore
2 | 
3 | # R history
4 | .Rhistory
5 | 
6 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | # Required python packages 
 2 | # for MyBinder
 3 | #--------------------------
 4 | numpy
 5 | scipy
 6 | pandas
 7 | matplotlib
 8 | ipywidgets
 9 | sklearn
10 | 


--------------------------------------------------------------------------------
/Analyses/README.md:
--------------------------------------------------------------------------------
 1 | # Analyses Directory Description
 2 | 
 3 |  * ``Data_Preparation``: folder containing the Jupyter notebooks to prepare the inpu files for the NGMM regression
 4 |  * ``Regression``: folder containing the Jupyter notebooks for the NGMM regression using INLA, CMDSTAN, and PYSTAN
 5 |  * ``Prediction`` folder containing Jupyter notebooks and examples to make predictions using NGMMs
 6 |  * ``Code_Verification``: folder containing the codes used in the verification exercise
 7 |  * ``Python_lib``: folder containing the Python scripts
 8 |  * ``R_lib``: folder containing the R scripts
 9 |  * ``Stan_lib``: folder containing the STAN regression files for the NGMM regression
10 |  * [Launch the Prior Distribution Visualization Tool](https://mybinder.org/v2/gh/NHR3-UCLA/ngmm_tools/bae8b8d09783d0916822ca2e138277b00d0ca6b5?urlpath=lab%2Ftree%2FAnalyses%2Fprior_distributions.ipynb)
11 |  
12 | 


--------------------------------------------------------------------------------
/Analyses/Code_Verification/synthetic_datasets/read_me.txt:
--------------------------------------------------------------------------------
 1 | File name			Description
 2 | ---------------		---------------
 3 | create_synthetic_ds1.py	Creates synthetic datasets with: one spatially varying earthquake constant, one spatially varying site
 4 | 				constant, and one spatially independent site constant
 5 | create_synthetic_ds2.py	Creates synthetic datasets with: one spatially varying earthquake constant, one spatially varying site
 6 | 				constant, one spatially independent site constant, and cell specific anelastic attenuation (spatially 
 7 | 				varying and independent component) 
 8 | create_synthetic_ds3.py	Creates synthetic datasets with: one spatially varying earthquake constant, one spatially varying site
 9 | 				constant, one spatially independent site constant, a spatially varying geometrical spreading term that is 
10 | 				a function of the earthquake coordinates, a spatially varying Vs30 scaling term that is a function of the
11 | 				site coordinates and cell specific anelastic attenuation (spatially varying and independent component) 
12 | 


--------------------------------------------------------------------------------
/Examples/example1/regression_stan_model.stan:
--------------------------------------------------------------------------------
 1 | /*********************************************
 2 | Stan program for toy example
 3 | 
 4 |  ********************************************/
 5 | 
 6 | data {
 7 |   int N;  // number of observations
 8 |   int NG; // number of grid points
 9 |   
10 |   //grid IDs
11 |   int<lower=1,upper=NG> gid[N]; // grid id
12 | 
13 |   //observations
14 |   vector[N] Y; 
15 | 
16 |   //coordinates
17 |   vector[2] X_g[NG];
18 | }
19 | 
20 | transformed data {
21 |   real delta = 1e-9;
22 | }
23 | 
24 | parameters {
25 |   //aleatory std
26 |   real<lower=0> sigma; 
27 |   //kernel hyper-paramters
28 |   real<lower=0.0>  ell;
29 |   real<lower=0.0>  omega;
30 |  
31 |   //model coefficient
32 |   real c_0;
33 |   //standardized normal variables for spatially correlated coefficient
34 |   vector[NG] z_1;
35 | }
36 | 
37 | transformed parameters {
38 |   //spatially correlated coefficient
39 |   vector[NG] c_1;
40 | 
41 |   {
42 |     matrix[NG,NG] COV_1;
43 |     matrix[NG,NG] L_1;
44 |     for(i in 1:NG) {
45 |       for(j in i:NG) {
46 |         real C_1 = (omega^2 * exp(-distance(X_g[i],X_g[j])/ell));
47 |         COV_1[i,j] = C_1;
48 |         COV_1[j,i] = C_1;
49 |       }
50 |       COV_1[i,i] += delta;
51 |     }
52 |     L_1 = cholesky_decompose(COV_1);
53 |     c_1 = L_1 * z_1;
54 |   }
55 | }
56 | 
57 | 
58 | model {
59 |   //hyper-parameters
60 |   ell   ~ inv_gamma(2.,50);
61 |   omega ~ exponential(5);
62 |   sigma ~ lognormal(-1,0.3);
63 | 
64 |   //constant shift
65 |   c_0 ~ normal(0.,0.1);
66 |   //standardized normal variables for spatially correlated coefficient
67 |   z_1 ~ std_normal();
68 | 
69 |   //likelihood
70 |   Y ~ normal(c_0 + c_1[gid], sigma);
71 | }
72 | 
73 | 


--------------------------------------------------------------------------------
/Analyses/Python_lib/ground_motions/pylib_Willis15CA_Vs30.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Tue Feb  2 19:01:47 2021
 5 | 
 6 | @author: glavrent
 7 | """
 8 | #load variables
 9 | import pathlib
10 | import numpy as np
11 | import rasterio
12 | 
13 | 
14 | class Willis15Vs30CA:
15 |     
16 |     def __init__(self, fname_vs30map_med=None, fname_vs30map_sig=None):
17 |         #file path
18 |         root = pathlib.Path(__file__).parent
19 |         #vs30 data filenames
20 |         fname_vs30map_med = '/mnt/halcloud_nfs/glavrent/Research/Other_projects/VS30_CA/data/California_vs30_Wills15_hybrid_7p5c.tif'    if fname_vs30map_med is None else fname_vs30map_med
21 |         fname_vs30map_sig = '/mnt/halcloud_nfs/glavrent/Research/Other_projects/VS30_CA/data/California_vs30_Wills15_hybrid_7p5c_sd.tif' if fname_vs30map_sig is None else fname_vs30map_sig
22 |         #load vs30 data
23 |         # self.vs30map_med = rasterio.open(root / 'data/California_vs30_Wills15_hybrid_7p5c.tif')
24 |         # self.vs30map_sig = rasterio.open(root / 'data/California_vs30_Wills15_hybrid_7p5c_sd.tif')
25 |         self.vs30map_med = rasterio.open( fname_vs30map_med )
26 |         self.vs30map_sig = rasterio.open( fname_vs30map_sig )
27 |     
28 |     
29 |     def lookup(self, lonlats):
30 |         return (
31 |             np.fromiter(self.vs30map_med.sample(lonlats, 1), np.float),
32 |             np.fromiter(self.vs30map_sig.sample(lonlats, 1), np.float)
33 |         )
34 | 
35 |     def test_lookup(self):
36 |         medians, stds = list(self.lookup([(-122.258, 37.875), (-122.295, 37.895)]))
37 |     
38 |         np.testing.assert_allclose(medians, [733.4, 351.9], rtol=0.01)
39 |         np.testing.assert_allclose(stds, [0.432, 0.219], rtol=0.01)
40 | 
41 | 
42 | 


--------------------------------------------------------------------------------
/Analyses/Python_lib/regression/pylib_stats.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Tue Mar 15 13:56:13 2022
 5 | 
 6 | @author: glavrent
 7 | 
 8 | Other python statistics functions
 9 | """
10 | 
11 | #imprort libraries
12 | import numpy as np
13 | 
14 | def CalcRMS(samp_q, samp_p):
15 |     '''
16 |     Compute root mean square error between observation samples (samp_p) and 
17 |     model samples (samp_p)    
18 | 
19 |     Parameters
20 |     ----------
21 |     samp_q : np.array()
22 |         Model Samples.
23 |     samp_p : np.array()
24 |         Data Samples.
25 | 
26 |     Returns
27 |     -------
28 |     real
29 |         root mean square error
30 |     '''
31 |     
32 |     #errors
33 |     e = samp_q - samp_p
34 |     
35 |     return np.sqrt(np.mean(e**2))
36 | 
37 | 
38 | def CalcLKDivergece(samp_q, samp_p):
39 |     '''
40 |     Compute Kullback–Leibler divergence of observation samples (samp_p) based 
41 |     on model samples (samp_p)    
42 | 
43 |     Parameters
44 |     ----------
45 |     samp_q : np.array()
46 |         Model Samples.
47 |     samp_p : np.array()
48 |         Data Samples.
49 | 
50 |     Returns
51 |     -------
52 |     real
53 |         Kullback–Leibler divergence.
54 |     '''
55 |     
56 |     #create histogram bins
57 |     _, hist_bins = np.histogram(np.concatenate([samp_p,samp_q]))
58 |     
59 |     #count of p and q distribution
60 |     p, _ = np.histogram(samp_p, bins=hist_bins)
61 |     q, _ = np.histogram(samp_q, bins=hist_bins)
62 | 
63 |     #remove bins empty in any dist, otherwise kl= +/- inf
64 |     i_empty_bins = np.logical_or(p==0, q==0)
65 |     p = p[~i_empty_bins]
66 |     q = q[~i_empty_bins]
67 |     
68 |     #normalize to compute probabilites 
69 |     p = p/p.sum()
70 |     q = q/q.sum()
71 |     
72 |     return sum(p[i] * np.log2(p[i]/q[i]) for i in range(len(p)))
73 | 


--------------------------------------------------------------------------------
/Analyses/R_lib/auxiliary_functions.R:
--------------------------------------------------------------------------------
 1 | #################################################
 2 | # This script contains various auxiliary 
 3 | # functions for R
 4 | #
 5 | #################################################
 6 | 
 7 | #libraries
 8 | library(sp)
 9 | library(rgdal)
10 | 
11 | #Latlon to utm
12 | LongLatToUTM<-function(lat,lon,zone){
13 |   #' Convert Lat Lon to UTM coordinates
14 |   #' 
15 |   #' Input:
16 |   #'  lat: array with latitude degrees
17 |   #'  lon: array longitude degrees
18 |   #'  zone: UTM zone
19 |   #'  
20 |   #' Output:
21 |   #'  xy_utm: data.frame with id, Xutm, Yutm
22 |   
23 |   xy <- data.frame(ID = 1:length(lon), X = lon, Y = lat)
24 |   coordinates(xy) <- c("X", "Y")
25 |   proj4string(xy) <- CRS("+proj=longlat +datum=WGS84")  ## for example
26 |   xy_utm <- spTransform(xy, CRS(paste("+proj=utm +zone=",zone," +datum=WGS84",sep='')))
27 |   return(as.data.frame(xy_utm))
28 | }
29 | 
30 | #Unique elements
31 | UniqueIdxInv <- function(data_array){
32 |   #' Unique elements, indices and inverse of data_array
33 |   #' 
34 |   #' Input:
35 |   #'  data_array: input array
36 |   #'  
37 |   #' Output:
38 |   #'  unq: unique data
39 |   #'  idx: indices of unique data
40 |   #'  inv: inverse indices for creating original array
41 | 
42 |   #number of data
43 |   n_data <-length(data_array)
44 | 
45 |   #create data data-frame
46 |   df_data <- data.frame(data=data_array)
47 |   #get data-frame with unique data
48 |   df_data_unq <- unique(df_data)
49 |   data_unq    <- df_data_unq$data
50 | 
51 |   #get indices of unique data values
52 |   data_unq_idx <- strtoi(row.names(df_data_unq))
53 |   
54 |   #get inverse indices
55 |   data_unq_inv  <- array(0,n_data)
56 |   for (k in 1:length(data_unq)){
57 |     #return k for element equal to data_unq[k] else 0
58 |     data_unq_inv <- data_unq_inv + ifelse(data_array %in% data_unq[k],k,0)
59 |   }
60 |   
61 |   #return output
62 |   return(list(unq=data_unq, idx=data_unq_idx, inv=data_unq_inv))
63 | }
64 | 


--------------------------------------------------------------------------------
/Examples/example2/create_reg_dataset.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Sat Mar 26 16:01:54 2022
 5 | 
 6 | @author: glavrent
 7 | """
 8 | # Working directory and Packages
 9 | # ---------------------------
10 | import os
11 | import sys
12 | import pathlib
13 | #load packages
14 | import numpy as np
15 | import pandas as pd
16 | #plottign libraries
17 | import matplotlib as mpl
18 | from matplotlib import pyplot as plt
19 | import matplotlib.ticker as mticker
20 | 
21 | # Define Problem
22 | # ---------------------------
23 | #number of samples
24 | n_samp = 1000
25 | 
26 | #coefficients
27 | c0  = -0.2
28 | c1  =  0.6
29 | sig =  0.7
30 | 
31 | #output directory
32 | dir_out = 'data/'
33 | 
34 | # Create Dataset
35 | # ---------------------------
36 | #covariates
37 | x1  = np.random.randn(n_samp )
38 | #noise
39 | eps = sig *np.random.randn(n_samp )
40 | #response
41 | mu_y = c0 + c1 * x1 
42 | y = mu_y + eps
43 | 
44 | #model response
45 | model_x1 = np.linspace(-5,5)
46 | model_y  = c0 + c1 * model_x1
47 | 
48 | #regression data frame
49 | df_data = pd.DataFrame({'x1':x1, 'mu_y':mu_y, 'y':y})
50 | 
51 | # Save Dataset
52 | # ---------------------------
53 | pathlib.Path(dir_out).mkdir(parents=True, exist_ok=True) 
54 | df_data.to_csv( dir_out + 'regression_dataset.csv', index=False )
55 | 
56 | # Summary Figures
57 | # ---------------------------
58 | #figure title
59 | fname_fig = 'fig_dataset'
60 | #create figure
61 | fig, ax = plt.subplots(figsize = (10,10))
62 | #obsevations
63 | hl1 = ax.plot(df_data.x1, df_data.y, 'o')
64 | #plot response
65 | hl2 = ax.plot(model_x1, model_y, linewidth=3, color='black')
66 | #figure properties
67 | ax.grid(which='both')
68 | #tick size
69 | ax.tick_params(axis='x', labelsize=30)
70 | ax.tick_params(axis='y', labelsize=30)
71 | #labels
72 | ax.set_xlabel(r'$x_1$', fontsize=35)
73 | ax.set_ylabel(r'$y$', fontsize=35)
74 | #figure limits
75 | ax.set_xlim([-4, 4])
76 | ax.set_ylim([-4, 4])
77 | #save figure
78 | fig.tight_layout()
79 | fig.savefig( dir_out + fname_fig + '.png' )
80 | 
81 | 
82 | 
83 | 


--------------------------------------------------------------------------------
/Analyses/Code_Verification/preprocessing/PlotUsableMagRrupCatalog.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Mon Oct  4 16:32:37 2021
 5 | 
 6 | @author: glavrent
 7 | """
 8 | # %% Required Packages
 9 | # ======================================
10 | #load libraries
11 | import os
12 | import pathlib
13 | #arithmetic libraries
14 | import numpy as np
15 | import pandas as pd
16 | #plotting libraries
17 | from matplotlib import pyplot as plt
18 | import matplotlib.ticker as mticker
19 | 
20 | # %% Define variables
21 | # ======================================
22 | #input file names
23 | fname_flatfile_NGA2 = '../../../Raw_files/nga_w2/Updated_NGA_West2_Flatfile_RotD50_d050_public_version.xlsx'
24 | fname_mag_rrup_lim  = '../../../Data/Verification/preprocessing/flatfiles/usable_mag_rrup/usable_Mag_Rrup_coeffs.csv'
25 | 
26 | #output directoy
27 | dir_fig = '../../../Data/Verification/preprocessing/flatfiles/usable_mag_rrup/'
28 | 
29 | # %% Load Data
30 | # ======================================
31 | #NGAWest2
32 | df_flatfile_NGA2 = pd.read_excel(fname_flatfile_NGA2)
33 | #M/R limit
34 | df_m_r_lim = pd.read_csv(fname_mag_rrup_lim,index_col=0)
35 | 
36 | #remove rec with unavailable data
37 | df_flatfile_NGA2 = df_flatfile_NGA2.loc[df_flatfile_NGA2.EQID>0,:]
38 | df_flatfile_NGA2 = df_flatfile_NGA2.loc[df_flatfile_NGA2['ClstD (km)']>0,:]
39 | 
40 | #mag and distance arrays
41 | mag_array  = df_flatfile_NGA2['Earthquake Magnitude']
42 | rrup_array = df_flatfile_NGA2['ClstD (km)']
43 | 
44 | #compute limit 
45 | rrup_lim1 = np.arange(0,1001)
46 | mag_lim1  = (df_m_r_lim.loc['b0','coefficients'] +
47 |              df_m_r_lim.loc['b1','coefficients'] * rrup_lim1 +
48 |              df_m_r_lim.loc['b2','coefficients'] * rrup_lim1**2)
49 | rrup_lim2 = df_m_r_lim.loc['max_rrup','coefficients']
50 | 
51 | # %% Process Data
52 | # ======================================
53 | if not os.path.isdir(dir_fig): pathlib.Path(dir_fig).mkdir(parents=True, exist_ok=True)
54 | 
55 | # create figures
56 | # ----   ----   ----   ----   ----
57 | # Mag-Dist distribution
58 | fname_fig = 'M-R_limits'
59 | #create figure   
60 | fig, ax = plt.subplots(figsize = (10,9))
61 | pl1 = ax.scatter(rrup_array, mag_array, label='NGAWest2 CA')
62 | pl2 = ax.plot(rrup_lim1, mag_lim1,          linewidth=2, color='black')
63 | pl3 = ax.vlines(rrup_lim2, ymin=0, ymax=10, linewidth=2, color='black', linestyle='--')
64 | #edit figure properties
65 | ax.set_xlabel(r'Distance ($km$)', fontsize=30)
66 | ax.set_ylabel(r'Magnitude', fontsize=30)
67 | ax.grid(which='both')
68 | # ax.set_xscale('log')
69 | ax.set_xlim([0, 1000])
70 | ax.set_ylim([2, 8])
71 | ax.tick_params(axis='x', labelsize=25)
72 | ax.tick_params(axis='y', labelsize=25)
73 | # ax.legend(fontsize=25, loc='upper left')
74 | ax.xaxis.set_tick_params(which='major', size=10, width=2, direction='in', top='on')
75 | ax.xaxis.set_tick_params(which='minor', size=7,  width=2, direction='in', top='on')
76 | ax.yaxis.set_tick_params(which='major', size=10, width=2, direction='in', right='on')
77 | ax.yaxis.set_tick_params(which='minor', size=7,  width=2, direction='in', right='on')
78 | fig.tight_layout()
79 | #save figure
80 | fig.savefig( dir_fig + fname_fig + '.png' )
81 | 


--------------------------------------------------------------------------------
/Analyses/Code_Verification/preprocessing/ComputeUsableMagRrupCatalog.R:
--------------------------------------------------------------------------------
 1 | # This script computes the usable distance range as a function of magntitude 
 2 | # based on NGAWest2
 3 | ##################################################################################
 4 | 
 5 | #libraries
 6 | library(tidyverse)
 7 | library(readxl)
 8 | 
 9 | # Define variables
10 | # ---------------------------
11 | #input file names
12 | fname_flatfile_NGA2 <- '../../../Raw_files/nga_w2/Updated_NGA_West2_Flatfile_RotD50_d050_public_version.xlsx'
13 | 
14 | #output directory
15 | out_dir <- '../../../Data/Verification/preprocessing/flatfiles/usable_mag_rrup'
16 | dir.create(out_dir, showWarnings = FALSE)
17 | 
18 | #flag determine M/R limit
19 | # flag_reg <- TRUE
20 | flag_reg <- FALSE
21 | 
22 | # Load Data
23 | # ---------------------------
24 | #NGAWest2
25 | df_flatfile_NGA2 <- read_excel(fname_flatfile_NGA2)
26 | 
27 | #remove rec with unavailable data
28 | df_flatfile_NGA2 <- df_flatfile_NGA2[df_flatfile_NGA2$EQID>0,]
29 | df_flatfile_NGA2 <- df_flatfile_NGA2[df_flatfile_NGA2['ClstD (km)']>0,]
30 | 
31 | #mag and distance arrays
32 | mag_array  <- pull(df_flatfile_NGA2, 'Earthquake Magnitude')
33 | rrup_array <- pull(df_flatfile_NGA2, 'ClstD (km)')
34 | 
35 | # Process Data
36 | # ---------------------------
37 | #compute mag/R usable range
38 | if (flag_reg){
39 |   # plot M/R distribution
40 |   plot(rrup_array,mag_array,pch=19,xlim=c(1,1000),ylim=c(1,8))
41 |   grid()
42 |   #estimate m-r coefficients
43 |   clc  <- locator(n=7)
44 |   clcd <- data.frame(clc$x,clc$y,clc$x^2)
45 |   names(clcd) <- c("X","Y","X2")
46 |   outrg <- lm(Y~X + X2, data = clcd)
47 |   coeffs_m_r <- as.data.frame( coefficients(outrg) )
48 |   rownames(coeffs_m_r) <- c('b0','b1','b2')  
49 |   colnames(coeffs_m_r) <- 'coefficients'
50 |   #mag distance
51 |   coeffs_m_r['max_rrup','coefficients'] <- 400
52 | } else {
53 |   # #option 1
54 |   # coeffs_m_r <- data.frame(coefficients=c(1.515945, -0.0008673127, 2.725194e-05), row.names = c('b0','b1','b2') )
55 |   # #option 2
56 |   # coeffs_m_r <- data.frame(coefficients=c(1.238563, 0.0002829483, 2.65235e-05),   row.names = c('b0','b1','b2') )
57 |   # #option 3
58 |   # coeffs_m_r <-  data.frame(coefficients=c(1.731417, 0.003432009, 1.273215e-05),  row.names = c('b0','b1','b2')
59 |   # read from file
60 |   coeffs_m_r <- read.csv(file.path(out_dir, 'usable_Mag_Rrup_coeffs.csv'), row.names=1)
61 |   # plot M/R distribution
62 |   png(file=file.path(out_dir, 'usable_Mag_Rrup_range.png'), width=500, height=500)
63 |   plot(rrup_array,mag_array,pch=19,xlim=c(1,1000),ylim=c(1,8))
64 |   grid()
65 | }
66 | 
67 | 
68 | #plot M/R limits
69 | line_mag_rrup <- data.frame(seq(1,1000,20),coeffs_m_r['b0','coefficients'] +
70 |                                            coeffs_m_r['b1','coefficients'] *seq(1,1000,20) +
71 |                                            coeffs_m_r['b2','coefficients'] *seq(1,1000,20)^2)
72 | lines(line_mag_rrup[,1],line_mag_rrup[,2],col=2)
73 | abline(v = coeffs_m_r['max_rrup','coefficients'],col=2,lty=2)
74 | if (!flag_reg) dev.off()
75 |   
76 | # Output 
77 | # ---------------------------
78 | #save coefficients
79 | write.csv(coeffs_m_r, file=file.path(out_dir, 'usable_Mag_Rrup_coeffs.csv'))
80 | 
81 | 
82 | 
83 | 


--------------------------------------------------------------------------------
/Analyses/Code_Verification/regression/ds1/comparison_inla_model1_time.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Tue Mar 15 22:38:50 2022
  5 | 
  6 | @author: glavrent
  7 | """
  8 | # Working directory and Packages
  9 | # ---------------------------
 10 | #load variables
 11 | import os
 12 | import sys
 13 | import pathlib
 14 | #arithmetic libraries
 15 | import numpy as np
 16 | #statistics libraries
 17 | import pandas as pd
 18 | #plot libraries
 19 | import matplotlib as mpl
 20 | import matplotlib.pyplot as plt
 21 | from matplotlib.ticker import  AutoLocator as plt_autotick
 22 | 
 23 | # Define variables
 24 | # ---------------------------
 25 | #mesh info
 26 | mesh_info = ['coarse', 'medium', 'fine']
 27 | 
 28 | #dataset name
 29 | dataset_name = ['NGAWest2CANorth', 'NGAWest2CA', 'NGAWest3CA']
 30 | 
 31 | #correlation info
 32 | # 1: Small Correlation Lengths
 33 | # 2: Large Correlation Lenghts
 34 | corr_id = 1
 35 | 
 36 | #correlation name
 37 | if corr_id == 1:
 38 |     synds_name   = 'small corr len'
 39 |     synds_suffix = '_small_corr_len' 
 40 | elif corr_id == 2:
 41 |     synds_name   = 'large corr len'
 42 |     synds_suffix = '_large_corr_len'
 43 | 
 44 | #directories regressions
 45 | dir_reg = '../../../../Data/Verification/regression/ds1/'
 46 | 
 47 | #directory output
 48 | dir_out = '../../../../Data/Verification/regression/ds1/comparisons/'
 49 | 
 50 | # Load Data
 51 | # ---------------------------           
 52 | #initialize dataframe
 53 | df_runinfo_all = {};
 54 | 
 55 | #iterate over different analyses
 56 | for j1, m_i in enumerate(mesh_info):
 57 |     for j2, d_n in enumerate(dataset_name):
 58 |         key_runinfo   = '%s_%s'%(m_i, d_n)
 59 |         fname_runinfo = '%s/INLA_%s_%s%s/run_info.csv'%(dir_reg, d_n, m_i, synds_suffix)
 60 |         #store calc time
 61 |         df_runinfo_all[key_runinfo] = pd.read_csv(fname_runinfo)
 62 |         
 63 | 
 64 | 
 65 | # Comparison Figures
 66 | # ---------------------------         
 67 | pathlib.Path(dir_out).mkdir(parents=True, exist_ok=True)
 68 | 
 69 | #line style (iterate with mesh info)
 70 | line_style = [':','--','-']
 71 | #color map (iterate with dataset)
 72 | c_map = plt.get_cmap('Dark2')
 73 | 
 74 | #run time figure
 75 | fig_fname = 'run_time_inla'
 76 | #create figure axes
 77 | fig, ax = plt.subplots(figsize = (20,10))
 78 | #iterate over different analyses
 79 | for j2, d_n in enumerate(dataset_name):
 80 |     for j1, (m_i, l_s) in enumerate(zip(mesh_info, line_style)):
 81 |         key_runinfo   = '%s_%s'%(m_i, d_n)
 82 |         #
 83 |         ds_id   = df_runinfo_all[key_runinfo].ds_id
 84 |         ds_name = ['Y%i'%d_i for d_i in ds_id]
 85 |         #
 86 |         run_time = df_runinfo_all[key_runinfo].run_time
 87 |         
 88 |         ax.plot(ds_id, run_time, linestyle=l_s, marker='o', linewidth=2, markersize=10, color=c_map(j2), label='%s - %s'%(d_n, m_i))
 89 | 
 90 | #figure properties
 91 | ax.set_ylim([0, max(0.50, max(ax.get_ylim()))])
 92 | ax.set_xlabel('synthetic dataset', fontsize=30)
 93 | ax.set_ylabel('Run Time (min)',    fontsize=30)
 94 | ax.grid(which='both')
 95 | ax.set_xticks(ds_id)
 96 | ax.set_xticklabels(labels=ds_name)
 97 | ax.tick_params(axis='x', labelsize=25)
 98 | ax.tick_params(axis='y', labelsize=25)
 99 | #legend
100 | ax.legend(loc='center left', bbox_to_anchor=(1, 0.5),  fontsize=25)
101 | #save figure
102 | fig.tight_layout()
103 | fig.savefig( dir_out + fig_fname + '.png' )
104 | 
105 | 
106 | 
107 | 


--------------------------------------------------------------------------------
/Analyses/Prediction/create_scen_dataframe.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Sat Aug 20 17:26:17 2022
  5 | 
  6 | @author: glavrent
  7 | """
  8 | 
  9 | #load variables
 10 | import os
 11 | import sys
 12 | import pathlib
 13 | #arithmetic libraries
 14 | import numpy as np
 15 | #statistics libraries
 16 | import pandas as pd
 17 | #geographic libraries
 18 | import pyproj
 19 | import geopy.distance
 20 | 
 21 | #user libraries
 22 | sys.path.insert(0,'../Python_lib/ground_motions')
 23 | from pylib_gmm_eas import BA18
 24 | ba18 = BA18()
 25 | 
 26 | # Define Problem
 27 | # ---------------------------
 28 | #structural period
 29 | freq = 5.0119
 30 | 
 31 | #earthquake scenario
 32 | mag   = 7.0
 33 | vs30  = 400
 34 | sof   = 'SS'
 35 | dip   = 90
 36 | z_tor = 0
 37 | #color bar limits
 38 | cbar_lim = [np.log(1e-8),np.log(.06)]
 39 | 
 40 | #earthquake coordinates
 41 | scen_eq_latlon  = [34.2,    -116.9]
 42 | #utm zone
 43 | utm_zone = '11S'
 44 | 
 45 | #grid
 46 | grid_X_dxdy = [10, 10]
 47 | 
 48 | #scenario filename
 49 | fname_scen_predict = '../../Data/Prediction/scen_predict.csv'
 50 | 
 51 | # UTM projection
 52 | # ---------------------------
 53 | # projection system
 54 | utmProj = pyproj.Proj("+proj=utm +zone="+utm_zone+", +ellps=WGS84 +datum=WGS84 +units=m +no_defs")
 55 | 
 56 | #grid limits in UTM
 57 | grid_X_win = np.array([[-140, 3500], [780, 4700]])
 58 | 
 59 | #create coordinate grid
 60 | grid_x_edge = np.arange(grid_X_win[0,0],grid_X_win[1,0],grid_X_dxdy[0])
 61 | grid_y_edge = np.arange(grid_X_win[0,1],grid_X_win[1,1],grid_X_dxdy[0])
 62 | grid_x, grid_y = np.meshgrid(grid_x_edge, grid_y_edge)
 63 | #create coordinate array with all grid nodes
 64 | grid_X = np.vstack([grid_x.T.flatten(), grid_y.T.flatten()]).T
 65 | #compute lat/lon coordinate array
 66 | grid_latlon = np.fliplr(np.array([utmProj(g_x*1000, g_y*1000, inverse=True) for g_x, g_y in 
 67 |                                   zip(grid_X[:,0], grid_X[:,1])]))
 68 | n_gpt = len(grid_X)
 69 | 
 70 | #earthquake UTM coordinates
 71 | scen_eq_X = np.array(utmProj(scen_eq_latlon[1], scen_eq_latlon[0])) / 1000
 72 | 
 73 | #create earthquake and site ids
 74 | eqid_array = np.full(n_gpt, -1)
 75 | site_array = -1*(1+np.arange(n_gpt))
 76 | 
 77 | # Compute Ergodic Base Scaling
 78 | # ---------------------------
 79 | #compute distances
 80 | scen_dist_array = np.linalg.norm(grid_X - scen_eq_X, axis=1)
 81 | scen_dist_array = np.sqrt(scen_dist_array**2 + z_tor**2)
 82 | 
 83 | #scenarios of interest
 84 | scen_eas_nerg_scl   = np.full(n_gpt, np.nan)
 85 | scen_eas_nerg_aleat = np.full(n_gpt, np.nan)
 86 | for k, d in enumerate(scen_dist_array):
 87 |     fnorm = 1 if sof == 'SS' else 0
 88 |     #median and aleatory    
 89 |     scen_eas_nerg_scl[k], _, scen_eas_nerg_aleat[k] = ba18.EasF(freq, mag, rrup=d, vs30=vs30, ztor=z_tor, fnorm=fnorm, flag_keep_b7 = False)
 90 |     
 91 |     
 92 | # Summarize Scenario Dataframe
 93 | # ---------------------------
 94 | df_scen_prdct = pd.DataFrame({'eqid':eqid_array, 'ssn':site_array,
 95 |                               'eqLat':np.full(n_gpt,scen_eq_latlon[0]), 'eqLon':np.full(n_gpt,scen_eq_latlon[0]),
 96 |                               'staLat':grid_latlon[:,0], 'staLon':grid_latlon[:,1],
 97 |                               'eqX':np.full(n_gpt,scen_eq_X[0]), 'eqY':np.full(n_gpt,scen_eq_X[1]), 'eqZ':np.full(n_gpt,-z_tor),
 98 |                               'staX':grid_X[:,0], 'staY':grid_X[:,1],
 99 |                               'erg_base':scen_eas_nerg_scl})
100 | 
101 | #save prediction scenarios
102 | df_scen_prdct.to_csv(fname_scen_predict )
103 | 


--------------------------------------------------------------------------------
/Analyses/Code_Verification/regression/ds1/main_cmdstan_model1_NGAWest3CA.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Wed Jul 14 14:17:52 2021
  5 | 
  6 | @author: glavrent
  7 | """
  8 | # Working directory and Packages
  9 | # ---------------------------
 10 | #load libraries
 11 | import os
 12 | import sys
 13 | import numpy as np
 14 | import pandas as pd
 15 | import time
 16 | #user functions
 17 | sys.path.insert(0,'../../../Python_lib/regression/cmdstan/')
 18 | from regression_cmdstan_model1_unbounded_hyp import RunStan
 19 | 
 20 | # Define variables
 21 | # ---------------------------
 22 | #filename suffix
 23 | # synds_suffix = '_small_corr_len' 
 24 | # synds_suffix = '_large_corr_len'
 25 | 
 26 | #synthetic datasets directory
 27 | ds_dir = '../../../../Data/Verification/synthetic_datasets/ds1'
 28 | ds_dir = r'%s%s/'%(ds_dir, synds_suffix)
 29 | 
 30 | # dataset info 
 31 | # ds_fname_main = 'CatalogNGAWest3CA_synthetic_data'
 32 | ds_fname_main = 'CatalogNGAWest3CALite_synthetic_data'
 33 | ds_id = np.arange(1,6)
 34 | 
 35 | #stan model
 36 | # sm_fname = '../../../Stan_lib/regression_stan_model1_unbounded_hyp.stan'
 37 | # sm_fname = '../../../Stan_lib/regression_stan_model1_unbounded_hyp_chol.stan'
 38 | # sm_fname = '../../../Stan_lib/regression_stan_model1_unbounded_hyp_chol_efficient.stan'
 39 | # sm_fname = '../../../Stan_lib/regression_stan_model1_unbounded_hyp_chol_efficient2.stan'
 40 | 
 41 | #output info
 42 | #main output filename
 43 | out_fname_main = 'NGAWest3CA_syndata'
 44 | #main output directory
 45 | out_dir_main   = '../../../../Data/Verification/regression/ds1/'
 46 | #output sub-directory
 47 | # out_dir_sub    = 'CMDSTAN_NGAWest3CA'
 48 | # out_dir_sub    = 'CMDSTAN_NGAWest3CA_chol'
 49 | # out_dir_sub    = 'CMDSTAN_NGAWest3CA_chol_eff'
 50 | # out_dir_sub    = 'CMDSTAN_NGAWest3CA_chol_eff2'
 51 | 
 52 | #stan parameters
 53 | res_name='tot'
 54 | n_iter_warmup   = 500
 55 | n_iter_sampling = 500
 56 | n_chains        = 4
 57 | adapt_delta     = 0.8
 58 | max_treedepth   = 10
 59 | #parallel options
 60 | stan_parallel=False
 61 | 
 62 | #output sub-dir with corr with suffix info
 63 | out_dir_sub = f'%s%s'%(out_dir_sub, synds_suffix)
 64 | 
 65 | # Run stan regression
 66 | # ---------------------------
 67 | #create datafame with computation time
 68 | df_run_info = list()
 69 | 
 70 | #iterate over all synthetic datasets
 71 | for d_id in ds_id:
 72 |     print('Synthetic dataset %i fo %i'%(d_id, len(ds_id)))
 73 |     #run time start
 74 |     run_t_strt = time.time()    
 75 |     #input flatfile
 76 |     ds_fname = '%s%s%s_Y%i.csv'%(ds_dir, ds_fname_main, synds_suffix, d_id)
 77 |     #load flatfile
 78 |     df_flatfile = pd.read_csv(ds_fname)
 79 |     
 80 |     #output file name and directory
 81 |     out_fname = '%s%s_Y%i'%(out_fname_main, synds_suffix, d_id)
 82 |     out_dir   = '%s/%s/Y%i/'%(out_dir_main, out_dir_sub, d_id)
 83 | 
 84 |     #run stan model
 85 |     RunStan(df_flatfile, sm_fname, 
 86 |             out_fname, out_dir, res_name,  
 87 |             n_iter_warmup=n_iter_warmup, n_iter_sampling=n_iter_sampling, n_chains=n_chains,
 88 |             adapt_delta=adapt_delta, max_treedepth=max_treedepth,
 89 |             stan_parallel=stan_parallel)
 90 |     
 91 |     #run time end
 92 |     run_t_end = time.time()
 93 | 
 94 |     #compute run time
 95 |     run_tm = (run_t_end - run_t_strt)/60
 96 |   
 97 |     #log run time
 98 |     df_run_info.append(pd.DataFrame({'computer_name':os.uname()[1],'out_name':out_dir_sub,
 99 |                                      'ds_id':d_id,'run_time':run_tm}, index=[d_id]))
100 |                            
101 |     #write out run info
102 |     out_fname   = '%s%s/run_info.csv'%(out_dir_main, out_dir_sub)
103 |     pd.concat(df_run_info).reset_index(drop=True).to_csv(out_fname, index=False)
104 | 
105 | 


--------------------------------------------------------------------------------
/Analyses/Code_Verification/regression/ds1/main_pystan_model1_NGAWest3CA.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Wed Jul 14 14:17:52 2021
  5 | 
  6 | @author: glavrent
  7 | """
  8 | # Working directory and Packages
  9 | # ---------------------------
 10 | #load libraries
 11 | import os
 12 | import sys
 13 | import numpy as np
 14 | import pandas as pd
 15 | import time
 16 | #user functions
 17 | sys.path.insert(0,'../../../Python_lib/regression/pystan/')
 18 | from regression_pystan_model1_unbounded_hyp import RunStan
 19 | 
 20 | # Define variables
 21 | # ---------------------------
 22 | #filename suffix
 23 | # synds_suffix = '_small_corr_len' 
 24 | # synds_suffix = '_large_corr_len'
 25 | 
 26 | #synthetic datasets directory
 27 | ds_dir = '../../../../Data/Verification/synthetic_datasets/ds1'
 28 | ds_dir = r'%s%s/'%(ds_dir, synds_suffix)
 29 | 
 30 | # dataset info 
 31 | #ds_fname_main = 'CatalogNGAWest3CA_synthetic_data'
 32 | ds_fname_main = 'CatalogNGAWest3CALite_synthetic_data'
 33 | ds_id = np.arange(1,6)
 34 | 
 35 | #stan model 
 36 | # sm_fname = '../../../Stan_lib/regression_stan_model1_unbounded_hyp.stan'
 37 | # sm_fname = '../../../Stan_lib/regression_stan_model1_unbounded_hyp_chol.stan'
 38 | # sm_fname = '../../../Stan_lib/regression_stan_model1_unbounded_hyp_chol_efficient.stan'
 39 | # sm_fname = '../../../Stan_lib/regression_stan_model1_unbounded_hyp_chol_efficient2.stan'
 40 | 
 41 | #output info
 42 | #main output filename
 43 | out_fname_main = 'NGAWest3CA_syndata'
 44 | #main output directory
 45 | out_dir_main   = '../../../../Data/Verification/regression/ds1/'
 46 | #output sub-directory
 47 | # out_dir_sub    = 'PYSTAN_NGAWest3CA'
 48 | # out_dir_sub    = 'PYSTAN_NGAWest3CA_chol'
 49 | # out_dir_sub    = 'PYSTAN_NGAWest3CA_chol_eff'
 50 | # out_dir_sub    = 'PYSTAN_NGAWest3CA_chol_eff2'
 51 | 
 52 | #stan parameters
 53 | runstan_flag = True
 54 | # pystan_ver = 2
 55 | pystan_ver = 3
 56 | res_name = 'tot'
 57 | n_iter   = 1000
 58 | n_chains = 4
 59 | adapt_delta   = 0.8
 60 | max_treedepth = 10
 61 | #parallel options
 62 | # flag_parallel = True
 63 | flag_parallel = False
 64 | 
 65 | #output sub-dir with corr with suffix info
 66 | out_dir_sub = f'%s%s'%(out_dir_sub, synds_suffix)
 67 | 
 68 | # Run stan regression
 69 | # ---------------------------
 70 | #create datafame with computation time
 71 | df_run_info = list()
 72 | 
 73 | #iterate over all synthetic datasets
 74 | for d_id in ds_id:
 75 |     print('Synthetic dataset %i fo %i'%(d_id, len(ds_id)))
 76 |     #run time start
 77 |     run_t_strt = time.time()
 78 |     #input flatfile
 79 |     ds_fname = '%s%s%s_Y%i.csv'%(ds_dir, ds_fname_main, synds_suffix, d_id)
 80 |     #load flatfile
 81 |     df_flatfile = pd.read_csv(ds_fname)
 82 |     
 83 |     #output file name and directory
 84 |     out_fname = '%s%s_Y%i'%(out_fname_main, synds_suffix, d_id)
 85 |     out_dir   = '%s/%s/Y%i/'%(out_dir_main, out_dir_sub, d_id)
 86 | 
 87 |     #run stan model
 88 |     RunStan(df_flatfile, sm_fname, out_fname, out_dir, res_name, 
 89 |             runstan_flag=runstan_flag, n_iter=n_iter, n_chains=n_chains,
 90 |             adapt_delta=adapt_delta, max_treedepth=max_treedepth,
 91 |             pystan_ver=pystan_ver, pystan_parallel=flag_parallel)
 92 | 
 93 |     #run time end
 94 |     run_t_end = time.time()
 95 | 
 96 |     #compute run time
 97 |     run_tm = (run_t_end - run_t_strt)/60
 98 |   
 99 |     #log run time
100 |     df_run_info.append(pd.DataFrame({'computer_name':os.uname()[1],'out_name':out_dir_sub,
101 |                                      'ds_id':d_id,'run_time':run_tm}, index=[d_id]))
102 |                            
103 |     #write out run info
104 |     out_fname   = '%s%s/run_info.csv'%(out_dir_main, out_dir_sub)
105 |     pd.concat(df_run_info).reset_index(drop=True).to_csv(out_fname, index=False)
106 | 
107 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Non-ergodic Methodology and Modeling Tools
 2 | 
 3 | This repository contains software tools for developing Nonergodic Ground Motion Models (NGMMs) based on the varying coefficient (Landwehr et al., 2016) and cell-specific anelastic attention approach (Dawood and Rodriguez‐Marek, 2013). 
 4 | Developed tools are available for R using the statistical package (R-INLA, https://www.r-inla.org/) and in python using the CMDSTAN and PYSTAN interface packages for the Bayesian software (Stan, https://mc-stan.org/). Documentation and detailed instructions on the use of the developed tools are provided in [Lavrentiadis et al., 2022a GIRS report](https://www.risksciences.ucla.edu/girs-reports/2022/04). A general introduction and considerations for the development of NGMMs are presented in Lavrentiadis et al., 2022b.
 5 | 
 6 | ## Home Page
 7 | The project's home page with links to the various project deliverables is: https://www.risksciences.ucla.edu/nhr3/ngmm
 8 | The project's data are accessible through [DesignSafe](https://www.designsafe-ci.org/data/browser/public/designsafe.storage.published/PRJ-5771)
 9 | 
10 | ## Folder Structure
11 | The main folder ``Analyses`` contains all the regression, prediction, hazard implementation, testing, and library scripts. 
12 | Within the ``Analyses``  folder,  ``Data_Preparation`` includes preprocessing scripts to prepare the ground-motion data for the NGMM regression. ``Regression`` contains the Jupyter notebooks for running the NGMM regressions using Stan and INLA. ``Predictions`` includes the scripts for the conditional predictions for new scenarios based on the regression results. ``Code_Verification`` contains the codes associated with the verification exercise. 
13 | Lastly, folders ``Python_lib``, ``R_lib``, and ``Stan_lib`` contain various scripts invoked in the main functions.
14 | 
15 | The main folder ``Data`` mirrors the structure of the ``Analyses`` folder and contains all the input and output files.
16 | 
17 | The ``Raw_files`` includes the files used to construct the synthetic datasets for the verification exercise.
18 | 
19 |     .
20 |     |--Analyses
21 |     |     |--Data_Preparation
22 |     |     |--Regression
23 |     |     |--Predictions
24 |     |     |--Code_Verification
25 |     |     |--Python_lib
26 |     |     |--R_lib
27 |     |     |--Stan_lib
28 |     |
29 |     |--Data
30 |     |     |--Regression
31 |     |     |--Predictions
32 |     |     |--Code_Verification
33 |     |     
34 |     |--Raw_files
35 | 
36 | An example regression dataset, as well as the synthetic datasets and raw metadata, can be downloaded from [DesignSafe](https://www.designsafe-ci.org/data/browser/public/designsafe.storage.published/PRJ-5771).
37 | 
38 | ## Acknowledgments 
39 | Financial support by the California Department of Transportation and Pacific Gas & Electric Company is greatly appreciated.  
40 | 
41 | ## References
42 | Dawood, H. M., & Rodriguez‐Marek, A. (2013). A method for including path effects in ground‐motion prediction equations: An example using the M w 9.0 Tohoku earthquake aftershocks. Bulletin of the Seismological Society of America, 103(2B), 1360-1372.
43 | 
44 | Landwehr, N., Kuehn, N. M., Scheffer, T., & Abrahamson, N. (2016). A nonergodic ground‐motion model for California with spatially varying coefficients. Bulletin of the Seismological Society of America, 106(6), 2574-2583.
45 | 
46 | Lavrentiadis, G., Nicolas, K. M., Bozorgnia, Y., Seylabi, E., Meng, X., Goulet, C., & Kottke, A. (2022a) Non‐ergodic Methodology and Modeling Tools. Natural Hazards Risk and Resiliency Research Center: The Garrick Institute for the Risk Sciences, University of California, Los Angeles
47 | 
48 | Lavrentiadis, G., Abrahamson, N. A., Nicolas, K. M., Bozorgnia, Y., Goulet, C. A., Babič, A., ... & Walling, M. (2022b). Overview and Introduction to Development of Non-Ergodic Earthquake Ground-Motion Models. Bulletin of Earthquake Engineering
49 | 


--------------------------------------------------------------------------------
/Analyses/Code_Verification/regression/ds1/main_cmdstan_model1_NGAWest2CA.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Wed Jul 14 14:17:52 2021
  5 | 
  6 | @author: glavrent
  7 | """
  8 | # Working directory and Packages
  9 | # ---------------------------
 10 | #load libraries
 11 | import os
 12 | import sys
 13 | import numpy as np
 14 | import pandas as pd
 15 | import time
 16 | #user functions
 17 | sys.path.insert(0,'../../../Python_lib/regression/cmdstan/')
 18 | from regression_cmdstan_model1_unbounded_hyp import RunStan
 19 | 
 20 | # Define variables
 21 | # ---------------------------
 22 | #filename suffix
 23 | # synds_suffix = '_small_corr_len' 
 24 | # synds_suffix = '_large_corr_len'
 25 | 
 26 | #synthetic datasets directory
 27 | ds_dir = '../../../../Data/Verification/synthetic_datasets/ds1'
 28 | ds_dir = r'%s%s/'%(ds_dir, synds_suffix)
 29 | 
 30 | # dataset info 
 31 | # ds_fname_main = 'CatalogNGAWest3CA_synthetic_data'
 32 | ds_fname_main = 'CatalogNGAWest3CALite_synthetic_data'
 33 | ds_id = np.arange(1,6)
 34 | 
 35 | #stan model
 36 | # sm_fname = '../../../Stan_lib/regression_stan_model1_unbounded_hyp.stan'
 37 | # sm_fname = '../../../Stan_lib/regression_stan_model1_unbounded_hyp_chol.stan'
 38 | # sm_fname = '../../../Stan_lib/regression_stan_model1_unbounded_hyp_chol_efficient.stan'
 39 | # sm_fname = '../../../Stan_lib/regression_stan_model1_unbounded_hyp_chol_efficient2.stan'
 40 | 
 41 | #output info
 42 | #main output filename
 43 | out_fname_main = 'NGAWest2CA_syndata'
 44 | #main output directory
 45 | out_dir_main   = '../../../../Data/Verification/regression/ds1/'
 46 | #output sub-directory
 47 | # out_dir_sub    = 'CMDSTAN_NGAWest2CA'
 48 | # out_dir_sub    = 'CMDSTAN_NGAWest2CA_chol'
 49 | # out_dir_sub    = 'CMDSTAN_NGAWest2CA_chol_eff'
 50 | # out_dir_sub    = 'CMDSTAN_NGAWest2CA_chol_eff2'
 51 | 
 52 | #stan parameters
 53 | res_name='tot'
 54 | n_iter_warmup   = 500
 55 | n_iter_sampling = 500
 56 | n_chains        = 4
 57 | adapt_delta     = 0.8
 58 | max_treedepth   = 10
 59 | #parallel options
 60 | stan_parallel=False
 61 | 
 62 | #output sub-dir with corr with suffix info
 63 | out_dir_sub = f'%s%s'%(out_dir_sub, synds_suffix)
 64 | 
 65 | # Run stan regression
 66 | # ---------------------------
 67 | #create datafame with computation time
 68 | df_run_info = list()
 69 | 
 70 | #iterate over all synthetic datasets
 71 | for d_id in ds_id:
 72 |     print('Synthetic dataset %i fo %i'%(d_id, len(ds_id)))
 73 |     #run time start
 74 |     run_t_strt = time.time()    
 75 |     #input flatfile
 76 |     ds_fname = '%s%s%s_Y%i.csv'%(ds_dir, ds_fname_main, synds_suffix, d_id)
 77 |     #load flatfile
 78 |     df_flatfile = pd.read_csv(ds_fname)
 79 |     #keep only NGAWest2 records
 80 |     df_flatfile = df_flatfile.loc[df_flatfile.dsid==0,:]
 81 |     
 82 |     #output file name and directory
 83 |     out_fname = '%s%s_Y%i'%(out_fname_main, synds_suffix, d_id)
 84 |     out_dir   = '%s/%s/Y%i/'%(out_dir_main, out_dir_sub, d_id)
 85 | 
 86 |     #run stan model
 87 |     RunStan(df_flatfile, sm_fname, 
 88 |             out_fname, out_dir, res_name,  
 89 |             n_iter_warmup=n_iter_warmup, n_iter_sampling=n_iter_sampling, n_chains=n_chains,
 90 |             adapt_delta=adapt_delta, max_treedepth=max_treedepth,
 91 |             stan_parallel=stan_parallel)
 92 |     
 93 |     #run time end
 94 |     run_t_end = time.time()
 95 | 
 96 |     #compute run time
 97 |     run_tm = (run_t_end - run_t_strt)/60
 98 |   
 99 |     #log run time
100 |     df_run_info.append(pd.DataFrame({'computer_name':os.uname()[1],'out_name':out_dir_sub,
101 |                                      'ds_id':d_id,'run_time':run_tm}, index=[d_id]))
102 |                            
103 |     #write out run info
104 |     out_fname   = '%s%s/run_info.csv'%(out_dir_main, out_dir_sub)
105 |     pd.concat(df_run_info).reset_index(drop=True).to_csv(out_fname, index=False)
106 | 
107 | 


--------------------------------------------------------------------------------
/Analyses/Regression/README.md:
--------------------------------------------------------------------------------
 1 | # Non-ergodic Ground Motion Model Types:
 2 | 
 3 |  * Type-1: Three non-ergodic terms: 
 4 |  
 5 |  $$
 6 |   f_{nerg}(M,R_{rup},V_{S30},..., \vec{t_{E}}, \vec{t_{S}}) = f_{erg}(M,R_{rup},V_{S30},...) + \delta  c_{1,E}(\vec{t_{E}}) + \delta c_{1a,S}(\vec{t_{S}}) + \delta  c_{1b,S}(\vec{t_{S}})
 7 |  $$
 8 |  
 9 |  &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
10 |  a spatially varying earthquake constant ( $\delta  c_{1,E}$ ), a spatially varying site constant ( $\delta c_{1a,S}$ ), and a spatially independent site <br>
11 |  &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
12 |  constant ( $\delta  c_{1b,S}$ ). 
13 |  
14 |  * Type-2: Four non-ergodic terms: 
15 |  
16 |  $$
17 |  \begin{aligned}
18 |   f_{nerg}(M,R_{rup},V_{S30},..., \vec{t_{E}}, \vec{t_{S}}) =& \left( f_{erg}(M,V_{S30},...) - c_{a~erg}~R_{rup} \right) + \delta  c_{1,E}(\vec{t_{E}}) + \delta c_{1a,S}(\vec{t_{S}}) + \delta  c_{1b,S}(\vec{t_{S}}) + \\
19 |   & \mathbf{c}_{ca,P} \cdot \Delta R 
20 |  \end{aligned}
21 |  $$
22 |  
23 |  &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
24 |  a spatially varying earthquake constant ( $\delta c_{1,E}$ ), a spatially varying site constant ( $\delta c_{1a,S}$ ), a spatially independent site<br>
25 |  &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
26 |  constant ( $\delta c_{1b,S}$ ), and cell-specific anelastic attenuation ( $\mathbf{c}_{ca,P} $). 
27 |  
28 |  * Type-3: Six non-ergodic terms: 
29 |  
30 |   $$
31 |  \begin{aligned}
32 |   f_{nerg}(M,R_{rup},V_{S30},..., \vec{t_{E}}, \vec{t_{S}}) =& \left( f_{erg}(M,V_{S30},...) - (c_2 ~ f_{gs}(M,R) + c_3 ~ f_{V_{S30}}(V_{S30})) + c_{a~erg} ~ R_{rup}) \right) + \\
33 |   & \delta  c_{1,E}(\vec{t_{E}}) + \delta c_{1a,S}(\vec{t_{S}}) + \delta  c_{1b,S}(\vec{t_{S}}) + \\
34 |   &  c_{2,E}(\vec{t_{E}}) f_{gs}(M,R_{rup}) + \delta c_{1a,S}(\vec{t_{S}}) +  \mathbf{c}_{ca,P} \cdot \Delta R
35 |  \end{aligned}
36 |  $$
37 |  
38 |  &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
39 |  a spatially varying earthquake constant ( $\delta c_{1,E}$ ), a spatially varying site constant ( $\delta c_{1a,S}$ ), a spatially independent site<br>
40 |  &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
41 |  constant ( $\delta c_{1b,S}$ ),  a spatially varying geometrical spreading coefficient ( $c_{2,P}$ ), a spatially varying $V_{S30}$ scaling ( $c_{3,S}$ ), <br> 
42 |  &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
43 |  and cell-specific anelastic attenuation ( $\mathbf{c}_{ca,P} $). 
44 | 
45 | # File Descriptions
46 | 
47 | ### INLA
48 |  * Type-1 NGMM: ``nonerg_gmm_regression_type1_inla.ipynb``
49 |  * Type-2 NGMM with spatially uncorrelated anelastic attenuation cells: ``nonerg_gmm_regression_type2_uncorrcells_inla.ipynb`` 
50 |  * Type-3 NGMM with spatially uncorrelated anelastic attenuation cells: ``nonerg_gmm_regression_type3_uncorrcells_inla.ipynb``
51 | 
52 | ### CMDSTAN
53 |  * Type-1 NGMM: ``nonerg_gmm_regression_type1_cmdstan.ipynb``
54 |  * Type-2 NGMM with spatially correlated anelastic attenuation cells: ``nonerg_gmm_regression_type2_corrcells_cmdstan.ipynb`` 
55 |  * Type-2 NGMM with spatially uncorrelated anelastic attenuation cells: ``nonerg_gmm_regression_type2_uncorrcells_cmdstan.ipynb``
56 |  * Type-3 NGMM with spatially correlated anelastic attenuation cells: ``nonerg_gmm_regression_type3_corrcells_cmdstan.ipynb``
57 |  * Type-3 NGMM with spatially uncorrelated anelastic attenuation cells: ``nonerg_gmm_regression_type3_uncorrcells_cmdstan.ipynb``
58 | 
59 | ### PYSTAN
60 |  * Type-1 NGMM: ``nonerg_gmm_regression_type1_pystan.ipynb``
61 |  * Type-2 NGMM with spatially correlated anelastic attenuation cells: ``nonerg_gmm_regression_type2_corrcells_pystan.ipynb``
62 |  * Type-2 NGMM with spatially uncorrelated anelastic attenuation cells: ``nonerg_gmm_regression_type2_uncorrcells_pystan.ipynb``
63 |  * Type-3 NGMM with spatially correlated anelastic attenuation cells: ``nonerg_gmm_regression_type3_corrcells_pystan.ipynb``
64 |  * Type-3 NGMM with spatially uncorrelated anelastic attenuation cells: ``nonerg_gmm_regression_type3_uncorrcells_pystan.ipynb``
65 | 


--------------------------------------------------------------------------------
/Examples/example2/regression_inla.R:
--------------------------------------------------------------------------------
 1 | #libraries
 2 | library(stringr)
 3 | library(assertthat)
 4 | library(pracma)
 5 | # Bayesian regression
 6 | library(INLA)
 7 | library(inlabru)
 8 | library(posterior)
 9 | #plotting packages
10 | library(ggplot2)
11 | library(maps) 
12 | 
13 | # Define Problem
14 | # ---------------------------
15 | #data filename
16 | fname_data <- 'data/regression_dataset.csv'
17 | #output directory
18 | dir_out <- 'data/inla_regression/'
19 | 
20 | # Read Data
21 | # ---------------------------
22 | df_data <- read.csv(fname_data)
23 | 
24 | # Preprocess Data
25 | # ---------------------------
26 | n_data <- nrow(df_data)
27 | 
28 | # Run INLA, fit model 
29 | # ---------------------------
30 | #prior of fixed effects
31 | prior.fixed <- list(mean.intercept = 0, prec.intercept = 1,
32 |                     mean = 0, prec = 1)
33 | #prior of likelihood precision (log-scale)
34 | prior.prec <- list(prec = list(prior = "loggamma", param = c(4.0, 0.5)))
35 | 
36 | #run regression
37 | fit_inla <- inla(y ~ x1, data = df_data, family="gaussian", 
38 |                  control.fixed  = prior.fixed,
39 |                  control.family = list(hyper = list(prec = prior.prec)), 
40 |                  control.inla = list(int.strategy='eb', strategy="gaussian"),
41 |                  verbose=TRUE)
42 | 
43 | 
44 | # Post-processing
45 | # ---------------------------
46 | #compute posterior distributions
47 | df_post_c0  <- as.data.frame( fit_inla$marginals.fixed$`(Intercept)` )
48 | df_post_c1  <- as.data.frame( fit_inla$marginals.fixed$x1 )
49 | df_post_sig <- as.data.frame(inla.tmarginal(function(x) exp(-x/2), fit_inla$internal.marginals.hyperpar[['Log precision for the Gaussian observations']]))
50 | 
51 | # Plotting
52 | # ---------------------------  
53 | pl_post_c0 <- ggplot() + geom_line(data=df_post_c0, aes(x=x, y=y))  + theme_bw() +
54 |                   labs(x="sigma", y="posterior(c0)") + xlim(-.25,-0.1) + ylim(0, 30) + 
55 |                   theme(plot.title=element_text(size=20), axis.title=element_text(size=20),
56 |                         axis.text.y=element_text(size=20), axis.text.x=element_text(size=20),
57 |                         legend.key.size = unit(1, 'cm'), legend.text=element_text(size=20))
58 | 
59 | pl_post_c1 <- ggplot() + geom_line(data=df_post_c1, aes(x=x, y=y))  + theme_bw() +
60 |                   labs(x="sigma", y="posterior(c1)") + xlim(0.5,0.8) + ylim(0, 20) + 
61 |                   theme(plot.title=element_text(size=20), axis.title=element_text(size=20),
62 |                         axis.text.y=element_text(size=20), axis.text.x=element_text(size=20),
63 |                         legend.key.size = unit(1, 'cm'), legend.text=element_text(size=20))
64 | 
65 | 
66 | pl_post_sig <- ggplot() + geom_line(data=df_post_sig, aes(x=x, y=y))  + theme_bw() +
67 |                 labs(x="sigma", y="posterior(sigma)") + xlim(0.6,0.8) + ylim(0, 30) + 
68 |                 theme(plot.title=element_text(size=20), axis.title=element_text(size=20),
69 |                       axis.text.y=element_text(size=20), axis.text.x=element_text(size=20),
70 |                       legend.key.size = unit(1, 'cm'), legend.text=element_text(size=20))
71 | 
72 | 
73 | # Save Data
74 | # ---------------------------  
75 | #create output directories
76 | dir.create(dir_out, showWarnings = FALSE)
77 | 
78 | #write out regression results
79 | write.csv(df_post_c0,   file=file.path(dir_out, 'inla_c0_posterior.csv'),    row.names = FALSE )
80 | write.csv(df_post_c1,   file=file.path(dir_out, 'inla_c1_posterior.csv'),    row.names = FALSE )
81 | write.csv(df_post_sig,  file=file.path(dir_out, 'inla_sigma_posterior.csv'), row.names = FALSE )
82 | 
83 | #save figures
84 | #---  ---  ---  ---  ---  ---
85 | #posterior distributions
86 | ggsave(file.path(dir_out,'inla_c0_posterior.png'),  plot=pl_post_c0,  device='png')
87 | ggsave(file.path(dir_out,'inla_c1_posterior.png'),  plot=pl_post_c1,  device='png')
88 | ggsave(file.path(dir_out,'inla_sig_posterior.png'), plot=pl_post_sig, device='png')
89 | 
90 | 
91 | 
92 | 


--------------------------------------------------------------------------------
/Analyses/Python_lib/catalog/pylib_catalog.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Tue Jul 20 10:39:12 2021
  5 | 
  6 | @author: glavrent
  7 | """
  8 | 
  9 | #load libraries
 10 | #arithmetic libraries
 11 | import numpy as np
 12 | 
 13 | def IndexAvgColumns(df_data, col_idx, col2avg):
 14 |     '''
 15 |     Average columns based on index column
 16 | 
 17 |     Parameters
 18 |     ----------
 19 |     df_data : pd.dataframe
 20 |         Data data-frame.
 21 |     col_idx : str
 22 |         Name of index column.
 23 |     col2avg : list
 24 |         List of column names to be averaged.
 25 | 
 26 |     Returns
 27 |     -------
 28 |     df_data : pd.dataframe
 29 |         Data data-frame.
 30 | 
 31 |     '''
 32 |     
 33 |     #unique ids
 34 |     idx_array, inv_array = np.unique(df_data[col_idx], return_inverse=True)
 35 |     #iterate over columns
 36 |     for col in col2avg:
 37 |         #compute average values for all unique indices
 38 |         avg_vals = np.array([np.nanmean(df_data.loc[df_data[col_idx] == idx,col]) for idx in idx_array])
 39 |         df_data.loc[:,col] = avg_vals[inv_array]
 40 |             
 41 |     return df_data
 42 | 
 43 | def ColocatePt(df_flatfile, col_idx, col_coor, thres_dist=0.01, return_df_pt=False):
 44 |     '''
 45 |     Colocate points (assign same ID) based on threshold distance.
 46 | 
 47 |     Parameters
 48 |     ----------
 49 |     df_flatfile : pd.DataFrame
 50 |         Catalog flatfile.
 51 |     col_idx : str
 52 |         Name of index column.
 53 |     col_coor : list of str
 54 |         List of coordinate name columns.
 55 |     thres_dist : real, optional
 56 |         Value of threshold distance. The default is 0.01.
 57 |     return_df_pt : bool, optional
 58 |         Option for returning point data frame. The default is False.
 59 | 
 60 |     Returns
 61 |     -------
 62 |     df_flatfile : pd.DataFrame
 63 |         Catalog flatfile with updated index column.
 64 |     df_pt: pd.DataFrame
 65 |         Point data frame with updated index column.
 66 |     '''
 67 | 
 68 |     #dataframe with unique points
 69 |     _, pt_idx, pt_inv = np.unique(df_flatfile[col_idx], axis=0, return_index=True, return_inverse=True)
 70 |     df_pt = df_flatfile.loc[:,[col_idx] + col_coor].iloc[pt_idx,:]
 71 |     
 72 |     #find and merge collocated points
 73 |     for _, pt in df_pt.iterrows():
 74 |         #distance between points
 75 |         dist2pt = np.linalg.norm((df_pt[col_coor] - pt[col_coor]).astype(float), axis=1)
 76 |         #indices of collocated points
 77 |         i_pt_coll = dist2pt < thres_dist
 78 |         #assign new id for collocated points
 79 |         df_pt.loc[i_pt_coll,col_idx] = pt[col_idx].astype(int)
 80 |     
 81 |     #update pt info to main catalog
 82 |     df_flatfile.loc[:,col_idx] = df_pt[col_idx].values[pt_inv]
 83 |     
 84 |     if not return_df_pt:
 85 |         return df_flatfile
 86 |     else:
 87 |         return df_flatfile, df_pt
 88 |     
 89 | def UsableSta(mag_array, dist_array, df_coeffs):
 90 |     '''
 91 |     Find records that meet the mag-distance limits
 92 | 
 93 |     Parameters
 94 |     ----------
 95 |     mag_array : np.array
 96 |         Magnitude array.
 97 |     dist_array : np.array
 98 |         Distance array.
 99 |     df_coeffs : pd.DataFrame
100 |         Coefficients dataframe.
101 | 
102 |     Returns
103 |     -------
104 |     rec_lim : np.array
105 |         logical array with True for records that meet M/R limits.
106 | 
107 |     '''
108 |     
109 |     #rrup limit
110 |     rrup_lim = dist_array <= df_coeffs.loc['max_rrup','coefficients']
111 |     
112 |     #mag limit
113 |     mag_min = (df_coeffs.loc['b1','coefficients'] + 
114 |                df_coeffs.loc['b1','coefficients'] * dist_array + 
115 |                df_coeffs.loc['b2','coefficients'] * dist_array**2)
116 |     mag_lim = mag_array >= mag_min 
117 |     
118 |     #find records that meet both conditions
119 |     rec_lim = np.logical_and(rrup_lim, mag_lim)
120 | 
121 |     return rec_lim
122 | 


--------------------------------------------------------------------------------
/Analyses/Code_Verification/regression/ds2/main_pystan_model2_corr_cells_NGAWest3CA_sparse.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Wed Jul 14 14:17:52 2021
  5 | 
  6 | @author: glavrent
  7 | """
  8 | # Working directory and Packages
  9 | # ---------------------------
 10 | #load libraries
 11 | import os
 12 | import sys
 13 | import numpy as np
 14 | import pandas as pd
 15 | import time
 16 | #user functions
 17 | sys.path.insert(0,'../../../Python_lib/regression/pystan/')
 18 | from regression_pystan_model2_corr_cells_sparse_unbounded_hyp import RunStan
 19 | 
 20 | # Define variables
 21 | # ---------------------------
 22 | #filename suffix
 23 | # synds_suffix = '_small_corr_len' 
 24 | # synds_suffix = '_large_corr_len'
 25 | 
 26 | #synthetic datasets directory
 27 | ds_dir = '../../../../Data/Verification/synthetic_datasets/ds2'
 28 | ds_dir = r'%s%s/'%(ds_dir, synds_suffix)
 29 | 
 30 | # dataset info 
 31 | #ds_fname_main = 'CatalogNGAWest3CA_synthetic_data'
 32 | ds_fname_main = 'CatalogNGAWest3CALite_synthetic_data'
 33 | ds_id = np.arange(1,6)
 34 | #cell specific anelastic attenuation
 35 | ds_fname_cellinfo = 'CatalogNGAWest3CALite_cellinfo'
 36 | ds_fname_celldist = 'CatalogNGAWest3CALite_distancematrix'
 37 | 
 38 | #stan model 
 39 | sm_fname = '../../../Stan_lib/regression_stan_model2_corr_cells_sparse_unbounded_hyp_chol_efficient.stan'
 40 | 
 41 | #output info
 42 | #main output filename
 43 | out_fname_main = 'NGAWest3CA_syndata'
 44 | #main output directory
 45 | out_dir_main   = '../../../../Data/Verification/regression/ds2/'
 46 | #output sub-directory
 47 | out_dir_sub    = 'PYSTAN_NGAWest3CA_corr_cells_chol_eff_sp'
 48 | 
 49 | #stan parameters
 50 | runstan_flag = True
 51 | pystan_ver = 2
 52 | # pystan_ver = 3
 53 | res_name = 'tot'
 54 | n_iter = 1000
 55 | n_chains = 4
 56 | adapt_delta   = 0.8
 57 | max_treedepth = 10
 58 | #ergodic coefficients
 59 | c_a_erg=0.0
 60 | #parallel options
 61 | # flag_parallel = True
 62 | flag_parallel = False
 63 | 
 64 | #output sub-dir with corr with suffix info
 65 | out_dir_sub = f'%s%s'%(out_dir_sub, synds_suffix)
 66 | 
 67 | #load cell dataframes
 68 | cellinfo_fname = '%s%s.csv'%(ds_dir, ds_fname_cellinfo)
 69 | celldist_fname = '%s%s.csv'%(ds_dir, ds_fname_celldist)
 70 | df_cellinfo = pd.read_csv(cellinfo_fname)
 71 | df_celldist = pd.read_csv(celldist_fname)
 72 | 
 73 | # Run stan regression
 74 | # ---------------------------
 75 | #create datafame with computation time
 76 | df_run_info = list()
 77 | 
 78 | #iterate over all synthetic datasets
 79 | for d_id in ds_id:
 80 |     print('Synthetic dataset %i fo %i'%(d_id, len(ds_id)))
 81 |     #run time start
 82 |     run_t_strt = time.time()    
 83 |     #input flatfile
 84 |     ds_fname = '%s%s%s_Y%i.csv'%(ds_dir, ds_fname_main, synds_suffix, d_id)
 85 |     #load flatfile
 86 |     df_flatfile = pd.read_csv(ds_fname)
 87 |     
 88 |     #output file name and directory
 89 |     out_fname = '%s%s_Y%i'%(out_fname_main, synds_suffix, d_id)
 90 |     out_dir   = '%s/%s/Y%i/'%(out_dir_main, out_dir_sub, d_id)
 91 | 
 92 |     #run stan model
 93 |     RunStan(df_flatfile, df_cellinfo, df_celldist, sm_fname, 
 94 |             out_fname, out_dir, res_name, c_a_erg=c_a_erg, 
 95 |             runstan_flag=runstan_flag, n_iter=n_iter, n_chains=n_chains,
 96 |             adapt_delta=adapt_delta, max_treedepth=max_treedepth,
 97 |             pystan_ver=pystan_ver, pystan_parallel=flag_parallel)
 98 |        
 99 |     #run time end
100 |     run_t_end = time.time()
101 | 
102 |     #compute run time
103 |     run_tm = (run_t_end - run_t_strt)/60
104 |   
105 |     #log run time
106 |     df_run_info.append(pd.DataFrame({'computer_name':os.uname()[1],'out_name':out_dir_sub,
107 |                                      'ds_id':d_id,'run_time':run_tm}, index=[d_id]))
108 |                            
109 |     #write out run info
110 |     out_fname   = '%s%s/run_info.csv'%(out_dir_main, out_dir_sub)
111 |     pd.concat(df_run_info).reset_index(drop=True).to_csv(out_fname, index=False)
112 | 
113 | 


--------------------------------------------------------------------------------
/Analyses/Code_Verification/synthetic_datasets/create_synthetic_ds1.stan:
--------------------------------------------------------------------------------
  1 | /*********************************************
  2 | Stan program to create a synthetic data-set 
  3 | with a zero correlation length station term,
  4 | an earthquake and station spatially varying 
  5 | terms and between and within event aleatory
  6 | terms
  7 | ********************************************/
  8 | 
  9 | data {
 10 |   int N;      // number of records
 11 |   int NEQ;    // number of earthquakes
 12 |   int NSTAT;  // number of stations
 13 |     
 14 |   //event and station ID
 15 |   int<lower=1,upper=NEQ> eq[N];     // event id (in numerical order from 1 to last)
 16 |   int<lower=1,upper=NSTAT> stat[N]; // station id (in numerical order from 1 to last)
 17 | 
 18 |   //earthquake and station coordinates
 19 |   vector[2] X_e[NEQ];
 20 |   vector[2] X_s[NSTAT];
 21 |     
 22 |   //assumed hyper-parameters
 23 |   //earthquake and site constants
 24 |   real omega_0;
 25 |   real omega_1e;
 26 |   real omega_1as;
 27 |   real omega_1bs;
 28 |   real ell_1e;
 29 |   real ell_1as;
 30 |   //aleatory terms
 31 |   real tau_0;
 32 |   real phi_0;
 33 |   
 34 |   //mean of ergodic GMM
 35 |   vector[N] mu_gmm;
 36 | }
 37 | 
 38 | transformed data {
 39 |   real delta = 1e-9;
 40 | 
 41 |   //priors means
 42 |   real          dc_0_mu   = 0.;
 43 |   vector[NEQ]   dc_1e_mu  = rep_vector(0.,NEQ);
 44 |   vector[NSTAT] dc_1as_mu = rep_vector(0.,NSTAT);
 45 |   vector[NSTAT] dc_1bs_mu = rep_vector(0.,NSTAT);
 46 | }
 47 | 
 48 | parameters {}
 49 | 
 50 | model {}
 51 | 
 52 | generated quantities {
 53 |   //coefficient samples
 54 |   real          dc_0;
 55 |   vector[NEQ]   dc_1e;   //spatially varing terms
 56 |   vector[NSTAT] dc_1as;
 57 |   vector[NSTAT] dc_1bs;
 58 |   //samples of aleatory terms
 59 |   vector[NEQ]   dB;
 60 |   vector[N]     dW;    
 61 |   //gm samples
 62 |   vector[N]     Y_var_ceoff;
 63 |   vector[N]     Y_nerg_med;
 64 |   vector[N]     Y_aleat;
 65 |   vector[N]     Y_tot;
 66 | 
 67 |   //latent variable for constant shift
 68 |   {
 69 |     dc_0 = normal_rng(dc_0_mu,omega_0);
 70 |   }
 71 | 
 72 |   //generate latent variable for spatially varying earthquake term
 73 |   {
 74 |     matrix[NEQ,NEQ] cov_1e;
 75 |     
 76 |     for(i in 1:NEQ) {
 77 |       for(j in i:NEQ) {
 78 |         real d_e;
 79 |         real c_1e;
 80 |         
 81 |         d_e = distance(X_e[i],X_e[j]);
 82 |   
 83 |         c_1e = (omega_1e^2 * exp(-d_e/ell_1e));
 84 |   
 85 |         cov_1e[i,j] = c_1e;
 86 |         cov_1e[j,i] = c_1e;
 87 |       }
 88 |       cov_1e[i,i] += delta;
 89 |     }
 90 |     dc_1e = multi_normal_rng(dc_1e_mu, cov_1e);
 91 |   }
 92 | 
 93 |   //generate latent variable for spatially varying station term
 94 |   { 
 95 |     matrix[NSTAT,NSTAT] cov_1as;
 96 | 
 97 |     for(i in 1:NSTAT) {
 98 |       for(j in i:NSTAT) {
 99 |         real d_s;
100 |         real c_1as;
101 |   
102 |         d_s = distance(X_s[i],X_s[j]);
103 |         
104 |         c_1as  = (omega_1as^2  * exp(-d_s/ell_1as));
105 |   
106 |         cov_1as[i,j] = c_1as;
107 |         cov_1as[j,i] = c_1as;
108 |       }
109 |       cov_1as[i,i] += delta;
110 |     }
111 |     dc_1as = multi_normal_rng(dc_1as_mu, cov_1as);
112 |   }
113 |   
114 |   //generate latent variable for independent varying station term
115 |   {
116 |     for(i in 1:NSTAT) {
117 |       dc_1bs[i] = normal_rng(dc_1bs_mu[i], omega_1bs);
118 |     }
119 |   }
120 | 
121 |   //generate aleatory terms
122 |   {
123 |     for(i in 1:N) {
124 |       dW[i] = normal_rng(0., phi_0);
125 |     }
126 |     for(i in 1:NEQ) {
127 |       dB[i] = normal_rng(0., tau_0);
128 |     }
129 |   }
130 |  
131 |   //generate gm random samples
132 |   //add contributions of spatially varying terms
133 |   {
134 |     Y_var_ceoff =  dc_0 + dc_1e[eq] + dc_1as[stat] + dc_1bs[stat];
135 |   }
136 | 
137 |   //median ground motion
138 |   Y_nerg_med =  mu_gmm + Y_var_ceoff;
139 |   //aleatory variability
140 |   Y_aleat = dW + dB[eq];
141 |   //total gm
142 |   Y_tot = Y_nerg_med + Y_aleat;
143 | }
144 | 
145 | 
146 | 


--------------------------------------------------------------------------------
/Analyses/Code_Verification/regression/ds1/main_cmdstan_model1_NGAWest2CANorth.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Wed Jul 14 14:17:52 2021
  5 | 
  6 | @author: glavrent
  7 | """
  8 | # Working directory and Packages
  9 | # ---------------------------
 10 | #load libraries
 11 | import os
 12 | import sys
 13 | import numpy as np
 14 | import pandas as pd
 15 | import time
 16 | #user functions
 17 | sys.path.insert(0,'../../../Python_lib/regression/cmdstan/')
 18 | from regression_cmdstan_model1_unbounded_hyp import RunStan
 19 | 
 20 | # Define variables
 21 | # ---------------------------
 22 | #filename suffix
 23 | # synds_suffix = '_small_corr_len' 
 24 | # synds_suffix = '_large_corr_len'
 25 | 
 26 | #synthetic datasets directory
 27 | ds_dir = '../../../../Data/Verification/synthetic_datasets/ds1'
 28 | ds_dir = r'%s%s/'%(ds_dir, synds_suffix)
 29 | 
 30 | # dataset info 
 31 | # ds_fname_main = 'CatalogNGAWest3CA_synthetic_data'
 32 | ds_fname_main = 'CatalogNGAWest3CALite_synthetic_data'
 33 | ds_id = np.arange(1,6)
 34 | 
 35 | #stan model
 36 | # sm_fname = '../../../Stan_lib/regression_stan_model1_unbounded_hyp.stan'
 37 | # sm_fname = '../../../Stan_lib/regression_stan_model1_unbounded_hyp_chol.stan'
 38 | # sm_fname = '../../../Stan_lib/regression_stan_model1_unbounded_hyp_chol_efficient.stan'
 39 | # sm_fname = '../../../Stan_lib/regression_stan_model1_unbounded_hyp_chol_efficient2.stan'
 40 | 
 41 | #output info
 42 | #main output filename
 43 | out_fname_main = 'NGAWest2CANorth_syndata'
 44 | #main output directory
 45 | out_dir_main   = '../../../../Data/Verification/regression/ds1/'
 46 | #output sub-directory
 47 | # out_dir_sub    = 'CMDSTAN_NGAWest2CANorth'
 48 | # out_dir_sub    = 'CMDSTAN_NGAWest2CANorth_chol'
 49 | # out_dir_sub    = 'CMDSTAN_NGAWest2CANorth_chol_eff'
 50 | # out_dir_sub    = 'CMDSTAN_NGAWest2CANorth_chol_eff2'
 51 | 
 52 | #stan parameters
 53 | res_name='tot'
 54 | n_iter_warmup   = 500
 55 | n_iter_sampling = 500
 56 | n_chains        = 4
 57 | adapt_delta     = 0.8
 58 | max_treedepth   = 10
 59 | #parallel options
 60 | stan_parallel=False
 61 | 
 62 | #output sub-dir with corr with suffix info
 63 | out_dir_sub = f'%s%s'%(out_dir_sub, synds_suffix)
 64 | 
 65 | # Run stan regression
 66 | # ---------------------------
 67 | #create datafame with computation time
 68 | df_run_info = list()
 69 | 
 70 | #iterate over all synthetic datasets
 71 | for d_id in ds_id:
 72 |     print('Synthetic dataset %i fo %i'%(d_id, len(ds_id)))
 73 |     #run time start
 74 |     run_t_strt = time.time()    
 75 |     #input flatfile
 76 |     ds_fname = '%s%s%s_Y%i.csv'%(ds_dir, ds_fname_main, synds_suffix, d_id)
 77 |     #load flatfile
 78 |     df_flatfile = pd.read_csv(ds_fname)
 79 |     #keep only North records of NGAWest2
 80 |     df_flatfile = df_flatfile.loc[np.logical_and(df_flatfile.dsid==0,
 81 |                                                  df_flatfile.sreg==1),:]
 82 |     
 83 |     #output file name and directory
 84 |     out_fname = '%s%s_Y%i'%(out_fname_main, synds_suffix, d_id)
 85 |     out_dir   = '%s/%s/Y%i/'%(out_dir_main, out_dir_sub, d_id)
 86 | 
 87 |     #run stan model
 88 |     RunStan(df_flatfile, sm_fname, 
 89 |             out_fname, out_dir, res_name,  
 90 |             n_iter_warmup=n_iter_warmup, n_iter_sampling=n_iter_sampling, n_chains=n_chains,
 91 |             adapt_delta=adapt_delta, max_treedepth=max_treedepth,
 92 |             stan_parallel=stan_parallel)
 93 |     
 94 |     #run time end
 95 |     run_t_end = time.time()
 96 | 
 97 |     #compute run time
 98 |     run_tm = (run_t_end - run_t_strt)/60
 99 |   
100 |     #log run time
101 |     df_run_info.append(pd.DataFrame({'computer_name':os.uname()[1],'out_name':out_dir_sub,
102 |                                      'ds_id':d_id,'run_time':run_tm}, index=[d_id]))
103 |                            
104 |     #write out run info
105 |     out_fname   = '%s%s/run_info.csv'%(out_dir_main, out_dir_sub)
106 |     pd.concat(df_run_info).reset_index(drop=True).to_csv(out_fname, index=False)
107 | 
108 | 


--------------------------------------------------------------------------------
/Analyses/Python_lib/QGIS/pylib_QGIS.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Tue May 19 11:04:00 2020
  5 | 
  6 | @author: glavrent
  7 | """
  8 | 
  9 | #load libraries
 10 | 
 11 | #load GIS
 12 | from qgis.core import QgsVectorLayer, QgsPointXY
 13 | from qgis.core import QgsField, QgsFeature, QgsGeometry, QgsVectorFileWriter, QgsFeatureSink
 14 | from qgis.PyQt.QtCore import QVariant
 15 | 
 16 | def EQLayer(eq_data):
 17 |     '''
 18 |     Create earthquake source layer for QGIS
 19 | 
 20 |     Parameters
 21 |     ----------
 22 |     eq_data : pd.dataframe
 23 |         Dataframe for rupture points with fields:
 24 |             eqid, region, mag, SOF, Ztor, eqLat, eqLon
 25 |     
 26 |     Returns
 27 |     -------
 28 |     eq_layer : TYPE
 29 |         QGIS layer with earthquake sources.
 30 |     '''
 31 | 
 32 |     #create qgis layer for earthquake sources
 33 |     eq_layer = QgsVectorLayer("Point", "eq_pts", "memory")
 34 |     eq_pr = eq_layer.dataProvider()
 35 |     eq_pr.addAttributes([QgsField("eqid",      QVariant.Int),
 36 |                          QgsField("region",    QVariant.Int),
 37 |                          QgsField("mag",       QVariant.Double),
 38 |                          QgsField("SOF",       QVariant.Int),
 39 |                          QgsField("Ztor",      QVariant.Double),
 40 |                          QgsField("eqLat",     QVariant.Double),
 41 |                          QgsField("eqLon",     QVariant.Double)])
 42 | 
 43 |     #iterate over earthquakes, add on layer
 44 |     eq_layer.startEditing()
 45 |     for eq in eq_data.iterrows():
 46 |         #earthquake info
 47 |         eq_info   = eq[1][['eqid','region','mag','SOF','Ztor']].tolist()
 48 |         eq_latlon = eq[1][['eqLat','eqLon']].tolist()
 49 |         #define feature, earthquake  
 50 |         eq_f = QgsFeature()
 51 |         eq_f.setGeometry(QgsGeometry.fromPointXY(QgsPointXY(eq_latlon[1],eq_latlon[0])))
 52 |         eq_f.setAttributes(eq_info + eq_latlon)
 53 |         #add earthquake in layer
 54 |         eq_pr.addFeatures([eq_f])
 55 |     #commit changes
 56 |     eq_layer.commitChanges()
 57 |     #update displacement layer
 58 |     eq_layer.updateExtents()   
 59 |     
 60 |     return eq_layer
 61 | 
 62 | def STALayer(sta_data):
 63 |     '''
 64 |     Create station layer for QGIS
 65 | 
 66 |     Parameters
 67 |     ----------
 68 |     sta_data : pd.dataframe
 69 |         Dataframe for rupture points with fields:
 70 |             'ssn','region','Vs30','Z1.0','StaLat','StaLon'
 71 |             eqid','region','mag','SOF','eqLat','eqLon'
 72 |     
 73 |     Returns
 74 |     -------
 75 |     sta_layer : TYPE
 76 |         QGIS layer with station points.
 77 |     '''
 78 | 
 79 |     #create qgis layer for station locations
 80 |     sta_layer = QgsVectorLayer("Point", "sta_pts", "memory")
 81 |     sta_pr = sta_layer.dataProvider()
 82 |     sta_pr.addAttributes([QgsField("ssn",       QVariant.Int),
 83 |                          QgsField("region",     QVariant.Int),
 84 |                          QgsField("Vs30",       QVariant.Double),
 85 |                          QgsField("Z1.0",       QVariant.Double),
 86 |                          QgsField("staLat",     QVariant.Double),
 87 |                          QgsField("staLon",     QVariant.Double)])
 88 | 
 89 |     #iterate over station, add on layer
 90 |     sta_layer.startEditing()
 91 |     for sta in sta_data.iterrows():
 92 |         #earthquake info
 93 |         sta_info   = sta[1][['ssn','region','Vs30','Z1.0']].tolist()
 94 |         sta_latlon = sta[1][['staLat','staLon']].tolist()
 95 |         #define feature, earthquake  
 96 |         sta_f = QgsFeature()
 97 |         sta_f.setGeometry(QgsGeometry.fromPointXY(QgsPointXY(sta_latlon[1],sta_latlon[0])))
 98 |         sta_f.setAttributes(sta_info + sta_latlon)
 99 |         #add earthquake in layer
100 |         sta_pr.addFeatures([sta_f])
101 |     #commit changes
102 |     sta_layer.commitChanges()
103 |     #update displacement layer
104 |     sta_layer.updateExtents()   
105 |     
106 |     return sta_layer
107 | 


--------------------------------------------------------------------------------
/Analyses/Code_Verification/regression/ds3/main_pystan_model3_corr_cells_NGAWest3CA.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Wed Jul 14 14:17:52 2021
  5 | 
  6 | @author: glavrent
  7 | """
  8 | # Working directory and Packages
  9 | # ---------------------------
 10 | #load libraries
 11 | import os
 12 | import sys
 13 | import numpy as np
 14 | import pandas as pd
 15 | import time
 16 | #user functions
 17 | sys.path.insert(0,'../../../Python_lib/regression/pystan/')
 18 | from regression_pystan_model3_corr_cells_unbounded_hyp import RunStan
 19 | 
 20 | 
 21 | # Define variables
 22 | # ---------------------------
 23 | #filename suffix
 24 | # synds_suffix = '_small_corr_len' 
 25 | # synds_suffix = '_large_corr_len'
 26 | 
 27 | #synthetic datasets directory
 28 | ds_dir = '../../../../Data/Validation/synthetic_datasets/ds3'
 29 | ds_dir = r'%s%s/'%(ds_dir, synds_suffix)
 30 | 
 31 | # dataset info 
 32 | #ds_fname_main = 'CatalogNGAWest3CA_synthetic_data'
 33 | ds_fname_main = 'CatalogNGAWest3CALite_synthetic_data'
 34 | ds_id = np.arange(1,6)
 35 | #cell specific anelastic attenuation
 36 | ds_fname_cellinfo = 'CatalogNGAWest3CALite_cellinfo'
 37 | ds_fname_celldist = 'CatalogNGAWest3CALite_distancematrix'
 38 | 
 39 | #stan model 
 40 | sm_fname = '../../../Stan_lib/regression_stan_model3_corr_cells_unbounded_hyp_chol_efficient.stan'
 41 | 
 42 | #output info
 43 | #main output filename
 44 | out_fname_main = 'NGAWest3CA_syndata'
 45 | #main output directory
 46 | out_dir_main   = '../../../../Data/Validation/regression/ds3/'
 47 | #output sub-directory
 48 | out_dir_sub    = 'PYSTAN_NGAWest3CA_corr_cells_chol_eff'
 49 | 
 50 | #stan parameters
 51 | runstan_flag = True
 52 | # pystan_ver = 2
 53 | pystan_ver = 3
 54 | res_name = 'tot'
 55 | n_iter = 1000
 56 | n_chains = 4
 57 | adapt_delta   = 0.8
 58 | max_treedepth = 10
 59 | #ergodic coefficients
 60 | c_2_erg=-2.0
 61 | c_3_erg=-0.6
 62 | c_a_erg=0.0
 63 | #parallel options
 64 | # flag_parallel = True
 65 | flag_parallel = False
 66 | 
 67 | #output sub-dir with corr with suffix info
 68 | out_dir_sub = f'%s%s'%(out_dir_sub, synds_suffix)
 69 | 
 70 | #load cell dataframes
 71 | cellinfo_fname = '%s%s.csv'%(ds_dir, ds_fname_cellinfo)
 72 | celldist_fname = '%s%s.csv'%(ds_dir, ds_fname_celldist)
 73 | df_cellinfo = pd.read_csv(cellinfo_fname)
 74 | df_celldist = pd.read_csv(celldist_fname)
 75 | 
 76 | # Run stan regression
 77 | # ---------------------------
 78 | #create datafame with computation time
 79 | df_run_info = list()
 80 | 
 81 | #iterate over all synthetic datasets
 82 | for d_id in ds_id:
 83 |     print('Synthetic dataset %i fo %i'%(d_id, len(ds_id)))
 84 |     #run time start
 85 |     run_t_strt = time.time()        
 86 |     #input flatfile
 87 |     ds_fname = '%s%s%s_Y%i.csv'%(ds_dir, ds_fname_main, synds_suffix, d_id)
 88 |     #load flatfile
 89 |     df_flatfile = pd.read_csv(ds_fname)
 90 |     
 91 |     #output file name and directory
 92 |     out_fname = '%s%s_Y%i'%(out_fname_main, synds_suffix, d_id)
 93 |     out_dir   = '%s/%s/Y%i/'%(out_dir_main, out_dir_sub, d_id)
 94 | 
 95 |     #run stan model
 96 |     RunStan(df_flatfile, df_cellinfo, df_celldist, sm_fname, 
 97 |             out_fname, out_dir, res_name, 
 98 |             c_2_erg=c_2_erg, c_3_erg=c_3_erg, c_a_erg=c_a_erg, 
 99 |             runstan_flag=runstan_flag, n_iter=n_iter, n_chains=n_chains,
100 |             adapt_delta=adapt_delta, max_treedepth=max_treedepth,
101 |             pystan_ver=pystan_ver, pystan_parallel=flag_parallel)
102 |        
103 |     #run time end
104 |     run_t_end = time.time()
105 | 
106 |     #compute run time
107 |     run_tm = (run_t_end - run_t_strt)/60
108 |   
109 |     #log run time
110 |     df_run_info.append(pd.DataFrame({'computer_name':os.uname()[1],'out_name':out_dir_sub,
111 |                                      'ds_id':d_id,'run_time':run_tm}, index=[d_id]))
112 |                            
113 |     #write out run info
114 |     out_fname   = '%s%s/run_info.csv'%(out_dir_main, out_dir_sub)
115 |     pd.concat(df_run_info).reset_index(drop=True).to_csv(out_fname, index=False)
116 |     
117 | 
118 | 


--------------------------------------------------------------------------------
/Analyses/Code_Verification/regression/ds3/main_pystan_model3_uncorr_cells_NGAWest3CA.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Wed Jul 14 14:17:52 2021
  5 | 
  6 | @author: glavrent
  7 | """
  8 | # Working directory and Packages
  9 | # ---------------------------
 10 | #load libraries
 11 | import os
 12 | import sys
 13 | import numpy as np
 14 | import pandas as pd
 15 | import time
 16 | #user functions
 17 | sys.path.insert(0,'../../../Python_lib/regression/pystan/')
 18 | from regression_pystan_model3_uncorr_cells_unbounded_hyp import RunStan
 19 | 
 20 | 
 21 | # Define variables
 22 | # ---------------------------
 23 | #filename suffix
 24 | # synds_suffix = '_small_corr_len' 
 25 | # synds_suffix = '_large_corr_len'
 26 | 
 27 | #synthetic datasets directory
 28 | ds_dir = '../../../../Data/Validation/synthetic_datasets/ds3'
 29 | ds_dir = r'%s%s/'%(ds_dir, synds_suffix)
 30 | 
 31 | # dataset info 
 32 | #ds_fname_main = 'CatalogNGAWest3CA_synthetic_data'
 33 | ds_fname_main = 'CatalogNGAWest3CALite_synthetic_data'
 34 | ds_id = np.arange(1,6)
 35 | #cell specific anelastic attenuation
 36 | ds_fname_cellinfo = 'CatalogNGAWest3CALite_cellinfo'
 37 | ds_fname_celldist = 'CatalogNGAWest3CALite_distancematrix'
 38 | 
 39 | #stan model 
 40 | sm_fname = '../../../Stan_lib/regression_stan_model3_uncorr_cells_unbounded_hyp_chol_efficient.stan'
 41 | 
 42 | #output info
 43 | #main output filename
 44 | out_fname_main = 'NGAWest3CA_syndata'
 45 | #main output directory
 46 | out_dir_main   = '../../../../Data/Validation/regression/ds3/'
 47 | #output sub-directory
 48 | out_dir_sub    = 'PYSTAN_NGAWest3CA_uncorr_cells_chol_eff'
 49 | 
 50 | #stan parameters
 51 | runstan_flag = True
 52 | # pystan_ver = 2
 53 | pystan_ver = 3
 54 | res_name = 'tot'
 55 | n_iter = 1000
 56 | n_chains = 4
 57 | adapt_delta   = 0.8
 58 | max_treedepth = 10
 59 | #ergodic coefficients
 60 | c_2_erg=-2.0
 61 | c_3_erg=-0.6
 62 | c_a_erg=0.0
 63 | #parallel options
 64 | # flag_parallel = True
 65 | flag_parallel = False
 66 | 
 67 | #output sub-dir with corr with suffix info
 68 | out_dir_sub = f'%s%s'%(out_dir_sub, synds_suffix)
 69 | 
 70 | #load cell dataframes
 71 | cellinfo_fname = '%s%s.csv'%(ds_dir, ds_fname_cellinfo)
 72 | celldist_fname = '%s%s.csv'%(ds_dir, ds_fname_celldist)
 73 | df_cellinfo = pd.read_csv(cellinfo_fname)
 74 | df_celldist = pd.read_csv(celldist_fname)
 75 | 
 76 | # Run stan regression
 77 | # ---------------------------
 78 | #create datafame with computation time
 79 | df_run_info = list()
 80 | 
 81 | #iterate over all synthetic datasets
 82 | for d_id in ds_id:
 83 |     print('Synthetic dataset %i fo %i'%(d_id, len(ds_id)))
 84 |     #run time start
 85 |     run_t_strt = time.time()        
 86 |     #input flatfile
 87 |     ds_fname = '%s%s%s_Y%i.csv'%(ds_dir, ds_fname_main, synds_suffix, d_id)
 88 |     #load flatfile
 89 |     df_flatfile = pd.read_csv(ds_fname)
 90 |     
 91 |     #output file name and directory
 92 |     out_fname = '%s%s_Y%i'%(out_fname_main, synds_suffix, d_id)
 93 |     out_dir   = '%s/%s/Y%i/'%(out_dir_main, out_dir_sub, d_id)
 94 | 
 95 |     #run stan model
 96 |     RunStan(df_flatfile, df_cellinfo, df_celldist, sm_fname, 
 97 |             out_fname, out_dir, res_name, 
 98 |             c_2_erg=c_2_erg, c_3_erg=c_3_erg, c_a_erg=c_a_erg, 
 99 |             runstan_flag=runstan_flag, n_iter=n_iter, n_chains=n_chains,
100 |             adapt_delta=adapt_delta, max_treedepth=max_treedepth,
101 |             pystan_ver=pystan_ver, pystan_parallel=flag_parallel)
102 |        
103 |     #run time end
104 |     run_t_end = time.time()
105 | 
106 |     #compute run time
107 |     run_tm = (run_t_end - run_t_strt)/60
108 |     
109 |     #log run time
110 |     df_run_info.append(pd.DataFrame({'computer_name':os.uname()[1],'out_name':out_dir_sub,
111 |                                      'ds_id':d_id,'run_time':run_tm}, index=[d_id]))
112 |                            
113 |     #write out run info
114 |     out_fname   = '%s%s/run_info.csv'%(out_dir_main, out_dir_sub)
115 |     pd.concat(df_run_info).reset_index(drop=True).to_csv(out_fname, index=False)
116 | 
117 | 
118 | 


--------------------------------------------------------------------------------
/Analyses/Code_Verification/regression/ds1/main_pystan_model1_NGAWest2CA.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Wed Jul 14 14:17:52 2021
  5 | 
  6 | @author: glavrent
  7 | """
  8 | # Working directory and Packages
  9 | # ---------------------------
 10 | #load libraries
 11 | import os
 12 | import sys
 13 | import numpy as np
 14 | import pandas as pd
 15 | import time
 16 | #user functions
 17 | sys.path.insert(0,'../../../Python_lib/regression/pystan/')
 18 | from regression_pystan_model1_unbounded_hyp import RunStan
 19 | 
 20 | # Define variables
 21 | # ---------------------------
 22 | #filename suffix
 23 | # synds_suffix = '_small_corr_len'
 24 | # synds_suffix = '_large_corr_len'
 25 | 
 26 | #synthetic datasets directory
 27 | ds_dir = '../../../../Data/Verification/synthetic_datasets/ds1'
 28 | ds_dir = r'%s%s/'%(ds_dir, synds_suffix)
 29 | 
 30 | # dataset info 
 31 | # ds_fname_main = 'CatalogNGAWest3CA_synthetic_data'
 32 | ds_fname_main = 'CatalogNGAWest3CALite_synthetic_data'
 33 | ds_id = np.arange(1,6)
 34 | 
 35 | #stan model 
 36 | # sm_fname = '../../../Stan_lib/regression_stan_model1_unbounded_hyp.stan'
 37 | # sm_fname = '../../../Stan_lib/regression_stan_model1_unbounded_hyp_chol.stan'
 38 | # sm_fname = '../../../Stan_lib/regression_stan_model1_unbounded_hyp_chol_efficient.stan'
 39 | # sm_fname = '../../../Stan_lib/regression_stan_model1_unbounded_hyp_chol_efficient2.stan'
 40 | 
 41 | #output info
 42 | #main output filename
 43 | out_fname_main = 'NGAWest2CA_syndata'
 44 | #main output directory
 45 | out_dir_main   = '../../../../Data/Verification/regression/ds1/'
 46 | #output sub-directory
 47 | #python 2
 48 | # out_dir_sub    = 'PYSTAN_NGAWest2CA'
 49 | # out_dir_sub    = 'PYSTAN_NGAWest2CA_chol'
 50 | # out_dir_sub    = 'PYSTAN_NGAWest2CA_chol_eff'
 51 | # out_dir_sub    = 'PYSTAN_NGAWest2CA_chol_eff2'
 52 | #python 3
 53 | # out_dir_sub    = 'PYSTAN3_NGAWest2CA'
 54 | # out_dir_sub    = 'PYSTAN3_NGAWest2CA_chol'
 55 | # out_dir_sub    = 'PYSTAN3_NGAWest2CA_chol_eff'
 56 | # out_dir_sub    = 'PYSTAN3_NGAWest2CA_chol_eff2'
 57 | 
 58 | #stan parameters
 59 | runstan_flag = True
 60 | #pystan_ver = 2
 61 | pystan_ver = 3
 62 | res_name = 'tot'
 63 | n_iter   = 1000
 64 | n_chains = 4
 65 | adapt_delta   = 0.8
 66 | max_treedepth = 10
 67 | #parallel options
 68 | # flag_parallel = True
 69 | flag_parallel = False
 70 | 
 71 | #output sub-dir with corr with suffix info
 72 | out_dir_sub = f'%s%s'%(out_dir_sub, synds_suffix)
 73 | 
 74 | # Run stan regression
 75 | # ---------------------------
 76 | #create datafame with computation time
 77 | df_run_info = list()
 78 | 
 79 | #iterate over all synthetic datasets
 80 | for d_id in ds_id:
 81 |     print('Synthetic dataset %i fo %i'%(d_id, len(ds_id)))
 82 |     #run time start
 83 |     run_t_strt = time.time()    
 84 |     #input flatfile
 85 |     ds_fname = '%s%s%s_Y%i.csv'%(ds_dir, ds_fname_main, synds_suffix, d_id)
 86 |     #load flatfile
 87 |     df_flatfile = pd.read_csv(ds_fname)
 88 |     #keep only NGAWest2 records
 89 |     df_flatfile = df_flatfile.loc[df_flatfile.dsid==0,:]
 90 |     
 91 |     #output file name and directory
 92 |     out_fname = '%s%s_Y%i'%(out_fname_main, synds_suffix, d_id)
 93 |     out_dir   = '%s/%s/Y%i/'%(out_dir_main, out_dir_sub, d_id)
 94 | 
 95 |     #run stan model
 96 |     RunStan(df_flatfile, sm_fname, out_fname, out_dir, res_name, 
 97 |             runstan_flag=runstan_flag, n_iter=n_iter, n_chains=n_chains,
 98 |             adapt_delta=adapt_delta, max_treedepth=max_treedepth,
 99 |             pystan_ver=pystan_ver, pystan_parallel=flag_parallel)
100 |     
101 |     #run time end
102 |     run_t_end = time.time()
103 | 
104 |     #compute run time
105 |     run_tm = (run_t_end - run_t_strt)/60
106 |   
107 |     #log run time
108 |     df_run_info.append(pd.DataFrame({'computer_name':os.uname()[1],'out_name':out_dir_sub,
109 |                                      'ds_id':d_id,'run_time':run_tm}, index=[d_id]))
110 |                            
111 |     #write out run info
112 |     out_fname   = '%s%s/run_info.csv'%(out_dir_main, out_dir_sub)
113 |     pd.concat(df_run_info).reset_index(drop=True).to_csv(out_fname, index=False)
114 | 
115 | 


--------------------------------------------------------------------------------
/Analyses/Code_Verification/regression/ds3/main_pystan_model3_corr_cells_NGAWest2CA.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Wed Jul 14 14:17:52 2021
  5 | 
  6 | @author: glavrent
  7 | """
  8 | # Working directory and Packages
  9 | # ---------------------------
 10 | #load libraries
 11 | import os
 12 | import sys
 13 | import numpy as np
 14 | import pandas as pd
 15 | import time
 16 | #user functions
 17 | sys.path.insert(0,'../../../Python_lib/regression/pystan/')
 18 | from regression_pystan_model3_corr_cells_unbounded_hyp import RunStan
 19 | 
 20 | 
 21 | # Define variables
 22 | # ---------------------------
 23 | #filename suffix
 24 | # synds_suffix = '_small_corr_len' 
 25 | # synds_suffix = '_large_corr_len'
 26 | 
 27 | #synthetic datasets directory
 28 | ds_dir = '../../../../Data/Validation/synthetic_datasets/ds3'
 29 | ds_dir = r'%s%s/'%(ds_dir, synds_suffix)
 30 | 
 31 | # dataset info 
 32 | #ds_fname_main = 'CatalogNGAWest3CA_synthetic_data'
 33 | ds_fname_main = 'CatalogNGAWest3CALite_synthetic_data'
 34 | ds_id = np.arange(1,6)
 35 | #cell specific anelastic attenuation
 36 | ds_fname_cellinfo = 'CatalogNGAWest3CALite_cellinfo'
 37 | ds_fname_celldist = 'CatalogNGAWest3CALite_distancematrix'
 38 | 
 39 | #stan model 
 40 | sm_fname = '../../../Stan_lib/regression_stan_model3_corr_cells_unbounded_hyp_chol_efficient.stan'
 41 | 
 42 | #output info
 43 | #main output filename
 44 | out_fname_main = 'NGAWest2CA_syndata'
 45 | #main output directory
 46 | out_dir_main   = '../../../../Data/Validation/regression/ds3/'
 47 | #output sub-directory
 48 | out_dir_sub    = 'PYSTAN_NGAWest2CA_corr_cells_chol_eff'
 49 | 
 50 | #stan parameters
 51 | runstan_flag = True
 52 | # pystan_ver = 2
 53 | pystan_ver = 3
 54 | res_name = 'tot'
 55 | n_iter = 1000
 56 | n_chains = 4
 57 | adapt_delta   = 0.8
 58 | max_treedepth = 10
 59 | #ergodic coefficients
 60 | c_2_erg=-2.0
 61 | c_3_erg=-0.6
 62 | c_a_erg=0.0
 63 | #parallel options
 64 | # flag_parallel = True
 65 | flag_parallel = False
 66 | 
 67 | #output sub-dir with corr with suffix info
 68 | out_dir_sub = f'%s%s'%(out_dir_sub, synds_suffix)
 69 | 
 70 | #load cell dataframes
 71 | cellinfo_fname = '%s%s.csv'%(ds_dir, ds_fname_cellinfo)
 72 | celldist_fname = '%s%s.csv'%(ds_dir, ds_fname_celldist)
 73 | df_cellinfo = pd.read_csv(cellinfo_fname)
 74 | df_celldist = pd.read_csv(celldist_fname)
 75 | 
 76 | # Run stan regression
 77 | # ---------------------------
 78 | #create datafame with computation time
 79 | df_run_info = list()
 80 | 
 81 | #iterate over all synthetic datasets
 82 | for d_id in ds_id:
 83 |     print('Synthetic dataset %i fo %i'%(d_id, len(ds_id)))
 84 |     #run time start
 85 |     run_t_strt = time.time()        
 86 |     #input flatfile
 87 |     ds_fname = '%s%s%s_Y%i.csv'%(ds_dir, ds_fname_main, synds_suffix, d_id)
 88 |     #load flatfile
 89 |     df_flatfile = pd.read_csv(ds_fname)
 90 |     #keep only NGAWest2 records
 91 |     df_flatfile = df_flatfile.loc[df_flatfile.dsid==0,:]
 92 |     
 93 |     #output file name and directory
 94 |     out_fname = '%s%s_Y%i'%(out_fname_main, synds_suffix, d_id)
 95 |     out_dir   = '%s/%s/Y%i/'%(out_dir_main, out_dir_sub, d_id)
 96 | 
 97 |     #run stan model
 98 |     RunStan(df_flatfile, df_cellinfo, df_celldist, sm_fname, 
 99 |             out_fname, out_dir, res_name, 
100 |             c_2_erg=c_2_erg, c_3_erg=c_3_erg, c_a_erg=c_a_erg, 
101 |             runstan_flag=runstan_flag, n_iter=n_iter, n_chains=n_chains,
102 |             adapt_delta=adapt_delta, max_treedepth=max_treedepth,
103 |             pystan_ver=pystan_ver, pystan_parallel=flag_parallel)
104 |        
105 |     #run time end
106 |     run_t_end = time.time()
107 | 
108 |     #compute run time
109 |     run_tm = (run_t_end - run_t_strt)/60
110 |   
111 |     #log run time
112 |     df_run_info.append(pd.DataFrame({'computer_name':os.uname()[1],'out_name':out_dir_sub,
113 |                                      'ds_id':d_id,'run_time':run_tm}, index=[d_id]))
114 |                            
115 |     #write out run info
116 |     out_fname   = '%s%s/run_info.csv'%(out_dir_main, out_dir_sub)
117 |     pd.concat(df_run_info).reset_index(drop=True).to_csv(out_fname, index=False)
118 |     
119 | 
120 | 


--------------------------------------------------------------------------------
/Analyses/Code_Verification/regression/ds2/main_pystan_model2_corr_cells_NGAWest2CA_sparse.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Wed Jul 14 14:17:52 2021
  5 | 
  6 | @author: glavrent
  7 | """
  8 | # Working directory and Packages
  9 | # ---------------------------
 10 | #load libraries
 11 | import os
 12 | import sys
 13 | import numpy as np
 14 | import pandas as pd
 15 | import time
 16 | #user functions
 17 | sys.path.insert(0,'../../../Python_lib/regression/pystan/')
 18 | from regression_pystan_model2_corr_cells_sparse_unbounded_hyp import RunStan
 19 | 
 20 | # Define variables
 21 | # ---------------------------
 22 | #filename suffix
 23 | # synds_suffix = '_small_corr_len' 
 24 | # synds_suffix = '_large_corr_len'
 25 | 
 26 | #synthetic datasets directory
 27 | ds_dir = '../../../../Data/Verification/synthetic_datasets/ds2'
 28 | ds_dir = r'%s%s/'%(ds_dir, synds_suffix)
 29 | 
 30 | # dataset info 
 31 | #ds_fname_main = 'CatalogNGAWest3CA_synthetic_data'
 32 | ds_fname_main = 'CatalogNGAWest3CALite_synthetic_data'
 33 | ds_id = np.arange(1,6)
 34 | #cell specific anelastic attenuation
 35 | ds_fname_cellinfo = 'CatalogNGAWest3CALite_cellinfo'
 36 | ds_fname_celldist = 'CatalogNGAWest3CALite_distancematrix'
 37 | 
 38 | #stan model 
 39 | sm_fname = '../../../Stan_lib/regression_stan_model2_corr_cells_sparse_unbounded_hyp_chol_efficient.stan'
 40 | 
 41 | #output info
 42 | #main output filename
 43 | out_fname_main = 'NGAWest2CA_syndata'
 44 | #main output directory
 45 | out_dir_main   = '../../../../Data/Verification/regression/ds2/'
 46 | #output sub-directory
 47 | # out_dir_sub    = 'PYSTAN_NGAWest2CA_corr_cells_chol_eff_sp'
 48 | out_dir_sub    = 'PYSTAN3_NGAWest2CA_corr_cells_chol_eff_sp'
 49 | 
 50 | #stan parameters
 51 | runstan_flag = True
 52 | # pystan_ver = 2
 53 | pystan_ver = 3
 54 | res_name = 'tot'
 55 | n_iter = 1000
 56 | n_chains = 4
 57 | adapt_delta   = 0.8 #0.9
 58 | max_treedepth = 10
 59 | #ergodic coefficients
 60 | c_a_erg=0.0
 61 | #parallel options
 62 | # flag_parallel = True
 63 | flag_parallel = False
 64 | 
 65 | #output sub-dir with corr with suffix info
 66 | out_dir_sub = f'%s%s'%(out_dir_sub, synds_suffix)
 67 | 
 68 | #load cell dataframes
 69 | cellinfo_fname = '%s%s.csv'%(ds_dir, ds_fname_cellinfo)
 70 | celldist_fname = '%s%s.csv'%(ds_dir, ds_fname_celldist)
 71 | df_cellinfo = pd.read_csv(cellinfo_fname)
 72 | df_celldist = pd.read_csv(celldist_fname)
 73 | 
 74 | # Run stan regression
 75 | # ---------------------------
 76 | #create datafame with computation time
 77 | df_run_info = list()
 78 | 
 79 | #iterate over all synthetic datasets
 80 | for d_id in ds_id:
 81 |     print('Synthetic dataset %i fo %i'%(d_id, len(ds_id)))
 82 |     #run time start
 83 |     run_t_strt = time.time()        
 84 |     #input flatfile
 85 |     ds_fname = '%s%s%s_Y%i.csv'%(ds_dir, ds_fname_main, synds_suffix, d_id)
 86 |     #load flatfile
 87 |     df_flatfile = pd.read_csv(ds_fname)
 88 |     #keep only NGAWest2 records
 89 |     df_flatfile = df_flatfile.loc[df_flatfile.dsid==0,:]
 90 |     
 91 |     #output file name and directory
 92 |     out_fname = '%s%s_Y%i'%(out_fname_main, synds_suffix, d_id)
 93 |     out_dir   = '%s/%s/Y%i/'%(out_dir_main, out_dir_sub, d_id)
 94 | 
 95 |     #run stan model
 96 |     RunStan(df_flatfile, df_cellinfo, df_celldist, sm_fname, 
 97 |             out_fname, out_dir, res_name, c_a_erg=c_a_erg, 
 98 |             runstan_flag=runstan_flag, n_iter=n_iter, n_chains=n_chains,
 99 |             adapt_delta=adapt_delta, max_treedepth=max_treedepth,
100 |             pystan_ver=pystan_ver, pystan_parallel=flag_parallel)
101 |        
102 |     #run time end
103 |     run_t_end = time.time()
104 | 
105 |     #compute run time
106 |     run_tm = (run_t_end - run_t_strt)/60
107 |   
108 |     #log run time
109 |     df_run_info.append(pd.DataFrame({'computer_name':os.uname()[1],'out_name':out_dir_sub,
110 |                                      'ds_id':d_id,'run_time':run_tm}, index=[d_id]))
111 |                            
112 |     #write out run info
113 |     out_fname   = '%s%s/run_info.csv'%(out_dir_main, out_dir_sub)
114 |     pd.concat(df_run_info).reset_index(drop=True).to_csv(out_fname, index=False)
115 |     
116 | 
117 | 


--------------------------------------------------------------------------------
/Analyses/Code_Verification/regression/ds3/main_pystan_model3_uncorr_cells_NGAWest2CA.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Wed Jul 14 14:17:52 2021
  5 | 
  6 | @author: glavrent
  7 | """
  8 | # Working directory and Packages
  9 | # ---------------------------
 10 | #load libraries
 11 | import os
 12 | import sys
 13 | import numpy as np
 14 | import pandas as pd
 15 | import time
 16 | #user functions
 17 | sys.path.insert(0,'../../../Python_lib/regression/pystan/')
 18 | from regression_pystan_model3_uncorr_cells_unbounded_hyp import RunStan
 19 | 
 20 | 
 21 | # Define variables
 22 | # ---------------------------
 23 | #filename suffix
 24 | # synds_suffix = '_small_corr_len' 
 25 | # synds_suffix = '_large_corr_len'
 26 | 
 27 | #synthetic datasets directory
 28 | ds_dir = '../../../../Data/Validation/synthetic_datasets/ds3'
 29 | ds_dir = r'%s%s/'%(ds_dir, synds_suffix)
 30 | 
 31 | # dataset info 
 32 | #ds_fname_main = 'CatalogNGAWest3CA_synthetic_data'
 33 | ds_fname_main = 'CatalogNGAWest3CALite_synthetic_data'
 34 | ds_id = np.arange(1,6)
 35 | #cell specific anelastic attenuation
 36 | ds_fname_cellinfo = 'CatalogNGAWest3CALite_cellinfo'
 37 | ds_fname_celldist = 'CatalogNGAWest3CALite_distancematrix'
 38 | 
 39 | #stan model 
 40 | sm_fname = '../../../Stan_lib/regression_stan_model3_uncorr_cells_unbounded_hyp_chol_efficient.stan'
 41 | 
 42 | #output info
 43 | #main output filename
 44 | out_fname_main = 'NGAWest2CA_syndata'
 45 | #main output directory
 46 | out_dir_main   = '../../../../Data/Validation/regression/ds3/'
 47 | #output sub-directory
 48 | out_dir_sub    = 'PYSTAN_NGAWest2CA_uncorr_cells_chol_eff'
 49 | 
 50 | #stan parameters
 51 | runstan_flag = True
 52 | # pystan_ver = 2
 53 | pystan_ver = 3
 54 | res_name = 'tot'
 55 | n_iter = 1000
 56 | n_chains = 4
 57 | adapt_delta   = 0.8
 58 | max_treedepth = 10
 59 | #ergodic coefficients
 60 | c_2_erg=-2.0
 61 | c_3_erg=-0.6
 62 | c_a_erg=0.0
 63 | #parallel options
 64 | # flag_parallel = True
 65 | flag_parallel = False
 66 | 
 67 | #output sub-dir with corr with suffix info
 68 | out_dir_sub = f'%s%s'%(out_dir_sub, synds_suffix)
 69 | 
 70 | #load cell dataframes
 71 | cellinfo_fname = '%s%s.csv'%(ds_dir, ds_fname_cellinfo)
 72 | celldist_fname = '%s%s.csv'%(ds_dir, ds_fname_celldist)
 73 | df_cellinfo = pd.read_csv(cellinfo_fname)
 74 | df_celldist = pd.read_csv(celldist_fname)
 75 | 
 76 | # Run stan regression
 77 | # ---------------------------
 78 | #create datafame with computation time
 79 | df_run_info = list()
 80 | 
 81 | #iterate over all synthetic datasets
 82 | for d_id in ds_id:
 83 |     print('Synthetic dataset %i fo %i'%(d_id, len(ds_id)))
 84 |     #run time start
 85 |     run_t_strt = time.time()        
 86 |     #input flatfile
 87 |     ds_fname = '%s%s%s_Y%i.csv'%(ds_dir, ds_fname_main, synds_suffix, d_id)
 88 |     #load flatfile
 89 |     df_flatfile = pd.read_csv(ds_fname)
 90 |     #keep only NGAWest2 records
 91 |     df_flatfile = df_flatfile.loc[df_flatfile.dsid==0,:]
 92 |     
 93 |     #output file name and directory
 94 |     out_fname = '%s%s_Y%i'%(out_fname_main, synds_suffix, d_id)
 95 |     out_dir   = '%s/%s/Y%i/'%(out_dir_main, out_dir_sub, d_id)
 96 | 
 97 |     #run stan model
 98 |     RunStan(df_flatfile, df_cellinfo, df_celldist, sm_fname, 
 99 |             out_fname, out_dir, res_name, 
100 |             c_2_erg=c_2_erg, c_3_erg=c_3_erg, c_a_erg=c_a_erg, 
101 |             runstan_flag=runstan_flag, n_iter=n_iter, n_chains=n_chains,
102 |             adapt_delta=adapt_delta, max_treedepth=max_treedepth,
103 |             pystan_ver=pystan_ver, pystan_parallel=flag_parallel)
104 |        
105 |     #run time end
106 |     run_t_end = time.time()
107 | 
108 |     #compute run time
109 |     run_tm = (run_t_end - run_t_strt)/60
110 |     
111 |     #log run time
112 |     df_run_info.append(pd.DataFrame({'computer_name':os.uname()[1],'out_name':out_dir_sub,
113 |                                      'ds_id':d_id,'run_time':run_tm}, index=[d_id]))
114 |                            
115 |     #write out run info
116 |     out_fname   = '%s%s/run_info.csv'%(out_dir_main, out_dir_sub)
117 |     pd.concat(df_run_info).reset_index(drop=True).to_csv(out_fname, index=False)
118 | 
119 | 
120 | 


--------------------------------------------------------------------------------
/Analyses/Code_Verification/regression/ds2/main_pystan_model2_corr_cells_NGAWest2CANorth_sparse.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Wed Jul 14 14:17:52 2021
  5 | 
  6 | @author: glavrent
  7 | """
  8 | # Working directory and Packages
  9 | # ---------------------------
 10 | #load libraries
 11 | import os
 12 | import sys
 13 | import numpy as np
 14 | import pandas as pd
 15 | import time
 16 | #user functions
 17 | sys.path.insert(0,'../../../Python_lib/regression/pystan/')
 18 | from regression_pystan_model2_corr_cells_sparse_unbounded_hyp import RunStan
 19 | 
 20 | 
 21 | # Define variables
 22 | # ---------------------------
 23 | #filename suffix
 24 | # synds_suffix = '_small_corr_len' 
 25 | # synds_suffix = '_large_corr_len'
 26 | 
 27 | #synthetic datasets directory
 28 | ds_dir = '../../../../Data/Verification/synthetic_datasets/ds2'
 29 | ds_dir = r'%s%s/'%(ds_dir, synds_suffix)
 30 | 
 31 | # dataset info 
 32 | #ds_fname_main = 'CatalogNGAWest3CA_synthetic_data'
 33 | ds_fname_main = 'CatalogNGAWest3CALite_synthetic_data'
 34 | ds_id = np.arange(1,6)
 35 | #cell specific anelastic attenuation
 36 | ds_fname_cellinfo = 'CatalogNGAWest3CALite_cellinfo'
 37 | ds_fname_celldist = 'CatalogNGAWest3CALite_distancematrix'
 38 | 
 39 | #stan model 
 40 | sm_fname = '../../../Stan_lib/regression_stan_model2_corr_cells_sparse_unbounded_hyp_chol_efficient.stan'
 41 | 
 42 | #output info
 43 | #main output filename
 44 | out_fname_main = 'NGAWest2CANorth_syndata'
 45 | #main output directory
 46 | out_dir_main   = '../../../../Data/Verification/regression/ds2/'
 47 | #output sub-directory
 48 | out_dir_sub    = 'PYSTAN_NGAWest2CANorth_corr_cells_chol_eff_sp'
 49 | 
 50 | #stan parameters
 51 | runstan_flag = True
 52 | # pystan_ver = 2
 53 | pystan_ver = 3
 54 | res_name = 'tot'
 55 | n_iter = 1000
 56 | n_chains = 4
 57 | adapt_delta   = 0.8
 58 | max_treedepth = 10
 59 | #ergodic coefficients
 60 | c_a_erg=0.0
 61 | #parallel options
 62 | # flag_parallel = True
 63 | flag_parallel = False
 64 | 
 65 | #output sub-dir with corr with suffix info
 66 | out_dir_sub = f'%s%s'%(out_dir_sub, synds_suffix)
 67 | 
 68 | #load cell dataframes
 69 | cellinfo_fname = '%s%s.csv'%(ds_dir, ds_fname_cellinfo)
 70 | celldist_fname = '%s%s.csv'%(ds_dir, ds_fname_celldist)
 71 | df_cellinfo = pd.read_csv(cellinfo_fname)
 72 | df_celldist = pd.read_csv(celldist_fname)
 73 | 
 74 | # Run stan regression
 75 | # ---------------------------
 76 | #create datafame with computation time
 77 | df_run_info = list()
 78 | 
 79 | #iterate over all synthetic datasets
 80 | for d_id in ds_id:
 81 |     print('Synthetic dataset %i fo %i'%(d_id, len(ds_id)))
 82 |     #run time start
 83 |     run_t_strt = time.time()   
 84 |     #input flatfile
 85 |     ds_fname = '%s%s%s_Y%i.csv'%(ds_dir, ds_fname_main, synds_suffix, d_id)
 86 |     #load flatfile
 87 |     df_flatfile = pd.read_csv(ds_fname)
 88 |     #keep only North records of NGAWest2
 89 |     df_flatfile = df_flatfile.loc[np.logical_and(df_flatfile.dsid==0,
 90 |                                                  df_flatfile.sreg==1),:]
 91 |     
 92 |     #output file name and directory
 93 |     out_fname = '%s%s_Y%i'%(out_fname_main, synds_suffix, d_id)
 94 |     out_dir   = '%s/%s/Y%i/'%(out_dir_main, out_dir_sub, d_id)
 95 | 
 96 |     #run stan model
 97 |     RunStan(df_flatfile, df_cellinfo, df_celldist, sm_fname, 
 98 |             out_fname, out_dir, res_name, c_a_erg=c_a_erg, 
 99 |             runstan_flag=runstan_flag, n_iter=n_iter, n_chains=n_chains,
100 |             adapt_delta=adapt_delta, max_treedepth=max_treedepth,
101 |             pystan_ver=pystan_ver, pystan_parallel=flag_parallel)
102 |        
103 |     #run time end
104 |     run_t_end = time.time()
105 | 
106 |     #compute run time
107 |     run_tm = (run_t_end - run_t_strt)/60
108 |   
109 |     #log run time
110 |     df_run_info.append(pd.DataFrame({'computer_name':os.uname()[1],'out_name':out_dir_sub,
111 |                                      'ds_id':d_id,'run_time':run_tm}, index=[d_id]))
112 |                            
113 |     #write out run info
114 |     out_fname   = '%s%s/run_info.csv'%(out_dir_main, out_dir_sub)
115 |     pd.concat(df_run_info).reset_index(drop=True).to_csv(out_fname, index=False)
116 | 
117 | 


--------------------------------------------------------------------------------
/Analyses/Stan_lib/regression_stan_model1_unbounded_hyp.stan:
--------------------------------------------------------------------------------
  1 | /*********************************************
  2 | Stan program to obtain VCM parameters
  3 | lower dimensions is used (event terms/station terms)
  4 | 
  5 | This model explicitly estimates the latent event terms and station terms.
  6 | This model includes a spatially varying earthquake term, a spatially 
  7 | varying station term, a spatially independent station term, and the 
  8 | between and within event residuals. 
  9 | The spatially varying terms are modeled as chelosky decomposition of the
 10 | kernel function multiplied with standard normal variates.
 11 |  ********************************************/
 12 | 
 13 | data {
 14 |   int N;      // number of records
 15 |   int NEQ;    // number of earthquakes
 16 |   int NSTAT;  // number of stations
 17 |   
 18 |   //event and station ID
 19 |   int<lower=1,upper=NEQ> eq[N];     // event id (in numerical order from 1 to last)
 20 |   int<lower=1,upper=NSTAT> stat[N]; // station id (in numerical order from 1 to last)
 21 | 
 22 |   //observations
 23 |   vector[N] Y; // median predictions for each record with anelasic attenuation taken out
 24 | 
 25 |   //mean ground motion
 26 |   vector[N] rec_mu; 
 27 | 
 28 |   //Earthquake, Station coordinates
 29 |   vector[2] X_e[NEQ];   // event coordinates for each record
 30 |   vector[2] X_s[NSTAT]; // station coordinates for each record
 31 | }
 32 | 
 33 | transformed data {
 34 |   real delta = 1e-9;
 35 | }
 36 | 
 37 | parameters {
 38 |   //Aleatory Variability Terms
 39 |   real<lower=0> phi_0;  // phi_0 - remaining aleatory variability of within-event residuals
 40 |   real<lower=0> tau_0;  // tau_0 - remaining aleatory variability of between-event residuals
 41 |   
 42 |   //Epistemic Uncertainty Terms
 43 |   real<lower=0.0>  ell_1e;
 44 |   real<lower=0.0>  omega_1e;
 45 |   real<lower=0.0>  ell_1as;
 46 |   real<lower=0.0>  omega_1as;
 47 |   real<lower=0.0>  omega_1bs;
 48 |  
 49 |   //spatially correlated coefficients
 50 |   real dc_0;             //constant shift
 51 |   vector[NEQ]   dc_1e;   //spatially varying eq coeff
 52 |   vector[NSTAT] dc_1as;  //spatially varying stat coeff
 53 |   vector[NSTAT] dc_1bs;  //zero correlation station term
 54 |   
 55 |   //between event terms
 56 |   vector[NEQ]   dB;
 57 | }
 58 | 
 59 | model {
 60 |   //non-ergodic mean
 61 |   vector[N] rec_nerg_dB;
 62 |   
 63 |   //Aleatory Variability Terms
 64 |   phi_0 ~ lognormal(-1.20,0.3);
 65 |   tau_0 ~ lognormal(-1,0.3);
 66 |   //Station and earthquake paramters
 67 |   dB ~ normal(0,tau_0);
 68 |   
 69 |   //non-ergodic hyper-parameters
 70 |   ell_1e  ~ inv_gamma(2.,50);
 71 |   ell_1as ~ inv_gamma(2.,50);
 72 |   omega_1e  ~ exponential(5);
 73 |   omega_1as ~ exponential(5);
 74 |   omega_1bs ~ exponential(5);
 75 | 
 76 |   //constant shift
 77 |   dc_0 ~ normal(0.,0.1);
 78 |   
 79 |   //station contributions with zero correlation length
 80 |   dc_1bs ~ normal(0,omega_1bs);
 81 |     
 82 |   //spatillay latent variable for event contributions to GP
 83 |   {
 84 |     matrix[NEQ,NEQ] cov_1e;
 85 |     
 86 |     for(i in 1:NEQ) {
 87 |       for(j in i:NEQ) {
 88 |         real d_e;
 89 |         real c_1e;
 90 |         
 91 |         d_e = distance(X_e[i],X_e[j]);
 92 |   
 93 |         c_1e = (omega_1e^2 * exp(-d_e/ell_1e));
 94 |   
 95 |         cov_1e[i,j] = c_1e;
 96 |         cov_1e[j,i] = c_1e;
 97 |       }
 98 |       cov_1e[i,i] += delta;
 99 |     }
100 |     dc_1e ~ multi_normal(rep_vector(0.,NEQ), cov_1e);
101 |   }
102 | 
103 |   //Spatially latent variable for station contributions to GP
104 |   { 
105 |     matrix[NSTAT,NSTAT] cov_1as;
106 | 
107 |     for(i in 1:NSTAT) {
108 |       for(j in i:NSTAT) {
109 |         real d_s;
110 |         real c_1as;
111 |   
112 |         d_s = distance(X_s[i],X_s[j]);
113 |         
114 |         c_1as = (omega_1as^2  * exp(-d_s/ell_1as));
115 |   
116 |         cov_1as[i,j] = c_1as;
117 |         cov_1as[j,i] = c_1as;
118 |       }
119 |       cov_1as[i,i] += delta;
120 |     }
121 |     dc_1as ~ multi_normal(rep_vector(0.,NSTAT), cov_1as);
122 |   }
123 |   
124 |   //Mean non-ergodic including dB
125 |   rec_nerg_dB = rec_mu + dc_0 + dc_1as[eq] + dc_1as[stat] + dc_1bs[stat] + dB[eq];
126 |   
127 |   Y ~ normal(rec_nerg_dB,phi_0);
128 | }
129 | 
130 | 


--------------------------------------------------------------------------------
/Examples/example1/regression_inla_postprocessing.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Sun Mar 27 12:20:36 2022
  5 | 
  6 | @author: glavrent
  7 | """
  8 | # Working directory and Packages
  9 | # ---------------------------
 10 | 
 11 | #load packages
 12 | import sys
 13 | import pathlib
 14 | import glob
 15 | import re           #regular expression package
 16 | import pickle
 17 | from joblib import cpu_count
 18 | #arithmetic libraries
 19 | import numpy as np
 20 | #statistics libraries
 21 | import pandas as pd
 22 | #plot libraries
 23 | import matplotlib as mpl
 24 | import matplotlib.pyplot as plt
 25 | from matplotlib.ticker import  AutoLocator as plt_autotick
 26 | 
 27 | # Define Problem
 28 | # ---------------------------
 29 | #data filename
 30 | fname_data = 'data/examp_obs.csv'
 31 | #inla regression filename
 32 | fname_inla_reg = 'data/inla_regression/inla_regression.csv'
 33 | 
 34 | #output directory
 35 | dir_out = 'data/inla_regression/'
 36 | 
 37 | # Read Data
 38 | # ---------------------------
 39 | #observation data
 40 | df_data = pd.read_csv(fname_data, index_col=0)
 41 | #inla regression results
 42 | df_reg_summary = pd.read_csv(fname_inla_reg, index_col=0)
 43 | 
 44 | # Summary figures
 45 | # ---------------------------
 46 | #color bar (mean)
 47 | cbar_levs_mean  = np.linspace(-2, 2, 101).tolist()    
 48 | cbar_ticks_mean = np.arange(-2, 2.01, 0.8).tolist()    
 49 | #color bar (sigma)
 50 | cbar_levs_sig  = np.linspace(0.0, 0.5, 101).tolist()    
 51 | cbar_ticks_sig = np.arange(0, 0.501, 0.1).tolist()    
 52 | 
 53 | # scatter comparison 
 54 | fname_fig = 'inla_gp_scatter'
 55 | #create figure
 56 | fig, ax = plt.subplots(figsize = (10,10))
 57 | #obsevations scatter
 58 | hl = ax.plot(df_data.tot, df_reg_summary.tot_mean, 'o')
 59 | ax.axline((0,0), slope=1, color="black", linestyle="--")
 60 | #figure properties
 61 | ax.grid(which='both')
 62 | #tick size
 63 | ax.tick_params(axis='x', labelsize=32)
 64 | ax.tick_params(axis='y', labelsize=32)
 65 | #figure limits
 66 | ax.set_xticks([-2,-1,0,1,2])
 67 | ax.set_yticks([-2,-1,0,1,2])
 68 | ax.set_xlim([-2.0, 2.0])
 69 | ax.set_ylim([-2.0, 2.0])
 70 | #labels
 71 | ax.set_xlabel('Data',      fontsize=35)
 72 | ax.set_ylabel('Estimated', fontsize=35)
 73 | #save figure
 74 | fig.tight_layout()
 75 | fig.savefig( dir_out + fname_fig + '.png' )
 76 | 
 77 | #field mean
 78 | fname_fig = 'inla_gp_field_mean'
 79 | #create figure
 80 | fig, ax = plt.subplots(figsize = (10,11))
 81 | #obsevations map
 82 | hl = ax.scatter(df_reg_summary.X, df_reg_summary.Y, c=df_reg_summary.tot_mean, marker='D', vmin=-2, vmax=2, s=100)
 83 | #figure properties
 84 | ax.grid(which='both')
 85 | #color bar
 86 | cbar = fig.colorbar(hl, orientation="horizontal", pad=0.15, boundaries=cbar_levs_mean, ticks=cbar_ticks_mean)
 87 | #tick size
 88 | ax.tick_params(axis='x', labelsize=30)
 89 | ax.tick_params(axis='y', labelsize=30)
 90 | #labels
 91 | ax.set_xlabel(r'$t_1$', fontsize=35)
 92 | ax.set_ylabel(r'$t_2$', fontsize=35)
 93 | #figure limits
 94 | ax.set_xlim([0, 100])
 95 | ax.set_ylim([0, 100])
 96 | #update colorbar 
 97 | cbar.ax.tick_params(tick1On=1, labelsize=30)
 98 | cbar.set_label(r'$\mu(c_0 + c_1(\vec{t}))$', size=35)
 99 | #save figure
100 | fig.tight_layout()
101 | fig.savefig( dir_out + fname_fig + '.png' )
102 | 
103 | #field std
104 | fname_fig = 'inla_gp_field_std'
105 | #create figure
106 | fig, ax = plt.subplots(figsize = (10,11))
107 | #obsevations map
108 | hl = ax.scatter(df_reg_summary.X, df_reg_summary.Y, c=df_reg_summary.tot_sig, marker='D', vmin=0, vmax=0.5, s=100, cmap='Oranges')
109 | #figure properties
110 | ax.grid(which='both')
111 | #color bar
112 | cbar = fig.colorbar(hl, orientation="horizontal", pad=0.15, boundaries=cbar_levs_sig, ticks=cbar_ticks_sig)
113 | #tick size
114 | ax.tick_params(axis='x', labelsize=30)
115 | ax.tick_params(axis='y', labelsize=30)
116 | #labels
117 | ax.set_xlabel(r'$t_1$', fontsize=35)
118 | ax.set_ylabel(r'$t_2$', fontsize=35)
119 | #figure limits
120 | ax.set_xlim([0, 100])
121 | ax.set_ylim([0, 100])
122 | #update colorbar 
123 | cbar.ax.tick_params(tick1On=1, labelsize=30)
124 | cbar.set_label(r'$\psi(c_0 + c_1(\vec{t}))$', size=35)
125 | #save figure
126 | fig.tight_layout()
127 | fig.savefig( dir_out + fname_fig + '.png' )
128 | 


--------------------------------------------------------------------------------
/Analyses/Code_Verification/regression/ds3/main_pystan_model3_corr_cells_NGAWest2CANorth.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Wed Jul 14 14:17:52 2021
  5 | 
  6 | @author: glavrent
  7 | """
  8 | # Working directory and Packages
  9 | # ---------------------------
 10 | #load libraries
 11 | import os
 12 | import sys
 13 | import numpy as np
 14 | import pandas as pd
 15 | import time
 16 | #user functions
 17 | sys.path.insert(0,'../../../Python_lib/regression/pystan/')
 18 | from regression_pystan_model3_corr_cells_unbounded_hyp import RunStan
 19 | 
 20 | 
 21 | # Define variables
 22 | # ---------------------------
 23 | #filename suffix
 24 | # synds_suffix = '_small_corr_len' 
 25 | # synds_suffix = '_large_corr_len'
 26 | 
 27 | #synthetic datasets directory
 28 | ds_dir = '../../../../Data/Validation/synthetic_datasets/ds3'
 29 | ds_dir = r'%s%s/'%(ds_dir, synds_suffix)
 30 | 
 31 | # dataset info 
 32 | #ds_fname_main = 'CatalogNGAWest3CA_synthetic_data'
 33 | ds_fname_main = 'CatalogNGAWest3CALite_synthetic_data'
 34 | ds_id = np.arange(1,6)
 35 | #cell specific anelastic attenuation
 36 | ds_fname_cellinfo = 'CatalogNGAWest3CALite_cellinfo'
 37 | ds_fname_celldist = 'CatalogNGAWest3CALite_distancematrix'
 38 | 
 39 | #stan model 
 40 | sm_fname = '../../../Stan_lib/regression_stan_model3_corr_cells_unbounded_hyp_chol_efficient.stan'
 41 | 
 42 | #output info
 43 | #main output filename
 44 | out_fname_main = 'NGAWest2CANorth_syndata'
 45 | #main output directory
 46 | out_dir_main   = '../../../../Data/Validation/regression/ds3/'
 47 | #output sub-directory
 48 | out_dir_sub    = 'PYSTAN_NGAWest2CANorth_corr_cells_chol_eff'
 49 | 
 50 | #stan parameters
 51 | runstan_flag = True
 52 | # pystan_ver = 2
 53 | pystan_ver = 3
 54 | res_name = 'tot'
 55 | n_iter = 1000
 56 | n_chains = 4
 57 | adapt_delta   = 0.8
 58 | max_treedepth = 10
 59 | #ergodic coefficients
 60 | c_2_erg=-2.0
 61 | c_3_erg=-0.6
 62 | c_a_erg=0.0
 63 | #parallel options
 64 | # flag_parallel = True
 65 | flag_parallel = False
 66 | 
 67 | #output sub-dir with corr with suffix info
 68 | out_dir_sub = f'%s%s'%(out_dir_sub, synds_suffix)
 69 | 
 70 | #load cell dataframes
 71 | cellinfo_fname = '%s%s.csv'%(ds_dir, ds_fname_cellinfo)
 72 | celldist_fname = '%s%s.csv'%(ds_dir, ds_fname_celldist)
 73 | df_cellinfo = pd.read_csv(cellinfo_fname)
 74 | df_celldist = pd.read_csv(celldist_fname)
 75 | 
 76 | # Run stan regression
 77 | # ---------------------------
 78 | #create datafame with computation time
 79 | df_run_info = list()
 80 | 
 81 | #iterate over all synthetic datasets
 82 | for d_id in ds_id:
 83 |     print('Synthetic dataset %i fo %i'%(d_id, len(ds_id)))
 84 |     #run time start
 85 |     run_t_strt = time.time()        
 86 |     #input flatfile
 87 |     ds_fname = '%s%s%s_Y%i.csv'%(ds_dir, ds_fname_main, synds_suffix, d_id)
 88 |     #load flatfile
 89 |     df_flatfile = pd.read_csv(ds_fname)
 90 |     #keep only North records of NGAWest2
 91 |     df_flatfile = df_flatfile.loc[np.logical_and(df_flatfile.dsid==0,
 92 |                                                  df_flatfile.sreg==1),:]
 93 |     
 94 |     #output file name and directory
 95 |     out_fname = '%s%s_Y%i'%(out_fname_main, synds_suffix, d_id)
 96 |     out_dir   = '%s/%s/Y%i/'%(out_dir_main, out_dir_sub, d_id)
 97 | 
 98 |     #run stan model
 99 |     RunStan(df_flatfile, df_cellinfo, df_celldist, sm_fname, 
100 |             out_fname, out_dir, res_name, 
101 |             c_2_erg=c_2_erg, c_3_erg=c_3_erg, c_a_erg=c_a_erg, 
102 |             runstan_flag=runstan_flag, n_iter=n_iter, n_chains=n_chains,
103 |             adapt_delta=adapt_delta, max_treedepth=max_treedepth,
104 |             pystan_ver=pystan_ver, pystan_parallel=flag_parallel)
105 |        
106 |     #run time end
107 |     run_t_end = time.time()
108 | 
109 |     #compute run time
110 |     run_tm = (run_t_end - run_t_strt)/60
111 |   
112 |     #log run time
113 |     df_run_info.append(pd.DataFrame({'computer_name':os.uname()[1],'out_name':out_dir_sub,
114 |                                      'ds_id':d_id,'run_time':run_tm}, index=[d_id]))
115 |                            
116 |     #write out run info
117 |     out_fname   = '%s%s/run_info.csv'%(out_dir_main, out_dir_sub)
118 |     pd.concat(df_run_info).reset_index(drop=True).to_csv(out_fname, index=False)
119 |     
120 | 
121 | 


--------------------------------------------------------------------------------
/Analyses/Code_Verification/regression/ds3/main_pystan_model3_uncorr_cells_NGAWest2CANorth.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Wed Jul 14 14:17:52 2021
  5 | 
  6 | @author: glavrent
  7 | """
  8 | # Working directory and Packages
  9 | # ---------------------------
 10 | #load libraries
 11 | import os
 12 | import sys
 13 | import numpy as np
 14 | import pandas as pd
 15 | import time
 16 | #user functions
 17 | sys.path.insert(0,'../../../Python_lib/regression/pystan/')
 18 | from regression_pystan_model3_uncorr_cells_unbounded_hyp import RunStan
 19 | 
 20 | 
 21 | # Define variables
 22 | # ---------------------------
 23 | #filename suffix
 24 | # synds_suffix = '_small_corr_len' 
 25 | # synds_suffix = '_large_corr_len'
 26 | 
 27 | #synthetic datasets directory
 28 | ds_dir = '../../../../Data/Validation/synthetic_datasets/ds3'
 29 | ds_dir = r'%s%s/'%(ds_dir, synds_suffix)
 30 | 
 31 | # dataset info 
 32 | #ds_fname_main = 'CatalogNGAWest3CA_synthetic_data'
 33 | ds_fname_main = 'CatalogNGAWest3CALite_synthetic_data'
 34 | ds_id = np.arange(1,6)
 35 | #cell specific anelastic attenuation
 36 | ds_fname_cellinfo = 'CatalogNGAWest3CALite_cellinfo'
 37 | ds_fname_celldist = 'CatalogNGAWest3CALite_distancematrix'
 38 | 
 39 | #stan model 
 40 | sm_fname = '../../../Stan_lib/regression_stan_model3_uncorr_cells_unbounded_hyp_chol_efficient.stan'
 41 | 
 42 | #output info
 43 | #main output filename
 44 | out_fname_main = 'NGAWest2CANorth_syndata'
 45 | #main output directory
 46 | out_dir_main   = '../../../../Data/Validation/regression/ds3/'
 47 | #output sub-directory
 48 | out_dir_sub    = 'PYSTAN_NGAWest2CANorth_uncorr_cells_chol_eff'
 49 | 
 50 | #stan parameters
 51 | runstan_flag = True
 52 | # pystan_ver = 2
 53 | pystan_ver = 3
 54 | res_name = 'tot'
 55 | n_iter = 1000
 56 | n_chains = 4
 57 | adapt_delta   = 0.8
 58 | max_treedepth = 10
 59 | #ergodic coefficients
 60 | c_2_erg=-2.0
 61 | c_3_erg=-0.6
 62 | c_a_erg=0.0
 63 | #parallel options
 64 | # flag_parallel = True
 65 | flag_parallel = False
 66 | 
 67 | #output sub-dir with corr with suffix info
 68 | out_dir_sub = f'%s%s'%(out_dir_sub, synds_suffix)
 69 | 
 70 | #load cell dataframes
 71 | cellinfo_fname = '%s%s.csv'%(ds_dir, ds_fname_cellinfo)
 72 | celldist_fname = '%s%s.csv'%(ds_dir, ds_fname_celldist)
 73 | df_cellinfo = pd.read_csv(cellinfo_fname)
 74 | df_celldist = pd.read_csv(celldist_fname)
 75 | 
 76 | # Run stan regression
 77 | # ---------------------------
 78 | #create datafame with computation time
 79 | df_run_info = list()
 80 | 
 81 | #iterate over all synthetic datasets
 82 | for d_id in ds_id:
 83 |     print('Synthetic dataset %i fo %i'%(d_id, len(ds_id)))
 84 |     #run time start
 85 |     run_t_strt = time.time()        
 86 |     #input flatfile
 87 |     ds_fname = '%s%s%s_Y%i.csv'%(ds_dir, ds_fname_main, synds_suffix, d_id)
 88 |     #load flatfile
 89 |     df_flatfile = pd.read_csv(ds_fname)
 90 |     #keep only North records of NGAWest2
 91 |     df_flatfile = df_flatfile.loc[np.logical_and(df_flatfile.dsid==0,
 92 |                                                  df_flatfile.sreg==1),:]
 93 |     
 94 |     #output file name and directory
 95 |     out_fname = '%s%s_Y%i'%(out_fname_main, synds_suffix, d_id)
 96 |     out_dir   = '%s/%s/Y%i/'%(out_dir_main, out_dir_sub, d_id)
 97 | 
 98 |     #run stan model
 99 |     RunStan(df_flatfile, df_cellinfo, df_celldist, sm_fname, 
100 |             out_fname, out_dir, res_name, 
101 |             c_2_erg=c_2_erg, c_3_erg=c_3_erg, c_a_erg=c_a_erg, 
102 |             runstan_flag=runstan_flag, n_iter=n_iter, n_chains=n_chains,
103 |             adapt_delta=adapt_delta, max_treedepth=max_treedepth,
104 |             pystan_ver=pystan_ver, pystan_parallel=flag_parallel)
105 |        
106 |     #run time end
107 |     run_t_end = time.time()
108 | 
109 |     #compute run time
110 |     run_tm = (run_t_end - run_t_strt)/60
111 |     
112 |     #log run time
113 |     df_run_info.append(pd.DataFrame({'computer_name':os.uname()[1],'out_name':out_dir_sub,
114 |                                      'ds_id':d_id,'run_time':run_tm}, index=[d_id]))
115 |                            
116 |     #write out run info
117 |     out_fname   = '%s%s/run_info.csv'%(out_dir_main, out_dir_sub)
118 |     pd.concat(df_run_info).reset_index(drop=True).to_csv(out_fname, index=False)
119 | 
120 | 
121 | 


--------------------------------------------------------------------------------
/Analyses/Code_Verification/regression/ds1/main_pystan_model1_NGAWest2CANorth.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Wed Jul 14 14:17:52 2021
  5 | 
  6 | @author: glavrent
  7 | """
  8 | # Working directory and Packages
  9 | # ---------------------------
 10 | #load libraries
 11 | import os
 12 | import sys
 13 | import numpy as np
 14 | import pandas as pd
 15 | import time
 16 | #user functions
 17 | sys.path.insert(0,'../../../Python_lib/regression/pystan/')
 18 | from regression_pystan_model1_unbounded_hyp import RunStan
 19 | 
 20 | # Define variables
 21 | # ---------------------------
 22 | #filename suffix
 23 | # synds_suffix = '_small_corr_len' 
 24 | # synds_suffix = '_large_corr_len'
 25 | 
 26 | #synthetic datasets directory
 27 | ds_dir = '../../../../Data/Verification/synthetic_datasets/ds1'
 28 | ds_dir = r'%s%s/'%(ds_dir, synds_suffix)
 29 | 
 30 | # dataset info 
 31 | # ds_fname_main = 'CatalogNGAWest3CA_synthetic_data'
 32 | ds_fname_main = 'CatalogNGAWest3CALite_synthetic_data'
 33 | ds_id = np.arange(1,6)
 34 | 
 35 | #stan model
 36 | # sm_fname = '../../../Stan_lib/regression_stan_model1_unbounded_hyp.stan'
 37 | # sm_fname = '../../../Stan_lib/regression_stan_model1_unbounded_hyp_chol.stan'
 38 | # sm_fname = '../../../Stan_lib/regression_stan_model1_unbounded_hyp_chol_efficient.stan'
 39 | # sm_fname = '../../../Stan_lib/regression_stan_model1_unbounded_hyp_chol_efficient2.stan'
 40 | 
 41 | #output info
 42 | #main output filename
 43 | out_fname_main = 'NGAWest2CANorth_syndata'
 44 | #main output directory
 45 | out_dir_main   = '../../../../Data/Verification/regression/ds1/'
 46 | #output sub-directory
 47 | #pystan2
 48 | # out_dir_sub    = 'PYSTAN_NGAWest2CANorth'
 49 | # out_dir_sub    = 'PYSTAN_NGAWest2CANorth_chol'
 50 | # out_dir_sub    = 'PYSTAN_NGAWest2CANorth_chol_eff'
 51 | # out_dir_sub    = 'PYSTAN_NGAWest2CANorth_chol_eff2'
 52 | #pystan3
 53 | # out_dir_sub    = 'PYSTAN3_NGAWest2CANorth'
 54 | # out_dir_sub    = 'PYSTAN3_NGAWest2CANorth_chol'
 55 | # out_dir_sub    = 'PYSTAN3_NGAWest2CANorth_chol_eff'
 56 | # out_dir_sub    = 'PYSTAN3_NGAWest2CANorth_chol_eff2'
 57 | 
 58 | #stan parameters
 59 | runstan_flag = True
 60 | # pystan_ver = 2
 61 | pystan_ver = 3
 62 | res_name = 'tot'
 63 | n_iter   = 1000
 64 | n_chains = 4
 65 | adapt_delta   = 0.8
 66 | max_treedepth = 10
 67 | #parallel options
 68 | # flag_parallel = True
 69 | flag_parallel = False
 70 | 
 71 | #output sub-dir with corr with suffix info
 72 | out_dir_sub = f'%s%s'%(out_dir_sub, synds_suffix)
 73 | 
 74 | # Run stan regression
 75 | # ---------------------------
 76 | #create datafame with computation time
 77 | df_run_info = list()
 78 | 
 79 | #iterate over all synthetic datasets
 80 | for d_id in ds_id:
 81 |     print('Synthetic dataset %i fo %i'%(d_id, len(ds_id)))
 82 |     #run time start
 83 |     run_t_strt = time.time()        
 84 |     #input flatfile
 85 |     ds_fname = '%s%s%s_Y%i.csv'%(ds_dir, ds_fname_main, synds_suffix, d_id)
 86 |     #load flatfile
 87 |     df_flatfile = pd.read_csv(ds_fname)
 88 |     #keep only North records of NGAWest2
 89 |     df_flatfile = df_flatfile.loc[np.logical_and(df_flatfile.dsid==0,
 90 |                                                  df_flatfile.sreg==1),:]
 91 |     
 92 |     #output file name and directory
 93 |     out_fname = '%s%s_Y%i'%(out_fname_main, synds_suffix, d_id)
 94 |     out_dir   = '%s/%s/Y%i/'%(out_dir_main, out_dir_sub, d_id)
 95 | 
 96 |     #run stan model
 97 |     RunStan(df_flatfile, sm_fname, out_fname, out_dir, res_name, 
 98 |             runstan_flag=runstan_flag, n_iter=n_iter, n_chains=n_chains,
 99 |             adapt_delta=adapt_delta, max_treedepth=max_treedepth,
100 |             pystan_ver=pystan_ver, pystan_parallel=flag_parallel)
101 |     
102 |     #run time end
103 |     run_t_end = time.time()
104 | 
105 |     #compute run time
106 |     run_tm = (run_t_end - run_t_strt)/60
107 |   
108 |     #log run time
109 |     df_run_info.append(pd.DataFrame({'computer_name':os.uname()[1],'out_name':out_dir_sub,
110 |                                      'ds_id':d_id,'run_time':run_tm}, index=[d_id]))
111 |                            
112 |     #write out run info
113 |     out_fname   = '%s%s/run_info.csv'%(out_dir_main, out_dir_sub)
114 |     pd.concat(df_run_info).reset_index(drop=True).to_csv(out_fname, index=False)
115 | 
116 | 


--------------------------------------------------------------------------------
/Analyses/Code_Verification/regression/ds3/main_cmdstan_model3_uncorr_cells_NGAWest3CA.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Wed Dec 29 15:16:15 2021
  5 | 
  6 | @author: glavrent
  7 | """
  8 | # Working directory and Packages
  9 | # ---------------------------
 10 | #load libraries
 11 | import os
 12 | import sys
 13 | import numpy as np
 14 | import pandas as pd
 15 | import time
 16 | #user functions
 17 | sys.path.insert(0,'../../../Python_lib/regression/cmdstan/')
 18 | # from regression_cmdstan_model3_uncorr_cells_unbounded_hyp import RunStan
 19 | # from regression_cmdstan_model3_uncorr_cells_sparse_unbounded_hyp import RunStan
 20 | 
 21 | # Define variables
 22 | # ---------------------------
 23 | #filename suffix
 24 | # synds_suffix = '_small_corr_len' 
 25 | # synds_suffix = '_large_corr_len'
 26 | 
 27 | #synthetic datasets directory
 28 | ds_dir = '../../../../Data/Verification/synthetic_datasets/ds3'
 29 | ds_dir = r'%s%s/'%(ds_dir, synds_suffix)
 30 | 
 31 | # dataset info 
 32 | #ds_fname_main = 'CatalogNGAWest3CA_synthetic_data'
 33 | ds_fname_main = 'CatalogNGAWest3CALite_synthetic_data'
 34 | ds_id = np.arange(1,6)
 35 | #cell specific anelastic attenuation
 36 | ds_fname_cellinfo = 'CatalogNGAWest3CALite_cellinfo'
 37 | ds_fname_celldist = 'CatalogNGAWest3CALite_distancematrix'
 38 | 
 39 | #stan model 
 40 | # sm_fname = '../../../Stan_lib/regression_stan_model3_uncorr_cells_unbounded_hyp_chol_efficient.stan'
 41 | sm_fname = '../../../Stan_lib/regression_stan_model3_uncorr_cells_sparse_unbounded_hyp_chol_efficient.stan'
 42 | 
 43 | #output info
 44 | #main output filename
 45 | out_fname_main = 'NGAWest3CA_syndata'
 46 | #main output directory
 47 | out_dir_main   = '../../../../Data/Verification/regression/ds3/'
 48 | #output sub-directory
 49 | # out_dir_sub    = 'CMDSTAN_NGAWest3CA_uncorr_cells_chol_eff'
 50 | # out_dir_sub    = 'CMDSTAN_NGAWest3CA_uncorr_cells_chol_eff_sp'
 51 | 
 52 | #stan parameters
 53 | res_name = 'tot'
 54 | n_iter_warmup   = 500
 55 | n_iter_sampling = 500
 56 | n_chains        = 4
 57 | adapt_delta     = 0.8
 58 | max_treedepth   = 10
 59 | #ergodic coefficients
 60 | c_2_erg=-2.0
 61 | c_3_erg=-0.6
 62 | c_a_erg= 0.0
 63 | #parallel options
 64 | # flag_parallel = True
 65 | flag_parallel = False
 66 | 
 67 | #output sub-dir with corr with suffix info
 68 | out_dir_sub = f'%s%s'%(out_dir_sub, synds_suffix)
 69 | 
 70 | #load cell dataframes
 71 | cellinfo_fname = '%s%s.csv'%(ds_dir, ds_fname_cellinfo)
 72 | celldist_fname = '%s%s.csv'%(ds_dir, ds_fname_celldist)
 73 | df_cellinfo = pd.read_csv(cellinfo_fname)
 74 | df_celldist = pd.read_csv(celldist_fname)
 75 | 
 76 | # Run stan regression
 77 | # ---------------------------
 78 | #create datafame with computation time
 79 | df_run_info = list()
 80 | 
 81 | #iterate over all synthetic datasets
 82 | for d_id in ds_id:
 83 |     print('Synthetic dataset %i fo %i'%(d_id, len(ds_id)))
 84 |     #run time start
 85 |     run_t_strt = time.time()        
 86 |     #input flatfile
 87 |     ds_fname = '%s%s%s_Y%i.csv'%(ds_dir, ds_fname_main, synds_suffix, d_id)
 88 |     #load flatfile
 89 |     df_flatfile = pd.read_csv(ds_fname)
 90 |     
 91 |     #output file name and directory
 92 |     out_fname = '%s%s_Y%i'%(out_fname_main, synds_suffix, d_id)
 93 |     out_dir   = '%s/%s/Y%i/'%(out_dir_main, out_dir_sub, d_id)
 94 | 
 95 |     #run stan model
 96 |     RunStan(df_flatfile, df_cellinfo, df_celldist, sm_fname, 
 97 |             out_fname, out_dir, res_name,
 98 |             c_2_erg=c_2_erg, c_3_erg=c_3_erg, c_a_erg=c_a_erg, 
 99 |             n_iter_warmup=n_iter_warmup, n_iter_sampling=n_iter_sampling, n_chains=n_chains,
100 |             adapt_delta=adapt_delta, max_treedepth=max_treedepth,
101 |             stan_parallel=flag_parallel)
102 |        
103 |     #run time end
104 |     run_t_end = time.time()
105 | 
106 |     #compute run time
107 |     run_tm = (run_t_end - run_t_strt)/60
108 |   
109 |     #log run time
110 |     df_run_info.append(pd.DataFrame({'computer_name':os.uname()[1],'out_name':out_dir_sub,
111 |                                      'ds_id':d_id,'run_time':run_tm}, index=[d_id]))
112 |                            
113 |     #write out run info
114 |     out_fname   = '%s%s/run_info.csv'%(out_dir_main, out_dir_sub)
115 |     pd.concat(df_run_info).reset_index(drop=True).to_csv(out_fname, index=False)
116 |     
117 | 
118 | 


--------------------------------------------------------------------------------
/Analyses/Code_Verification/regression/ds3/main_cmdstan_model3_corr_cells_NGAWest3CA.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Wed Dec 29 15:16:15 2021
  5 | 
  6 | @author: glavrent
  7 | """
  8 | # Working directory and Packages
  9 | # ---------------------------
 10 | #load libraries
 11 | import os
 12 | import sys
 13 | import numpy as np
 14 | import pandas as pd
 15 | import time
 16 | #user functions
 17 | sys.path.insert(0,'../../../Python_lib/regression/cmdstan/')
 18 | # from regression_cmdstan_model3_corr_cells_unbounded_hyp import RunStan
 19 | # from regression_cmdstan_model3_corr_cells_sparse_unbounded_hyp import RunStan
 20 | 
 21 | # Define variables
 22 | # ---------------------------
 23 | #filename suffix
 24 | # synds_suffix = '_small_corr_len' 
 25 | # synds_suffix = '_large_corr_len'
 26 | 
 27 | #synthetic datasets directory
 28 | ds_dir = '../../../../Data/Verification/synthetic_datasets/ds3'
 29 | ds_dir = r'%s%s/'%(ds_dir, synds_suffix)
 30 | 
 31 | # dataset info 
 32 | #ds_fname_main = 'CatalogNGAWest3CA_synthetic_data'
 33 | ds_fname_main = 'CatalogNGAWest3CALite_synthetic_data'
 34 | ds_id = np.arange(1,6)
 35 | #cell specific anelastic attenuation
 36 | ds_fname_cellinfo = 'CatalogNGAWest3CALite_cellinfo'
 37 | ds_fname_celldist = 'CatalogNGAWest3CALite_distancematrix'
 38 | 
 39 | #stan model 
 40 | # sm_fname = '../../../Stan_lib/regression_stan_model3_corr_cells_unbounded_hyp_chol_efficient.stan'
 41 | # sm_fname = '../../../Stan_lib/regression_stan_model3_corr_cells_sparse_unbounded_hyp_chol_efficient.stan'
 42 | 
 43 | #output info
 44 | #main output filename
 45 | out_fname_main = 'NGAWest3CA_syndata'
 46 | #main output directory
 47 | out_dir_main   = '../../../../Data/Verification/regression/ds3/'
 48 | #output sub-directory
 49 | # out_dir_sub    = 'CMDSTAN_NGAWest3CA_corr_cells_chol_eff'
 50 | # out_dir_sub    = 'CMDSTAN_NGAWest3CA_corr_cells_chol_eff_sp'
 51 | #stan parameters
 52 | res_name = 'tot'
 53 | n_iter_warmup   = 500
 54 | n_iter_sampling = 500
 55 | n_chains        = 4
 56 | adapt_delta     = 0.8
 57 | max_treedepth   = 10
 58 | #ergodic coefficients
 59 | c_2_erg=-2.0
 60 | c_3_erg=-0.6
 61 | c_a_erg= 0.0
 62 | #parallel options
 63 | # flag_parallel = True
 64 | flag_parallel = False
 65 | 
 66 | #output sub-dir with corr with suffix info
 67 | out_dir_sub = f'%s%s'%(out_dir_sub, synds_suffix)
 68 | 
 69 | #load cell dataframes
 70 | cellinfo_fname = '%s%s.csv'%(ds_dir, ds_fname_cellinfo)
 71 | celldist_fname = '%s%s.csv'%(ds_dir, ds_fname_celldist)
 72 | df_cellinfo = pd.read_csv(cellinfo_fname)
 73 | df_celldist = pd.read_csv(celldist_fname)
 74 | 
 75 | # Run stan regression
 76 | # ---------------------------
 77 | #create datafame with computation time
 78 | df_run_info = list()
 79 | 
 80 | #iterate over all synthetic datasets
 81 | for d_id in ds_id:
 82 |     print('Synthetic dataset %i fo %i'%(d_id, len(ds_id)))
 83 |     #run time start
 84 |     run_t_strt = time.time()        
 85 |     #input flatfile
 86 |     ds_fname = '%s%s%s_Y%i.csv'%(ds_dir, ds_fname_main, synds_suffix, d_id)
 87 |     #load flatfile
 88 |     df_flatfile = pd.read_csv(ds_fname)
 89 |     #keep only NGAWest2 records
 90 |     df_flatfile = df_flatfile.loc[df_flatfile.dsid==0,:]
 91 |     
 92 |     #output file name and directory
 93 |     out_fname = '%s%s_Y%i'%(out_fname_main, synds_suffix, d_id)
 94 |     out_dir   = '%s/%s/Y%i/'%(out_dir_main, out_dir_sub, d_id)
 95 | 
 96 |     #run stan model
 97 |     RunStan(df_flatfile, df_cellinfo, df_celldist, sm_fname, 
 98 |             out_fname, out_dir, res_name, 
 99 |             c_2_erg=c_2_erg, c_3_erg=c_3_erg, c_a_erg=c_a_erg, 
100 |             n_iter_warmup=n_iter_warmup, n_iter_sampling=n_iter_sampling, n_chains=n_chains,
101 |             adapt_delta=adapt_delta, max_treedepth=max_treedepth,
102 |             stan_parallel=flag_parallel)
103 |        
104 |     #run time end
105 |     run_t_end = time.time()
106 | 
107 |     #compute run time
108 |     run_tm = (run_t_end - run_t_strt)/60
109 |   
110 |     #log run time
111 |     df_run_info.append(pd.DataFrame({'computer_name':os.uname()[1],'out_name':out_dir_sub,
112 |                                      'ds_id':d_id,'run_time':run_tm}, index=[d_id]))
113 |                            
114 |     #write out run info
115 |     out_fname   = '%s%s/run_info.csv'%(out_dir_main, out_dir_sub)
116 |     pd.concat(df_run_info).reset_index(drop=True).to_csv(out_fname, index=False)
117 |     
118 | 
119 | 


--------------------------------------------------------------------------------
/Analyses/Code_Verification/regression/ds3/main_cmdstan_model3_corr_cells_NGAWest2CA.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Wed Dec 29 15:16:15 2021
  5 | 
  6 | @author: glavrent
  7 | """
  8 | # Working directory and Packages
  9 | # ---------------------------
 10 | #load libraries
 11 | import os
 12 | import sys
 13 | import numpy as np
 14 | import pandas as pd
 15 | import time
 16 | #user functions
 17 | sys.path.insert(0,'../../../Python_lib/regression/cmdstan/')
 18 | # from regression_cmdstan_model3_corr_cells_unbounded_hyp import RunStan
 19 | # from regression_cmdstan_model3_corr_cells_sparse_unbounded_hyp import RunStan
 20 | 
 21 | # Define variables
 22 | # ---------------------------
 23 | #filename suffix
 24 | # synds_suffix = '_small_corr_len' 
 25 | # synds_suffix = '_large_corr_len'
 26 | 
 27 | #synthetic datasets directory
 28 | ds_dir = '../../../../Data/Verification/synthetic_datasets/ds3'
 29 | ds_dir = r'%s%s/'%(ds_dir, synds_suffix)
 30 | 
 31 | # dataset info 
 32 | #ds_fname_main = 'CatalogNGAWest3CA_synthetic_data'
 33 | ds_fname_main = 'CatalogNGAWest3CALite_synthetic_data'
 34 | ds_id = np.arange(1,6)
 35 | #cell specific anelastic attenuation
 36 | ds_fname_cellinfo = 'CatalogNGAWest3CALite_cellinfo'
 37 | ds_fname_celldist = 'CatalogNGAWest3CALite_distancematrix'
 38 | 
 39 | #stan model 
 40 | # sm_fname = '../../../Stan_lib/regression_stan_model3_corr_cells_unbounded_hyp_chol_efficient.stan'
 41 | # sm_fname = '../../../Stan_lib/regression_stan_model3_corr_cells_sparse_unbounded_hyp_chol_efficient.stan'
 42 | 
 43 | #output info
 44 | #main output filename
 45 | out_fname_main = 'NGAWest2CA_syndata'
 46 | #main output directory
 47 | out_dir_main   = '../../../../Data/Verification/regression/ds3/'
 48 | #output sub-directory
 49 | # out_dir_sub    = 'CMDSTAN_NGAWest2CA_corr_cells_chol_eff'
 50 | # out_dir_sub    = 'CMDSTAN_NGAWest2CA_corr_cells_chol_eff_sp'
 51 | 
 52 | #stan parameters
 53 | res_name = 'tot'
 54 | n_iter_warmup   = 500
 55 | n_iter_sampling = 500
 56 | n_chains        = 4
 57 | adapt_delta     = 0.8
 58 | max_treedepth   = 10
 59 | #ergodic coefficients
 60 | c_2_erg=-2.0
 61 | c_3_erg=-0.6
 62 | c_a_erg= 0.0
 63 | #parallel options
 64 | # flag_parallel = True
 65 | flag_parallel = False
 66 | 
 67 | #output sub-dir with corr with suffix info
 68 | out_dir_sub = f'%s%s'%(out_dir_sub, synds_suffix)
 69 | 
 70 | #load cell dataframes
 71 | cellinfo_fname = '%s%s.csv'%(ds_dir, ds_fname_cellinfo)
 72 | celldist_fname = '%s%s.csv'%(ds_dir, ds_fname_celldist)
 73 | df_cellinfo = pd.read_csv(cellinfo_fname)
 74 | df_celldist = pd.read_csv(celldist_fname)
 75 | 
 76 | # Run stan regression
 77 | # ---------------------------
 78 | #create datafame with computation time
 79 | df_run_info = list()
 80 | 
 81 | #iterate over all synthetic datasets
 82 | for d_id in ds_id:
 83 |     print('Synthetic dataset %i fo %i'%(d_id, len(ds_id)))
 84 |     #run time start
 85 |     run_t_strt = time.time()        
 86 |     #input flatfile
 87 |     ds_fname = '%s%s%s_Y%i.csv'%(ds_dir, ds_fname_main, synds_suffix, d_id)
 88 |     #load flatfile
 89 |     df_flatfile = pd.read_csv(ds_fname)
 90 |     #keep only NGAWest2 records
 91 |     df_flatfile = df_flatfile.loc[df_flatfile.dsid==0,:]
 92 |     
 93 |     #output file name and directory
 94 |     out_fname = '%s%s_Y%i'%(out_fname_main, synds_suffix, d_id)
 95 |     out_dir   = '%s/%s/Y%i/'%(out_dir_main, out_dir_sub, d_id)
 96 | 
 97 |     #run stan model
 98 |     RunStan(df_flatfile, df_cellinfo, df_celldist, sm_fname, 
 99 |             out_fname, out_dir, res_name, 
100 |             c_2_erg=c_2_erg, c_3_erg=c_3_erg, c_a_erg=c_a_erg, 
101 |             n_iter_warmup=n_iter_warmup, n_iter_sampling=n_iter_sampling, n_chains=n_chains,
102 |             adapt_delta=adapt_delta, max_treedepth=max_treedepth,
103 |             stan_parallel=flag_parallel)
104 |        
105 |     #run time end
106 |     run_t_end = time.time()
107 | 
108 |     #compute run time
109 |     run_tm = (run_t_end - run_t_strt)/60
110 |   
111 |     #log run time
112 |     df_run_info.append(pd.DataFrame({'computer_name':os.uname()[1],'out_name':out_dir_sub,
113 |                                      'ds_id':d_id,'run_time':run_tm}, index=[d_id]))
114 |                            
115 |     #write out run info
116 |     out_fname   = '%s%s/run_info.csv'%(out_dir_main, out_dir_sub)
117 |     pd.concat(df_run_info).reset_index(drop=True).to_csv(out_fname, index=False)
118 |     
119 | 
120 | 


--------------------------------------------------------------------------------
/Analyses/Code_Verification/regression/ds3/main_cmdstan_model3_uncorr_cells_NGAWest2CA.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Wed Dec 29 15:16:15 2021
  5 | 
  6 | @author: glavrent
  7 | """
  8 | # Working directory and Packages
  9 | # ---------------------------
 10 | #load libraries
 11 | import os
 12 | import sys
 13 | import numpy as np
 14 | import pandas as pd
 15 | import time
 16 | #user functions
 17 | sys.path.insert(0,'../../../Python_lib/regression/cmdstan/')
 18 | # from regression_cmdstan_model3_uncorr_cells_unbounded_hyp import RunStan
 19 | # from regression_cmdstan_model3_uncorr_cells_sparse_unbounded_hyp import RunStan
 20 | 
 21 | # Define variables
 22 | # ---------------------------
 23 | #filename suffix
 24 | # synds_suffix = '_small_corr_len' 
 25 | # synds_suffix = '_large_corr_len'
 26 | 
 27 | #synthetic datasets directory
 28 | ds_dir = '../../../../Data/Verification/synthetic_datasets/ds3'
 29 | ds_dir = r'%s%s/'%(ds_dir, synds_suffix)
 30 | 
 31 | # dataset info 
 32 | #ds_fname_main = 'CatalogNGAWest3CA_synthetic_data'
 33 | ds_fname_main = 'CatalogNGAWest3CALite_synthetic_data'
 34 | ds_id = np.arange(1,6)
 35 | #cell specific anelastic attenuation
 36 | ds_fname_cellinfo = 'CatalogNGAWest3CALite_cellinfo'
 37 | ds_fname_celldist = 'CatalogNGAWest3CALite_distancematrix'
 38 | 
 39 | #stan model 
 40 | # sm_fname = '../../../Stan_lib/regression_stan_model3_uncorr_cells_unbounded_hyp_chol_efficient.stan'
 41 | # sm_fname = '../../../Stan_lib/regression_stan_model3_uncorr_cells_sparse_unbounded_hyp_chol_efficient.stan'
 42 | 
 43 | #output info
 44 | #main output filename
 45 | out_fname_main = 'NGAWest2CA_syndata'
 46 | #main output directory
 47 | out_dir_main   = '../../../../Data/Verification/regression/ds3/'
 48 | #output sub-directory
 49 | # out_dir_sub    = 'CMDSTAN_NGAWest2CA_uncorr_cells_chol_eff'
 50 | # out_dir_sub    = 'CMDSTAN_NGAWest2CA_uncorr_cells_chol_eff_sp'
 51 | 
 52 | #stan parameters
 53 | res_name = 'tot'
 54 | n_iter_warmup   = 500
 55 | n_iter_sampling = 500
 56 | n_chains        = 4
 57 | adapt_delta     = 0.8
 58 | max_treedepth   = 10
 59 | #ergodic coefficients
 60 | c_2_erg=-2.0
 61 | c_3_erg=-0.6
 62 | c_a_erg= 0.0
 63 | #parallel options
 64 | # flag_parallel = True
 65 | flag_parallel = False
 66 | 
 67 | #output sub-dir with corr with suffix info
 68 | out_dir_sub = f'%s%s'%(out_dir_sub, synds_suffix)
 69 | 
 70 | #load cell dataframes
 71 | cellinfo_fname = '%s%s.csv'%(ds_dir, ds_fname_cellinfo)
 72 | celldist_fname = '%s%s.csv'%(ds_dir, ds_fname_celldist)
 73 | df_cellinfo = pd.read_csv(cellinfo_fname)
 74 | df_celldist = pd.read_csv(celldist_fname)
 75 | 
 76 | # Run stan regression
 77 | # ---------------------------
 78 | #create datafame with computation time
 79 | df_run_info = list()
 80 | 
 81 | #iterate over all synthetic datasets
 82 | for d_id in ds_id:
 83 |     print('Synthetic dataset %i fo %i'%(d_id, len(ds_id)))
 84 |     #run time start
 85 |     run_t_strt = time.time()        
 86 |     #input flatfile
 87 |     ds_fname = '%s%s%s_Y%i.csv'%(ds_dir, ds_fname_main, synds_suffix, d_id)
 88 |     #load flatfile
 89 |     df_flatfile = pd.read_csv(ds_fname)
 90 |     #keep only NGAWest2 records
 91 |     df_flatfile = df_flatfile.loc[df_flatfile.dsid==0,:]
 92 |     
 93 |     #output file name and directory
 94 |     out_fname = '%s%s_Y%i'%(out_fname_main, synds_suffix, d_id)
 95 |     out_dir   = '%s/%s/Y%i/'%(out_dir_main, out_dir_sub, d_id)
 96 | 
 97 |     #run stan model
 98 |     RunStan(df_flatfile, df_cellinfo, df_celldist, sm_fname, 
 99 |             out_fname, out_dir, res_name,
100 |             c_2_erg=c_2_erg, c_3_erg=c_3_erg, c_a_erg=c_a_erg, 
101 |             n_iter_warmup=n_iter_warmup, n_iter_sampling=n_iter_sampling, n_chains=n_chains,
102 |             adapt_delta=adapt_delta, max_treedepth=max_treedepth,
103 |             stan_parallel=flag_parallel)
104 |        
105 |     #run time end
106 |     run_t_end = time.time()
107 | 
108 |     #compute run time
109 |     run_tm = (run_t_end - run_t_strt)/60
110 |   
111 |     #log run time
112 |     df_run_info.append(pd.DataFrame({'computer_name':os.uname()[1],'out_name':out_dir_sub,
113 |                                      'ds_id':d_id,'run_time':run_tm}, index=[d_id]))
114 |                            
115 |     #write out run info
116 |     out_fname   = '%s%s/run_info.csv'%(out_dir_main, out_dir_sub)
117 |     pd.concat(df_run_info).reset_index(drop=True).to_csv(out_fname, index=False)
118 |     
119 | 
120 | 


--------------------------------------------------------------------------------
/Analyses/Code_Verification/regression/ds2/main_pystan_model2_corr_cells_NGAWest3CA.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Wed Jul 14 14:17:52 2021
  5 | 
  6 | @author: glavrent
  7 | """
  8 | # Working directory and Packages
  9 | # ---------------------------
 10 | #load libraries
 11 | import os
 12 | import sys
 13 | import numpy as np
 14 | import pandas as pd
 15 | import time
 16 | #user functions
 17 | sys.path.insert(0,'../../../Python_lib/regression/pystan/')
 18 | from regression_pystan_model2_corr_cells_unbounded_hyp import RunStan
 19 | 
 20 | 
 21 | # Define variables
 22 | # ---------------------------
 23 | #filename suffix
 24 | # synds_suffix = '_small_corr_len' 
 25 | # synds_suffix = '_large_corr_len'
 26 | 
 27 | #synthetic datasets directory
 28 | ds_dir = '../../../../Data/Verification/synthetic_datasets/ds2'
 29 | ds_dir = r'%s%s/'%(ds_dir, synds_suffix)
 30 | 
 31 | # dataset info 
 32 | #ds_fname_main = 'CatalogNGAWest3CA_synthetic_data'
 33 | ds_fname_main = 'CatalogNGAWest3CALite_synthetic_data'
 34 | ds_id = np.arange(1,6)
 35 | #cell specific anelastic attenuation
 36 | ds_fname_cellinfo = 'CatalogNGAWest3CALite_cellinfo'
 37 | ds_fname_celldist = 'CatalogNGAWest3CALite_distancematrix'
 38 | 
 39 | #stan model 
 40 | # sm_fname = '../../../Stan_lib/regression_stan_model2_corr_cells_unbounded_hyp.stan'
 41 | # sm_fname = '../../../Stan_lib/regression_stan_model2_corr_cells_unbounded_hyp_chol.stan'
 42 | # sm_fname = '../../../Stan_lib/regression_stan_model2_corr_cells_unbounded_hyp_chol_efficient.stan'
 43 | # sm_fname = '../../../Stan_lib/regression_stan_model2_corr_cells_unbounded_hyp_chol_efficient2.stan'
 44 | 
 45 | #output info
 46 | #main output filename
 47 | out_fname_main = 'NGAWest3CA_syndata'
 48 | #main output directory
 49 | out_dir_main   = '../../../../Data/Verification/regression/ds2/'
 50 | #output sub-directory
 51 | # out_dir_sub    = 'PYSTAN_NGAWest3CA_corr_cells'
 52 | # out_dir_sub    = 'PYSTAN_NGAWest3CA_corr_cells_chol'
 53 | # out_dir_sub    = 'PYSTAN_NGAWest3CA_corr_cells_chol_eff'
 54 | # out_dir_sub    = 'PYSTAN_NGAWest3CA_corr_cells_chol_eff2'
 55 | 
 56 | #stan parameters
 57 | runstan_flag = True
 58 | # pystan_ver = 2
 59 | pystan_ver = 3
 60 | res_name = 'tot'
 61 | n_iter = 1000
 62 | n_chains = 4
 63 | adapt_delta   = 0.8
 64 | max_treedepth = 10
 65 | #ergodic coefficients
 66 | c_a_erg=0.0
 67 | #parallel options
 68 | # flag_parallel = True
 69 | flag_parallel = False
 70 | 
 71 | #output sub-dir with corr with suffix info
 72 | out_dir_sub = f'%s%s'%(out_dir_sub, synds_suffix)
 73 | 
 74 | #load cell dataframes
 75 | cellinfo_fname = '%s%s.csv'%(ds_dir, ds_fname_cellinfo)
 76 | celldist_fname = '%s%s.csv'%(ds_dir, ds_fname_celldist)
 77 | df_cellinfo = pd.read_csv(cellinfo_fname)
 78 | df_celldist = pd.read_csv(celldist_fname)
 79 | 
 80 | # Run stan regression
 81 | # ---------------------------
 82 | #create datafame with computation time
 83 | df_run_info = list()
 84 | 
 85 | #iterate over all synthetic datasets
 86 | for d_id in ds_id:
 87 |     print('Synthetic dataset %i fo %i'%(d_id, len(ds_id)))
 88 |     #run time start
 89 |     run_t_strt = time.time()    
 90 |     #input flatfile
 91 |     ds_fname = '%s%s%s_Y%i.csv'%(ds_dir, ds_fname_main, synds_suffix, d_id)
 92 |     #load flatfile
 93 |     df_flatfile = pd.read_csv(ds_fname)
 94 |     
 95 |     #output file name and directory
 96 |     out_fname = '%s%s_Y%i'%(out_fname_main, synds_suffix, d_id)
 97 |     out_dir   = '%s/%s/Y%i/'%(out_dir_main, out_dir_sub, d_id)
 98 | 
 99 |     #run stan model
100 |     RunStan(df_flatfile, df_cellinfo, df_celldist, sm_fname, 
101 |             out_fname, out_dir, res_name, c_a_erg=c_a_erg, 
102 |             runstan_flag=runstan_flag, n_iter=n_iter, n_chains=n_chains,
103 |             adapt_delta=adapt_delta, max_treedepth=max_treedepth,
104 |             pystan_ver=pystan_ver, pystan_parallel=flag_parallel)
105 |        
106 |     #run time end
107 |     run_t_end = time.time()
108 | 
109 |     #compute run time
110 |     run_tm = (run_t_end - run_t_strt)/60
111 |   
112 |     #log run time
113 |     df_run_info.append(pd.DataFrame({'computer_name':os.uname()[1],'out_name':out_dir_sub,
114 |                                      'ds_id':d_id,'run_time':run_tm}, index=[d_id]))
115 |                            
116 |     #write out run info
117 |     out_fname   = '%s%s/run_info.csv'%(out_dir_main, out_dir_sub)
118 |     pd.concat(df_run_info).reset_index(drop=True).to_csv(out_fname, index=False)
119 | 
120 | 


--------------------------------------------------------------------------------
/Analyses/Code_Verification/regression/ds2/main_pystan_model2_uncorr_cells_NGAWest3CA.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Wed Jul 14 14:17:52 2021
  5 | 
  6 | @author: glavrent
  7 | """
  8 | # Working directory and Packages
  9 | # ---------------------------
 10 | #load libraries
 11 | import os
 12 | import sys
 13 | import numpy as np
 14 | import pandas as pd
 15 | import time
 16 | #user functions
 17 | sys.path.insert(0,'../../../Python_lib/regression/pystan/')
 18 | from regression_pystan_model2_uncorr_cells_unbounded_hyp import RunStan
 19 | 
 20 | 
 21 | # Define variables
 22 | # ---------------------------
 23 | #filename suffix
 24 | # synds_suffix = '_small_corr_len' 
 25 | # synds_suffix = '_large_corr_len'
 26 | 
 27 | #synthetic datasets directory
 28 | ds_dir = '../../../../Data/Verification/synthetic_datasets/ds2'
 29 | ds_dir = r'%s%s/'%(ds_dir, synds_suffix)
 30 | 
 31 | # dataset info 
 32 | #ds_fname_main = 'CatalogNGAWest3CA_synthetic_data'
 33 | ds_fname_main = 'CatalogNGAWest3CALite_synthetic_data'
 34 | ds_id = np.arange(1,6)
 35 | #cell specific anelastic attenuation
 36 | ds_fname_cellinfo = 'CatalogNGAWest3CALite_cellinfo'
 37 | ds_fname_celldist = 'CatalogNGAWest3CALite_distancematrix'
 38 | 
 39 | #stan model 
 40 | # sm_fname = '../../../Stan_lib/regression_stan_model2_uncorr_cells_unbounded_hyp.stan'
 41 | # sm_fname = '../../../Stan_lib/regression_stan_model2_uncorr_cells_unbounded_hyp_chol.stan'
 42 | # sm_fname = '../../../Stan_lib/regression_stan_model2_uncorr_cells_unbounded_hyp_chol_efficient.stan'
 43 | # sm_fname = '../../../Stan_lib/regression_stan_model2_uncorr_cells_unbounded_hyp_chol_efficient2.stan'
 44 | 
 45 | #output info
 46 | #main output filename
 47 | out_fname_main = 'NGAWest3CA_syndata'
 48 | #main output directory
 49 | out_dir_main   = '../../../../Data/Verification/regression/ds2/'
 50 | #output sub-directory
 51 | # out_dir_sub    = 'PYSTAN_NGAWest3CA_uncorr_cells'
 52 | # out_dir_sub    = 'PYSTAN_NGAWest3CA_uncorr_cells_chol'
 53 | # out_dir_sub    = 'PYSTAN_NGAWest3CA_uncorr_cells_chol_eff'
 54 | # out_dir_sub    = 'PYSTAN_NGAWest3CA_uncorr_cells_chol_eff2'
 55 | 
 56 | #stan parameters
 57 | runstan_flag = True
 58 | # pystan_ver = 2
 59 | pystan_ver = 3
 60 | res_name = 'tot'
 61 | n_iter = 1000
 62 | n_chains = 4
 63 | adapt_delta   = 0.8
 64 | max_treedepth = 10
 65 | #ergodic coefficients
 66 | c_a_erg=0.0
 67 | #parallel options
 68 | # flag_parallel = True
 69 | flag_parallel = False
 70 | 
 71 | #output sub-dir with corr with suffix info
 72 | out_dir_sub = f'%s%s'%(out_dir_sub, synds_suffix)
 73 | 
 74 | #load cell dataframes
 75 | cellinfo_fname = '%s%s.csv'%(ds_dir, ds_fname_cellinfo)
 76 | celldist_fname = '%s%s.csv'%(ds_dir, ds_fname_celldist)
 77 | df_cellinfo = pd.read_csv(cellinfo_fname)
 78 | df_celldist = pd.read_csv(celldist_fname)
 79 | 
 80 | # Run stan regression
 81 | # ---------------------------
 82 | #create datafame with computation time
 83 | df_run_info = list()
 84 | 
 85 | #iterate over all synthetic datasets
 86 | for d_id in ds_id:
 87 |     print('Synthetic dataset %i fo %i'%(d_id, len(ds_id)))
 88 |     #run time start
 89 |     run_t_strt = time.time()    
 90 |     #input flatfile
 91 |     ds_fname = '%s%s%s_Y%i.csv'%(ds_dir, ds_fname_main, synds_suffix, d_id)
 92 |     #load flatfile
 93 |     df_flatfile = pd.read_csv(ds_fname)
 94 |     
 95 |     #output file name and directory
 96 |     out_fname = '%s%s_Y%i'%(out_fname_main, synds_suffix, d_id)
 97 |     out_dir   = '%s/%s/Y%i/'%(out_dir_main, out_dir_sub, d_id)
 98 | 
 99 |     #run stan model
100 |     RunStan(df_flatfile, df_cellinfo, df_celldist, sm_fname, 
101 |             out_fname, out_dir, res_name, c_a_erg=c_a_erg, 
102 |             runstan_flag=runstan_flag, n_iter=n_iter, n_chains=n_chains,
103 |             adapt_delta=adapt_delta, max_treedepth=max_treedepth,
104 |             pystan_ver=pystan_ver, pystan_parallel=flag_parallel)
105 |          
106 |     #run time end
107 |     run_t_end = time.time()
108 | 
109 |     #compute run time
110 |     run_tm = (run_t_end - run_t_strt)/60
111 |   
112 |     #log run time
113 |     df_run_info.append(pd.DataFrame({'computer_name':os.uname()[1],'out_name':out_dir_sub,
114 |                                      'ds_id':d_id,'run_time':run_tm}, index=[d_id]))
115 |                            
116 |     #write out run info
117 |     out_fname   = '%s%s/run_info.csv'%(out_dir_main, out_dir_sub)
118 |     pd.concat(df_run_info).reset_index(drop=True).to_csv(out_fname, index=False)
119 | 
120 | 


--------------------------------------------------------------------------------
/Analyses/Code_Verification/regression/ds3/main_cmdstan_model3_corr_cells_NGAWest2CANorth.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Wed Dec 29 15:16:15 2021
  5 | 
  6 | @author: glavrent
  7 | """
  8 | # Working directory and Packages
  9 | # ---------------------------
 10 | #load libraries
 11 | import os
 12 | import sys
 13 | import numpy as np
 14 | import pandas as pd
 15 | import time
 16 | #user functions
 17 | sys.path.insert(0,'../../../Python_lib/regression/cmdstan/')
 18 | # from regression_cmdstan_model3_corr_cells_unbounded_hyp import RunStan
 19 | # from regression_cmdstan_model3_corr_cells_sparse_unbounded_hyp import RunStan
 20 | 
 21 | # Define variables
 22 | # ---------------------------
 23 | #filename suffix
 24 | # synds_suffix = '_small_corr_len' 
 25 | # synds_suffix = '_large_corr_len'
 26 | 
 27 | #synthetic datasets directory
 28 | ds_dir = '../../../../Data/Verification/synthetic_datasets/ds3'
 29 | ds_dir = r'%s%s/'%(ds_dir, synds_suffix)
 30 | 
 31 | # dataset info 
 32 | #ds_fname_main = 'CatalogNGAWest3CA_synthetic_data'
 33 | ds_fname_main = 'CatalogNGAWest3CALite_synthetic_data'
 34 | ds_id = np.arange(1,6)
 35 | #cell specific anelastic attenuation
 36 | ds_fname_cellinfo = 'CatalogNGAWest3CALite_cellinfo'
 37 | ds_fname_celldist = 'CatalogNGAWest3CALite_distancematrix'
 38 | 
 39 | #stan model 
 40 | # sm_fname = '../../../Stan_lib/regression_stan_model3_corr_cells_unbounded_hyp_chol_efficient.stan'
 41 | # sm_fname = '../../../Stan_lib/regression_stan_model3_corr_cells_sparse_unbounded_hyp_chol_efficient.stan'
 42 | 
 43 | #output info
 44 | #main output filename
 45 | out_fname_main = 'NGAWest2CANorth_syndata'
 46 | #main output directory
 47 | out_dir_main   = '../../../../Data/Verification/regression/ds3/'
 48 | #output sub-directory
 49 | # out_dir_sub    = 'CMDSTAN_NGAWest2CANorth_corr_cells_chol_eff'
 50 | # out_dir_sub    = 'CMDSTAN_NGAWest2CANorth_corr_cells_chol_eff_sp'
 51 | 
 52 | #stan parameters
 53 | res_name = 'tot'
 54 | n_iter_warmup   = 500
 55 | n_iter_sampling = 500
 56 | n_chains        = 4
 57 | adapt_delta     = 0.8
 58 | max_treedepth   = 10
 59 | #ergodic coefficients
 60 | c_2_erg=-2.0
 61 | c_3_erg=-0.6
 62 | c_a_erg= 0.0
 63 | #parallel options
 64 | # flag_parallel = True
 65 | flag_parallel = False
 66 | 
 67 | #output sub-dir with corr with suffix info
 68 | out_dir_sub = f'%s%s'%(out_dir_sub, synds_suffix)
 69 | 
 70 | #load cell dataframes
 71 | cellinfo_fname = '%s%s.csv'%(ds_dir, ds_fname_cellinfo)
 72 | celldist_fname = '%s%s.csv'%(ds_dir, ds_fname_celldist)
 73 | df_cellinfo = pd.read_csv(cellinfo_fname)
 74 | df_celldist = pd.read_csv(celldist_fname)
 75 | 
 76 | # Run stan regression
 77 | # ---------------------------
 78 | #create datafame with computation time
 79 | df_run_info = list()
 80 | 
 81 | #iterate over all synthetic datasets
 82 | for d_id in ds_id:
 83 |     print('Synthetic dataset %i fo %i'%(d_id, len(ds_id)))
 84 |     #run time start
 85 |     run_t_strt = time.time()        
 86 |     #input flatfile
 87 |     ds_fname = '%s%s%s_Y%i.csv'%(ds_dir, ds_fname_main, synds_suffix, d_id)
 88 |     #load flatfile
 89 |     df_flatfile = pd.read_csv(ds_fname)
 90 |     #keep only North records of NGAWest2
 91 |     df_flatfile = df_flatfile.loc[np.logical_and(df_flatfile.dsid==0,
 92 |                                                  df_flatfile.sreg==1),:]
 93 |     
 94 |     #output file name and directory
 95 |     out_fname = '%s%s_Y%i'%(out_fname_main, synds_suffix, d_id)
 96 |     out_dir   = '%s/%s/Y%i/'%(out_dir_main, out_dir_sub, d_id)
 97 | 
 98 |     #run stan model
 99 |     RunStan(df_flatfile, df_cellinfo, df_celldist, sm_fname, 
100 |             out_fname, out_dir, res_name, 
101 |             c_2_erg=c_2_erg, c_3_erg=c_3_erg, c_a_erg=c_a_erg, 
102 |             n_iter_warmup=n_iter_warmup, n_iter_sampling=n_iter_sampling, n_chains=n_chains,
103 |             adapt_delta=adapt_delta, max_treedepth=max_treedepth,
104 |             stan_parallel=flag_parallel)
105 |        
106 |     #run time end
107 |     run_t_end = time.time()
108 | 
109 |     #compute run time
110 |     run_tm = (run_t_end - run_t_strt)/60
111 |   
112 |     #log run time
113 |     df_run_info.append(pd.DataFrame({'computer_name':os.uname()[1],'out_name':out_dir_sub,
114 |                                      'ds_id':d_id,'run_time':run_tm}, index=[d_id]))
115 |                            
116 |     #write out run info
117 |     out_fname   = '%s%s/run_info.csv'%(out_dir_main, out_dir_sub)
118 |     pd.concat(df_run_info).reset_index(drop=True).to_csv(out_fname, index=False)
119 |     
120 | 
121 | 


--------------------------------------------------------------------------------
/Analyses/Code_Verification/regression/ds3/main_cmdstan_model3_uncorr_cells_NGAWest2CANorth.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Wed Dec 29 15:16:15 2021
  5 | 
  6 | @author: glavrent
  7 | """
  8 | # Working directory and Packages
  9 | # ---------------------------
 10 | #load libraries
 11 | import os
 12 | import sys
 13 | import numpy as np
 14 | import pandas as pd
 15 | import time
 16 | #user functions
 17 | sys.path.insert(0,'../../../Python_lib/regression/cmdstan/')
 18 | # from regression_cmdstan_model3_uncorr_cells_unbounded_hyp import RunStan
 19 | # from regression_cmdstan_model3_uncorr_cells_sparse_unbounded_hyp import RunStan
 20 | 
 21 | # Define variables
 22 | # ---------------------------
 23 | #filename suffix
 24 | # synds_suffix = '_small_corr_len' 
 25 | # synds_suffix = '_large_corr_len'
 26 | 
 27 | #synthetic datasets directory
 28 | ds_dir = '../../../../Data/Verification/synthetic_datasets/ds3'
 29 | ds_dir = r'%s%s/'%(ds_dir, synds_suffix)
 30 | 
 31 | # dataset info 
 32 | #ds_fname_main = 'CatalogNGAWest3CA_synthetic_data'
 33 | ds_fname_main = 'CatalogNGAWest3CALite_synthetic_data'
 34 | ds_id = np.arange(1,6)
 35 | #cell specific anelastic attenuation
 36 | ds_fname_cellinfo = 'CatalogNGAWest3CALite_cellinfo'
 37 | ds_fname_celldist = 'CatalogNGAWest3CALite_distancematrix'
 38 | 
 39 | #stan model 
 40 | # sm_fname = '../../../Stan_lib/regression_stan_model3_uncorr_cells_unbounded_hyp_chol_efficient.stan'
 41 | # sm_fname = '../../../Stan_lib/regression_stan_model3_uncorr_cells_sparse_unbounded_hyp_chol_efficient.stan'
 42 | 
 43 | #output info
 44 | #main output filename
 45 | out_fname_main = 'NGAWest2CANorth_syndata'
 46 | #main output directory
 47 | out_dir_main   = '../../../../Data/Verification/regression/ds3/'
 48 | #output sub-directory
 49 | # out_dir_sub    = 'CMDSTAN_NGAWest2CANorth_uncorr_cells_chol_eff'
 50 | # out_dir_sub    = 'CMDSTAN_NGAWest2CANorth_uncorr_cells_chol_eff_sp'
 51 | 
 52 | #stan parameters
 53 | res_name = 'tot'
 54 | n_iter_warmup   = 500
 55 | n_iter_sampling = 500
 56 | n_chains        = 4
 57 | adapt_delta     = 0.8
 58 | max_treedepth   = 10
 59 | #ergodic coefficients
 60 | c_2_erg=-2.0
 61 | c_3_erg=-0.6
 62 | c_a_erg= 0.0
 63 | #parallel options
 64 | # flag_parallel = True
 65 | flag_parallel = False
 66 | 
 67 | #output sub-dir with corr with suffix info
 68 | out_dir_sub = f'%s%s'%(out_dir_sub, synds_suffix)
 69 | 
 70 | #load cell dataframes
 71 | cellinfo_fname = '%s%s.csv'%(ds_dir, ds_fname_cellinfo)
 72 | celldist_fname = '%s%s.csv'%(ds_dir, ds_fname_celldist)
 73 | df_cellinfo = pd.read_csv(cellinfo_fname)
 74 | df_celldist = pd.read_csv(celldist_fname)
 75 | 
 76 | # Run stan regression
 77 | # ---------------------------
 78 | #create datafame with computation time
 79 | df_run_info = list()
 80 | 
 81 | #iterate over all synthetic datasets
 82 | for d_id in ds_id:
 83 |     print('Synthetic dataset %i fo %i'%(d_id, len(ds_id)))
 84 |     #run time start
 85 |     run_t_strt = time.time()        
 86 |     #input flatfile
 87 |     ds_fname = '%s%s%s_Y%i.csv'%(ds_dir, ds_fname_main, synds_suffix, d_id)
 88 |     #load flatfile
 89 |     df_flatfile = pd.read_csv(ds_fname)
 90 |     #keep only North records of NGAWest2
 91 |     df_flatfile = df_flatfile.loc[np.logical_and(df_flatfile.dsid==0,
 92 |                                                  df_flatfile.sreg==1),:]
 93 |     
 94 |     #output file name and directory
 95 |     out_fname = '%s%s_Y%i'%(out_fname_main, synds_suffix, d_id)
 96 |     out_dir   = '%s/%s/Y%i/'%(out_dir_main, out_dir_sub, d_id)
 97 | 
 98 |     #run stan model
 99 |     RunStan(df_flatfile, df_cellinfo, df_celldist, sm_fname, 
100 |             out_fname, out_dir, res_name,
101 |             c_2_erg=c_2_erg, c_3_erg=c_3_erg, c_a_erg=c_a_erg, 
102 |             n_iter_warmup=n_iter_warmup, n_iter_sampling=n_iter_sampling, n_chains=n_chains,
103 |             adapt_delta=adapt_delta, max_treedepth=max_treedepth,
104 |             stan_parallel=flag_parallel)
105 |        
106 |     #run time end
107 |     run_t_end = time.time()
108 | 
109 |     #compute run time
110 |     run_tm = (run_t_end - run_t_strt)/60
111 |   
112 |     #log run time
113 |     df_run_info.append(pd.DataFrame({'computer_name':os.uname()[1],'out_name':out_dir_sub,
114 |                                      'ds_id':d_id,'run_time':run_tm}, index=[d_id]))
115 |                            
116 |     #write out run info
117 |     out_fname   = '%s%s/run_info.csv'%(out_dir_main, out_dir_sub)
118 |     pd.concat(df_run_info).reset_index(drop=True).to_csv(out_fname, index=False)
119 |     
120 | 
121 | 


--------------------------------------------------------------------------------
/Analyses/Stan_lib/regression_stan_model1_unbounded_hyp_chol.stan:
--------------------------------------------------------------------------------
  1 | /*********************************************
  2 | Stan program to obtain VCM parameters
  3 | lower dimensions is used (event terms/station terms)
  4 | 
  5 | This model explicitly estimates the latent event terms and station terms.
  6 | This model includes a spatially varying earthquake term, a spatially 
  7 | varying station term, a spatially independent station term, and the 
  8 | between and within event residuals. 
  9 | The spatially varying terms are modeled as chelosky decomposition of the
 10 | kernel function multiplied with standard normal variates.
 11 |  ********************************************/
 12 | 
 13 | data {
 14 |   int N;      // number of records
 15 |   int NEQ;    // number of earthquakes
 16 |   int NSTAT;  // number of stations
 17 |   
 18 |   //event and station ID
 19 |   int<lower=1,upper=NEQ> eq[N];     // event id (in numerical order from 1 to last)
 20 |   int<lower=1,upper=NSTAT> stat[N]; // station id (in numerical order from 1 to last)
 21 | 
 22 |   //observations
 23 |   vector[N] Y; // median predictions for each record with anelasic attenuation taken out
 24 | 
 25 |   //mean ground motion
 26 |   vector[N] rec_mu; 
 27 | 
 28 |   //Earthquake, Station coordinates
 29 |   vector[2] X_e[NEQ];   // event coordinates for each record
 30 |   vector[2] X_s[NSTAT]; // station coordinates for each record
 31 | }
 32 | 
 33 | transformed data {
 34 |   real delta = 1e-9;
 35 | }
 36 | 
 37 | parameters {
 38 |   //Aleatory Variability Terms
 39 |   real<lower=0> phi_0;  // phi_0 - remaining aleatory variability of within-event residuals
 40 |   real<lower=0> tau_0;  // tau_0 - remaining aleatory variability of between-event residuals
 41 |   
 42 |   //Epistemic Uncertainty Terms
 43 |   real<lower=0.0>  ell_1e;
 44 |   real<lower=0.0>  omega_1e;
 45 |   real<lower=0.0>  ell_1as;
 46 |   real<lower=0.0>  omega_1as;
 47 |   real<lower=0.0>  omega_1bs;
 48 |  
 49 |   //spatially correlated coefficients
 50 |   real dc_0;             //constant shift
 51 |   vector[NSTAT] dc_1bs;  //zero correlation station term
 52 | 
 53 |   //standardized normal variables for spatially correlated coefficients
 54 |   vector[NEQ]   z_1e;   //spatially varying eq coeff
 55 |   vector[NSTAT] z_1as;  //spatially varying stat coeff
 56 |   
 57 |   //between event terms
 58 |   vector[NEQ]   dB;
 59 | }
 60 | 
 61 | transformed parameters{
 62 |   //Spatially correlated coefficients
 63 |   vector[NEQ]   dc_1e;   //spatially varying eq coeff
 64 |   vector[NSTAT] dc_1as;  //spatially varying stat coeff
 65 | 
 66 |   //spatillay latent variable for event contributions to GP
 67 |   {
 68 |     matrix[NEQ,NEQ] COV_1e;
 69 |     matrix[NEQ,NEQ] L_1e;
 70 |     for(i in 1:NEQ) {
 71 |       for(j in i:NEQ) {
 72 |         real d_e = distance(X_e[i],X_e[j]);
 73 |         real C_1e = (omega_1e^2 * exp(-d_e/ell_1e));
 74 |         COV_1e[i,j] = C_1e;
 75 |         COV_1e[j,i] = C_1e;
 76 |       }
 77 |       COV_1e[i,i] += delta;
 78 |     }
 79 |     L_1e = cholesky_decompose(COV_1e);
 80 |     dc_1e = L_1e * z_1e;
 81 |   }
 82 | 
 83 | 
 84 |   //Spatially latent variable for station contributions to GP
 85 |   { 
 86 |     matrix[NSTAT,NSTAT] COV_1as;
 87 |     matrix[NSTAT,NSTAT] L_1as;
 88 |     for(i in 1:NSTAT) {
 89 |       for(j in i:NSTAT) {
 90 |         real d_s = distance(X_s[i],X_s[j]);
 91 |         real C_1as = (omega_1as^2  * exp(-d_s/ell_1as));
 92 |         COV_1as[i,j] = C_1as;
 93 |         COV_1as[j,i] = C_1as;
 94 |       }
 95 |       COV_1as[i,i] += delta;
 96 |     }
 97 |     L_1as = cholesky_decompose(COV_1as);
 98 |     dc_1as = L_1as * z_1as;
 99 |   }
100 | 
101 | }
102 | 
103 | model {
104 |   //non-ergodic mean
105 |   vector[N] rec_nerg_dB;
106 |   
107 |   //Aleatory Variability Terms
108 |   phi_0 ~ lognormal(-1.20,0.3);
109 |   tau_0 ~ lognormal(-1,0.3);
110 |   //Station and earthquake paramters
111 |   dB ~ normal(0,tau_0);
112 |   
113 |   //non-ergodic hyper-parameters
114 |   ell_1e  ~ inv_gamma(2.,50);
115 |   ell_1as ~ inv_gamma(2.,50);
116 |   omega_1e  ~ exponential(5);
117 |   omega_1as ~ exponential(5);
118 |   omega_1bs ~ exponential(5);
119 |   
120 |   //constant shift
121 |   dc_0 ~ normal(0.,0.1);
122 |   
123 |   //standardized event contributions to GP
124 |   z_1e ~ std_normal();
125 | 
126 |   //standardized station contributions to GP
127 |   z_1as ~ std_normal();
128 |   
129 |   //station contributions with zero correlation length
130 |   dc_1bs ~ normal(0,omega_1bs);
131 | 
132 |   //Mean non-ergodic including dB
133 |   rec_nerg_dB = rec_mu + dc_0 + dc_1e[eq] + dc_1as[stat] + dc_1bs[stat] + dB[eq];
134 |   
135 |   Y ~ normal(rec_nerg_dB,phi_0);
136 | }
137 | 
138 | 


--------------------------------------------------------------------------------
/Analyses/Code_Verification/regression/ds2/main_cmdstan_model2_corr_cells_NGAWest3CA.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Wed Dec 29 15:16:15 2021
  5 | 
  6 | @author: glavrent
  7 | """
  8 | # Working directory and Packages
  9 | # ---------------------------
 10 | #load libraries
 11 | import os
 12 | import sys
 13 | import numpy as np
 14 | import pandas as pd
 15 | import time
 16 | #user functions
 17 | sys.path.insert(0,'../../../Python_lib/regression/cmdstan/')
 18 | # from regression_cmdstan_model2_corr_cells_unbounded_hyp import RunStan
 19 | from regression_cmdstan_model2_corr_cells_sparse_unbounded_hyp import RunStan
 20 | 
 21 | # Define variables
 22 | # ---------------------------
 23 | #filename suffix
 24 | # synds_suffix = '_small_corr_len' 
 25 | # synds_suffix = '_large_corr_len'
 26 | 
 27 | #synthetic datasets directory
 28 | ds_dir = '../../../../Data/Verification/synthetic_datasets/ds2'
 29 | ds_dir = r'%s%s/'%(ds_dir, synds_suffix)
 30 | 
 31 | # dataset info 
 32 | #ds_fname_main = 'CatalogNGAWest3CA_synthetic_data'
 33 | ds_fname_main = 'CatalogNGAWest3CALite_synthetic_data'
 34 | ds_id = np.arange(1,6)
 35 | #cell specific anelastic attenuation
 36 | ds_fname_cellinfo = 'CatalogNGAWest3CALite_cellinfo'
 37 | ds_fname_celldist = 'CatalogNGAWest3CALite_distancematrix'
 38 | 
 39 | #stan model 
 40 | # sm_fname = '../../../Stan_lib/regression_stan_model2_corr_cells_unbounded_hyp.stan'
 41 | # sm_fname = '../../../Stan_lib/regression_stan_model2_corr_cells_unbounded_hyp_chol.stan'
 42 | # sm_fname = '../../../Stan_lib/regression_stan_model2_corr_cells_unbounded_hyp_chol_efficient.stan'
 43 | # sm_fname = '../../../Stan_lib/regression_stan_model2_corr_cells_unbounded_hyp_chol_efficient2.stan'
 44 | # sm_fname = '../../../Stan_lib/regression_stan_model2_corr_cells_sparse_unbounded_hyp_chol_efficient.stan'
 45 | 
 46 | #output info
 47 | #main output filename
 48 | out_fname_main = 'NGAWest3CA_syndata'
 49 | #main output directory
 50 | out_dir_main   = '../../../../Data/Verification/regression/ds2/'
 51 | #output sub-directory
 52 | # out_dir_sub    = 'CMDSTAN_NGAWest3CA_corr_cells'
 53 | # out_dir_sub    = 'CMDSTAN_NGAWest3CA_corr_cells_chol'
 54 | # out_dir_sub    = 'CMDSTAN_NGAWest3CA_corr_cells_chol_efficient'
 55 | # out_dir_sub    = 'CMDSTAN_NGAWest3CA_corr_cells_chol_efficient2'
 56 | # out_dir_sub    = 'CMDSTAN_NGAWest3CA_corr_cells_chol_efficient_sp'
 57 | 
 58 | #stan parameters
 59 | res_name = 'tot'
 60 | n_iter_warmup   = 500
 61 | n_iter_sampling = 500
 62 | n_chains        = 4
 63 | adapt_delta     = 0.8
 64 | max_treedepth   = 10
 65 | #ergodic coefficients
 66 | c_a_erg=0.0
 67 | #parallel options
 68 | # flag_parallel = True
 69 | flag_parallel = False
 70 | 
 71 | #output sub-dir with corr with suffix info
 72 | out_dir_sub = f'%s%s'%(out_dir_sub, synds_suffix)
 73 | 
 74 | #load cell dataframes
 75 | cellinfo_fname = '%s%s.csv'%(ds_dir, ds_fname_cellinfo)
 76 | celldist_fname = '%s%s.csv'%(ds_dir, ds_fname_celldist)
 77 | df_cellinfo = pd.read_csv(cellinfo_fname)
 78 | df_celldist = pd.read_csv(celldist_fname)
 79 | 
 80 | # Run stan regression
 81 | # ---------------------------
 82 | #create datafame with computation time
 83 | df_run_info = list()
 84 | 
 85 | #iterate over all synthetic datasets
 86 | for d_id in ds_id:
 87 |     print('Synthetic dataset %i fo %i'%(d_id, len(ds_id)))
 88 |     #run time start
 89 |     run_t_strt = time.time()        
 90 |     #input flatfile
 91 |     ds_fname = '%s%s%s_Y%i.csv'%(ds_dir, ds_fname_main, synds_suffix, d_id)
 92 |     #load flatfile
 93 |     df_flatfile = pd.read_csv(ds_fname)
 94 |     
 95 |     #output file name and directory
 96 |     out_fname = '%s%s_Y%i'%(out_fname_main, synds_suffix, d_id)
 97 |     out_dir   = '%s/%s/Y%i/'%(out_dir_main, out_dir_sub, d_id)
 98 | 
 99 |     #run stan model
100 |     RunStan(df_flatfile, df_cellinfo, df_celldist, sm_fname, 
101 |             out_fname, out_dir, res_name, c_a_erg=c_a_erg, 
102 |             n_iter_warmup=n_iter_warmup, n_iter_sampling=n_iter_sampling, n_chains=n_chains,
103 |             adapt_delta=adapt_delta, max_treedepth=max_treedepth,
104 |             stan_parallel=flag_parallel)
105 |        
106 |     #run time end
107 |     run_t_end = time.time()
108 | 
109 |     #compute run time
110 |     run_tm = (run_t_end - run_t_strt)/60
111 |   
112 |     #log run time
113 |     df_run_info.append(pd.DataFrame({'computer_name':os.uname()[1],'out_name':out_dir_sub,
114 |                                      'ds_id':d_id,'run_time':run_tm}, index=[d_id]))
115 |                            
116 |     #write out run info
117 |     out_fname   = '%s%s/run_info.csv'%(out_dir_main, out_dir_sub)
118 |     pd.concat(df_run_info).reset_index(drop=True).to_csv(out_fname, index=False)
119 |     
120 | 
121 | 


--------------------------------------------------------------------------------
/Analyses/Code_Verification/regression/ds2/main_cmdstan_model2_uncorr_cells_NGAWest3CA.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Wed Dec 29 15:16:15 2021
  5 | 
  6 | @author: glavrent
  7 | """
  8 | # Working directory and Packages
  9 | # ---------------------------
 10 | #load libraries
 11 | import os
 12 | import sys
 13 | import numpy as np
 14 | import pandas as pd
 15 | import time
 16 | #user functions
 17 | sys.path.insert(0,'../../../Python_lib/regression/cmdstan/')
 18 | # from regression_cmdstan_model2_uncorr_cells_unbounded_hyp import RunStan
 19 | from regression_cmdstan_model2_uncorr_cells_sparse_unbounded_hyp import RunStan
 20 | 
 21 | # Define variables
 22 | # ---------------------------
 23 | #filename suffix
 24 | # synds_suffix = '_small_corr_len' 
 25 | # synds_suffix = '_large_corr_len'
 26 | 
 27 | #synthetic datasets directory
 28 | ds_dir = '../../../../Data/Verification/synthetic_datasets/ds2'
 29 | ds_dir = r'%s%s/'%(ds_dir, synds_suffix)
 30 | 
 31 | # dataset info 
 32 | #ds_fname_main = 'CatalogNGAWest3CA_synthetic_data'
 33 | ds_fname_main = 'CatalogNGAWest3CALite_synthetic_data'
 34 | ds_id = np.arange(1,6)
 35 | #cell specific anelastic attenuation
 36 | ds_fname_cellinfo = 'CatalogNGAWest3CALite_cellinfo'
 37 | ds_fname_celldist = 'CatalogNGAWest3CALite_distancematrix'
 38 | 
 39 | #stan model 
 40 | # sm_fname = '../../../Stan_lib/regression_stan_model2_uncorr_cells_unbounded_hyp.stan'
 41 | # sm_fname = '../../../Stan_lib/regression_stan_model2_uncorr_cells_unbounded_hyp_chol.stan'
 42 | # sm_fname = '../../../Stan_lib/regression_stan_model2_uncorr_cells_unbounded_hyp_chol_efficient.stan'
 43 | # sm_fname = '../../../Stan_lib/regression_stan_model2_uncorr_cells_unbounded_hyp_chol_efficient2.stan'
 44 | # sm_fname = '../../../Stan_lib/regression_stan_model2_uncorr_cells_sparse_unbounded_hyp_chol_efficient.stan'
 45 | 
 46 | #output info
 47 | #main output filename
 48 | out_fname_main = 'NGAWest3CA_syndata'
 49 | #main output directory
 50 | out_dir_main   = '../../../../Data/Verification/regression/ds2/'
 51 | #output sub-directory
 52 | # out_dir_sub    = 'CMDSTAN_NGAWest3CA_uncorr_cells'
 53 | # out_dir_sub    = 'CMDSTAN_NGAWest3CA_uncorr_cells_chol'
 54 | # out_dir_sub    = 'CMDSTAN_NGAWest3CA_uncorr_cells_chol_eff'
 55 | # out_dir_sub    = 'CMDSTAN_NGAWest3CA_uncorr_cells_chol_eff2'
 56 | # out_dir_sub    = 'CMDSTAN_NGAWest3CA_uncorr_cells_chol_eff_sp'
 57 | 
 58 | #stan parameters
 59 | res_name = 'tot'
 60 | n_iter_warmup   = 500
 61 | n_iter_sampling = 500
 62 | n_chains        = 4
 63 | adapt_delta     = 0.8
 64 | max_treedepth   = 10
 65 | #ergodic coefficients
 66 | c_a_erg=0.0
 67 | #parallel options
 68 | # flag_parallel = True
 69 | flag_parallel = False
 70 | 
 71 | #output sub-dir with corr with suffix info
 72 | out_dir_sub = f'%s%s'%(out_dir_sub, synds_suffix)
 73 | 
 74 | #load cell dataframes
 75 | cellinfo_fname = '%s%s.csv'%(ds_dir, ds_fname_cellinfo)
 76 | celldist_fname = '%s%s.csv'%(ds_dir, ds_fname_celldist)
 77 | df_cellinfo = pd.read_csv(cellinfo_fname)
 78 | df_celldist = pd.read_csv(celldist_fname)
 79 | 
 80 | # Run stan regression
 81 | # ---------------------------
 82 | #create datafame with computation time
 83 | df_run_info = list()
 84 | 
 85 | #iterate over all synthetic datasets
 86 | for d_id in ds_id:
 87 |     print('Synthetic dataset %i fo %i'%(d_id, len(ds_id)))
 88 |     #run time start
 89 |     run_t_strt = time.time()        
 90 |     #input flatfile
 91 |     ds_fname = '%s%s%s_Y%i.csv'%(ds_dir, ds_fname_main, synds_suffix, d_id)
 92 |     #load flatfile
 93 |     df_flatfile = pd.read_csv(ds_fname)
 94 |     
 95 |     #output file name and directory
 96 |     out_fname = '%s%s_Y%i'%(out_fname_main, synds_suffix, d_id)
 97 |     out_dir   = '%s/%s/Y%i/'%(out_dir_main, out_dir_sub, d_id)
 98 | 
 99 |     #run stan model
100 |     RunStan(df_flatfile, df_cellinfo, df_celldist, sm_fname, 
101 |             out_fname, out_dir, res_name, c_a_erg=c_a_erg, 
102 |             n_iter_warmup=n_iter_warmup, n_iter_sampling=n_iter_sampling, n_chains=n_chains,
103 |             adapt_delta=adapt_delta, max_treedepth=max_treedepth,
104 |             stan_parallel=flag_parallel)
105 |        
106 |     #run time end
107 |     run_t_end = time.time()
108 | 
109 |     #compute run time
110 |     run_tm = (run_t_end - run_t_strt)/60
111 |   
112 |     #log run time
113 |     df_run_info.append(pd.DataFrame({'computer_name':os.uname()[1],'out_name':out_dir_sub,
114 |                                      'ds_id':d_id,'run_time':run_tm}, index=[d_id]))
115 |                            
116 |     #write out run info
117 |     out_fname   = '%s%s/run_info.csv'%(out_dir_main, out_dir_sub)
118 |     pd.concat(df_run_info).reset_index(drop=True).to_csv(out_fname, index=False)
119 |     
120 | 
121 | 


--------------------------------------------------------------------------------
/Analyses/Code_Verification/regression/ds2/main_cmdstan_model2_corr_cells_NGAWest2CA.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Wed Dec 29 15:16:15 2021
  5 | 
  6 | @author: glavrent
  7 | """
  8 | # Working directory and Packages
  9 | # ---------------------------
 10 | #load libraries
 11 | import os
 12 | import sys
 13 | import numpy as np
 14 | import pandas as pd
 15 | import time
 16 | #user functions
 17 | sys.path.insert(0,'../../../Python_lib/regression/cmdstan/')
 18 | # from regression_cmdstan_model2_corr_cells_unbounded_hyp import RunStan
 19 | from regression_cmdstan_model2_corr_cells_sparse_unbounded_hyp import RunStan
 20 | 
 21 | # Define variables
 22 | # ---------------------------
 23 | #filename suffix
 24 | # synds_suffix = '_small_corr_len' 
 25 | # synds_suffix = '_large_corr_len'
 26 | 
 27 | #synthetic datasets directory
 28 | ds_dir = '../../../../Data/Validation/synthetic_datasets/ds2'
 29 | ds_dir = r'%s%s/'%(ds_dir, synds_suffix)
 30 | 
 31 | # dataset info 
 32 | #ds_fname_main = 'CatalogNGAWest3CA_synthetic_data'
 33 | ds_fname_main = 'CatalogNGAWest3CALite_synthetic_data'
 34 | ds_id = np.arange(1,6)
 35 | #cell specific anelastic attenuation
 36 | ds_fname_cellinfo = 'CatalogNGAWest3CALite_cellinfo'
 37 | ds_fname_celldist = 'CatalogNGAWest3CALite_distancematrix'
 38 | 
 39 | #stan model 
 40 | # sm_fname = '../../../Stan_lib/regression_stan_model2_corr_cells_unbounded_hyp.stan'
 41 | # sm_fname = '../../../Stan_lib/regression_stan_model2_corr_cells_unbounded_hyp_chol.stan'
 42 | # sm_fname = '../../../Stan_lib/regression_stan_model2_corr_cells_unbounded_hyp_chol_efficient.stan'
 43 | # sm_fname = '../../../Stan_lib/regression_stan_model2_corr_cells_unbounded_hyp_chol_efficient2.stan'
 44 | # sm_fname = '../../../Stan_lib/regression_stan_model2_corr_cells_sparse_unbounded_hyp_chol_efficient.stan'
 45 | 
 46 | #output info
 47 | #main output filename
 48 | out_fname_main = 'NGAWest2CA_syndata'
 49 | #main output directory
 50 | out_dir_main   = '../../../../Data/Validation/regression/ds2/'
 51 | #output sub-directory
 52 | # out_dir_sub    = 'CMDSTAN_NGAWest2CA_corr_cells'
 53 | # out_dir_sub    = 'CMDSTAN_NGAWest2CA_corr_cells_chol'
 54 | # out_dir_sub    = 'CMDSTAN_NGAWest2CA_corr_cells_chol_efficient'
 55 | # out_dir_sub    = 'CMDSTAN_NGAWest2CA_corr_cells_chol_efficient2'
 56 | # out_dir_sub    = 'CMDSTAN_NGAWest2CA_corr_cells_chol_efficient_sp'
 57 | 
 58 | #stan parameters
 59 | res_name = 'tot'
 60 | n_iter_warmup   = 500
 61 | n_iter_sampling = 500
 62 | n_chains        = 4
 63 | adapt_delta     = 0.8
 64 | max_treedepth   = 10
 65 | #ergodic coefficients
 66 | c_a_erg=0.0
 67 | #parallel options
 68 | # flag_parallel = True
 69 | flag_parallel = False
 70 | 
 71 | #output sub-dir with corr with suffix info
 72 | out_dir_sub = f'%s%s'%(out_dir_sub, synds_suffix)
 73 | 
 74 | #load cell dataframes
 75 | cellinfo_fname = '%s%s.csv'%(ds_dir, ds_fname_cellinfo)
 76 | celldist_fname = '%s%s.csv'%(ds_dir, ds_fname_celldist)
 77 | df_cellinfo = pd.read_csv(cellinfo_fname)
 78 | df_celldist = pd.read_csv(celldist_fname)
 79 | 
 80 | # Run stan regression
 81 | # ---------------------------
 82 | #create datafame with computation time
 83 | df_run_info = list()
 84 | 
 85 | #iterate over all synthetic datasets
 86 | for d_id in ds_id:
 87 |     print('Synthetic dataset %i fo %i'%(d_id, len(ds_id)))
 88 |     #run time start
 89 |     run_t_strt = time.time()        
 90 |     #input flatfile
 91 |     ds_fname = '%s%s%s_Y%i.csv'%(ds_dir, ds_fname_main, synds_suffix, d_id)
 92 |     #load flatfile
 93 |     df_flatfile = pd.read_csv(ds_fname)
 94 |     #keep only NGAWest2 records
 95 |     df_flatfile = df_flatfile.loc[df_flatfile.dsid==0,:]
 96 |     
 97 |     #output file name and directory
 98 |     out_fname = '%s%s_Y%i'%(out_fname_main, synds_suffix, d_id)
 99 |     out_dir   = '%s/%s/Y%i/'%(out_dir_main, out_dir_sub, d_id)
100 | 
101 |     #run stan model
102 |     RunStan(df_flatfile, df_cellinfo, df_celldist, sm_fname, 
103 |             out_fname, out_dir, res_name, c_a_erg=c_a_erg, 
104 |             n_iter_warmup=n_iter_warmup, n_iter_sampling=n_iter_sampling, n_chains=n_chains,
105 |             adapt_delta=adapt_delta, max_treedepth=max_treedepth,
106 |             stan_parallel=flag_parallel)
107 |        
108 |     #run time end
109 |     run_t_end = time.time()
110 | 
111 |     #compute run time
112 |     run_tm = (run_t_end - run_t_strt)/60
113 |   
114 |     #log run time
115 |     df_run_info.append(pd.DataFrame({'computer_name':os.uname()[1],'out_name':out_dir_sub,
116 |                                      'ds_id':d_id,'run_time':run_tm}, index=[d_id]))
117 |                            
118 |     #write out run info
119 |     out_fname   = '%s%s/run_info.csv'%(out_dir_main, out_dir_sub)
120 |     pd.concat(df_run_info).reset_index(drop=True).to_csv(out_fname, index=False)
121 |     
122 | 
123 | 


--------------------------------------------------------------------------------
/Analyses/Code_Verification/regression/ds2/main_cmdstan_model2_uncorr_cells_NGAWest2CA.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Wed Dec 29 15:16:15 2021
  5 | 
  6 | @author: glavrent
  7 | """
  8 | # Working directory and Packages
  9 | # ---------------------------
 10 | #load libraries
 11 | import os
 12 | import sys
 13 | import numpy as np
 14 | import pandas as pd
 15 | import time
 16 | #user functions
 17 | sys.path.insert(0,'../../../Python_lib/regression/cmdstan/')
 18 | # from regression_cmdstan_model2_uncorr_cells_unbounded_hyp import RunStan
 19 | from regression_cmdstan_model2_uncorr_cells_sparse_unbounded_hyp import RunStan
 20 | 
 21 | # Define variables
 22 | # ---------------------------
 23 | #filename suffix
 24 | # synds_suffix = '_small_corr_len' 
 25 | # synds_suffix = '_large_corr_len'
 26 | 
 27 | #synthetic datasets directory
 28 | ds_dir = '../../../../Data/Verification/synthetic_datasets/ds2'
 29 | ds_dir = r'%s%s/'%(ds_dir, synds_suffix)
 30 | 
 31 | # dataset info 
 32 | #ds_fname_main = 'CatalogNGAWest3CA_synthetic_data'
 33 | ds_fname_main = 'CatalogNGAWest3CALite_synthetic_data'
 34 | ds_id = np.arange(1,6)
 35 | #cell specific anelastic attenuation
 36 | ds_fname_cellinfo = 'CatalogNGAWest3CALite_cellinfo'
 37 | ds_fname_celldist = 'CatalogNGAWest3CALite_distancematrix'
 38 | 
 39 | #stan model 
 40 | # sm_fname = '../../../Stan_lib/regression_stan_model2_uncorr_cells_unbounded_hyp.stan'
 41 | # sm_fname = '../../../Stan_lib/regression_stan_model2_uncorr_cells_unbounded_hyp_chol.stan'
 42 | # sm_fname = '../../../Stan_lib/regression_stan_model2_uncorr_cells_unbounded_hyp_chol_efficient.stan'
 43 | # sm_fname = '../../../Stan_lib/regression_stan_model2_uncorr_cells_unbounded_hyp_chol_efficient2.stan'
 44 | # sm_fname = '../../../Stan_lib/regression_stan_model2_uncorr_cells_sparse_unbounded_hyp_chol_efficient.stan'
 45 | 
 46 | #output info
 47 | #main output filename
 48 | out_fname_main = 'NGAWest2CA_syndata'
 49 | #main output directory
 50 | out_dir_main   = '../../../../Data/Verification/regression/ds2/'
 51 | #output sub-directory
 52 | # out_dir_sub    = 'CMDSTAN_NGAWest2CA_uncorr_cells'
 53 | # out_dir_sub    = 'CMDSTAN_NGAWest2CA_uncorr_cells_chol'
 54 | # out_dir_sub    = 'CMDSTAN_NGAWest2CA_uncorr_cells_chol_eff'
 55 | # out_dir_sub    = 'CMDSTAN_NGAWest2CA_uncorr_cells_chol_eff2'
 56 | # out_dir_sub    = 'CMDSTAN_NGAWest2CA_uncorr_cells_chol_eff_sp'
 57 | 
 58 | #stan parameters
 59 | res_name = 'tot'
 60 | n_iter_warmup   = 500
 61 | n_iter_sampling = 500
 62 | n_chains        = 4
 63 | adapt_delta     = 0.8
 64 | max_treedepth   = 10
 65 | #ergodic coefficients
 66 | c_a_erg=0.0
 67 | #parallel options
 68 | # flag_parallel = True
 69 | flag_parallel = False
 70 | 
 71 | #output sub-dir with corr with suffix info
 72 | out_dir_sub = f'%s%s'%(out_dir_sub, synds_suffix)
 73 | 
 74 | #load cell dataframes
 75 | cellinfo_fname = '%s%s.csv'%(ds_dir, ds_fname_cellinfo)
 76 | celldist_fname = '%s%s.csv'%(ds_dir, ds_fname_celldist)
 77 | df_cellinfo = pd.read_csv(cellinfo_fname)
 78 | df_celldist = pd.read_csv(celldist_fname)
 79 | 
 80 | # Run stan regression
 81 | # ---------------------------
 82 | #create datafame with computation time
 83 | df_run_info = list()
 84 | 
 85 | #iterate over all synthetic datasets
 86 | for d_id in ds_id:
 87 |     print('Synthetic dataset %i fo %i'%(d_id, len(ds_id)))
 88 |     #run time start
 89 |     run_t_strt = time.time()        
 90 |     #input flatfile
 91 |     ds_fname = '%s%s%s_Y%i.csv'%(ds_dir, ds_fname_main, synds_suffix, d_id)
 92 |     #load flatfile
 93 |     df_flatfile = pd.read_csv(ds_fname)
 94 |     #keep only NGAWest2 records
 95 |     df_flatfile = df_flatfile.loc[df_flatfile.dsid==0,:]
 96 |         
 97 |     #output file name and directory
 98 |     out_fname = '%s%s_Y%i'%(out_fname_main, synds_suffix, d_id)
 99 |     out_dir   = '%s/%s/Y%i/'%(out_dir_main, out_dir_sub, d_id)
100 | 
101 |     #run stan model
102 |     RunStan(df_flatfile, df_cellinfo, df_celldist, sm_fname, 
103 |             out_fname, out_dir, res_name, c_a_erg=c_a_erg, 
104 |             n_iter_warmup=n_iter_warmup, n_iter_sampling=n_iter_sampling, n_chains=n_chains,
105 |             adapt_delta=adapt_delta, max_treedepth=max_treedepth,
106 |             stan_parallel=flag_parallel)
107 |        
108 |     #run time end
109 |     run_t_end = time.time()
110 | 
111 |     #compute run time
112 |     run_tm = (run_t_end - run_t_strt)/60
113 |   
114 |     #log run time
115 |     df_run_info.append(pd.DataFrame({'computer_name':os.uname()[1],'out_name':out_dir_sub,
116 |                                      'ds_id':d_id,'run_time':run_tm}, index=[d_id]))
117 |                            
118 |     #write out run info
119 |     out_fname   = '%s%s/run_info.csv'%(out_dir_main, out_dir_sub)
120 |     pd.concat(df_run_info).reset_index(drop=True).to_csv(out_fname, index=False)
121 |     
122 | 
123 | 


--------------------------------------------------------------------------------
/Analyses/Code_Verification/regression/ds3/main_inla_model3_uncorr_cells_NGAWest2CANorth.R:
--------------------------------------------------------------------------------
  1 | ##################################################################################
  2 | # This script iterates over all sythetic datasets based on the NGAWest3 flatfile
  3 | # and calculates the non-ergodic terms
  4 | ##################################################################################
  5 | 
  6 | #user functions
  7 | source('../../../R_lib/regression/inla/regression_inla_model3_uncorr_cells_unbounded_hyp.R')
  8 | 
  9 | # Define variables
 10 | # ---------------------------
 11 | #main directory
 12 | main_dir <- '../../../../'                                           #local machine
 13 | # main_dir <- '/u/scratch/g/glavrent/Research/Nonerg_GMM_methodology/' #Hoffman2
 14 | 
 15 | #output filename sufix
 16 | # synds_suffix <- '_small_corr_len' 
 17 | # synds_suffix <- '_large_corr_len'
 18 | # synds_suffix <- '_small_corr_len' 
 19 | 
 20 | #synthetic datasets directory
 21 | ds_dir <- 'Data/Verification/synthetic_datasets/ds3'
 22 | ds_dir <- sprintf('%s%s', ds_dir, synds_suffix) 
 23 | 
 24 | # dataset info 
 25 | # ds_main_data_fname     <- 'CatalogNGAWest3CA_synthetic_data'
 26 | # ds_main_cellinfo_fname <- 'CatalogNGAWest3CA_cellinfo'
 27 | # ds_main_cellmat_fname  <- 'CatalogNGAWest3CA_distancematrix'
 28 | ds_main_data_fname        <- 'CatalogNGAWest3CALite_synthetic_data'
 29 | ds_main_cellinfo_fname    <- 'CatalogNGAWest3CALite_cellinfo'
 30 | ds_main_cellmat_fname     <- 'CatalogNGAWest3CALite_distancematrix'
 31 | ds_id <- seq(1,5)
 32 | 
 33 | #output info
 34 | #main output filename
 35 | out_fname_main <- 'NGAWest2CANorth_syndata'
 36 | #main output directory
 37 | out_dir_main   <- 'Data/Verification/regression/ds3'
 38 | #output sub-directory
 39 | # out_dir_sub    <- 'INLA_NGAWest2CANorth_uncorr_cells'
 40 | # out_dir_sub    <- 'INLA_NGAWest2CANorth_uncorr_cells_fine'
 41 | # out_dir_sub    <- 'INLA_NGAWest2CANorth_uncorr_cells_medium'
 42 | # out_dir_sub    <- 'INLA_NGAWest2CANorth_uncorr_cells_coarse'
 43 | 
 44 | #inla parameters
 45 | runinla_flag <- TRUE
 46 | # runinla_flag <- FALSE
 47 | res_name     <- 'tot'
 48 | 
 49 | #mesh coarseness
 50 | # #fine
 51 | # mesh_edge_max     <- 5
 52 | # mesh_inner_offset <- 15
 53 | # mesh_outer_offset <- 15
 54 | # #medium
 55 | # mesh_edge_max     <- 15
 56 | # mesh_inner_offset <- 15
 57 | # mesh_outer_offset <- 50
 58 | # #coarse
 59 | # mesh_edge_max     <- 50
 60 | # mesh_inner_offset <- 50
 61 | # mesh_outer_offset <- 150
 62 | 
 63 | #ergodic coefficients
 64 | c_2_erg <- -2.0
 65 | c_3_erg <- -0.6
 66 | c_a_erg <-  0.0 #anelastic attenuation
 67 | 
 68 | #output sub-dir with corr with suffix info
 69 | out_dir_sub <- sprintf('%s%s',out_dir_sub, synds_suffix)
 70 | 
 71 | # Run inla regression
 72 | # ---------------------------
 73 | #create datafame with computation time
 74 | df_run_info <- data.frame()
 75 | 
 76 | #iterate over all synthetic datasets
 77 | for (d_id in ds_id){
 78 |   print(paste("Synthetic dataset",d_id,"of",length(ds_id)))
 79 |   #run time start
 80 |   run_t_strt <- Sys.time()
 81 |   #input file names
 82 |   analysis_fname <- sprintf('%s%s_Y%i', ds_main_data_fname, synds_suffix, d_id)
 83 |   flatfile_fname <- file.path(main_dir, ds_dir, sprintf('%s%s_Y%i.csv', ds_main_data_fname, synds_suffix, d_id))
 84 |   cellinfo_fname <- file.path(main_dir, ds_dir, sprintf('%s.csv', ds_main_cellinfo_fname))
 85 |   cellmat_fname  <- file.path(main_dir, ds_dir, sprintf('%s.csv', ds_main_cellmat_fname))
 86 | 
 87 |   #load files
 88 |   df_flatfile  <- read.csv(flatfile_fname)
 89 |   df_cellinfo  <- read.csv(cellinfo_fname)
 90 |   df_cellmat   <- read.csv(cellmat_fname)
 91 |   #keep only NGAWest2 records
 92 |   df_flatfile <- subset(df_flatfile, dsid==0 & sreg==1)
 93 | 
 94 |       
 95 |   #output file name and directory
 96 |   out_fname <- sprintf('%s%s_Y%i',      out_fname_main, synds_suffix, d_id)
 97 |   out_dir   <- sprintf('%s%s/%s/Y%i', main_dir, out_dir_main, out_dir_sub, d_id)
 98 |   
 99 |   #run INLA model
100 |   RunINLA(df_flatfile, df_cellinfo, df_cellmat, out_fname, out_dir, res_name=res_name, 
101 |           c_2_erg=c_2_erg, c_3_erg=c_3_erg, c_a_erg=c_a_erg 
102 |           runinla_flag=runinla_flag,
103 |           mesh_edge_max=mesh_edge_max, 
104 |           mesh_inner_offset=mesh_inner_offset, mesh_outer_offset=mesh_outer_offset)
105 |   
106 |   #run time end
107 |   run_t_end <- Sys.time()
108 | 
109 |   #compute run time
110 |   run_tm <- run_t_end - run_t_strt
111 |   
112 |   #log run time
113 |   df_r_i <- data.frame(computer_name=Sys.info()["nodename"], out_name=out_dir_sub, ds_id=d_id, run_time=run_tm)
114 |   df_run_info <- rbind(df_run_info, df_r_i)
115 | 
116 |   #write out run info
117 |   row.names(df_run_info) <- NULL
118 |   out_fname <- sprintf('%s%s/%s/run_info.csv', main_dir, out_dir_main, out_dir_sub)
119 |   write.csv(df_run_info, out_fname, row.names=FALSE)
120 | }
121 | 


--------------------------------------------------------------------------------
/Analyses/Code_Verification/regression/ds2/main_pystan_model2_uncorr_cells_NGAWest2CA.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Wed Jul 14 14:17:52 2021
  5 | 
  6 | @author: glavrent
  7 | """
  8 | # Working directory and Packages
  9 | # ---------------------------
 10 | #load libraries
 11 | import os
 12 | import sys
 13 | import numpy as np
 14 | import pandas as pd
 15 | import time
 16 | #user functions
 17 | sys.path.insert(0,'../../../Python_lib/regression/pystan/')
 18 | from regression_pystan_model2_uncorr_cells_unbounded_hyp import RunStan
 19 | 
 20 | 
 21 | # Define variables
 22 | # ---------------------------
 23 | #filename suffix
 24 | # synds_suffix = '_small_corr_len' 
 25 | # synds_suffix = '_large_corr_len'
 26 | 
 27 | #synthetic datasets directory
 28 | ds_dir = '../../../../Data/Verification/synthetic_datasets/ds2'
 29 | ds_dir = r'%s%s/'%(ds_dir, synds_suffix)
 30 | 
 31 | # dataset info 
 32 | #ds_fname_main = 'CatalogNGAWest3CA_synthetic_data'
 33 | ds_fname_main = 'CatalogNGAWest3CALite_synthetic_data'
 34 | ds_id = np.arange(1,6)
 35 | #cell specific anelastic attenuation
 36 | ds_fname_cellinfo = 'CatalogNGAWest3CALite_cellinfo'
 37 | ds_fname_celldist = 'CatalogNGAWest3CALite_distancematrix'
 38 | 
 39 | #stan model 
 40 | # sm_fname = '../../../Stan_lib/regression_stan_model2_uncorr_cells_unbounded_hyp.stan'
 41 | # sm_fname = '../../../Stan_lib/regression_stan_model2_uncorr_cells_unbounded_hyp_chol.stan'
 42 | # sm_fname = '../../../Stan_lib/regression_stan_model2_uncorr_cells_unbounded_hyp_chol_efficient.stan'
 43 | # sm_fname = '../../../Stan_lib/regression_stan_model2_uncorr_cells_unbounded_hyp_chol_efficient2.stan'
 44 | 
 45 | #output info
 46 | #main output filename
 47 | out_fname_main = 'NGAWest2CA_syndata'
 48 | #main output directory
 49 | out_dir_main   = '../../../../Data/Verification/regression/ds2/'
 50 | #output sub-directory
 51 | #pystan 2
 52 | # out_dir_sub    = 'PYSTAN_NGAWest2CA_uncorr_cells'
 53 | # out_dir_sub    = 'PYSTAN_NGAWest2CA_uncorr_cells_chol'
 54 | # out_dir_sub    = 'PYSTAN_NGAWest2CA_uncorr_cells_chol_eff'
 55 | # out_dir_sub    = 'PYSTAN_NGAWest2CA_uncorr_cells_chol_eff2'
 56 | #pystan 3
 57 | # out_dir_sub    = 'PYSTAN3_NGAWest2CA_uncorr_cells'
 58 | # out_dir_sub    = 'PYSTAN3_NGAWest2CA_uncorr_cells_chol'
 59 | # out_dir_sub    = 'PYSTAN3_NGAWest2CA_uncorr_cells_chol_eff'
 60 | # out_dir_sub    = 'PYSTAN3_NGAWest2CA_uncorr_cells_chol_eff2'
 61 | 
 62 | #stan parameters
 63 | runstan_flag = True
 64 | # pystan_ver = 2
 65 | pystan_ver = 3
 66 | res_name = 'tot'
 67 | n_iter = 1000
 68 | n_chains = 4
 69 | adapt_delta   = 0.8
 70 | max_treedepth = 10
 71 | #ergodic coefficients
 72 | c_a_erg=0.0
 73 | #parallel options
 74 | # flag_parallel = True
 75 | flag_parallel = False
 76 | 
 77 | #output sub-dir with corr with suffix info
 78 | out_dir_sub = f'%s%s'%(out_dir_sub, synds_suffix)
 79 | 
 80 | #load cell dataframes
 81 | cellinfo_fname = '%s%s.csv'%(ds_dir, ds_fname_cellinfo)
 82 | celldist_fname = '%s%s.csv'%(ds_dir, ds_fname_celldist)
 83 | df_cellinfo = pd.read_csv(cellinfo_fname)
 84 | df_celldist = pd.read_csv(celldist_fname)
 85 | 
 86 | # Run stan regression
 87 | # ---------------------------
 88 | #create datafame with computation time
 89 | df_run_info = list()
 90 | 
 91 | #iterate over all synthetic datasets
 92 | for d_id in ds_id:
 93 |     print('Synthetic dataset %i fo %i'%(d_id, len(ds_id)))
 94 |     #run time start
 95 |     run_t_strt = time.time()        
 96 |     #input flatfile
 97 |     ds_fname = '%s%s%s_Y%i.csv'%(ds_dir, ds_fname_main, synds_suffix, d_id)
 98 |     #load flatfile
 99 |     df_flatfile = pd.read_csv(ds_fname)
100 |     #keep only NGAWest2 records
101 |     df_flatfile = df_flatfile.loc[df_flatfile.dsid==0,:]
102 |     
103 |     #output file name and directory
104 |     out_fname = '%s%s_Y%i'%(out_fname_main, synds_suffix, d_id)
105 |     out_dir   = '%s/%s/Y%i/'%(out_dir_main, out_dir_sub, d_id)
106 | 
107 |     #run stan model
108 |     RunStan(df_flatfile, df_cellinfo, df_celldist, sm_fname, 
109 |             out_fname, out_dir, res_name, c_a_erg=c_a_erg, 
110 |             runstan_flag=runstan_flag, n_iter=n_iter, n_chains=n_chains,
111 |             adapt_delta=adapt_delta, max_treedepth=max_treedepth,
112 |             pystan_ver=pystan_ver, pystan_parallel=flag_parallel)
113 |        
114 |     #run time end
115 |     run_t_end = time.time()
116 | 
117 |     #compute run time
118 |     run_tm = (run_t_end - run_t_strt)/60
119 |     
120 |     #log run time
121 |     df_run_info.append(pd.DataFrame({'computer_name':os.uname()[1],'out_name':out_dir_sub,
122 |                                      'ds_id':d_id,'run_time':run_tm}, index=[d_id]))
123 |                            
124 |     #write out run info
125 |     out_fname   = '%s%s/run_info.csv'%(out_dir_main, out_dir_sub)
126 |     pd.concat(df_run_info).reset_index(drop=True).to_csv(out_fname, index=False)
127 | 
128 | 
129 | 


--------------------------------------------------------------------------------
/Analyses/Code_Verification/regression/ds2/main_cmdstan_model2_corr_cells_NGAWest2CANorth.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Wed Dec 29 15:16:15 2021
  5 | 
  6 | @author: glavrent
  7 | """
  8 | # Working directory and Packages
  9 | # ---------------------------
 10 | #load libraries
 11 | import os
 12 | import sys
 13 | import numpy as np
 14 | import pandas as pd
 15 | import time
 16 | #user functions
 17 | sys.path.insert(0,'../../../Python_lib/regression/cmdstan/')
 18 | # from regression_cmdstan_model2_corr_cells_unbounded_hyp import RunStan
 19 | from regression_cmdstan_model2_corr_cells_sparse_unbounded_hyp import RunStan
 20 | 
 21 | # Define variables
 22 | # ---------------------------
 23 | #filename suffix
 24 | # synds_suffix = '_small_corr_len' 
 25 | # synds_suffix = '_large_corr_len'
 26 | 
 27 | #synthetic datasets directory
 28 | ds_dir = '../../../../Data/Verification/synthetic_datasets/ds2'
 29 | ds_dir = r'%s%s/'%(ds_dir, synds_suffix)
 30 | 
 31 | # dataset info 
 32 | #ds_fname_main = 'CatalogNGAWest3CA_synthetic_data'
 33 | ds_fname_main = 'CatalogNGAWest3CALite_synthetic_data'
 34 | ds_id = np.arange(1,6)
 35 | #cell specific anelastic attenuation
 36 | ds_fname_cellinfo = 'CatalogNGAWest3CALite_cellinfo'
 37 | ds_fname_celldist = 'CatalogNGAWest3CALite_distancematrix'
 38 | 
 39 | #stan model 
 40 | # sm_fname = '../../../Stan_lib/regression_stan_model2_corr_cells_unbounded_hyp.stan'
 41 | # sm_fname = '../../../Stan_lib/regression_stan_model2_corr_cells_unbounded_hyp_chol.stan'
 42 | # sm_fname = '../../../Stan_lib/regression_stan_model2_corr_cells_unbounded_hyp_chol_efficient.stan'
 43 | # sm_fname = '../../../Stan_lib/regression_stan_model2_corr_cells_unbounded_hyp_chol_efficient2.stan'
 44 | # sm_fname = '../../../Stan_lib/regression_stan_model2_corr_cells_sparse_unbounded_hyp_chol_efficient.stan'
 45 | 
 46 | #output info
 47 | #main output filename
 48 | out_fname_main = 'NGAWest2CANorth_syndata'
 49 | #main output directory
 50 | out_dir_main   = '../../../../Data/Verification/regression/ds2/'
 51 | #output sub-directory
 52 | # out_dir_sub    = 'CMDSTAN_NGAWest2CANorth_corr_cells'
 53 | # out_dir_sub    = 'CMDSTAN_NGAWest2CANorth_corr_cells_chol'
 54 | # out_dir_sub    = 'CMDSTAN_NGAWest2CANorth_corr_cells_chol_eff'
 55 | # out_dir_sub    = 'CMDSTAN_NGAWest2CANorth_corr_cells_chol_eff2'
 56 | # out_dir_sub    = 'CMDSTAN_NGAWest2CANorth_corr_cells_chol_eff_sp'
 57 | 
 58 | #stan parameters
 59 | res_name = 'tot'
 60 | n_iter_warmup   = 500
 61 | n_iter_sampling = 500
 62 | n_chains        = 4
 63 | adapt_delta     = 0.8
 64 | max_treedepth   = 10
 65 | #ergodic coefficients
 66 | c_a_erg=0.0
 67 | #parallel options
 68 | # flag_parallel = True
 69 | flag_parallel = False
 70 | 
 71 | #output sub-dir with corr with suffix info
 72 | out_dir_sub = f'%s%s'%(out_dir_sub, synds_suffix)
 73 | 
 74 | #load cell dataframes
 75 | cellinfo_fname = '%s%s.csv'%(ds_dir, ds_fname_cellinfo)
 76 | celldist_fname = '%s%s.csv'%(ds_dir, ds_fname_celldist)
 77 | df_cellinfo = pd.read_csv(cellinfo_fname)
 78 | df_celldist = pd.read_csv(celldist_fname)
 79 | 
 80 | # Run stan regression
 81 | # ---------------------------
 82 | #create datafame with computation time
 83 | df_run_info = list()
 84 | 
 85 | #iterate over all synthetic datasets
 86 | for d_id in ds_id:
 87 |     print('Synthetic dataset %i fo %i'%(d_id, len(ds_id)))
 88 |     #run time start
 89 |     run_t_strt = time.time()        
 90 |     #input flatfile
 91 |     ds_fname = '%s%s%s_Y%i.csv'%(ds_dir, ds_fname_main, synds_suffix, d_id)
 92 |     #load flatfile
 93 |     df_flatfile = pd.read_csv(ds_fname)
 94 |     #keep only North records of NGAWest2
 95 |     df_flatfile = df_flatfile.loc[np.logical_and(df_flatfile.dsid==0,
 96 |                                                  df_flatfile.sreg==1),:]
 97 |     
 98 |     #output file name and directory
 99 |     out_fname = '%s%s_Y%i'%(out_fname_main, synds_suffix, d_id)
100 |     out_dir   = '%s/%s/Y%i/'%(out_dir_main, out_dir_sub, d_id)
101 | 
102 |     #run stan model
103 |     RunStan(df_flatfile, df_cellinfo, df_celldist, sm_fname, 
104 |             out_fname, out_dir, res_name, c_a_erg=c_a_erg, 
105 |             n_iter_warmup=n_iter_warmup, n_iter_sampling=n_iter_sampling, n_chains=n_chains,
106 |             adapt_delta=adapt_delta, max_treedepth=max_treedepth,
107 |             stan_parallel=flag_parallel)
108 |        
109 |     #run time end
110 |     run_t_end = time.time()
111 | 
112 |     #compute run time
113 |     run_tm = (run_t_end - run_t_strt)/60
114 |   
115 |     #log run time
116 |     df_run_info.append(pd.DataFrame({'computer_name':os.uname()[1],'out_name':out_dir_sub,
117 |                                      'ds_id':d_id,'run_time':run_tm}, index=[d_id]))
118 |                            
119 |     #write out run info
120 |     out_fname   = '%s%s/run_info.csv'%(out_dir_main, out_dir_sub)
121 |     pd.concat(df_run_info).reset_index(drop=True).to_csv(out_fname, index=False)
122 |     
123 | 
124 | 


--------------------------------------------------------------------------------
/Analyses/Code_Verification/regression/ds2/main_cmdstan_model2_uncorr_cells_NGAWest2CANorth.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Wed Dec 29 15:16:15 2021
  5 | 
  6 | @author: glavrent
  7 | """
  8 | # Working directory and Packages
  9 | # ---------------------------
 10 | #load libraries
 11 | import os
 12 | import sys
 13 | import numpy as np
 14 | import pandas as pd
 15 | import time
 16 | #user functions
 17 | sys.path.insert(0,'../../../Python_lib/regression/cmdstan/')
 18 | # from regression_cmdstan_model2_uncorr_cells_unbounded_hyp import RunStan
 19 | from regression_cmdstan_model2_uncorr_cells_sparse_unbounded_hyp import RunStan
 20 | 
 21 | # Define variables
 22 | # ---------------------------
 23 | #filename suffix
 24 | # synds_suffix = '_small_corr_len' 
 25 | # synds_suffix = '_large_corr_len'
 26 | 
 27 | #synthetic datasets directory
 28 | ds_dir = '../../../../Data/Verification/synthetic_datasets/ds2'
 29 | ds_dir = r'%s%s/'%(ds_dir, synds_suffix)
 30 | 
 31 | # dataset info 
 32 | #ds_fname_main = 'CatalogNGAWest3CA_synthetic_data'
 33 | ds_fname_main = 'CatalogNGAWest3CALite_synthetic_data'
 34 | ds_id = np.arange(1,6)
 35 | #cell specific anelastic attenuation
 36 | ds_fname_cellinfo = 'CatalogNGAWest3CALite_cellinfo'
 37 | ds_fname_celldist = 'CatalogNGAWest3CALite_distancematrix'
 38 | 
 39 | #stan model 
 40 | # sm_fname = '../../../Stan_lib/regression_stan_model2_uncorr_cells_unbounded_hyp.stan'
 41 | # sm_fname = '../../../Stan_lib/regression_stan_model2_uncorr_cells_unbounded_hyp_chol.stan'
 42 | # sm_fname = '../../../Stan_lib/regression_stan_model2_uncorr_cells_unbounded_hyp_chol_efficient.stan'
 43 | # sm_fname = '../../../Stan_lib/regression_stan_model2_uncorr_cells_unbounded_hyp_chol_efficient2.stan'
 44 | # sm_fname = '../../../Stan_lib/regression_stan_model2_uncorr_cells_sparse_unbounded_hyp_chol_efficient.stan'
 45 | 
 46 | #output info
 47 | #main output filename
 48 | out_fname_main = 'NGAWest2CANorth_syndata'
 49 | #main output directory
 50 | out_dir_main   = '../../../../Data/Verification/regression/ds2/'
 51 | #output sub-directory
 52 | # out_dir_sub    = 'CMDSTAN_NGAWest2CANorth_uncorr_cells'
 53 | # out_dir_sub    = 'CMDSTAN_NGAWest2CANorth_uncorr_cells_chol'
 54 | # out_dir_sub    = 'CMDSTAN_NGAWest2CANorth_uncorr_cells_chol_eff'
 55 | # out_dir_sub    = 'CMDSTAN_NGAWest2CANorth_uncorr_cells_chol_eff2'
 56 | # out_dir_sub    = 'CMDSTAN_NGAWest2CANorth_uncorr_cells_chol_eff_sp'
 57 | 
 58 | #stan parameters
 59 | res_name = 'tot'
 60 | n_iter_warmup   = 500
 61 | n_iter_sampling = 500
 62 | n_chains        = 4
 63 | adapt_delta     = 0.8
 64 | max_treedepth   = 10
 65 | #ergodic coefficients
 66 | c_a_erg=0.0
 67 | #parallel options
 68 | # flag_parallel = True
 69 | flag_parallel = False
 70 | 
 71 | #output sub-dir with corr with suffix info
 72 | out_dir_sub = f'%s%s'%(out_dir_sub, synds_suffix)
 73 | 
 74 | #load cell dataframes
 75 | cellinfo_fname = '%s%s.csv'%(ds_dir, ds_fname_cellinfo)
 76 | celldist_fname = '%s%s.csv'%(ds_dir, ds_fname_celldist)
 77 | df_cellinfo = pd.read_csv(cellinfo_fname)
 78 | df_celldist = pd.read_csv(celldist_fname)
 79 | 
 80 | # Run stan regression
 81 | # ---------------------------
 82 | #create datafame with computation time
 83 | df_run_info = list()
 84 | 
 85 | #iterate over all synthetic datasets
 86 | for d_id in ds_id:
 87 |     print('Synthetic dataset %i fo %i'%(d_id, len(ds_id)))
 88 |     #run time start
 89 |     run_t_strt = time.time()        
 90 |     #input flatfile
 91 |     ds_fname = '%s%s%s_Y%i.csv'%(ds_dir, ds_fname_main, synds_suffix, d_id)
 92 |     #load flatfile
 93 |     df_flatfile = pd.read_csv(ds_fname)
 94 |     #keep only North records of NGAWest2
 95 |     df_flatfile = df_flatfile.loc[np.logical_and(df_flatfile.dsid==0,
 96 |                                                  df_flatfile.sreg==1),:]
 97 |     
 98 |     #output file name and directory
 99 |     out_fname = '%s%s_Y%i'%(out_fname_main, synds_suffix, d_id)
100 |     out_dir   = '%s/%s/Y%i/'%(out_dir_main, out_dir_sub, d_id)
101 | 
102 |     #run stan model
103 |     RunStan(df_flatfile, df_cellinfo, df_celldist, sm_fname, 
104 |             out_fname, out_dir, res_name, c_a_erg=c_a_erg, 
105 |             n_iter_warmup=n_iter_warmup, n_iter_sampling=n_iter_sampling, n_chains=n_chains,
106 |             adapt_delta=adapt_delta, max_treedepth=max_treedepth,
107 |             stan_parallel=flag_parallel)
108 |        
109 |     #run time end
110 |     run_t_end = time.time()
111 | 
112 |     #compute run time
113 |     run_tm = (run_t_end - run_t_strt)/60
114 |   
115 |     #log run time
116 |     df_run_info.append(pd.DataFrame({'computer_name':os.uname()[1],'out_name':out_dir_sub,
117 |                                      'ds_id':d_id,'run_time':run_tm}, index=[d_id]))
118 |                            
119 |     #write out run info
120 |     out_fname   = '%s%s/run_info.csv'%(out_dir_main, out_dir_sub)
121 |     pd.concat(df_run_info).reset_index(drop=True).to_csv(out_fname, index=False)
122 |     
123 | 
124 | 


--------------------------------------------------------------------------------
/Analyses/Code_Verification/regression/ds2/main_pystan_model2_uncorr_cells_NGAWest2CANorth.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Wed Jul 14 14:17:52 2021
  5 | 
  6 | @author: glavrent
  7 | """
  8 | # Working directory and Packages
  9 | # ---------------------------
 10 | #load libraries
 11 | import os
 12 | import sys
 13 | import numpy as np
 14 | import pandas as pd
 15 | import time
 16 | #user functions
 17 | sys.path.insert(0,'../../../Python_lib/regression/pystan/')
 18 | from regression_pystan_model2_uncorr_cells_unbounded_hyp import RunStan
 19 | 
 20 | 
 21 | # Define variables
 22 | # ---------------------------
 23 | #filename suffix
 24 | # synds_suffix = '_small_corr_len' 
 25 | # synds_suffix = '_large_corr_len'
 26 | 
 27 | #synthetic datasets directory
 28 | ds_dir = '../../../../Data/Verification/synthetic_datasets/ds2'
 29 | ds_dir = r'%s%s/'%(ds_dir, synds_suffix)
 30 | 
 31 | # dataset info 
 32 | #ds_fname_main = 'CatalogNGAWest3CA_synthetic_data'
 33 | ds_fname_main = 'CatalogNGAWest3CALite_synthetic_data'
 34 | ds_id = np.arange(1,6)
 35 | #cell specific anelastic attenuation
 36 | ds_fname_cellinfo = 'CatalogNGAWest3CALite_cellinfo'
 37 | ds_fname_celldist = 'CatalogNGAWest3CALite_distancematrix'
 38 | 
 39 | #stan model
 40 | # sm_fname = '../../../Stan_lib/regression_stan_model2_uncorr_cells_unbounded_hyp.stan'
 41 | # sm_fname = '../../../Stan_lib/regression_stan_model2_uncorr_cells_unbounded_hyp_chol.stan'
 42 | # sm_fname = '../../../Stan_lib/regression_stan_model2_uncorr_cells_unbounded_hyp_chol_efficient.stan'
 43 | # sm_fname = '../../../Stan_lib/regression_stan_model2_uncorr_cells_unbounded_hyp_chol_efficient2.stan'
 44 | 
 45 | #output info
 46 | #main output filename
 47 | out_fname_main = 'NGAWest2CANorth_syndata'
 48 | #main output directory
 49 | out_dir_main   = '../../../../Data/Verification/regression/ds2/'
 50 | #output sub-directory
 51 | #pystan2
 52 | # out_dir_sub    = 'PYSTAN_NGAWest2CANorth_uncorr_cells'
 53 | # out_dir_sub    = 'PYSTAN_NGAWest2CANorth_uncorr_cells_chol'
 54 | # out_dir_sub    = 'PYSTAN_NGAWest2CANorth_uncorr_cells_chol_eff'
 55 | # out_dir_sub    = 'PYSTAN_NGAWest2CANorth_uncorr_cells_chol_eff2'
 56 | #pystan3
 57 | # out_dir_sub    = 'PYSTAN3_NGAWest2CANorth_uncorr_cells'
 58 | # out_dir_sub    = 'PYSTAN3_NGAWest2CANorth_uncorr_cells_chol'
 59 | # out_dir_sub    = 'PYSTAN3_NGAWest2CANorth_uncorr_cells_chol_eff'
 60 | # out_dir_sub    = 'PYSTAN3_NGAWest2CANorth_uncorr_cells_chol_eff2'
 61 | 
 62 | #stan parameters
 63 | runstan_flag = True
 64 | # pystan_ver = 2
 65 | pystan_ver = 3
 66 | res_name = 'tot'
 67 | n_iter = 1000
 68 | n_chains = 4
 69 | adapt_delta   = 0.8
 70 | max_treedepth = 10
 71 | #ergodic coefficients
 72 | c_a_erg=0.0
 73 | #parallel options
 74 | # flag_parallel = True
 75 | flag_parallel = False
 76 | 
 77 | #output sub-dir with corr with suffix info
 78 | out_dir_sub = f'%s%s'%(out_dir_sub, synds_suffix)
 79 | 
 80 | #load cell dataframes
 81 | cellinfo_fname = '%s%s.csv'%(ds_dir, ds_fname_cellinfo)
 82 | celldist_fname = '%s%s.csv'%(ds_dir, ds_fname_celldist)
 83 | df_cellinfo = pd.read_csv(cellinfo_fname)
 84 | df_celldist = pd.read_csv(celldist_fname)
 85 | 
 86 | # Run stan regression
 87 | # ---------------------------
 88 | #create datafame with computation time
 89 | df_run_info = list()
 90 | 
 91 | #iterate over all synthetic datasets
 92 | for d_id in ds_id:
 93 |     print('Synthetic dataset %i fo %i'%(d_id, len(ds_id)))
 94 |     #run time start
 95 |     run_t_strt = time.time()   
 96 |     #input flatfile
 97 |     ds_fname = '%s%s%s_Y%i.csv'%(ds_dir, ds_fname_main, synds_suffix, d_id)
 98 |     #load flatfile
 99 |     df_flatfile = pd.read_csv(ds_fname)
100 |     #keep only North records of NGAWest2
101 |     df_flatfile = df_flatfile.loc[np.logical_and(df_flatfile.dsid==0,
102 |                                                  df_flatfile.sreg==1),:]
103 |     
104 |     #output file name and directory
105 |     out_fname = '%s%s_Y%i'%(out_fname_main, synds_suffix, d_id)
106 |     out_dir   = '%s/%s/Y%i/'%(out_dir_main, out_dir_sub, d_id)
107 | 
108 |     #run stan model
109 |     RunStan(df_flatfile, df_cellinfo, df_celldist, sm_fname, 
110 |             out_fname, out_dir, res_name, c_a_erg=c_a_erg, 
111 |             runstan_flag=runstan_flag, n_iter=n_iter, n_chains=n_chains,
112 |             adapt_delta=adapt_delta, max_treedepth=max_treedepth,
113 |             pystan_ver=pystan_ver, pystan_parallel=flag_parallel)
114 |        
115 |     #run time end
116 |     run_t_end = time.time()
117 | 
118 |     #compute run time
119 |     run_tm = (run_t_end - run_t_strt)/60
120 |   
121 |     #log run time
122 |     df_run_info.append(pd.DataFrame({'computer_name':os.uname()[1],'out_name':out_dir_sub,
123 |                                      'ds_id':d_id,'run_time':run_tm}, index=[d_id]))
124 |                            
125 |     #write out run info
126 |     out_fname   = '%s%s/run_info.csv'%(out_dir_main, out_dir_sub)
127 |     pd.concat(df_run_info).reset_index(drop=True).to_csv(out_fname, index=False)
128 | 
129 | 
130 | 


--------------------------------------------------------------------------------
/Analyses/Code_Verification/regression/ds1/main_inla_model1_NGAWest2CA.R:
--------------------------------------------------------------------------------
  1 | ##################################################################################
  2 | # This script iterates over all synthetic datasets based on the NGAWest3 flatfile
  3 | # and calculates the non-ergodic terms
  4 | ##################################################################################
  5 | # user sets this file's directory as working directory.
  6 | # user installs INLA by running the following two lines in the console:
  7 | # options(timeout=600)
  8 | # install.packages("INLA",repos=c(getOption("repos"),INLA="https://inla.r-inla-download.org/R/stable"), dep=TRUE)
  9 | 
 10 | #user functions
 11 | source('../../../R_lib/regression/inla/regression_inla_model1_unbounded_hyp.R')
 12 | 
 13 | # Define variables
 14 | # ---------------------------
 15 | #main directory
 16 | main_dir <- '../../../../'                                           #local machine
 17 | # main_dir <- '/u/scratch/g/glavrent/Research/Nonerg_GMM_methodology/' #Hoffman2
 18 | # main_dir <- '/Users/elnaz-seylabi/Dropbox/NonErgModeling-local/' 
 19 | 
 20 | #filename suffix
 21 | # synds_suffix <- '_small_corr_len' 
 22 | # synds_suffix <- '_large_corr_len'
 23 | 
 24 | #synthetic datasets directory
 25 | ds_dir <- 'Data/Verification/synthetic_datasets/ds1'
 26 | ds_dir <- sprintf('%s%s', ds_dir, synds_suffix) 
 27 | 
 28 | # dataset info 
 29 | # ds_main_data_fname     <- 'CatalogNGAWest3CA_synthetic_data'
 30 | ds_main_data_fname     <- 'CatalogNGAWest3CALite_synthetic_data'
 31 | ds_id <- seq(1,5)
 32 | 
 33 | #output info
 34 | #main output filename
 35 | out_fname_main <- 'NGAWest2CA_syndata'
 36 | #main output directory
 37 | out_dir_main   <- 'Data/Verification/regression/ds1'
 38 | #output sub-directory
 39 | # out_dir_sub    <- 'INLA_NGAWest2CA'
 40 | #matern kernel function (nu=2)
 41 | # out_dir_sub    <- 'INLA_NGAWest2CA_fine'
 42 | # out_dir_sub    <- 'INLA_NGAWest2CA_medium'
 43 | # out_dir_sub    <- 'INLA_NGAWest2CA_coarse'
 44 | # out_dir_sub    <- 'INLA_NGAWest2CA_medium_full'
 45 | #exponential kernel function
 46 | # out_dir_sub    <- 'INLA_NGAWest2CA_fine_nexp'
 47 | # out_dir_sub    <- 'INLA_NGAWest2CA_medium_nexp'
 48 | # out_dir_sub    <- 'INLA_NGAWest2CA_coarse_nexp'
 49 | 
 50 | #inla parameters
 51 | runinla_flag <- TRUE # TRUE or FALSE
 52 | # alpha        <- 2   #matern kernel function nu=2
 53 | alpha        <- 3/2 #negative exponential kernel function
 54 | res_name     <- 'tot'
 55 | 
 56 | #mesh coarseness
 57 | # #fine
 58 | # mesh_edge_max     <- 5
 59 | # mesh_inner_offset <- 15
 60 | # mesh_outer_offset <- 15
 61 | # #medium
 62 | # mesh_edge_max     <- 15
 63 | # mesh_inner_offset <- 15
 64 | # mesh_outer_offset <- 50
 65 | # #coarse
 66 | # mesh_edge_max     <- 50
 67 | # mesh_inner_offset <- 50
 68 | # mesh_outer_offset <- 150
 69 | 
 70 | #approximation options
 71 | # if flag_gp_approx=TRUE uses int.strategy="eb" and strategy="gaussian"
 72 | # int.strategy="eb" corresponds to one integration point, and 
 73 | # strategy="gaussian" approximates posteriors as gaussian distributions
 74 | flag_gp_approx <- FALSE # TRUE or FALSE
 75 | 
 76 | #output sub-dir with corr with suffix info
 77 | out_dir_sub <- sprintf('%s%s',out_dir_sub, synds_suffix)
 78 | 
 79 | # Run inla regression
 80 | # ---------------------------
 81 | #create datafame with computation time
 82 | df_run_info <- data.frame()
 83 | 
 84 | #iterate over all synthetic datasets
 85 | for (d_id in ds_id){
 86 |   print(paste("Synthetic dataset",d_id,"of",length(ds_id)))
 87 |   #run time start
 88 |   run_t_strt <- Sys.time()
 89 |   #input file names
 90 |   analysis_fname <- sprintf('%s%s_Y%i', ds_main_data_fname, synds_suffix, d_id)
 91 |   flatfile_fname <- file.path(main_dir, ds_dir, sprintf('%s%s_Y%i.csv', ds_main_data_fname, synds_suffix, d_id))
 92 | 
 93 |   #load files
 94 |   df_flatfile  <- read.csv(flatfile_fname)
 95 |   #keep only NGAWest2 records
 96 |   df_flatfile <- subset(df_flatfile, dsid==0)
 97 |   
 98 |   #output file name and directory
 99 |   out_fname <- sprintf('%s%s_Y%i',      out_fname_main, synds_suffix, d_id)
100 |   out_dir   <- sprintf('%s%s/%s/Y%i', main_dir, out_dir_main, out_dir_sub, d_id)
101 |   
102 |   #run INLA model
103 |   RunINLA(df_flatfile, out_fname, out_dir, res_name=res_name, 
104 |           alpha=alpha,
105 |           mesh_edge_max=mesh_edge_max, 
106 |           mesh_inner_offset=mesh_inner_offset, mesh_outer_offset=mesh_outer_offset,
107 |           flag_gp_approx=flag_gp_approx,
108 |           runinla_flag=runinla_flag)
109 |   
110 |   #run time end
111 |   run_t_end <- Sys.time()
112 | 
113 |   #compute run time
114 |   run_tm <- run_t_end - run_t_strt
115 |   
116 |   #log run time
117 |   df_r_i <- data.frame(computer_name=Sys.info()["nodename"], out_name=out_dir_sub, ds_id=d_id, run_time=run_tm)
118 |   df_run_info <- rbind(df_run_info, df_r_i)
119 | 
120 |   #write out run info
121 |   row.names(df_run_info) <- NULL
122 |   out_fname <- sprintf('%s%s/%s/run_info.csv', main_dir, out_dir_main, out_dir_sub)
123 |   write.csv(df_run_info, out_fname, row.names=FALSE)
124 | }
125 | 
126 | 


--------------------------------------------------------------------------------
/Analyses/Code_Verification/regression/ds2/main_inla_model2_uncorr_cells_NGAWest2CANorth.R:
--------------------------------------------------------------------------------
  1 | ##################################################################################
  2 | # This script iterates over all sythetic datasets based on the NGAWest3 flatfile
  3 | # and calculates the non-ergodic terms
  4 | ##################################################################################
  5 | # user sets this file's directory as working directory.
  6 | # user installs INLA by running the following two lines in the console:
  7 | # options(timeout=600)
  8 | # install.packages("INLA",repos=c(getOption("repos"),INLA="https://inla.r-inla-download.org/R/stable"), dep=TRUE)
  9 | 
 10 | #user functions
 11 | source('../../../R_lib/regression/inla/regression_inla_model2_uncorr_cells_unbounded_hyp.R')
 12 | 
 13 | # Define variables
 14 | # ---------------------------
 15 | #main directory
 16 | main_dir <- '../../../../'                                           #local machine
 17 | # main_dir <- '/u/scratch/g/glavrent/Research/Nonerg_GMM_methodology/' #Hoffman2
 18 | # main_dir <- '/Users/elnaz-seylabi/Dropbox/NonErgModeling-local/' 
 19 | 
 20 | #output filename sufix
 21 | # synds_suffix <- '_small_corr_len' 
 22 | synds_suffix <- '_large_corr_len'
 23 | 
 24 | #synthetic datasets directory
 25 | ds_dir <- 'Data/Verification/synthetic_datasets/ds2'
 26 | ds_dir <- sprintf('%s%s', ds_dir, synds_suffix) 
 27 | 
 28 | # dataset info 
 29 | # ds_main_data_fname     <- 'CatalogNGAWest3CA_synthetic_data'
 30 | # ds_main_cellinfo_fname <- 'CatalogNGAWest3CA_cellinfo'
 31 | # ds_main_cellmat_fname  <- 'CatalogNGAWest3CA_distancematrix'
 32 | ds_main_data_fname        <- 'CatalogNGAWest3CALite_synthetic_data'
 33 | ds_main_cellinfo_fname    <- 'CatalogNGAWest3CALite_cellinfo'
 34 | ds_main_cellmat_fname     <- 'CatalogNGAWest3CALite_distancematrix'
 35 | ds_id <- seq(1,5)
 36 | 
 37 | #output info
 38 | #main output filename
 39 | out_fname_main <- 'NGAWest2CANorth_syndata'
 40 | #main output directory
 41 | out_dir_main   <- 'Data/Verification/regression/ds2'
 42 | #output sub-directory
 43 | # out_dir_sub    <- 'INLA_NGAWest2CANorth_uncorr_cells'
 44 | out_dir_sub    <- 'INLA_NGAWest2CANorth_uncorr_cells_fine'
 45 | # out_dir_sub    <- 'INLA_NGAWest2CANorth_uncorr_cells_medium'
 46 | # out_dir_sub    <- 'INLA_NGAWest2CANorth_uncorr_cells_coarse'
 47 | 
 48 | #inla parameters
 49 | runinla_flag <- TRUE # TRUE or FALSE
 50 | res_name     <- 'tot'
 51 | 
 52 | #mesh coarseness
 53 | #fine
 54 | mesh_edge_max     <- 5
 55 | mesh_inner_offset <- 15
 56 | mesh_outer_offset <- 15
 57 | # #medium
 58 | # mesh_edge_max     <- 15
 59 | # mesh_inner_offset <- 15
 60 | # mesh_outer_offset <- 50
 61 | # #coarse
 62 | # mesh_edge_max     <- 50
 63 | # mesh_inner_offset <- 50
 64 | # mesh_outer_offset <- 150
 65 | 
 66 | #ergodic coefficients
 67 | c_a_erg <- 0.0 #anelastic attenuation
 68 | 
 69 | #output sub-dir with corr with suffix info
 70 | out_dir_sub <- sprintf('%s%s',out_dir_sub, synds_suffix)
 71 | 
 72 | # Run inla regression
 73 | # ---------------------------
 74 | #create datafame with computation time
 75 | df_run_info <- data.frame()
 76 | 
 77 | #iterate over all synthetic datasets
 78 | for (d_id in ds_id){
 79 |   print(paste("Synthetic dataset",d_id,"of",length(ds_id)))
 80 |   #run time start
 81 |   run_t_strt <- Sys.time()
 82 |   #input file names
 83 |   analysis_fname <- sprintf('%s%s_Y%i', ds_main_data_fname, synds_suffix, d_id)
 84 |   flatfile_fname <- file.path(main_dir, ds_dir, sprintf('%s%s_Y%i.csv', ds_main_data_fname, synds_suffix, d_id))
 85 |   cellinfo_fname <- file.path(main_dir, ds_dir, sprintf('%s.csv', ds_main_cellinfo_fname))
 86 |   cellmat_fname  <- file.path(main_dir, ds_dir, sprintf('%s.csv', ds_main_cellmat_fname))
 87 | 
 88 |   #load files
 89 |   df_flatfile  <- read.csv(flatfile_fname)
 90 |   df_cellinfo  <- read.csv(cellinfo_fname)
 91 |   df_cellmat   <- read.csv(cellmat_fname)
 92 |   #keep only NGAWest2 records
 93 |   df_flatfile <- subset(df_flatfile, dsid==0 & sreg==1)
 94 | 
 95 |   #output file name and directory
 96 |   out_fname <- sprintf('%s%s_Y%i',      out_fname_main, synds_suffix, d_id)
 97 |   out_dir   <- sprintf('%s%s/%s/Y%i', main_dir, out_dir_main, out_dir_sub, d_id)
 98 |   
 99 |   #run INLA model
100 |   RunINLA(df_flatfile, df_cellinfo, df_cellmat, out_fname, out_dir, res_name=res_name, 
101 |           c_a_erg=c_a_erg,
102 |           mesh_edge_max=mesh_edge_max, 
103 |           mesh_inner_offset=mesh_inner_offset, mesh_outer_offset=mesh_outer_offset,
104 |           runinla_flag=runinla_flag)
105 |   
106 |   #run time end
107 |   run_t_end <- Sys.time()
108 | 
109 |   #compute run time
110 |   run_tm <- run_t_end - run_t_strt
111 |   
112 |   #log run time
113 |   df_r_i <- data.frame(computer_name=Sys.info()["nodename"], out_name=out_dir_sub, ds_id=d_id, run_time=run_tm)
114 |   df_run_info <- rbind(df_run_info, df_r_i)
115 | 
116 |   #write out run info
117 |   row.names(df_run_info) <- NULL
118 |   out_fname <- sprintf('%s%s/%s/run_info.csv', main_dir, out_dir_main, out_dir_sub)
119 |   write.csv(df_run_info, out_fname, row.names=FALSE)
120 | }
121 | 


--------------------------------------------------------------------------------
/Analyses/Code_Verification/regression/ds1/main_inla_model1_NGAWest2CANorth.R:
--------------------------------------------------------------------------------
  1 | ##################################################################################
  2 | # This script iterates over all synthetic datasets based on the NGAWest3 flatfile
  3 | # and calculates the non-ergodic terms
  4 | ##################################################################################
  5 | # user sets this file's directory as working directory.
  6 | # user installs INLA by running the following two lines in the console:
  7 | # options(timeout=600)
  8 | # install.packages("INLA",repos=c(getOption("repos"),INLA="https://inla.r-inla-download.org/R/stable"), dep=TRUE)
  9 | 
 10 | #user functions
 11 | source('../../../R_lib/regression/inla/regression_inla_model1_unbounded_hyp.R')
 12 | 
 13 | # Define variables
 14 | # ---------------------------
 15 | #main directory
 16 | main_dir <- '../../../../'                                           #local machine
 17 | # main_dir <- '/u/scratch/g/glavrent/Research/Nonerg_GMM_methodology/' #Hoffman2
 18 | # main_dir <- '/Users/elnaz-seylabi/Dropbox/NonErgModeling-local/' 
 19 | 
 20 | #filename suffix
 21 | # synds_suffix <- '_small_corr_len' 
 22 | # synds_suffix <- '_large_corr_len'
 23 | 
 24 | #synthetic datasets directory
 25 | ds_dir <- 'Data/Verification/synthetic_datasets/ds1'
 26 | ds_dir <- sprintf('%s%s', ds_dir, synds_suffix) 
 27 | 
 28 | # dataset info 
 29 | # ds_main_data_fname     <- 'CatalogNGAWest3CA_synthetic_data'
 30 | ds_main_data_fname     <- 'CatalogNGAWest3CALite_synthetic_data'
 31 | ds_id <- seq(1,5)
 32 | 
 33 | #output info
 34 | #main output filename
 35 | out_fname_main <- 'NGAWest2CANorth_syndata'
 36 | #main output directory
 37 | out_dir_main   <- 'Data/Verification/regression/ds1'
 38 | #output sub-directory
 39 | # out_dir_sub    <- 'INLA_NGAWest2CANorth'
 40 | #matern kernel function (nu=2)
 41 | # out_dir_sub    <- 'INLA_NGAWest2CANorth_fine'
 42 | # out_dir_sub    <- 'INLA_NGAWest2CANorth_medium'
 43 | # out_dir_sub    <- 'INLA_NGAWest2CANorth_coarse'
 44 | #exponential kernel function
 45 | # out_dir_sub    <- 'INLA_NGAWest2CANorth_fine_nexp'
 46 | # out_dir_sub    <- 'INLA_NGAWest2CANorth_medium_nexp'
 47 | # out_dir_sub    <- 'INLA_NGAWest2CANorth_coarse_nexp'
 48 | 
 49 | #inla parameters
 50 | runinla_flag <- TRUE # TRUE or FALSE
 51 | # alpha        <- 2   #matern kernel function nu=2
 52 | alpha        <- 3/2 #negative exponential kernel function
 53 | res_name     <- 'tot'
 54 | 
 55 | 
 56 | #mesh coarseness
 57 | # #fine
 58 | # mesh_edge_max     <- 5
 59 | # mesh_inner_offset <- 15
 60 | # mesh_outer_offset <- 15
 61 | # #medium
 62 | # mesh_edge_max     <- 15
 63 | # mesh_inner_offset <- 15
 64 | # mesh_outer_offset <- 50
 65 | # #coarse
 66 | # mesh_edge_max     <- 50
 67 | # mesh_inner_offset <- 50
 68 | # mesh_outer_offset <- 150
 69 | 
 70 | #approximation options
 71 | # if flag_gp_approx=TRUE uses int.strategy="eb" and strategy="gaussian"
 72 | # int.strategy="eb" corresponds to one integration point, and 
 73 | # strategy="gaussian" approximates posteriors as gaussian distributions
 74 | flag_gp_approx <- TRUE # TRUE or FALSE
 75 | 
 76 | #output sub-dir with corr with suffix info
 77 | out_dir_sub <- sprintf('%s%s',out_dir_sub, synds_suffix)
 78 | 
 79 | # Run inla regression
 80 | # ---------------------------
 81 | #create datafame with computation time
 82 | df_run_info <- data.frame()
 83 | 
 84 | #iterate over all synthetic datasets
 85 | for (d_id in ds_id){
 86 |   print(paste("Synthetic dataset",d_id,"of",length(ds_id)))
 87 |   #run time start
 88 |   run_t_strt <- Sys.time()
 89 |   #input file names
 90 |   analysis_fname <- sprintf('%s%s_Y%i', ds_main_data_fname, synds_suffix, d_id)
 91 |   flatfile_fname <- file.path(main_dir, ds_dir, sprintf('%s%s_Y%i.csv', ds_main_data_fname, synds_suffix, d_id)) 
 92 |                                 
 93 |   #load files
 94 |   df_flatfile  <- read.csv(flatfile_fname)
 95 |   #keep only NGAWest2 records
 96 |   df_flatfile <- subset(df_flatfile, dsid==0 & sreg==1)
 97 |   
 98 |   #output file name and directory
 99 |   out_fname <- sprintf('%s%s_Y%i',      out_fname_main, synds_suffix, d_id)
100 |   out_dir   <- sprintf('%s%s/%s/Y%i', main_dir, out_dir_main, out_dir_sub, d_id)
101 |   
102 |   #run INLA model
103 |   RunINLA(df_flatfile, out_fname, out_dir, res_name=res_name, 
104 |           alpha=alpha,
105 |           mesh_edge_max=mesh_edge_max, 
106 |           mesh_inner_offset=mesh_inner_offset, mesh_outer_offset=mesh_outer_offset,
107 |           flag_gp_approx=flag_gp_approx,
108 |           runinla_flag=runinla_flag)
109 |   
110 |   #run time end
111 |   run_t_end <- Sys.time()
112 | 
113 |   #compute run time
114 |   run_tm <- run_t_end - run_t_strt
115 |   
116 |   #log run time
117 |   df_r_i <- data.frame(computer_name=Sys.info()["nodename"], out_name=out_dir_sub, ds_id=d_id, run_time=run_tm)
118 |   df_run_info <- rbind(df_run_info, df_r_i)
119 | 
120 |   #write out run info
121 |   row.names(df_run_info) <- NULL
122 |   out_fname <- sprintf('%s%s/%s/run_info.csv', main_dir, out_dir_main, out_dir_sub)
123 |   write.csv(df_run_info, out_fname, row.names=FALSE)
124 | }
125 |   
126 | 


--------------------------------------------------------------------------------
/Analyses/Code_Verification/regression/ds2/main_pystan_model2_corr_cells_NGAWest2CA.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Wed Jul 14 14:17:52 2021
  5 | 
  6 | @author: glavrent
  7 | """
  8 | # Working directory and Packages
  9 | # ---------------------------
 10 | #load libraries
 11 | import os
 12 | import sys
 13 | import numpy as np
 14 | import pandas as pd
 15 | import time
 16 | #user functions
 17 | sys.path.insert(0,'../../../Python_lib/regression/pystan/')
 18 | from regression_pystan_model2_corr_cells_unbounded_hyp import RunStan
 19 | # from regression_pystan_model2_corr_cells_sparse_unbounded_hyp import RunStan
 20 | 
 21 | # Define variables
 22 | # ---------------------------
 23 | #filename suffix
 24 | # synds_suffix = '_small_corr_len' 
 25 | # synds_suffix = '_large_corr_len'
 26 | 
 27 | #synthetic datasets directory
 28 | ds_dir = '../../../../Data/Verification/synthetic_datasets/ds2'
 29 | ds_dir = r'%s%s/'%(ds_dir, synds_suffix)
 30 | 
 31 | # dataset info 
 32 | #ds_fname_main = 'CatalogNGAWest3CA_synthetic_data'
 33 | ds_fname_main = 'CatalogNGAWest3CALite_synthetic_data'
 34 | ds_id = np.arange(1,6)
 35 | #cell specific anelastic attenuation
 36 | ds_fname_cellinfo = 'CatalogNGAWest3CALite_cellinfo'
 37 | ds_fname_celldist = 'CatalogNGAWest3CALite_distancematrix'
 38 | 
 39 | #stan model 
 40 | # sm_fname = '../../../Stan_lib/regression_stan_model2_corr_cells_unbounded_hyp.stan'
 41 | # sm_fname = '../../../Stan_lib/regression_stan_model2_corr_cells_unbounded_hyp_chol.stan'
 42 | # sm_fname = '../../../Stan_lib/regression_stan_model2_corr_cells_unbounded_hyp_chol_efficient.stan'
 43 | # sm_fname = '../../../Stan_lib/regression_stan_model2_corr_cells_unbounded_hyp_chol_efficient2.stan'
 44 | # sm_fname = '../../../Stan_lib/regression_stan_model2_corr_cells_sparse_unbounded_hyp_chol_efficient2.stan'
 45 | 
 46 | #output info
 47 | #main output filename
 48 | out_fname_main = 'NGAWest2CA_syndata'
 49 | #main output directory
 50 | out_dir_main   = '../../../../Data/Verification/regression/ds2/'
 51 | #output sub-directory
 52 | #python 2
 53 | # out_dir_sub    = 'PYSTAN_NGAWest2CA_corr_cells'
 54 | # out_dir_sub    = 'PYSTAN_NGAWest2CA_corr_cells_chol'
 55 | # out_dir_sub    = 'PYSTAN_NGAWest2CA_corr_cells_chol_eff'
 56 | # out_dir_sub    = 'PYSTAN_NGAWest2CA_corr_cells_chol_eff2'
 57 | #python 3
 58 | # out_dir_sub    = 'PYSTAN3_NGAWest2CA_corr_cells'
 59 | # out_dir_sub    = 'PYSTAN3_NGAWest2CA_corr_cells_chol'
 60 | # out_dir_sub    = 'PYSTAN3_NGAWest2CA_corr_cells_chol_eff'
 61 | # out_dir_sub    = 'PYSTAN3_NGAWest2CA_corr_cells_chol_eff2'
 62 | # out_dir_sub    = 'PYSTAN3_NGAWest2CA_corr_cells_chol_eff_sp'
 63 | 
 64 | #stan parameters
 65 | runstan_flag = True
 66 | pystan_ver = 2
 67 | # pystan_ver = 3
 68 | res_name = 'tot'
 69 | n_iter = 1000
 70 | n_chains = 4
 71 | adapt_delta   = 0.8 #0.9
 72 | max_treedepth = 10
 73 | #ergodic coefficients
 74 | c_a_erg=0.0
 75 | #parallel options
 76 | # flag_parallel = True
 77 | flag_parallel = False
 78 | 
 79 | #output sub-dir with corr with suffix info
 80 | out_dir_sub = f'%s%s'%(out_dir_sub, synds_suffix)
 81 | 
 82 | #load cell dataframes
 83 | cellinfo_fname = '%s%s.csv'%(ds_dir, ds_fname_cellinfo)
 84 | celldist_fname = '%s%s.csv'%(ds_dir, ds_fname_celldist)
 85 | df_cellinfo = pd.read_csv(cellinfo_fname)
 86 | df_celldist = pd.read_csv(celldist_fname)
 87 | 
 88 | # Run stan regression
 89 | # ---------------------------
 90 | #create datafame with computation time
 91 | df_run_info = list()
 92 | 
 93 | #iterate over all synthetic datasets
 94 | for d_id in ds_id:
 95 |     print('Synthetic dataset %i fo %i'%(d_id, len(ds_id)))
 96 |     #run time start
 97 |     run_t_strt = time.time()        
 98 |     #input flatfile
 99 |     ds_fname = '%s%s%s_Y%i.csv'%(ds_dir, ds_fname_main, synds_suffix, d_id)
100 |     #load flatfile
101 |     df_flatfile = pd.read_csv(ds_fname)
102 |     #keep only NGAWest2 records
103 |     df_flatfile = df_flatfile.loc[df_flatfile.dsid==0,:]
104 |     
105 |     #output file name and directory
106 |     out_fname = '%s%s_Y%i'%(out_fname_main, synds_suffix, d_id)
107 |     out_dir   = '%s/%s/Y%i/'%(out_dir_main, out_dir_sub, d_id)
108 | 
109 |     #run stan model
110 |     RunStan(df_flatfile, df_cellinfo, df_celldist, sm_fname, 
111 |             out_fname, out_dir, res_name, c_a_erg=c_a_erg, 
112 |             runstan_flag=runstan_flag, n_iter=n_iter, n_chains=n_chains,
113 |             adapt_delta=adapt_delta, max_treedepth=max_treedepth,
114 |             pystan_ver=pystan_ver, pystan_parallel=flag_parallel)
115 |        
116 |     #run time end
117 |     run_t_end = time.time()
118 | 
119 |     #compute run time
120 |     run_tm = (run_t_end - run_t_strt)/60
121 |   
122 |     #log run time
123 |     df_run_info.append(pd.DataFrame({'computer_name':os.uname()[1],'out_name':out_dir_sub,
124 |                                      'ds_id':d_id,'run_time':run_tm}, index=[d_id]))
125 |                            
126 |     #write out run info
127 |     out_fname   = '%s%s/run_info.csv'%(out_dir_main, out_dir_sub)
128 |     pd.concat(df_run_info).reset_index(drop=True).to_csv(out_fname, index=False)
129 |     
130 | 
131 | 


--------------------------------------------------------------------------------
/Analyses/Code_Verification/regression/ds3/main_inla_model3_uncorr_cells_NGAWest3CA.R:
--------------------------------------------------------------------------------
  1 | ##################################################################################
  2 | # This script iterates over all sythetic datasets based on the NGAWest3 flatfile
  3 | # and calculates the non-ergodic terms
  4 | ##################################################################################
  5 | 
  6 | #user functions
  7 | source('../../../R_lib/regression/inla/regression_inla_model3_uncorr_cells_unbounded_hyp.R')
  8 | 
  9 | # Define variables
 10 | # ---------------------------
 11 | #main directory
 12 | main_dir <- '../../../../'                                           #local machine
 13 | # main_dir <- '/u/scratch/g/glavrent/Research/Nonerg_GMM_methodology/' #Hoffman2
 14 | 
 15 | #output filename sufix
 16 | # synds_suffix <- '_small_corr_len' 
 17 | # synds_suffix <- '_large_corr_len'
 18 | 
 19 | #synthetic datasets directory
 20 | ds_dir <- 'Data/Verification/synthetic_datasets/ds3'
 21 | ds_dir <- sprintf('%s%s', ds_dir, synds_suffix) 
 22 | 
 23 | # dataset info 
 24 | # ds_main_data_fname     <- 'CatalogNGAWest3CA_synthetic_data'
 25 | # ds_main_cellinfo_fname <- 'CatalogNGAWest3CA_cellinfo'
 26 | # ds_main_cellmat_fname  <- 'CatalogNGAWest3CA_distancematrix'
 27 | ds_main_data_fname        <- 'CatalogNGAWest3CALite_synthetic_data'
 28 | ds_main_cellinfo_fname    <- 'CatalogNGAWest3CALite_cellinfo'
 29 | ds_main_cellmat_fname     <- 'CatalogNGAWest3CALite_distancematrix'
 30 | ds_id <- seq(1,5)
 31 | 
 32 | #output info
 33 | #main output filename
 34 | out_fname_main <- 'NGAWest2CA_syndata'
 35 | #main output directory
 36 | out_dir_main   <- 'Data/Verification/regression/ds3'
 37 | #output sub-directory
 38 | # out_dir_sub    <- 'INLA_NGAWest3CA_uncorr_cells'
 39 | #matern kernel function (nu=2)
 40 | # out_dir_sub    <- 'INLA_NGAWest3CA_uncorr_cells_fine'
 41 | # out_dir_sub    <- 'INLA_NGAWest3CA_uncorr_cells_medium'
 42 | # out_dir_sub    <- 'INLA_NGAWest3CA_uncorr_cells_coarse'
 43 | #exponential kernel function
 44 | # out_dir_sub    <- 'INLA_NGAWest3CA_uncorr_cells_fine_nerg'
 45 | # out_dir_sub    <- 'INLA_NGAWest3CA_uncorr_cells_medium_nerg'
 46 | # out_dir_sub    <- 'INLA_NGAWest3CA_uncorr_cells_coarse_nerg'
 47 | 
 48 | #inla parameters
 49 | runinla_flag <- TRUE
 50 | alpha        <- 2   #matern kernel function nu=2
 51 | # alpha        <- 3/2 #negative exponential kernel function
 52 | res_name     <- 'tot'
 53 | num_threads   <- 8
 54 | 
 55 | #mesh coarseness
 56 | # #fine
 57 | # mesh_edge_max     <- 5
 58 | # mesh_inner_offset <- 15
 59 | # mesh_outer_offset <- 15
 60 | # #medium
 61 | # mesh_edge_max     <- 15
 62 | # mesh_inner_offset <- 15
 63 | # mesh_outer_offset <- 50
 64 | # #coarse
 65 | # mesh_edge_max     <- 50
 66 | # mesh_inner_offset <- 50
 67 | # mesh_outer_offset <- 150
 68 | 
 69 | #ergodic coefficients
 70 | c_2_erg <- -2.0
 71 | c_3_erg <- -0.6
 72 | c_a_erg <-  0.0 #anelastic attenuation
 73 | 
 74 | #output sub-dir with corr with suffix info
 75 | out_dir_sub <- sprintf('%s%s',out_dir_sub, synds_suffix)
 76 | 
 77 | # Run inla regression
 78 | # ---------------------------
 79 | #create datafame with computation time
 80 | df_run_info <- data.frame()
 81 | 
 82 | #iterate over all synthetic datasets
 83 | for (d_id in ds_id){
 84 |   print(paste("Synthetic dataset",d_id,"of",length(ds_id)))
 85 |   #run time start
 86 |   run_t_strt <- Sys.time()
 87 |   #input file names
 88 |   analysis_fname <- sprintf('%s%s_Y%i', ds_main_data_fname, synds_suffix, d_id)
 89 |   flatfile_fname <- file.path(main_dir, ds_dir, sprintf('%s%s_Y%i.csv', ds_main_data_fname, synds_suffix, d_id))
 90 |   cellinfo_fname <- file.path(main_dir, ds_dir, sprintf('%s.csv', ds_main_cellinfo_fname))
 91 |   cellmat_fname  <- file.path(main_dir, ds_dir, sprintf('%s.csv', ds_main_cellmat_fname))    
 92 | 
 93 |   #load files
 94 |   df_flatfile  <- read.csv(flatfile_fname)
 95 |   df_cellinfo  <- read.csv(cellinfo_fname)
 96 |   df_cellmat   <- read.csv(cellmat_fname)
 97 |   
 98 |   #output file name and directory
 99 |   out_fname <- sprintf('%s%s_Y%i',      out_fname_main, synds_suffix, d_id)
100 |   out_dir   <- sprintf('%s%s/%s/Y%i', main_dir, out_dir_main, out_dir_sub, d_id)
101 |   
102 |   #run INLA model
103 |   RunINLA(df_flatfile, df_cellinfo, df_cellmat, out_fname, out_dir, res_name=res_name, 
104 |           c_2_erg=c_2_erg, c_3_erg=c_3_erg, c_a_erg=c_a_erg,
105 |           alpha=alpha,
106 |           mesh_edge_max=mesh_edge_max, 
107 |           mesh_inner_offset=mesh_inner_offset, mesh_outer_offset=mesh_outer_offset,
108 |           n_threads=num_threads,
109 |           runinla_flag=runinla_flag)
110 |   
111 |   #run time end
112 |   run_t_end <- Sys.time()
113 | 
114 |   #compute run time
115 |   run_tm <- run_t_end - run_t_strt
116 |   
117 |   #log run time
118 |   df_r_i <- data.frame(computer_name=Sys.info()["nodename"], out_name=out_dir_sub, ds_id=d_id, run_time=run_tm)
119 |   df_run_info <- rbind(df_run_info, df_r_i)
120 | 
121 |   #write out run info
122 |   row.names(df_run_info) <- NULL
123 |   out_fname <- sprintf('%s%s/%s/run_info.csv', main_dir, out_dir_main, out_dir_sub)
124 |   write.csv(df_run_info, out_fname, row.names=FALSE)
125 | }
126 | 


--------------------------------------------------------------------------------