├── Analyses ├── Python_lib │ ├── __init__.py │ ├── QGIS │ │ ├── __init__.py │ │ └── pylib_QGIS.py │ ├── catalog │ │ ├── __init__.py │ │ └── pylib_catalog.py │ ├── plotting │ │ └── __init__.py │ ├── regression │ │ ├── __init__.py │ │ └── pylib_stats.py │ └── ground_motions │ │ ├── __init__.py │ │ └── pylib_Willis15CA_Vs30.py ├── README.md ├── Code_Verification │ ├── synthetic_datasets │ │ ├── read_me.txt │ │ └── create_synthetic_ds1.stan │ ├── preprocessing │ │ ├── PlotUsableMagRrupCatalog.py │ │ └── ComputeUsableMagRrupCatalog.R │ └── regression │ │ ├── ds1 │ │ ├── comparison_inla_model1_time.py │ │ ├── main_cmdstan_model1_NGAWest3CA.py │ │ ├── main_pystan_model1_NGAWest3CA.py │ │ ├── main_cmdstan_model1_NGAWest2CA.py │ │ ├── main_cmdstan_model1_NGAWest2CANorth.py │ │ ├── main_pystan_model1_NGAWest2CA.py │ │ ├── main_pystan_model1_NGAWest2CANorth.py │ │ ├── main_inla_model1_NGAWest2CA.R │ │ └── main_inla_model1_NGAWest2CANorth.R │ │ ├── ds2 │ │ ├── main_pystan_model2_corr_cells_NGAWest3CA_sparse.py │ │ ├── main_pystan_model2_corr_cells_NGAWest2CA_sparse.py │ │ ├── main_pystan_model2_corr_cells_NGAWest2CANorth_sparse.py │ │ ├── main_pystan_model2_corr_cells_NGAWest3CA.py │ │ ├── main_pystan_model2_uncorr_cells_NGAWest3CA.py │ │ ├── main_cmdstan_model2_corr_cells_NGAWest3CA.py │ │ ├── main_cmdstan_model2_uncorr_cells_NGAWest3CA.py │ │ ├── main_cmdstan_model2_corr_cells_NGAWest2CA.py │ │ ├── main_cmdstan_model2_uncorr_cells_NGAWest2CA.py │ │ ├── main_pystan_model2_uncorr_cells_NGAWest2CA.py │ │ ├── main_cmdstan_model2_corr_cells_NGAWest2CANorth.py │ │ ├── main_cmdstan_model2_uncorr_cells_NGAWest2CANorth.py │ │ ├── main_pystan_model2_uncorr_cells_NGAWest2CANorth.py │ │ ├── main_inla_model2_uncorr_cells_NGAWest2CANorth.R │ │ └── main_pystan_model2_corr_cells_NGAWest2CA.py │ │ └── ds3 │ │ ├── main_pystan_model3_corr_cells_NGAWest3CA.py │ │ ├── main_pystan_model3_uncorr_cells_NGAWest3CA.py │ │ ├── main_pystan_model3_corr_cells_NGAWest2CA.py │ │ ├── main_pystan_model3_uncorr_cells_NGAWest2CA.py │ │ ├── main_pystan_model3_corr_cells_NGAWest2CANorth.py │ │ ├── main_pystan_model3_uncorr_cells_NGAWest2CANorth.py │ │ ├── main_cmdstan_model3_uncorr_cells_NGAWest3CA.py │ │ ├── main_cmdstan_model3_corr_cells_NGAWest3CA.py │ │ ├── main_cmdstan_model3_corr_cells_NGAWest2CA.py │ │ ├── main_cmdstan_model3_uncorr_cells_NGAWest2CA.py │ │ ├── main_cmdstan_model3_corr_cells_NGAWest2CANorth.py │ │ ├── main_cmdstan_model3_uncorr_cells_NGAWest2CANorth.py │ │ ├── main_inla_model3_uncorr_cells_NGAWest2CANorth.R │ │ └── main_inla_model3_uncorr_cells_NGAWest3CA.R ├── R_lib │ └── auxiliary_functions.R ├── Prediction │ └── create_scen_dataframe.py ├── Regression │ └── README.md └── Stan_lib │ ├── regression_stan_model1_unbounded_hyp.stan │ └── regression_stan_model1_unbounded_hyp_chol.stan ├── .gitignore ├── requirements.txt ├── Examples ├── example1 │ ├── regression_stan_model.stan │ └── regression_inla_postprocessing.py └── example2 │ ├── create_reg_dataset.py │ └── regression_inla.R └── README.md /Analyses/Python_lib/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Analyses/Python_lib/QGIS/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Analyses/Python_lib/catalog/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Analyses/Python_lib/plotting/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Analyses/Python_lib/regression/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Analyses/Python_lib/ground_motions/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Files and folders to ignore 2 | 3 | # R history 4 | .Rhistory 5 | 6 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # Required python packages 2 | # for MyBinder 3 | #-------------------------- 4 | numpy 5 | scipy 6 | pandas 7 | matplotlib 8 | ipywidgets 9 | sklearn 10 | -------------------------------------------------------------------------------- /Analyses/README.md: -------------------------------------------------------------------------------- 1 | # Analyses Directory Description 2 | 3 | * ``Data_Preparation``: folder containing the Jupyter notebooks to prepare the inpu files for the NGMM regression 4 | * ``Regression``: folder containing the Jupyter notebooks for the NGMM regression using INLA, CMDSTAN, and PYSTAN 5 | * ``Prediction`` folder containing Jupyter notebooks and examples to make predictions using NGMMs 6 | * ``Code_Verification``: folder containing the codes used in the verification exercise 7 | * ``Python_lib``: folder containing the Python scripts 8 | * ``R_lib``: folder containing the R scripts 9 | * ``Stan_lib``: folder containing the STAN regression files for the NGMM regression 10 | * [Launch the Prior Distribution Visualization Tool](https://mybinder.org/v2/gh/NHR3-UCLA/ngmm_tools/bae8b8d09783d0916822ca2e138277b00d0ca6b5?urlpath=lab%2Ftree%2FAnalyses%2Fprior_distributions.ipynb) 11 | 12 | -------------------------------------------------------------------------------- /Analyses/Code_Verification/synthetic_datasets/read_me.txt: -------------------------------------------------------------------------------- 1 | File name Description 2 | --------------- --------------- 3 | create_synthetic_ds1.py Creates synthetic datasets with: one spatially varying earthquake constant, one spatially varying site 4 | constant, and one spatially independent site constant 5 | create_synthetic_ds2.py Creates synthetic datasets with: one spatially varying earthquake constant, one spatially varying site 6 | constant, one spatially independent site constant, and cell specific anelastic attenuation (spatially 7 | varying and independent component) 8 | create_synthetic_ds3.py Creates synthetic datasets with: one spatially varying earthquake constant, one spatially varying site 9 | constant, one spatially independent site constant, a spatially varying geometrical spreading term that is 10 | a function of the earthquake coordinates, a spatially varying Vs30 scaling term that is a function of the 11 | site coordinates and cell specific anelastic attenuation (spatially varying and independent component) 12 | -------------------------------------------------------------------------------- /Examples/example1/regression_stan_model.stan: -------------------------------------------------------------------------------- 1 | /********************************************* 2 | Stan program for toy example 3 | 4 | ********************************************/ 5 | 6 | data { 7 | int N; // number of observations 8 | int NG; // number of grid points 9 | 10 | //grid IDs 11 | int gid[N]; // grid id 12 | 13 | //observations 14 | vector[N] Y; 15 | 16 | //coordinates 17 | vector[2] X_g[NG]; 18 | } 19 | 20 | transformed data { 21 | real delta = 1e-9; 22 | } 23 | 24 | parameters { 25 | //aleatory std 26 | real sigma; 27 | //kernel hyper-paramters 28 | real ell; 29 | real omega; 30 | 31 | //model coefficient 32 | real c_0; 33 | //standardized normal variables for spatially correlated coefficient 34 | vector[NG] z_1; 35 | } 36 | 37 | transformed parameters { 38 | //spatially correlated coefficient 39 | vector[NG] c_1; 40 | 41 | { 42 | matrix[NG,NG] COV_1; 43 | matrix[NG,NG] L_1; 44 | for(i in 1:NG) { 45 | for(j in i:NG) { 46 | real C_1 = (omega^2 * exp(-distance(X_g[i],X_g[j])/ell)); 47 | COV_1[i,j] = C_1; 48 | COV_1[j,i] = C_1; 49 | } 50 | COV_1[i,i] += delta; 51 | } 52 | L_1 = cholesky_decompose(COV_1); 53 | c_1 = L_1 * z_1; 54 | } 55 | } 56 | 57 | 58 | model { 59 | //hyper-parameters 60 | ell ~ inv_gamma(2.,50); 61 | omega ~ exponential(5); 62 | sigma ~ lognormal(-1,0.3); 63 | 64 | //constant shift 65 | c_0 ~ normal(0.,0.1); 66 | //standardized normal variables for spatially correlated coefficient 67 | z_1 ~ std_normal(); 68 | 69 | //likelihood 70 | Y ~ normal(c_0 + c_1[gid], sigma); 71 | } 72 | 73 | -------------------------------------------------------------------------------- /Analyses/Python_lib/ground_motions/pylib_Willis15CA_Vs30.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Tue Feb 2 19:01:47 2021 5 | 6 | @author: glavrent 7 | """ 8 | #load variables 9 | import pathlib 10 | import numpy as np 11 | import rasterio 12 | 13 | 14 | class Willis15Vs30CA: 15 | 16 | def __init__(self, fname_vs30map_med=None, fname_vs30map_sig=None): 17 | #file path 18 | root = pathlib.Path(__file__).parent 19 | #vs30 data filenames 20 | fname_vs30map_med = '/mnt/halcloud_nfs/glavrent/Research/Other_projects/VS30_CA/data/California_vs30_Wills15_hybrid_7p5c.tif' if fname_vs30map_med is None else fname_vs30map_med 21 | fname_vs30map_sig = '/mnt/halcloud_nfs/glavrent/Research/Other_projects/VS30_CA/data/California_vs30_Wills15_hybrid_7p5c_sd.tif' if fname_vs30map_sig is None else fname_vs30map_sig 22 | #load vs30 data 23 | # self.vs30map_med = rasterio.open(root / 'data/California_vs30_Wills15_hybrid_7p5c.tif') 24 | # self.vs30map_sig = rasterio.open(root / 'data/California_vs30_Wills15_hybrid_7p5c_sd.tif') 25 | self.vs30map_med = rasterio.open( fname_vs30map_med ) 26 | self.vs30map_sig = rasterio.open( fname_vs30map_sig ) 27 | 28 | 29 | def lookup(self, lonlats): 30 | return ( 31 | np.fromiter(self.vs30map_med.sample(lonlats, 1), np.float), 32 | np.fromiter(self.vs30map_sig.sample(lonlats, 1), np.float) 33 | ) 34 | 35 | def test_lookup(self): 36 | medians, stds = list(self.lookup([(-122.258, 37.875), (-122.295, 37.895)])) 37 | 38 | np.testing.assert_allclose(medians, [733.4, 351.9], rtol=0.01) 39 | np.testing.assert_allclose(stds, [0.432, 0.219], rtol=0.01) 40 | 41 | 42 | -------------------------------------------------------------------------------- /Analyses/Python_lib/regression/pylib_stats.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Tue Mar 15 13:56:13 2022 5 | 6 | @author: glavrent 7 | 8 | Other python statistics functions 9 | """ 10 | 11 | #imprort libraries 12 | import numpy as np 13 | 14 | def CalcRMS(samp_q, samp_p): 15 | ''' 16 | Compute root mean square error between observation samples (samp_p) and 17 | model samples (samp_p) 18 | 19 | Parameters 20 | ---------- 21 | samp_q : np.array() 22 | Model Samples. 23 | samp_p : np.array() 24 | Data Samples. 25 | 26 | Returns 27 | ------- 28 | real 29 | root mean square error 30 | ''' 31 | 32 | #errors 33 | e = samp_q - samp_p 34 | 35 | return np.sqrt(np.mean(e**2)) 36 | 37 | 38 | def CalcLKDivergece(samp_q, samp_p): 39 | ''' 40 | Compute Kullback–Leibler divergence of observation samples (samp_p) based 41 | on model samples (samp_p) 42 | 43 | Parameters 44 | ---------- 45 | samp_q : np.array() 46 | Model Samples. 47 | samp_p : np.array() 48 | Data Samples. 49 | 50 | Returns 51 | ------- 52 | real 53 | Kullback–Leibler divergence. 54 | ''' 55 | 56 | #create histogram bins 57 | _, hist_bins = np.histogram(np.concatenate([samp_p,samp_q])) 58 | 59 | #count of p and q distribution 60 | p, _ = np.histogram(samp_p, bins=hist_bins) 61 | q, _ = np.histogram(samp_q, bins=hist_bins) 62 | 63 | #remove bins empty in any dist, otherwise kl= +/- inf 64 | i_empty_bins = np.logical_or(p==0, q==0) 65 | p = p[~i_empty_bins] 66 | q = q[~i_empty_bins] 67 | 68 | #normalize to compute probabilites 69 | p = p/p.sum() 70 | q = q/q.sum() 71 | 72 | return sum(p[i] * np.log2(p[i]/q[i]) for i in range(len(p))) 73 | -------------------------------------------------------------------------------- /Analyses/R_lib/auxiliary_functions.R: -------------------------------------------------------------------------------- 1 | ################################################# 2 | # This script contains various auxiliary 3 | # functions for R 4 | # 5 | ################################################# 6 | 7 | #libraries 8 | library(sp) 9 | library(rgdal) 10 | 11 | #Latlon to utm 12 | LongLatToUTM<-function(lat,lon,zone){ 13 | #' Convert Lat Lon to UTM coordinates 14 | #' 15 | #' Input: 16 | #' lat: array with latitude degrees 17 | #' lon: array longitude degrees 18 | #' zone: UTM zone 19 | #' 20 | #' Output: 21 | #' xy_utm: data.frame with id, Xutm, Yutm 22 | 23 | xy <- data.frame(ID = 1:length(lon), X = lon, Y = lat) 24 | coordinates(xy) <- c("X", "Y") 25 | proj4string(xy) <- CRS("+proj=longlat +datum=WGS84") ## for example 26 | xy_utm <- spTransform(xy, CRS(paste("+proj=utm +zone=",zone," +datum=WGS84",sep=''))) 27 | return(as.data.frame(xy_utm)) 28 | } 29 | 30 | #Unique elements 31 | UniqueIdxInv <- function(data_array){ 32 | #' Unique elements, indices and inverse of data_array 33 | #' 34 | #' Input: 35 | #' data_array: input array 36 | #' 37 | #' Output: 38 | #' unq: unique data 39 | #' idx: indices of unique data 40 | #' inv: inverse indices for creating original array 41 | 42 | #number of data 43 | n_data <-length(data_array) 44 | 45 | #create data data-frame 46 | df_data <- data.frame(data=data_array) 47 | #get data-frame with unique data 48 | df_data_unq <- unique(df_data) 49 | data_unq <- df_data_unq$data 50 | 51 | #get indices of unique data values 52 | data_unq_idx <- strtoi(row.names(df_data_unq)) 53 | 54 | #get inverse indices 55 | data_unq_inv <- array(0,n_data) 56 | for (k in 1:length(data_unq)){ 57 | #return k for element equal to data_unq[k] else 0 58 | data_unq_inv <- data_unq_inv + ifelse(data_array %in% data_unq[k],k,0) 59 | } 60 | 61 | #return output 62 | return(list(unq=data_unq, idx=data_unq_idx, inv=data_unq_inv)) 63 | } 64 | -------------------------------------------------------------------------------- /Examples/example2/create_reg_dataset.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sat Mar 26 16:01:54 2022 5 | 6 | @author: glavrent 7 | """ 8 | # Working directory and Packages 9 | # --------------------------- 10 | import os 11 | import sys 12 | import pathlib 13 | #load packages 14 | import numpy as np 15 | import pandas as pd 16 | #plottign libraries 17 | import matplotlib as mpl 18 | from matplotlib import pyplot as plt 19 | import matplotlib.ticker as mticker 20 | 21 | # Define Problem 22 | # --------------------------- 23 | #number of samples 24 | n_samp = 1000 25 | 26 | #coefficients 27 | c0 = -0.2 28 | c1 = 0.6 29 | sig = 0.7 30 | 31 | #output directory 32 | dir_out = 'data/' 33 | 34 | # Create Dataset 35 | # --------------------------- 36 | #covariates 37 | x1 = np.random.randn(n_samp ) 38 | #noise 39 | eps = sig *np.random.randn(n_samp ) 40 | #response 41 | mu_y = c0 + c1 * x1 42 | y = mu_y + eps 43 | 44 | #model response 45 | model_x1 = np.linspace(-5,5) 46 | model_y = c0 + c1 * model_x1 47 | 48 | #regression data frame 49 | df_data = pd.DataFrame({'x1':x1, 'mu_y':mu_y, 'y':y}) 50 | 51 | # Save Dataset 52 | # --------------------------- 53 | pathlib.Path(dir_out).mkdir(parents=True, exist_ok=True) 54 | df_data.to_csv( dir_out + 'regression_dataset.csv', index=False ) 55 | 56 | # Summary Figures 57 | # --------------------------- 58 | #figure title 59 | fname_fig = 'fig_dataset' 60 | #create figure 61 | fig, ax = plt.subplots(figsize = (10,10)) 62 | #obsevations 63 | hl1 = ax.plot(df_data.x1, df_data.y, 'o') 64 | #plot response 65 | hl2 = ax.plot(model_x1, model_y, linewidth=3, color='black') 66 | #figure properties 67 | ax.grid(which='both') 68 | #tick size 69 | ax.tick_params(axis='x', labelsize=30) 70 | ax.tick_params(axis='y', labelsize=30) 71 | #labels 72 | ax.set_xlabel(r'$x_1$', fontsize=35) 73 | ax.set_ylabel(r'$y$', fontsize=35) 74 | #figure limits 75 | ax.set_xlim([-4, 4]) 76 | ax.set_ylim([-4, 4]) 77 | #save figure 78 | fig.tight_layout() 79 | fig.savefig( dir_out + fname_fig + '.png' ) 80 | 81 | 82 | 83 | -------------------------------------------------------------------------------- /Analyses/Code_Verification/preprocessing/PlotUsableMagRrupCatalog.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Mon Oct 4 16:32:37 2021 5 | 6 | @author: glavrent 7 | """ 8 | # %% Required Packages 9 | # ====================================== 10 | #load libraries 11 | import os 12 | import pathlib 13 | #arithmetic libraries 14 | import numpy as np 15 | import pandas as pd 16 | #plotting libraries 17 | from matplotlib import pyplot as plt 18 | import matplotlib.ticker as mticker 19 | 20 | # %% Define variables 21 | # ====================================== 22 | #input file names 23 | fname_flatfile_NGA2 = '../../../Raw_files/nga_w2/Updated_NGA_West2_Flatfile_RotD50_d050_public_version.xlsx' 24 | fname_mag_rrup_lim = '../../../Data/Verification/preprocessing/flatfiles/usable_mag_rrup/usable_Mag_Rrup_coeffs.csv' 25 | 26 | #output directoy 27 | dir_fig = '../../../Data/Verification/preprocessing/flatfiles/usable_mag_rrup/' 28 | 29 | # %% Load Data 30 | # ====================================== 31 | #NGAWest2 32 | df_flatfile_NGA2 = pd.read_excel(fname_flatfile_NGA2) 33 | #M/R limit 34 | df_m_r_lim = pd.read_csv(fname_mag_rrup_lim,index_col=0) 35 | 36 | #remove rec with unavailable data 37 | df_flatfile_NGA2 = df_flatfile_NGA2.loc[df_flatfile_NGA2.EQID>0,:] 38 | df_flatfile_NGA2 = df_flatfile_NGA2.loc[df_flatfile_NGA2['ClstD (km)']>0,:] 39 | 40 | #mag and distance arrays 41 | mag_array = df_flatfile_NGA2['Earthquake Magnitude'] 42 | rrup_array = df_flatfile_NGA2['ClstD (km)'] 43 | 44 | #compute limit 45 | rrup_lim1 = np.arange(0,1001) 46 | mag_lim1 = (df_m_r_lim.loc['b0','coefficients'] + 47 | df_m_r_lim.loc['b1','coefficients'] * rrup_lim1 + 48 | df_m_r_lim.loc['b2','coefficients'] * rrup_lim1**2) 49 | rrup_lim2 = df_m_r_lim.loc['max_rrup','coefficients'] 50 | 51 | # %% Process Data 52 | # ====================================== 53 | if not os.path.isdir(dir_fig): pathlib.Path(dir_fig).mkdir(parents=True, exist_ok=True) 54 | 55 | # create figures 56 | # ---- ---- ---- ---- ---- 57 | # Mag-Dist distribution 58 | fname_fig = 'M-R_limits' 59 | #create figure 60 | fig, ax = plt.subplots(figsize = (10,9)) 61 | pl1 = ax.scatter(rrup_array, mag_array, label='NGAWest2 CA') 62 | pl2 = ax.plot(rrup_lim1, mag_lim1, linewidth=2, color='black') 63 | pl3 = ax.vlines(rrup_lim2, ymin=0, ymax=10, linewidth=2, color='black', linestyle='--') 64 | #edit figure properties 65 | ax.set_xlabel(r'Distance ($km$)', fontsize=30) 66 | ax.set_ylabel(r'Magnitude', fontsize=30) 67 | ax.grid(which='both') 68 | # ax.set_xscale('log') 69 | ax.set_xlim([0, 1000]) 70 | ax.set_ylim([2, 8]) 71 | ax.tick_params(axis='x', labelsize=25) 72 | ax.tick_params(axis='y', labelsize=25) 73 | # ax.legend(fontsize=25, loc='upper left') 74 | ax.xaxis.set_tick_params(which='major', size=10, width=2, direction='in', top='on') 75 | ax.xaxis.set_tick_params(which='minor', size=7, width=2, direction='in', top='on') 76 | ax.yaxis.set_tick_params(which='major', size=10, width=2, direction='in', right='on') 77 | ax.yaxis.set_tick_params(which='minor', size=7, width=2, direction='in', right='on') 78 | fig.tight_layout() 79 | #save figure 80 | fig.savefig( dir_fig + fname_fig + '.png' ) 81 | -------------------------------------------------------------------------------- /Analyses/Code_Verification/preprocessing/ComputeUsableMagRrupCatalog.R: -------------------------------------------------------------------------------- 1 | # This script computes the usable distance range as a function of magntitude 2 | # based on NGAWest2 3 | ################################################################################## 4 | 5 | #libraries 6 | library(tidyverse) 7 | library(readxl) 8 | 9 | # Define variables 10 | # --------------------------- 11 | #input file names 12 | fname_flatfile_NGA2 <- '../../../Raw_files/nga_w2/Updated_NGA_West2_Flatfile_RotD50_d050_public_version.xlsx' 13 | 14 | #output directory 15 | out_dir <- '../../../Data/Verification/preprocessing/flatfiles/usable_mag_rrup' 16 | dir.create(out_dir, showWarnings = FALSE) 17 | 18 | #flag determine M/R limit 19 | # flag_reg <- TRUE 20 | flag_reg <- FALSE 21 | 22 | # Load Data 23 | # --------------------------- 24 | #NGAWest2 25 | df_flatfile_NGA2 <- read_excel(fname_flatfile_NGA2) 26 | 27 | #remove rec with unavailable data 28 | df_flatfile_NGA2 <- df_flatfile_NGA2[df_flatfile_NGA2$EQID>0,] 29 | df_flatfile_NGA2 <- df_flatfile_NGA2[df_flatfile_NGA2['ClstD (km)']>0,] 30 | 31 | #mag and distance arrays 32 | mag_array <- pull(df_flatfile_NGA2, 'Earthquake Magnitude') 33 | rrup_array <- pull(df_flatfile_NGA2, 'ClstD (km)') 34 | 35 | # Process Data 36 | # --------------------------- 37 | #compute mag/R usable range 38 | if (flag_reg){ 39 | # plot M/R distribution 40 | plot(rrup_array,mag_array,pch=19,xlim=c(1,1000),ylim=c(1,8)) 41 | grid() 42 | #estimate m-r coefficients 43 | clc <- locator(n=7) 44 | clcd <- data.frame(clc$x,clc$y,clc$x^2) 45 | names(clcd) <- c("X","Y","X2") 46 | outrg <- lm(Y~X + X2, data = clcd) 47 | coeffs_m_r <- as.data.frame( coefficients(outrg) ) 48 | rownames(coeffs_m_r) <- c('b0','b1','b2') 49 | colnames(coeffs_m_r) <- 'coefficients' 50 | #mag distance 51 | coeffs_m_r['max_rrup','coefficients'] <- 400 52 | } else { 53 | # #option 1 54 | # coeffs_m_r <- data.frame(coefficients=c(1.515945, -0.0008673127, 2.725194e-05), row.names = c('b0','b1','b2') ) 55 | # #option 2 56 | # coeffs_m_r <- data.frame(coefficients=c(1.238563, 0.0002829483, 2.65235e-05), row.names = c('b0','b1','b2') ) 57 | # #option 3 58 | # coeffs_m_r <- data.frame(coefficients=c(1.731417, 0.003432009, 1.273215e-05), row.names = c('b0','b1','b2') 59 | # read from file 60 | coeffs_m_r <- read.csv(file.path(out_dir, 'usable_Mag_Rrup_coeffs.csv'), row.names=1) 61 | # plot M/R distribution 62 | png(file=file.path(out_dir, 'usable_Mag_Rrup_range.png'), width=500, height=500) 63 | plot(rrup_array,mag_array,pch=19,xlim=c(1,1000),ylim=c(1,8)) 64 | grid() 65 | } 66 | 67 | 68 | #plot M/R limits 69 | line_mag_rrup <- data.frame(seq(1,1000,20),coeffs_m_r['b0','coefficients'] + 70 | coeffs_m_r['b1','coefficients'] *seq(1,1000,20) + 71 | coeffs_m_r['b2','coefficients'] *seq(1,1000,20)^2) 72 | lines(line_mag_rrup[,1],line_mag_rrup[,2],col=2) 73 | abline(v = coeffs_m_r['max_rrup','coefficients'],col=2,lty=2) 74 | if (!flag_reg) dev.off() 75 | 76 | # Output 77 | # --------------------------- 78 | #save coefficients 79 | write.csv(coeffs_m_r, file=file.path(out_dir, 'usable_Mag_Rrup_coeffs.csv')) 80 | 81 | 82 | 83 | -------------------------------------------------------------------------------- /Analyses/Code_Verification/regression/ds1/comparison_inla_model1_time.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Tue Mar 15 22:38:50 2022 5 | 6 | @author: glavrent 7 | """ 8 | # Working directory and Packages 9 | # --------------------------- 10 | #load variables 11 | import os 12 | import sys 13 | import pathlib 14 | #arithmetic libraries 15 | import numpy as np 16 | #statistics libraries 17 | import pandas as pd 18 | #plot libraries 19 | import matplotlib as mpl 20 | import matplotlib.pyplot as plt 21 | from matplotlib.ticker import AutoLocator as plt_autotick 22 | 23 | # Define variables 24 | # --------------------------- 25 | #mesh info 26 | mesh_info = ['coarse', 'medium', 'fine'] 27 | 28 | #dataset name 29 | dataset_name = ['NGAWest2CANorth', 'NGAWest2CA', 'NGAWest3CA'] 30 | 31 | #correlation info 32 | # 1: Small Correlation Lengths 33 | # 2: Large Correlation Lenghts 34 | corr_id = 1 35 | 36 | #correlation name 37 | if corr_id == 1: 38 | synds_name = 'small corr len' 39 | synds_suffix = '_small_corr_len' 40 | elif corr_id == 2: 41 | synds_name = 'large corr len' 42 | synds_suffix = '_large_corr_len' 43 | 44 | #directories regressions 45 | dir_reg = '../../../../Data/Verification/regression/ds1/' 46 | 47 | #directory output 48 | dir_out = '../../../../Data/Verification/regression/ds1/comparisons/' 49 | 50 | # Load Data 51 | # --------------------------- 52 | #initialize dataframe 53 | df_runinfo_all = {}; 54 | 55 | #iterate over different analyses 56 | for j1, m_i in enumerate(mesh_info): 57 | for j2, d_n in enumerate(dataset_name): 58 | key_runinfo = '%s_%s'%(m_i, d_n) 59 | fname_runinfo = '%s/INLA_%s_%s%s/run_info.csv'%(dir_reg, d_n, m_i, synds_suffix) 60 | #store calc time 61 | df_runinfo_all[key_runinfo] = pd.read_csv(fname_runinfo) 62 | 63 | 64 | 65 | # Comparison Figures 66 | # --------------------------- 67 | pathlib.Path(dir_out).mkdir(parents=True, exist_ok=True) 68 | 69 | #line style (iterate with mesh info) 70 | line_style = [':','--','-'] 71 | #color map (iterate with dataset) 72 | c_map = plt.get_cmap('Dark2') 73 | 74 | #run time figure 75 | fig_fname = 'run_time_inla' 76 | #create figure axes 77 | fig, ax = plt.subplots(figsize = (20,10)) 78 | #iterate over different analyses 79 | for j2, d_n in enumerate(dataset_name): 80 | for j1, (m_i, l_s) in enumerate(zip(mesh_info, line_style)): 81 | key_runinfo = '%s_%s'%(m_i, d_n) 82 | # 83 | ds_id = df_runinfo_all[key_runinfo].ds_id 84 | ds_name = ['Y%i'%d_i for d_i in ds_id] 85 | # 86 | run_time = df_runinfo_all[key_runinfo].run_time 87 | 88 | ax.plot(ds_id, run_time, linestyle=l_s, marker='o', linewidth=2, markersize=10, color=c_map(j2), label='%s - %s'%(d_n, m_i)) 89 | 90 | #figure properties 91 | ax.set_ylim([0, max(0.50, max(ax.get_ylim()))]) 92 | ax.set_xlabel('synthetic dataset', fontsize=30) 93 | ax.set_ylabel('Run Time (min)', fontsize=30) 94 | ax.grid(which='both') 95 | ax.set_xticks(ds_id) 96 | ax.set_xticklabels(labels=ds_name) 97 | ax.tick_params(axis='x', labelsize=25) 98 | ax.tick_params(axis='y', labelsize=25) 99 | #legend 100 | ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), fontsize=25) 101 | #save figure 102 | fig.tight_layout() 103 | fig.savefig( dir_out + fig_fname + '.png' ) 104 | 105 | 106 | 107 | -------------------------------------------------------------------------------- /Analyses/Prediction/create_scen_dataframe.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sat Aug 20 17:26:17 2022 5 | 6 | @author: glavrent 7 | """ 8 | 9 | #load variables 10 | import os 11 | import sys 12 | import pathlib 13 | #arithmetic libraries 14 | import numpy as np 15 | #statistics libraries 16 | import pandas as pd 17 | #geographic libraries 18 | import pyproj 19 | import geopy.distance 20 | 21 | #user libraries 22 | sys.path.insert(0,'../Python_lib/ground_motions') 23 | from pylib_gmm_eas import BA18 24 | ba18 = BA18() 25 | 26 | # Define Problem 27 | # --------------------------- 28 | #structural period 29 | freq = 5.0119 30 | 31 | #earthquake scenario 32 | mag = 7.0 33 | vs30 = 400 34 | sof = 'SS' 35 | dip = 90 36 | z_tor = 0 37 | #color bar limits 38 | cbar_lim = [np.log(1e-8),np.log(.06)] 39 | 40 | #earthquake coordinates 41 | scen_eq_latlon = [34.2, -116.9] 42 | #utm zone 43 | utm_zone = '11S' 44 | 45 | #grid 46 | grid_X_dxdy = [10, 10] 47 | 48 | #scenario filename 49 | fname_scen_predict = '../../Data/Prediction/scen_predict.csv' 50 | 51 | # UTM projection 52 | # --------------------------- 53 | # projection system 54 | utmProj = pyproj.Proj("+proj=utm +zone="+utm_zone+", +ellps=WGS84 +datum=WGS84 +units=m +no_defs") 55 | 56 | #grid limits in UTM 57 | grid_X_win = np.array([[-140, 3500], [780, 4700]]) 58 | 59 | #create coordinate grid 60 | grid_x_edge = np.arange(grid_X_win[0,0],grid_X_win[1,0],grid_X_dxdy[0]) 61 | grid_y_edge = np.arange(grid_X_win[0,1],grid_X_win[1,1],grid_X_dxdy[0]) 62 | grid_x, grid_y = np.meshgrid(grid_x_edge, grid_y_edge) 63 | #create coordinate array with all grid nodes 64 | grid_X = np.vstack([grid_x.T.flatten(), grid_y.T.flatten()]).T 65 | #compute lat/lon coordinate array 66 | grid_latlon = np.fliplr(np.array([utmProj(g_x*1000, g_y*1000, inverse=True) for g_x, g_y in 67 | zip(grid_X[:,0], grid_X[:,1])])) 68 | n_gpt = len(grid_X) 69 | 70 | #earthquake UTM coordinates 71 | scen_eq_X = np.array(utmProj(scen_eq_latlon[1], scen_eq_latlon[0])) / 1000 72 | 73 | #create earthquake and site ids 74 | eqid_array = np.full(n_gpt, -1) 75 | site_array = -1*(1+np.arange(n_gpt)) 76 | 77 | # Compute Ergodic Base Scaling 78 | # --------------------------- 79 | #compute distances 80 | scen_dist_array = np.linalg.norm(grid_X - scen_eq_X, axis=1) 81 | scen_dist_array = np.sqrt(scen_dist_array**2 + z_tor**2) 82 | 83 | #scenarios of interest 84 | scen_eas_nerg_scl = np.full(n_gpt, np.nan) 85 | scen_eas_nerg_aleat = np.full(n_gpt, np.nan) 86 | for k, d in enumerate(scen_dist_array): 87 | fnorm = 1 if sof == 'SS' else 0 88 | #median and aleatory 89 | scen_eas_nerg_scl[k], _, scen_eas_nerg_aleat[k] = ba18.EasF(freq, mag, rrup=d, vs30=vs30, ztor=z_tor, fnorm=fnorm, flag_keep_b7 = False) 90 | 91 | 92 | # Summarize Scenario Dataframe 93 | # --------------------------- 94 | df_scen_prdct = pd.DataFrame({'eqid':eqid_array, 'ssn':site_array, 95 | 'eqLat':np.full(n_gpt,scen_eq_latlon[0]), 'eqLon':np.full(n_gpt,scen_eq_latlon[0]), 96 | 'staLat':grid_latlon[:,0], 'staLon':grid_latlon[:,1], 97 | 'eqX':np.full(n_gpt,scen_eq_X[0]), 'eqY':np.full(n_gpt,scen_eq_X[1]), 'eqZ':np.full(n_gpt,-z_tor), 98 | 'staX':grid_X[:,0], 'staY':grid_X[:,1], 99 | 'erg_base':scen_eas_nerg_scl}) 100 | 101 | #save prediction scenarios 102 | df_scen_prdct.to_csv(fname_scen_predict ) 103 | -------------------------------------------------------------------------------- /Analyses/Code_Verification/regression/ds1/main_cmdstan_model1_NGAWest3CA.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Wed Jul 14 14:17:52 2021 5 | 6 | @author: glavrent 7 | """ 8 | # Working directory and Packages 9 | # --------------------------- 10 | #load libraries 11 | import os 12 | import sys 13 | import numpy as np 14 | import pandas as pd 15 | import time 16 | #user functions 17 | sys.path.insert(0,'../../../Python_lib/regression/cmdstan/') 18 | from regression_cmdstan_model1_unbounded_hyp import RunStan 19 | 20 | # Define variables 21 | # --------------------------- 22 | #filename suffix 23 | # synds_suffix = '_small_corr_len' 24 | # synds_suffix = '_large_corr_len' 25 | 26 | #synthetic datasets directory 27 | ds_dir = '../../../../Data/Verification/synthetic_datasets/ds1' 28 | ds_dir = r'%s%s/'%(ds_dir, synds_suffix) 29 | 30 | # dataset info 31 | # ds_fname_main = 'CatalogNGAWest3CA_synthetic_data' 32 | ds_fname_main = 'CatalogNGAWest3CALite_synthetic_data' 33 | ds_id = np.arange(1,6) 34 | 35 | #stan model 36 | # sm_fname = '../../../Stan_lib/regression_stan_model1_unbounded_hyp.stan' 37 | # sm_fname = '../../../Stan_lib/regression_stan_model1_unbounded_hyp_chol.stan' 38 | # sm_fname = '../../../Stan_lib/regression_stan_model1_unbounded_hyp_chol_efficient.stan' 39 | # sm_fname = '../../../Stan_lib/regression_stan_model1_unbounded_hyp_chol_efficient2.stan' 40 | 41 | #output info 42 | #main output filename 43 | out_fname_main = 'NGAWest3CA_syndata' 44 | #main output directory 45 | out_dir_main = '../../../../Data/Verification/regression/ds1/' 46 | #output sub-directory 47 | # out_dir_sub = 'CMDSTAN_NGAWest3CA' 48 | # out_dir_sub = 'CMDSTAN_NGAWest3CA_chol' 49 | # out_dir_sub = 'CMDSTAN_NGAWest3CA_chol_eff' 50 | # out_dir_sub = 'CMDSTAN_NGAWest3CA_chol_eff2' 51 | 52 | #stan parameters 53 | res_name='tot' 54 | n_iter_warmup = 500 55 | n_iter_sampling = 500 56 | n_chains = 4 57 | adapt_delta = 0.8 58 | max_treedepth = 10 59 | #parallel options 60 | stan_parallel=False 61 | 62 | #output sub-dir with corr with suffix info 63 | out_dir_sub = f'%s%s'%(out_dir_sub, synds_suffix) 64 | 65 | # Run stan regression 66 | # --------------------------- 67 | #create datafame with computation time 68 | df_run_info = list() 69 | 70 | #iterate over all synthetic datasets 71 | for d_id in ds_id: 72 | print('Synthetic dataset %i fo %i'%(d_id, len(ds_id))) 73 | #run time start 74 | run_t_strt = time.time() 75 | #input flatfile 76 | ds_fname = '%s%s%s_Y%i.csv'%(ds_dir, ds_fname_main, synds_suffix, d_id) 77 | #load flatfile 78 | df_flatfile = pd.read_csv(ds_fname) 79 | 80 | #output file name and directory 81 | out_fname = '%s%s_Y%i'%(out_fname_main, synds_suffix, d_id) 82 | out_dir = '%s/%s/Y%i/'%(out_dir_main, out_dir_sub, d_id) 83 | 84 | #run stan model 85 | RunStan(df_flatfile, sm_fname, 86 | out_fname, out_dir, res_name, 87 | n_iter_warmup=n_iter_warmup, n_iter_sampling=n_iter_sampling, n_chains=n_chains, 88 | adapt_delta=adapt_delta, max_treedepth=max_treedepth, 89 | stan_parallel=stan_parallel) 90 | 91 | #run time end 92 | run_t_end = time.time() 93 | 94 | #compute run time 95 | run_tm = (run_t_end - run_t_strt)/60 96 | 97 | #log run time 98 | df_run_info.append(pd.DataFrame({'computer_name':os.uname()[1],'out_name':out_dir_sub, 99 | 'ds_id':d_id,'run_time':run_tm}, index=[d_id])) 100 | 101 | #write out run info 102 | out_fname = '%s%s/run_info.csv'%(out_dir_main, out_dir_sub) 103 | pd.concat(df_run_info).reset_index(drop=True).to_csv(out_fname, index=False) 104 | 105 | -------------------------------------------------------------------------------- /Analyses/Code_Verification/regression/ds1/main_pystan_model1_NGAWest3CA.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Wed Jul 14 14:17:52 2021 5 | 6 | @author: glavrent 7 | """ 8 | # Working directory and Packages 9 | # --------------------------- 10 | #load libraries 11 | import os 12 | import sys 13 | import numpy as np 14 | import pandas as pd 15 | import time 16 | #user functions 17 | sys.path.insert(0,'../../../Python_lib/regression/pystan/') 18 | from regression_pystan_model1_unbounded_hyp import RunStan 19 | 20 | # Define variables 21 | # --------------------------- 22 | #filename suffix 23 | # synds_suffix = '_small_corr_len' 24 | # synds_suffix = '_large_corr_len' 25 | 26 | #synthetic datasets directory 27 | ds_dir = '../../../../Data/Verification/synthetic_datasets/ds1' 28 | ds_dir = r'%s%s/'%(ds_dir, synds_suffix) 29 | 30 | # dataset info 31 | #ds_fname_main = 'CatalogNGAWest3CA_synthetic_data' 32 | ds_fname_main = 'CatalogNGAWest3CALite_synthetic_data' 33 | ds_id = np.arange(1,6) 34 | 35 | #stan model 36 | # sm_fname = '../../../Stan_lib/regression_stan_model1_unbounded_hyp.stan' 37 | # sm_fname = '../../../Stan_lib/regression_stan_model1_unbounded_hyp_chol.stan' 38 | # sm_fname = '../../../Stan_lib/regression_stan_model1_unbounded_hyp_chol_efficient.stan' 39 | # sm_fname = '../../../Stan_lib/regression_stan_model1_unbounded_hyp_chol_efficient2.stan' 40 | 41 | #output info 42 | #main output filename 43 | out_fname_main = 'NGAWest3CA_syndata' 44 | #main output directory 45 | out_dir_main = '../../../../Data/Verification/regression/ds1/' 46 | #output sub-directory 47 | # out_dir_sub = 'PYSTAN_NGAWest3CA' 48 | # out_dir_sub = 'PYSTAN_NGAWest3CA_chol' 49 | # out_dir_sub = 'PYSTAN_NGAWest3CA_chol_eff' 50 | # out_dir_sub = 'PYSTAN_NGAWest3CA_chol_eff2' 51 | 52 | #stan parameters 53 | runstan_flag = True 54 | # pystan_ver = 2 55 | pystan_ver = 3 56 | res_name = 'tot' 57 | n_iter = 1000 58 | n_chains = 4 59 | adapt_delta = 0.8 60 | max_treedepth = 10 61 | #parallel options 62 | # flag_parallel = True 63 | flag_parallel = False 64 | 65 | #output sub-dir with corr with suffix info 66 | out_dir_sub = f'%s%s'%(out_dir_sub, synds_suffix) 67 | 68 | # Run stan regression 69 | # --------------------------- 70 | #create datafame with computation time 71 | df_run_info = list() 72 | 73 | #iterate over all synthetic datasets 74 | for d_id in ds_id: 75 | print('Synthetic dataset %i fo %i'%(d_id, len(ds_id))) 76 | #run time start 77 | run_t_strt = time.time() 78 | #input flatfile 79 | ds_fname = '%s%s%s_Y%i.csv'%(ds_dir, ds_fname_main, synds_suffix, d_id) 80 | #load flatfile 81 | df_flatfile = pd.read_csv(ds_fname) 82 | 83 | #output file name and directory 84 | out_fname = '%s%s_Y%i'%(out_fname_main, synds_suffix, d_id) 85 | out_dir = '%s/%s/Y%i/'%(out_dir_main, out_dir_sub, d_id) 86 | 87 | #run stan model 88 | RunStan(df_flatfile, sm_fname, out_fname, out_dir, res_name, 89 | runstan_flag=runstan_flag, n_iter=n_iter, n_chains=n_chains, 90 | adapt_delta=adapt_delta, max_treedepth=max_treedepth, 91 | pystan_ver=pystan_ver, pystan_parallel=flag_parallel) 92 | 93 | #run time end 94 | run_t_end = time.time() 95 | 96 | #compute run time 97 | run_tm = (run_t_end - run_t_strt)/60 98 | 99 | #log run time 100 | df_run_info.append(pd.DataFrame({'computer_name':os.uname()[1],'out_name':out_dir_sub, 101 | 'ds_id':d_id,'run_time':run_tm}, index=[d_id])) 102 | 103 | #write out run info 104 | out_fname = '%s%s/run_info.csv'%(out_dir_main, out_dir_sub) 105 | pd.concat(df_run_info).reset_index(drop=True).to_csv(out_fname, index=False) 106 | 107 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Non-ergodic Methodology and Modeling Tools 2 | 3 | This repository contains software tools for developing Nonergodic Ground Motion Models (NGMMs) based on the varying coefficient (Landwehr et al., 2016) and cell-specific anelastic attention approach (Dawood and Rodriguez‐Marek, 2013). 4 | Developed tools are available for R using the statistical package (R-INLA, https://www.r-inla.org/) and in python using the CMDSTAN and PYSTAN interface packages for the Bayesian software (Stan, https://mc-stan.org/). Documentation and detailed instructions on the use of the developed tools are provided in [Lavrentiadis et al., 2022a GIRS report](https://www.risksciences.ucla.edu/girs-reports/2022/04). A general introduction and considerations for the development of NGMMs are presented in Lavrentiadis et al., 2022b. 5 | 6 | ## Home Page 7 | The project's home page with links to the various project deliverables is: https://www.risksciences.ucla.edu/nhr3/ngmm 8 | The project's data are accessible through [DesignSafe](https://www.designsafe-ci.org/data/browser/public/designsafe.storage.published/PRJ-5771) 9 | 10 | ## Folder Structure 11 | The main folder ``Analyses`` contains all the regression, prediction, hazard implementation, testing, and library scripts. 12 | Within the ``Analyses`` folder, ``Data_Preparation`` includes preprocessing scripts to prepare the ground-motion data for the NGMM regression. ``Regression`` contains the Jupyter notebooks for running the NGMM regressions using Stan and INLA. ``Predictions`` includes the scripts for the conditional predictions for new scenarios based on the regression results. ``Code_Verification`` contains the codes associated with the verification exercise. 13 | Lastly, folders ``Python_lib``, ``R_lib``, and ``Stan_lib`` contain various scripts invoked in the main functions. 14 | 15 | The main folder ``Data`` mirrors the structure of the ``Analyses`` folder and contains all the input and output files. 16 | 17 | The ``Raw_files`` includes the files used to construct the synthetic datasets for the verification exercise. 18 | 19 | . 20 | |--Analyses 21 | | |--Data_Preparation 22 | | |--Regression 23 | | |--Predictions 24 | | |--Code_Verification 25 | | |--Python_lib 26 | | |--R_lib 27 | | |--Stan_lib 28 | | 29 | |--Data 30 | | |--Regression 31 | | |--Predictions 32 | | |--Code_Verification 33 | | 34 | |--Raw_files 35 | 36 | An example regression dataset, as well as the synthetic datasets and raw metadata, can be downloaded from [DesignSafe](https://www.designsafe-ci.org/data/browser/public/designsafe.storage.published/PRJ-5771). 37 | 38 | ## Acknowledgments 39 | Financial support by the California Department of Transportation and Pacific Gas & Electric Company is greatly appreciated. 40 | 41 | ## References 42 | Dawood, H. M., & Rodriguez‐Marek, A. (2013). A method for including path effects in ground‐motion prediction equations: An example using the M w 9.0 Tohoku earthquake aftershocks. Bulletin of the Seismological Society of America, 103(2B), 1360-1372. 43 | 44 | Landwehr, N., Kuehn, N. M., Scheffer, T., & Abrahamson, N. (2016). A nonergodic ground‐motion model for California with spatially varying coefficients. Bulletin of the Seismological Society of America, 106(6), 2574-2583. 45 | 46 | Lavrentiadis, G., Nicolas, K. M., Bozorgnia, Y., Seylabi, E., Meng, X., Goulet, C., & Kottke, A. (2022a) Non‐ergodic Methodology and Modeling Tools. Natural Hazards Risk and Resiliency Research Center: The Garrick Institute for the Risk Sciences, University of California, Los Angeles 47 | 48 | Lavrentiadis, G., Abrahamson, N. A., Nicolas, K. M., Bozorgnia, Y., Goulet, C. A., Babič, A., ... & Walling, M. (2022b). Overview and Introduction to Development of Non-Ergodic Earthquake Ground-Motion Models. Bulletin of Earthquake Engineering 49 | -------------------------------------------------------------------------------- /Analyses/Code_Verification/regression/ds1/main_cmdstan_model1_NGAWest2CA.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Wed Jul 14 14:17:52 2021 5 | 6 | @author: glavrent 7 | """ 8 | # Working directory and Packages 9 | # --------------------------- 10 | #load libraries 11 | import os 12 | import sys 13 | import numpy as np 14 | import pandas as pd 15 | import time 16 | #user functions 17 | sys.path.insert(0,'../../../Python_lib/regression/cmdstan/') 18 | from regression_cmdstan_model1_unbounded_hyp import RunStan 19 | 20 | # Define variables 21 | # --------------------------- 22 | #filename suffix 23 | # synds_suffix = '_small_corr_len' 24 | # synds_suffix = '_large_corr_len' 25 | 26 | #synthetic datasets directory 27 | ds_dir = '../../../../Data/Verification/synthetic_datasets/ds1' 28 | ds_dir = r'%s%s/'%(ds_dir, synds_suffix) 29 | 30 | # dataset info 31 | # ds_fname_main = 'CatalogNGAWest3CA_synthetic_data' 32 | ds_fname_main = 'CatalogNGAWest3CALite_synthetic_data' 33 | ds_id = np.arange(1,6) 34 | 35 | #stan model 36 | # sm_fname = '../../../Stan_lib/regression_stan_model1_unbounded_hyp.stan' 37 | # sm_fname = '../../../Stan_lib/regression_stan_model1_unbounded_hyp_chol.stan' 38 | # sm_fname = '../../../Stan_lib/regression_stan_model1_unbounded_hyp_chol_efficient.stan' 39 | # sm_fname = '../../../Stan_lib/regression_stan_model1_unbounded_hyp_chol_efficient2.stan' 40 | 41 | #output info 42 | #main output filename 43 | out_fname_main = 'NGAWest2CA_syndata' 44 | #main output directory 45 | out_dir_main = '../../../../Data/Verification/regression/ds1/' 46 | #output sub-directory 47 | # out_dir_sub = 'CMDSTAN_NGAWest2CA' 48 | # out_dir_sub = 'CMDSTAN_NGAWest2CA_chol' 49 | # out_dir_sub = 'CMDSTAN_NGAWest2CA_chol_eff' 50 | # out_dir_sub = 'CMDSTAN_NGAWest2CA_chol_eff2' 51 | 52 | #stan parameters 53 | res_name='tot' 54 | n_iter_warmup = 500 55 | n_iter_sampling = 500 56 | n_chains = 4 57 | adapt_delta = 0.8 58 | max_treedepth = 10 59 | #parallel options 60 | stan_parallel=False 61 | 62 | #output sub-dir with corr with suffix info 63 | out_dir_sub = f'%s%s'%(out_dir_sub, synds_suffix) 64 | 65 | # Run stan regression 66 | # --------------------------- 67 | #create datafame with computation time 68 | df_run_info = list() 69 | 70 | #iterate over all synthetic datasets 71 | for d_id in ds_id: 72 | print('Synthetic dataset %i fo %i'%(d_id, len(ds_id))) 73 | #run time start 74 | run_t_strt = time.time() 75 | #input flatfile 76 | ds_fname = '%s%s%s_Y%i.csv'%(ds_dir, ds_fname_main, synds_suffix, d_id) 77 | #load flatfile 78 | df_flatfile = pd.read_csv(ds_fname) 79 | #keep only NGAWest2 records 80 | df_flatfile = df_flatfile.loc[df_flatfile.dsid==0,:] 81 | 82 | #output file name and directory 83 | out_fname = '%s%s_Y%i'%(out_fname_main, synds_suffix, d_id) 84 | out_dir = '%s/%s/Y%i/'%(out_dir_main, out_dir_sub, d_id) 85 | 86 | #run stan model 87 | RunStan(df_flatfile, sm_fname, 88 | out_fname, out_dir, res_name, 89 | n_iter_warmup=n_iter_warmup, n_iter_sampling=n_iter_sampling, n_chains=n_chains, 90 | adapt_delta=adapt_delta, max_treedepth=max_treedepth, 91 | stan_parallel=stan_parallel) 92 | 93 | #run time end 94 | run_t_end = time.time() 95 | 96 | #compute run time 97 | run_tm = (run_t_end - run_t_strt)/60 98 | 99 | #log run time 100 | df_run_info.append(pd.DataFrame({'computer_name':os.uname()[1],'out_name':out_dir_sub, 101 | 'ds_id':d_id,'run_time':run_tm}, index=[d_id])) 102 | 103 | #write out run info 104 | out_fname = '%s%s/run_info.csv'%(out_dir_main, out_dir_sub) 105 | pd.concat(df_run_info).reset_index(drop=True).to_csv(out_fname, index=False) 106 | 107 | -------------------------------------------------------------------------------- /Analyses/Regression/README.md: -------------------------------------------------------------------------------- 1 | # Non-ergodic Ground Motion Model Types: 2 | 3 | * Type-1: Three non-ergodic terms: 4 | 5 | $$ 6 | f_{nerg}(M,R_{rup},V_{S30},..., \vec{t_{E}}, \vec{t_{S}}) = f_{erg}(M,R_{rup},V_{S30},...) + \delta c_{1,E}(\vec{t_{E}}) + \delta c_{1a,S}(\vec{t_{S}}) + \delta c_{1b,S}(\vec{t_{S}}) 7 | $$ 8 | 9 |         10 | a spatially varying earthquake constant ( $\delta c_{1,E}$ ), a spatially varying site constant ( $\delta c_{1a,S}$ ), and a spatially independent site
11 |         12 | constant ( $\delta c_{1b,S}$ ). 13 | 14 | * Type-2: Four non-ergodic terms: 15 | 16 | $$ 17 | \begin{aligned} 18 | f_{nerg}(M,R_{rup},V_{S30},..., \vec{t_{E}}, \vec{t_{S}}) =& \left( f_{erg}(M,V_{S30},...) - c_{a~erg}~R_{rup} \right) + \delta c_{1,E}(\vec{t_{E}}) + \delta c_{1a,S}(\vec{t_{S}}) + \delta c_{1b,S}(\vec{t_{S}}) + \\ 19 | & \mathbf{c}_{ca,P} \cdot \Delta R 20 | \end{aligned} 21 | $$ 22 | 23 |         24 | a spatially varying earthquake constant ( $\delta c_{1,E}$ ), a spatially varying site constant ( $\delta c_{1a,S}$ ), a spatially independent site
25 |         26 | constant ( $\delta c_{1b,S}$ ), and cell-specific anelastic attenuation ( $\mathbf{c}_{ca,P} $). 27 | 28 | * Type-3: Six non-ergodic terms: 29 | 30 | $$ 31 | \begin{aligned} 32 | f_{nerg}(M,R_{rup},V_{S30},..., \vec{t_{E}}, \vec{t_{S}}) =& \left( f_{erg}(M,V_{S30},...) - (c_2 ~ f_{gs}(M,R) + c_3 ~ f_{V_{S30}}(V_{S30})) + c_{a~erg} ~ R_{rup}) \right) + \\ 33 | & \delta c_{1,E}(\vec{t_{E}}) + \delta c_{1a,S}(\vec{t_{S}}) + \delta c_{1b,S}(\vec{t_{S}}) + \\ 34 | & c_{2,E}(\vec{t_{E}}) f_{gs}(M,R_{rup}) + \delta c_{1a,S}(\vec{t_{S}}) + \mathbf{c}_{ca,P} \cdot \Delta R 35 | \end{aligned} 36 | $$ 37 | 38 |         39 | a spatially varying earthquake constant ( $\delta c_{1,E}$ ), a spatially varying site constant ( $\delta c_{1a,S}$ ), a spatially independent site
40 |         41 | constant ( $\delta c_{1b,S}$ ), a spatially varying geometrical spreading coefficient ( $c_{2,P}$ ), a spatially varying $V_{S30}$ scaling ( $c_{3,S}$ ),
42 |         43 | and cell-specific anelastic attenuation ( $\mathbf{c}_{ca,P} $). 44 | 45 | # File Descriptions 46 | 47 | ### INLA 48 | * Type-1 NGMM: ``nonerg_gmm_regression_type1_inla.ipynb`` 49 | * Type-2 NGMM with spatially uncorrelated anelastic attenuation cells: ``nonerg_gmm_regression_type2_uncorrcells_inla.ipynb`` 50 | * Type-3 NGMM with spatially uncorrelated anelastic attenuation cells: ``nonerg_gmm_regression_type3_uncorrcells_inla.ipynb`` 51 | 52 | ### CMDSTAN 53 | * Type-1 NGMM: ``nonerg_gmm_regression_type1_cmdstan.ipynb`` 54 | * Type-2 NGMM with spatially correlated anelastic attenuation cells: ``nonerg_gmm_regression_type2_corrcells_cmdstan.ipynb`` 55 | * Type-2 NGMM with spatially uncorrelated anelastic attenuation cells: ``nonerg_gmm_regression_type2_uncorrcells_cmdstan.ipynb`` 56 | * Type-3 NGMM with spatially correlated anelastic attenuation cells: ``nonerg_gmm_regression_type3_corrcells_cmdstan.ipynb`` 57 | * Type-3 NGMM with spatially uncorrelated anelastic attenuation cells: ``nonerg_gmm_regression_type3_uncorrcells_cmdstan.ipynb`` 58 | 59 | ### PYSTAN 60 | * Type-1 NGMM: ``nonerg_gmm_regression_type1_pystan.ipynb`` 61 | * Type-2 NGMM with spatially correlated anelastic attenuation cells: ``nonerg_gmm_regression_type2_corrcells_pystan.ipynb`` 62 | * Type-2 NGMM with spatially uncorrelated anelastic attenuation cells: ``nonerg_gmm_regression_type2_uncorrcells_pystan.ipynb`` 63 | * Type-3 NGMM with spatially correlated anelastic attenuation cells: ``nonerg_gmm_regression_type3_corrcells_pystan.ipynb`` 64 | * Type-3 NGMM with spatially uncorrelated anelastic attenuation cells: ``nonerg_gmm_regression_type3_uncorrcells_pystan.ipynb`` 65 | -------------------------------------------------------------------------------- /Examples/example2/regression_inla.R: -------------------------------------------------------------------------------- 1 | #libraries 2 | library(stringr) 3 | library(assertthat) 4 | library(pracma) 5 | # Bayesian regression 6 | library(INLA) 7 | library(inlabru) 8 | library(posterior) 9 | #plotting packages 10 | library(ggplot2) 11 | library(maps) 12 | 13 | # Define Problem 14 | # --------------------------- 15 | #data filename 16 | fname_data <- 'data/regression_dataset.csv' 17 | #output directory 18 | dir_out <- 'data/inla_regression/' 19 | 20 | # Read Data 21 | # --------------------------- 22 | df_data <- read.csv(fname_data) 23 | 24 | # Preprocess Data 25 | # --------------------------- 26 | n_data <- nrow(df_data) 27 | 28 | # Run INLA, fit model 29 | # --------------------------- 30 | #prior of fixed effects 31 | prior.fixed <- list(mean.intercept = 0, prec.intercept = 1, 32 | mean = 0, prec = 1) 33 | #prior of likelihood precision (log-scale) 34 | prior.prec <- list(prec = list(prior = "loggamma", param = c(4.0, 0.5))) 35 | 36 | #run regression 37 | fit_inla <- inla(y ~ x1, data = df_data, family="gaussian", 38 | control.fixed = prior.fixed, 39 | control.family = list(hyper = list(prec = prior.prec)), 40 | control.inla = list(int.strategy='eb', strategy="gaussian"), 41 | verbose=TRUE) 42 | 43 | 44 | # Post-processing 45 | # --------------------------- 46 | #compute posterior distributions 47 | df_post_c0 <- as.data.frame( fit_inla$marginals.fixed$`(Intercept)` ) 48 | df_post_c1 <- as.data.frame( fit_inla$marginals.fixed$x1 ) 49 | df_post_sig <- as.data.frame(inla.tmarginal(function(x) exp(-x/2), fit_inla$internal.marginals.hyperpar[['Log precision for the Gaussian observations']])) 50 | 51 | # Plotting 52 | # --------------------------- 53 | pl_post_c0 <- ggplot() + geom_line(data=df_post_c0, aes(x=x, y=y)) + theme_bw() + 54 | labs(x="sigma", y="posterior(c0)") + xlim(-.25,-0.1) + ylim(0, 30) + 55 | theme(plot.title=element_text(size=20), axis.title=element_text(size=20), 56 | axis.text.y=element_text(size=20), axis.text.x=element_text(size=20), 57 | legend.key.size = unit(1, 'cm'), legend.text=element_text(size=20)) 58 | 59 | pl_post_c1 <- ggplot() + geom_line(data=df_post_c1, aes(x=x, y=y)) + theme_bw() + 60 | labs(x="sigma", y="posterior(c1)") + xlim(0.5,0.8) + ylim(0, 20) + 61 | theme(plot.title=element_text(size=20), axis.title=element_text(size=20), 62 | axis.text.y=element_text(size=20), axis.text.x=element_text(size=20), 63 | legend.key.size = unit(1, 'cm'), legend.text=element_text(size=20)) 64 | 65 | 66 | pl_post_sig <- ggplot() + geom_line(data=df_post_sig, aes(x=x, y=y)) + theme_bw() + 67 | labs(x="sigma", y="posterior(sigma)") + xlim(0.6,0.8) + ylim(0, 30) + 68 | theme(plot.title=element_text(size=20), axis.title=element_text(size=20), 69 | axis.text.y=element_text(size=20), axis.text.x=element_text(size=20), 70 | legend.key.size = unit(1, 'cm'), legend.text=element_text(size=20)) 71 | 72 | 73 | # Save Data 74 | # --------------------------- 75 | #create output directories 76 | dir.create(dir_out, showWarnings = FALSE) 77 | 78 | #write out regression results 79 | write.csv(df_post_c0, file=file.path(dir_out, 'inla_c0_posterior.csv'), row.names = FALSE ) 80 | write.csv(df_post_c1, file=file.path(dir_out, 'inla_c1_posterior.csv'), row.names = FALSE ) 81 | write.csv(df_post_sig, file=file.path(dir_out, 'inla_sigma_posterior.csv'), row.names = FALSE ) 82 | 83 | #save figures 84 | #--- --- --- --- --- --- 85 | #posterior distributions 86 | ggsave(file.path(dir_out,'inla_c0_posterior.png'), plot=pl_post_c0, device='png') 87 | ggsave(file.path(dir_out,'inla_c1_posterior.png'), plot=pl_post_c1, device='png') 88 | ggsave(file.path(dir_out,'inla_sig_posterior.png'), plot=pl_post_sig, device='png') 89 | 90 | 91 | 92 | -------------------------------------------------------------------------------- /Analyses/Python_lib/catalog/pylib_catalog.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Tue Jul 20 10:39:12 2021 5 | 6 | @author: glavrent 7 | """ 8 | 9 | #load libraries 10 | #arithmetic libraries 11 | import numpy as np 12 | 13 | def IndexAvgColumns(df_data, col_idx, col2avg): 14 | ''' 15 | Average columns based on index column 16 | 17 | Parameters 18 | ---------- 19 | df_data : pd.dataframe 20 | Data data-frame. 21 | col_idx : str 22 | Name of index column. 23 | col2avg : list 24 | List of column names to be averaged. 25 | 26 | Returns 27 | ------- 28 | df_data : pd.dataframe 29 | Data data-frame. 30 | 31 | ''' 32 | 33 | #unique ids 34 | idx_array, inv_array = np.unique(df_data[col_idx], return_inverse=True) 35 | #iterate over columns 36 | for col in col2avg: 37 | #compute average values for all unique indices 38 | avg_vals = np.array([np.nanmean(df_data.loc[df_data[col_idx] == idx,col]) for idx in idx_array]) 39 | df_data.loc[:,col] = avg_vals[inv_array] 40 | 41 | return df_data 42 | 43 | def ColocatePt(df_flatfile, col_idx, col_coor, thres_dist=0.01, return_df_pt=False): 44 | ''' 45 | Colocate points (assign same ID) based on threshold distance. 46 | 47 | Parameters 48 | ---------- 49 | df_flatfile : pd.DataFrame 50 | Catalog flatfile. 51 | col_idx : str 52 | Name of index column. 53 | col_coor : list of str 54 | List of coordinate name columns. 55 | thres_dist : real, optional 56 | Value of threshold distance. The default is 0.01. 57 | return_df_pt : bool, optional 58 | Option for returning point data frame. The default is False. 59 | 60 | Returns 61 | ------- 62 | df_flatfile : pd.DataFrame 63 | Catalog flatfile with updated index column. 64 | df_pt: pd.DataFrame 65 | Point data frame with updated index column. 66 | ''' 67 | 68 | #dataframe with unique points 69 | _, pt_idx, pt_inv = np.unique(df_flatfile[col_idx], axis=0, return_index=True, return_inverse=True) 70 | df_pt = df_flatfile.loc[:,[col_idx] + col_coor].iloc[pt_idx,:] 71 | 72 | #find and merge collocated points 73 | for _, pt in df_pt.iterrows(): 74 | #distance between points 75 | dist2pt = np.linalg.norm((df_pt[col_coor] - pt[col_coor]).astype(float), axis=1) 76 | #indices of collocated points 77 | i_pt_coll = dist2pt < thres_dist 78 | #assign new id for collocated points 79 | df_pt.loc[i_pt_coll,col_idx] = pt[col_idx].astype(int) 80 | 81 | #update pt info to main catalog 82 | df_flatfile.loc[:,col_idx] = df_pt[col_idx].values[pt_inv] 83 | 84 | if not return_df_pt: 85 | return df_flatfile 86 | else: 87 | return df_flatfile, df_pt 88 | 89 | def UsableSta(mag_array, dist_array, df_coeffs): 90 | ''' 91 | Find records that meet the mag-distance limits 92 | 93 | Parameters 94 | ---------- 95 | mag_array : np.array 96 | Magnitude array. 97 | dist_array : np.array 98 | Distance array. 99 | df_coeffs : pd.DataFrame 100 | Coefficients dataframe. 101 | 102 | Returns 103 | ------- 104 | rec_lim : np.array 105 | logical array with True for records that meet M/R limits. 106 | 107 | ''' 108 | 109 | #rrup limit 110 | rrup_lim = dist_array <= df_coeffs.loc['max_rrup','coefficients'] 111 | 112 | #mag limit 113 | mag_min = (df_coeffs.loc['b1','coefficients'] + 114 | df_coeffs.loc['b1','coefficients'] * dist_array + 115 | df_coeffs.loc['b2','coefficients'] * dist_array**2) 116 | mag_lim = mag_array >= mag_min 117 | 118 | #find records that meet both conditions 119 | rec_lim = np.logical_and(rrup_lim, mag_lim) 120 | 121 | return rec_lim 122 | -------------------------------------------------------------------------------- /Analyses/Code_Verification/regression/ds2/main_pystan_model2_corr_cells_NGAWest3CA_sparse.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Wed Jul 14 14:17:52 2021 5 | 6 | @author: glavrent 7 | """ 8 | # Working directory and Packages 9 | # --------------------------- 10 | #load libraries 11 | import os 12 | import sys 13 | import numpy as np 14 | import pandas as pd 15 | import time 16 | #user functions 17 | sys.path.insert(0,'../../../Python_lib/regression/pystan/') 18 | from regression_pystan_model2_corr_cells_sparse_unbounded_hyp import RunStan 19 | 20 | # Define variables 21 | # --------------------------- 22 | #filename suffix 23 | # synds_suffix = '_small_corr_len' 24 | # synds_suffix = '_large_corr_len' 25 | 26 | #synthetic datasets directory 27 | ds_dir = '../../../../Data/Verification/synthetic_datasets/ds2' 28 | ds_dir = r'%s%s/'%(ds_dir, synds_suffix) 29 | 30 | # dataset info 31 | #ds_fname_main = 'CatalogNGAWest3CA_synthetic_data' 32 | ds_fname_main = 'CatalogNGAWest3CALite_synthetic_data' 33 | ds_id = np.arange(1,6) 34 | #cell specific anelastic attenuation 35 | ds_fname_cellinfo = 'CatalogNGAWest3CALite_cellinfo' 36 | ds_fname_celldist = 'CatalogNGAWest3CALite_distancematrix' 37 | 38 | #stan model 39 | sm_fname = '../../../Stan_lib/regression_stan_model2_corr_cells_sparse_unbounded_hyp_chol_efficient.stan' 40 | 41 | #output info 42 | #main output filename 43 | out_fname_main = 'NGAWest3CA_syndata' 44 | #main output directory 45 | out_dir_main = '../../../../Data/Verification/regression/ds2/' 46 | #output sub-directory 47 | out_dir_sub = 'PYSTAN_NGAWest3CA_corr_cells_chol_eff_sp' 48 | 49 | #stan parameters 50 | runstan_flag = True 51 | pystan_ver = 2 52 | # pystan_ver = 3 53 | res_name = 'tot' 54 | n_iter = 1000 55 | n_chains = 4 56 | adapt_delta = 0.8 57 | max_treedepth = 10 58 | #ergodic coefficients 59 | c_a_erg=0.0 60 | #parallel options 61 | # flag_parallel = True 62 | flag_parallel = False 63 | 64 | #output sub-dir with corr with suffix info 65 | out_dir_sub = f'%s%s'%(out_dir_sub, synds_suffix) 66 | 67 | #load cell dataframes 68 | cellinfo_fname = '%s%s.csv'%(ds_dir, ds_fname_cellinfo) 69 | celldist_fname = '%s%s.csv'%(ds_dir, ds_fname_celldist) 70 | df_cellinfo = pd.read_csv(cellinfo_fname) 71 | df_celldist = pd.read_csv(celldist_fname) 72 | 73 | # Run stan regression 74 | # --------------------------- 75 | #create datafame with computation time 76 | df_run_info = list() 77 | 78 | #iterate over all synthetic datasets 79 | for d_id in ds_id: 80 | print('Synthetic dataset %i fo %i'%(d_id, len(ds_id))) 81 | #run time start 82 | run_t_strt = time.time() 83 | #input flatfile 84 | ds_fname = '%s%s%s_Y%i.csv'%(ds_dir, ds_fname_main, synds_suffix, d_id) 85 | #load flatfile 86 | df_flatfile = pd.read_csv(ds_fname) 87 | 88 | #output file name and directory 89 | out_fname = '%s%s_Y%i'%(out_fname_main, synds_suffix, d_id) 90 | out_dir = '%s/%s/Y%i/'%(out_dir_main, out_dir_sub, d_id) 91 | 92 | #run stan model 93 | RunStan(df_flatfile, df_cellinfo, df_celldist, sm_fname, 94 | out_fname, out_dir, res_name, c_a_erg=c_a_erg, 95 | runstan_flag=runstan_flag, n_iter=n_iter, n_chains=n_chains, 96 | adapt_delta=adapt_delta, max_treedepth=max_treedepth, 97 | pystan_ver=pystan_ver, pystan_parallel=flag_parallel) 98 | 99 | #run time end 100 | run_t_end = time.time() 101 | 102 | #compute run time 103 | run_tm = (run_t_end - run_t_strt)/60 104 | 105 | #log run time 106 | df_run_info.append(pd.DataFrame({'computer_name':os.uname()[1],'out_name':out_dir_sub, 107 | 'ds_id':d_id,'run_time':run_tm}, index=[d_id])) 108 | 109 | #write out run info 110 | out_fname = '%s%s/run_info.csv'%(out_dir_main, out_dir_sub) 111 | pd.concat(df_run_info).reset_index(drop=True).to_csv(out_fname, index=False) 112 | 113 | -------------------------------------------------------------------------------- /Analyses/Code_Verification/synthetic_datasets/create_synthetic_ds1.stan: -------------------------------------------------------------------------------- 1 | /********************************************* 2 | Stan program to create a synthetic data-set 3 | with a zero correlation length station term, 4 | an earthquake and station spatially varying 5 | terms and between and within event aleatory 6 | terms 7 | ********************************************/ 8 | 9 | data { 10 | int N; // number of records 11 | int NEQ; // number of earthquakes 12 | int NSTAT; // number of stations 13 | 14 | //event and station ID 15 | int eq[N]; // event id (in numerical order from 1 to last) 16 | int stat[N]; // station id (in numerical order from 1 to last) 17 | 18 | //earthquake and station coordinates 19 | vector[2] X_e[NEQ]; 20 | vector[2] X_s[NSTAT]; 21 | 22 | //assumed hyper-parameters 23 | //earthquake and site constants 24 | real omega_0; 25 | real omega_1e; 26 | real omega_1as; 27 | real omega_1bs; 28 | real ell_1e; 29 | real ell_1as; 30 | //aleatory terms 31 | real tau_0; 32 | real phi_0; 33 | 34 | //mean of ergodic GMM 35 | vector[N] mu_gmm; 36 | } 37 | 38 | transformed data { 39 | real delta = 1e-9; 40 | 41 | //priors means 42 | real dc_0_mu = 0.; 43 | vector[NEQ] dc_1e_mu = rep_vector(0.,NEQ); 44 | vector[NSTAT] dc_1as_mu = rep_vector(0.,NSTAT); 45 | vector[NSTAT] dc_1bs_mu = rep_vector(0.,NSTAT); 46 | } 47 | 48 | parameters {} 49 | 50 | model {} 51 | 52 | generated quantities { 53 | //coefficient samples 54 | real dc_0; 55 | vector[NEQ] dc_1e; //spatially varing terms 56 | vector[NSTAT] dc_1as; 57 | vector[NSTAT] dc_1bs; 58 | //samples of aleatory terms 59 | vector[NEQ] dB; 60 | vector[N] dW; 61 | //gm samples 62 | vector[N] Y_var_ceoff; 63 | vector[N] Y_nerg_med; 64 | vector[N] Y_aleat; 65 | vector[N] Y_tot; 66 | 67 | //latent variable for constant shift 68 | { 69 | dc_0 = normal_rng(dc_0_mu,omega_0); 70 | } 71 | 72 | //generate latent variable for spatially varying earthquake term 73 | { 74 | matrix[NEQ,NEQ] cov_1e; 75 | 76 | for(i in 1:NEQ) { 77 | for(j in i:NEQ) { 78 | real d_e; 79 | real c_1e; 80 | 81 | d_e = distance(X_e[i],X_e[j]); 82 | 83 | c_1e = (omega_1e^2 * exp(-d_e/ell_1e)); 84 | 85 | cov_1e[i,j] = c_1e; 86 | cov_1e[j,i] = c_1e; 87 | } 88 | cov_1e[i,i] += delta; 89 | } 90 | dc_1e = multi_normal_rng(dc_1e_mu, cov_1e); 91 | } 92 | 93 | //generate latent variable for spatially varying station term 94 | { 95 | matrix[NSTAT,NSTAT] cov_1as; 96 | 97 | for(i in 1:NSTAT) { 98 | for(j in i:NSTAT) { 99 | real d_s; 100 | real c_1as; 101 | 102 | d_s = distance(X_s[i],X_s[j]); 103 | 104 | c_1as = (omega_1as^2 * exp(-d_s/ell_1as)); 105 | 106 | cov_1as[i,j] = c_1as; 107 | cov_1as[j,i] = c_1as; 108 | } 109 | cov_1as[i,i] += delta; 110 | } 111 | dc_1as = multi_normal_rng(dc_1as_mu, cov_1as); 112 | } 113 | 114 | //generate latent variable for independent varying station term 115 | { 116 | for(i in 1:NSTAT) { 117 | dc_1bs[i] = normal_rng(dc_1bs_mu[i], omega_1bs); 118 | } 119 | } 120 | 121 | //generate aleatory terms 122 | { 123 | for(i in 1:N) { 124 | dW[i] = normal_rng(0., phi_0); 125 | } 126 | for(i in 1:NEQ) { 127 | dB[i] = normal_rng(0., tau_0); 128 | } 129 | } 130 | 131 | //generate gm random samples 132 | //add contributions of spatially varying terms 133 | { 134 | Y_var_ceoff = dc_0 + dc_1e[eq] + dc_1as[stat] + dc_1bs[stat]; 135 | } 136 | 137 | //median ground motion 138 | Y_nerg_med = mu_gmm + Y_var_ceoff; 139 | //aleatory variability 140 | Y_aleat = dW + dB[eq]; 141 | //total gm 142 | Y_tot = Y_nerg_med + Y_aleat; 143 | } 144 | 145 | 146 | -------------------------------------------------------------------------------- /Analyses/Code_Verification/regression/ds1/main_cmdstan_model1_NGAWest2CANorth.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Wed Jul 14 14:17:52 2021 5 | 6 | @author: glavrent 7 | """ 8 | # Working directory and Packages 9 | # --------------------------- 10 | #load libraries 11 | import os 12 | import sys 13 | import numpy as np 14 | import pandas as pd 15 | import time 16 | #user functions 17 | sys.path.insert(0,'../../../Python_lib/regression/cmdstan/') 18 | from regression_cmdstan_model1_unbounded_hyp import RunStan 19 | 20 | # Define variables 21 | # --------------------------- 22 | #filename suffix 23 | # synds_suffix = '_small_corr_len' 24 | # synds_suffix = '_large_corr_len' 25 | 26 | #synthetic datasets directory 27 | ds_dir = '../../../../Data/Verification/synthetic_datasets/ds1' 28 | ds_dir = r'%s%s/'%(ds_dir, synds_suffix) 29 | 30 | # dataset info 31 | # ds_fname_main = 'CatalogNGAWest3CA_synthetic_data' 32 | ds_fname_main = 'CatalogNGAWest3CALite_synthetic_data' 33 | ds_id = np.arange(1,6) 34 | 35 | #stan model 36 | # sm_fname = '../../../Stan_lib/regression_stan_model1_unbounded_hyp.stan' 37 | # sm_fname = '../../../Stan_lib/regression_stan_model1_unbounded_hyp_chol.stan' 38 | # sm_fname = '../../../Stan_lib/regression_stan_model1_unbounded_hyp_chol_efficient.stan' 39 | # sm_fname = '../../../Stan_lib/regression_stan_model1_unbounded_hyp_chol_efficient2.stan' 40 | 41 | #output info 42 | #main output filename 43 | out_fname_main = 'NGAWest2CANorth_syndata' 44 | #main output directory 45 | out_dir_main = '../../../../Data/Verification/regression/ds1/' 46 | #output sub-directory 47 | # out_dir_sub = 'CMDSTAN_NGAWest2CANorth' 48 | # out_dir_sub = 'CMDSTAN_NGAWest2CANorth_chol' 49 | # out_dir_sub = 'CMDSTAN_NGAWest2CANorth_chol_eff' 50 | # out_dir_sub = 'CMDSTAN_NGAWest2CANorth_chol_eff2' 51 | 52 | #stan parameters 53 | res_name='tot' 54 | n_iter_warmup = 500 55 | n_iter_sampling = 500 56 | n_chains = 4 57 | adapt_delta = 0.8 58 | max_treedepth = 10 59 | #parallel options 60 | stan_parallel=False 61 | 62 | #output sub-dir with corr with suffix info 63 | out_dir_sub = f'%s%s'%(out_dir_sub, synds_suffix) 64 | 65 | # Run stan regression 66 | # --------------------------- 67 | #create datafame with computation time 68 | df_run_info = list() 69 | 70 | #iterate over all synthetic datasets 71 | for d_id in ds_id: 72 | print('Synthetic dataset %i fo %i'%(d_id, len(ds_id))) 73 | #run time start 74 | run_t_strt = time.time() 75 | #input flatfile 76 | ds_fname = '%s%s%s_Y%i.csv'%(ds_dir, ds_fname_main, synds_suffix, d_id) 77 | #load flatfile 78 | df_flatfile = pd.read_csv(ds_fname) 79 | #keep only North records of NGAWest2 80 | df_flatfile = df_flatfile.loc[np.logical_and(df_flatfile.dsid==0, 81 | df_flatfile.sreg==1),:] 82 | 83 | #output file name and directory 84 | out_fname = '%s%s_Y%i'%(out_fname_main, synds_suffix, d_id) 85 | out_dir = '%s/%s/Y%i/'%(out_dir_main, out_dir_sub, d_id) 86 | 87 | #run stan model 88 | RunStan(df_flatfile, sm_fname, 89 | out_fname, out_dir, res_name, 90 | n_iter_warmup=n_iter_warmup, n_iter_sampling=n_iter_sampling, n_chains=n_chains, 91 | adapt_delta=adapt_delta, max_treedepth=max_treedepth, 92 | stan_parallel=stan_parallel) 93 | 94 | #run time end 95 | run_t_end = time.time() 96 | 97 | #compute run time 98 | run_tm = (run_t_end - run_t_strt)/60 99 | 100 | #log run time 101 | df_run_info.append(pd.DataFrame({'computer_name':os.uname()[1],'out_name':out_dir_sub, 102 | 'ds_id':d_id,'run_time':run_tm}, index=[d_id])) 103 | 104 | #write out run info 105 | out_fname = '%s%s/run_info.csv'%(out_dir_main, out_dir_sub) 106 | pd.concat(df_run_info).reset_index(drop=True).to_csv(out_fname, index=False) 107 | 108 | -------------------------------------------------------------------------------- /Analyses/Python_lib/QGIS/pylib_QGIS.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Tue May 19 11:04:00 2020 5 | 6 | @author: glavrent 7 | """ 8 | 9 | #load libraries 10 | 11 | #load GIS 12 | from qgis.core import QgsVectorLayer, QgsPointXY 13 | from qgis.core import QgsField, QgsFeature, QgsGeometry, QgsVectorFileWriter, QgsFeatureSink 14 | from qgis.PyQt.QtCore import QVariant 15 | 16 | def EQLayer(eq_data): 17 | ''' 18 | Create earthquake source layer for QGIS 19 | 20 | Parameters 21 | ---------- 22 | eq_data : pd.dataframe 23 | Dataframe for rupture points with fields: 24 | eqid, region, mag, SOF, Ztor, eqLat, eqLon 25 | 26 | Returns 27 | ------- 28 | eq_layer : TYPE 29 | QGIS layer with earthquake sources. 30 | ''' 31 | 32 | #create qgis layer for earthquake sources 33 | eq_layer = QgsVectorLayer("Point", "eq_pts", "memory") 34 | eq_pr = eq_layer.dataProvider() 35 | eq_pr.addAttributes([QgsField("eqid", QVariant.Int), 36 | QgsField("region", QVariant.Int), 37 | QgsField("mag", QVariant.Double), 38 | QgsField("SOF", QVariant.Int), 39 | QgsField("Ztor", QVariant.Double), 40 | QgsField("eqLat", QVariant.Double), 41 | QgsField("eqLon", QVariant.Double)]) 42 | 43 | #iterate over earthquakes, add on layer 44 | eq_layer.startEditing() 45 | for eq in eq_data.iterrows(): 46 | #earthquake info 47 | eq_info = eq[1][['eqid','region','mag','SOF','Ztor']].tolist() 48 | eq_latlon = eq[1][['eqLat','eqLon']].tolist() 49 | #define feature, earthquake 50 | eq_f = QgsFeature() 51 | eq_f.setGeometry(QgsGeometry.fromPointXY(QgsPointXY(eq_latlon[1],eq_latlon[0]))) 52 | eq_f.setAttributes(eq_info + eq_latlon) 53 | #add earthquake in layer 54 | eq_pr.addFeatures([eq_f]) 55 | #commit changes 56 | eq_layer.commitChanges() 57 | #update displacement layer 58 | eq_layer.updateExtents() 59 | 60 | return eq_layer 61 | 62 | def STALayer(sta_data): 63 | ''' 64 | Create station layer for QGIS 65 | 66 | Parameters 67 | ---------- 68 | sta_data : pd.dataframe 69 | Dataframe for rupture points with fields: 70 | 'ssn','region','Vs30','Z1.0','StaLat','StaLon' 71 | eqid','region','mag','SOF','eqLat','eqLon' 72 | 73 | Returns 74 | ------- 75 | sta_layer : TYPE 76 | QGIS layer with station points. 77 | ''' 78 | 79 | #create qgis layer for station locations 80 | sta_layer = QgsVectorLayer("Point", "sta_pts", "memory") 81 | sta_pr = sta_layer.dataProvider() 82 | sta_pr.addAttributes([QgsField("ssn", QVariant.Int), 83 | QgsField("region", QVariant.Int), 84 | QgsField("Vs30", QVariant.Double), 85 | QgsField("Z1.0", QVariant.Double), 86 | QgsField("staLat", QVariant.Double), 87 | QgsField("staLon", QVariant.Double)]) 88 | 89 | #iterate over station, add on layer 90 | sta_layer.startEditing() 91 | for sta in sta_data.iterrows(): 92 | #earthquake info 93 | sta_info = sta[1][['ssn','region','Vs30','Z1.0']].tolist() 94 | sta_latlon = sta[1][['staLat','staLon']].tolist() 95 | #define feature, earthquake 96 | sta_f = QgsFeature() 97 | sta_f.setGeometry(QgsGeometry.fromPointXY(QgsPointXY(sta_latlon[1],sta_latlon[0]))) 98 | sta_f.setAttributes(sta_info + sta_latlon) 99 | #add earthquake in layer 100 | sta_pr.addFeatures([sta_f]) 101 | #commit changes 102 | sta_layer.commitChanges() 103 | #update displacement layer 104 | sta_layer.updateExtents() 105 | 106 | return sta_layer 107 | -------------------------------------------------------------------------------- /Analyses/Code_Verification/regression/ds3/main_pystan_model3_corr_cells_NGAWest3CA.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Wed Jul 14 14:17:52 2021 5 | 6 | @author: glavrent 7 | """ 8 | # Working directory and Packages 9 | # --------------------------- 10 | #load libraries 11 | import os 12 | import sys 13 | import numpy as np 14 | import pandas as pd 15 | import time 16 | #user functions 17 | sys.path.insert(0,'../../../Python_lib/regression/pystan/') 18 | from regression_pystan_model3_corr_cells_unbounded_hyp import RunStan 19 | 20 | 21 | # Define variables 22 | # --------------------------- 23 | #filename suffix 24 | # synds_suffix = '_small_corr_len' 25 | # synds_suffix = '_large_corr_len' 26 | 27 | #synthetic datasets directory 28 | ds_dir = '../../../../Data/Validation/synthetic_datasets/ds3' 29 | ds_dir = r'%s%s/'%(ds_dir, synds_suffix) 30 | 31 | # dataset info 32 | #ds_fname_main = 'CatalogNGAWest3CA_synthetic_data' 33 | ds_fname_main = 'CatalogNGAWest3CALite_synthetic_data' 34 | ds_id = np.arange(1,6) 35 | #cell specific anelastic attenuation 36 | ds_fname_cellinfo = 'CatalogNGAWest3CALite_cellinfo' 37 | ds_fname_celldist = 'CatalogNGAWest3CALite_distancematrix' 38 | 39 | #stan model 40 | sm_fname = '../../../Stan_lib/regression_stan_model3_corr_cells_unbounded_hyp_chol_efficient.stan' 41 | 42 | #output info 43 | #main output filename 44 | out_fname_main = 'NGAWest3CA_syndata' 45 | #main output directory 46 | out_dir_main = '../../../../Data/Validation/regression/ds3/' 47 | #output sub-directory 48 | out_dir_sub = 'PYSTAN_NGAWest3CA_corr_cells_chol_eff' 49 | 50 | #stan parameters 51 | runstan_flag = True 52 | # pystan_ver = 2 53 | pystan_ver = 3 54 | res_name = 'tot' 55 | n_iter = 1000 56 | n_chains = 4 57 | adapt_delta = 0.8 58 | max_treedepth = 10 59 | #ergodic coefficients 60 | c_2_erg=-2.0 61 | c_3_erg=-0.6 62 | c_a_erg=0.0 63 | #parallel options 64 | # flag_parallel = True 65 | flag_parallel = False 66 | 67 | #output sub-dir with corr with suffix info 68 | out_dir_sub = f'%s%s'%(out_dir_sub, synds_suffix) 69 | 70 | #load cell dataframes 71 | cellinfo_fname = '%s%s.csv'%(ds_dir, ds_fname_cellinfo) 72 | celldist_fname = '%s%s.csv'%(ds_dir, ds_fname_celldist) 73 | df_cellinfo = pd.read_csv(cellinfo_fname) 74 | df_celldist = pd.read_csv(celldist_fname) 75 | 76 | # Run stan regression 77 | # --------------------------- 78 | #create datafame with computation time 79 | df_run_info = list() 80 | 81 | #iterate over all synthetic datasets 82 | for d_id in ds_id: 83 | print('Synthetic dataset %i fo %i'%(d_id, len(ds_id))) 84 | #run time start 85 | run_t_strt = time.time() 86 | #input flatfile 87 | ds_fname = '%s%s%s_Y%i.csv'%(ds_dir, ds_fname_main, synds_suffix, d_id) 88 | #load flatfile 89 | df_flatfile = pd.read_csv(ds_fname) 90 | 91 | #output file name and directory 92 | out_fname = '%s%s_Y%i'%(out_fname_main, synds_suffix, d_id) 93 | out_dir = '%s/%s/Y%i/'%(out_dir_main, out_dir_sub, d_id) 94 | 95 | #run stan model 96 | RunStan(df_flatfile, df_cellinfo, df_celldist, sm_fname, 97 | out_fname, out_dir, res_name, 98 | c_2_erg=c_2_erg, c_3_erg=c_3_erg, c_a_erg=c_a_erg, 99 | runstan_flag=runstan_flag, n_iter=n_iter, n_chains=n_chains, 100 | adapt_delta=adapt_delta, max_treedepth=max_treedepth, 101 | pystan_ver=pystan_ver, pystan_parallel=flag_parallel) 102 | 103 | #run time end 104 | run_t_end = time.time() 105 | 106 | #compute run time 107 | run_tm = (run_t_end - run_t_strt)/60 108 | 109 | #log run time 110 | df_run_info.append(pd.DataFrame({'computer_name':os.uname()[1],'out_name':out_dir_sub, 111 | 'ds_id':d_id,'run_time':run_tm}, index=[d_id])) 112 | 113 | #write out run info 114 | out_fname = '%s%s/run_info.csv'%(out_dir_main, out_dir_sub) 115 | pd.concat(df_run_info).reset_index(drop=True).to_csv(out_fname, index=False) 116 | 117 | 118 | -------------------------------------------------------------------------------- /Analyses/Code_Verification/regression/ds3/main_pystan_model3_uncorr_cells_NGAWest3CA.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Wed Jul 14 14:17:52 2021 5 | 6 | @author: glavrent 7 | """ 8 | # Working directory and Packages 9 | # --------------------------- 10 | #load libraries 11 | import os 12 | import sys 13 | import numpy as np 14 | import pandas as pd 15 | import time 16 | #user functions 17 | sys.path.insert(0,'../../../Python_lib/regression/pystan/') 18 | from regression_pystan_model3_uncorr_cells_unbounded_hyp import RunStan 19 | 20 | 21 | # Define variables 22 | # --------------------------- 23 | #filename suffix 24 | # synds_suffix = '_small_corr_len' 25 | # synds_suffix = '_large_corr_len' 26 | 27 | #synthetic datasets directory 28 | ds_dir = '../../../../Data/Validation/synthetic_datasets/ds3' 29 | ds_dir = r'%s%s/'%(ds_dir, synds_suffix) 30 | 31 | # dataset info 32 | #ds_fname_main = 'CatalogNGAWest3CA_synthetic_data' 33 | ds_fname_main = 'CatalogNGAWest3CALite_synthetic_data' 34 | ds_id = np.arange(1,6) 35 | #cell specific anelastic attenuation 36 | ds_fname_cellinfo = 'CatalogNGAWest3CALite_cellinfo' 37 | ds_fname_celldist = 'CatalogNGAWest3CALite_distancematrix' 38 | 39 | #stan model 40 | sm_fname = '../../../Stan_lib/regression_stan_model3_uncorr_cells_unbounded_hyp_chol_efficient.stan' 41 | 42 | #output info 43 | #main output filename 44 | out_fname_main = 'NGAWest3CA_syndata' 45 | #main output directory 46 | out_dir_main = '../../../../Data/Validation/regression/ds3/' 47 | #output sub-directory 48 | out_dir_sub = 'PYSTAN_NGAWest3CA_uncorr_cells_chol_eff' 49 | 50 | #stan parameters 51 | runstan_flag = True 52 | # pystan_ver = 2 53 | pystan_ver = 3 54 | res_name = 'tot' 55 | n_iter = 1000 56 | n_chains = 4 57 | adapt_delta = 0.8 58 | max_treedepth = 10 59 | #ergodic coefficients 60 | c_2_erg=-2.0 61 | c_3_erg=-0.6 62 | c_a_erg=0.0 63 | #parallel options 64 | # flag_parallel = True 65 | flag_parallel = False 66 | 67 | #output sub-dir with corr with suffix info 68 | out_dir_sub = f'%s%s'%(out_dir_sub, synds_suffix) 69 | 70 | #load cell dataframes 71 | cellinfo_fname = '%s%s.csv'%(ds_dir, ds_fname_cellinfo) 72 | celldist_fname = '%s%s.csv'%(ds_dir, ds_fname_celldist) 73 | df_cellinfo = pd.read_csv(cellinfo_fname) 74 | df_celldist = pd.read_csv(celldist_fname) 75 | 76 | # Run stan regression 77 | # --------------------------- 78 | #create datafame with computation time 79 | df_run_info = list() 80 | 81 | #iterate over all synthetic datasets 82 | for d_id in ds_id: 83 | print('Synthetic dataset %i fo %i'%(d_id, len(ds_id))) 84 | #run time start 85 | run_t_strt = time.time() 86 | #input flatfile 87 | ds_fname = '%s%s%s_Y%i.csv'%(ds_dir, ds_fname_main, synds_suffix, d_id) 88 | #load flatfile 89 | df_flatfile = pd.read_csv(ds_fname) 90 | 91 | #output file name and directory 92 | out_fname = '%s%s_Y%i'%(out_fname_main, synds_suffix, d_id) 93 | out_dir = '%s/%s/Y%i/'%(out_dir_main, out_dir_sub, d_id) 94 | 95 | #run stan model 96 | RunStan(df_flatfile, df_cellinfo, df_celldist, sm_fname, 97 | out_fname, out_dir, res_name, 98 | c_2_erg=c_2_erg, c_3_erg=c_3_erg, c_a_erg=c_a_erg, 99 | runstan_flag=runstan_flag, n_iter=n_iter, n_chains=n_chains, 100 | adapt_delta=adapt_delta, max_treedepth=max_treedepth, 101 | pystan_ver=pystan_ver, pystan_parallel=flag_parallel) 102 | 103 | #run time end 104 | run_t_end = time.time() 105 | 106 | #compute run time 107 | run_tm = (run_t_end - run_t_strt)/60 108 | 109 | #log run time 110 | df_run_info.append(pd.DataFrame({'computer_name':os.uname()[1],'out_name':out_dir_sub, 111 | 'ds_id':d_id,'run_time':run_tm}, index=[d_id])) 112 | 113 | #write out run info 114 | out_fname = '%s%s/run_info.csv'%(out_dir_main, out_dir_sub) 115 | pd.concat(df_run_info).reset_index(drop=True).to_csv(out_fname, index=False) 116 | 117 | 118 | -------------------------------------------------------------------------------- /Analyses/Code_Verification/regression/ds1/main_pystan_model1_NGAWest2CA.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Wed Jul 14 14:17:52 2021 5 | 6 | @author: glavrent 7 | """ 8 | # Working directory and Packages 9 | # --------------------------- 10 | #load libraries 11 | import os 12 | import sys 13 | import numpy as np 14 | import pandas as pd 15 | import time 16 | #user functions 17 | sys.path.insert(0,'../../../Python_lib/regression/pystan/') 18 | from regression_pystan_model1_unbounded_hyp import RunStan 19 | 20 | # Define variables 21 | # --------------------------- 22 | #filename suffix 23 | # synds_suffix = '_small_corr_len' 24 | # synds_suffix = '_large_corr_len' 25 | 26 | #synthetic datasets directory 27 | ds_dir = '../../../../Data/Verification/synthetic_datasets/ds1' 28 | ds_dir = r'%s%s/'%(ds_dir, synds_suffix) 29 | 30 | # dataset info 31 | # ds_fname_main = 'CatalogNGAWest3CA_synthetic_data' 32 | ds_fname_main = 'CatalogNGAWest3CALite_synthetic_data' 33 | ds_id = np.arange(1,6) 34 | 35 | #stan model 36 | # sm_fname = '../../../Stan_lib/regression_stan_model1_unbounded_hyp.stan' 37 | # sm_fname = '../../../Stan_lib/regression_stan_model1_unbounded_hyp_chol.stan' 38 | # sm_fname = '../../../Stan_lib/regression_stan_model1_unbounded_hyp_chol_efficient.stan' 39 | # sm_fname = '../../../Stan_lib/regression_stan_model1_unbounded_hyp_chol_efficient2.stan' 40 | 41 | #output info 42 | #main output filename 43 | out_fname_main = 'NGAWest2CA_syndata' 44 | #main output directory 45 | out_dir_main = '../../../../Data/Verification/regression/ds1/' 46 | #output sub-directory 47 | #python 2 48 | # out_dir_sub = 'PYSTAN_NGAWest2CA' 49 | # out_dir_sub = 'PYSTAN_NGAWest2CA_chol' 50 | # out_dir_sub = 'PYSTAN_NGAWest2CA_chol_eff' 51 | # out_dir_sub = 'PYSTAN_NGAWest2CA_chol_eff2' 52 | #python 3 53 | # out_dir_sub = 'PYSTAN3_NGAWest2CA' 54 | # out_dir_sub = 'PYSTAN3_NGAWest2CA_chol' 55 | # out_dir_sub = 'PYSTAN3_NGAWest2CA_chol_eff' 56 | # out_dir_sub = 'PYSTAN3_NGAWest2CA_chol_eff2' 57 | 58 | #stan parameters 59 | runstan_flag = True 60 | #pystan_ver = 2 61 | pystan_ver = 3 62 | res_name = 'tot' 63 | n_iter = 1000 64 | n_chains = 4 65 | adapt_delta = 0.8 66 | max_treedepth = 10 67 | #parallel options 68 | # flag_parallel = True 69 | flag_parallel = False 70 | 71 | #output sub-dir with corr with suffix info 72 | out_dir_sub = f'%s%s'%(out_dir_sub, synds_suffix) 73 | 74 | # Run stan regression 75 | # --------------------------- 76 | #create datafame with computation time 77 | df_run_info = list() 78 | 79 | #iterate over all synthetic datasets 80 | for d_id in ds_id: 81 | print('Synthetic dataset %i fo %i'%(d_id, len(ds_id))) 82 | #run time start 83 | run_t_strt = time.time() 84 | #input flatfile 85 | ds_fname = '%s%s%s_Y%i.csv'%(ds_dir, ds_fname_main, synds_suffix, d_id) 86 | #load flatfile 87 | df_flatfile = pd.read_csv(ds_fname) 88 | #keep only NGAWest2 records 89 | df_flatfile = df_flatfile.loc[df_flatfile.dsid==0,:] 90 | 91 | #output file name and directory 92 | out_fname = '%s%s_Y%i'%(out_fname_main, synds_suffix, d_id) 93 | out_dir = '%s/%s/Y%i/'%(out_dir_main, out_dir_sub, d_id) 94 | 95 | #run stan model 96 | RunStan(df_flatfile, sm_fname, out_fname, out_dir, res_name, 97 | runstan_flag=runstan_flag, n_iter=n_iter, n_chains=n_chains, 98 | adapt_delta=adapt_delta, max_treedepth=max_treedepth, 99 | pystan_ver=pystan_ver, pystan_parallel=flag_parallel) 100 | 101 | #run time end 102 | run_t_end = time.time() 103 | 104 | #compute run time 105 | run_tm = (run_t_end - run_t_strt)/60 106 | 107 | #log run time 108 | df_run_info.append(pd.DataFrame({'computer_name':os.uname()[1],'out_name':out_dir_sub, 109 | 'ds_id':d_id,'run_time':run_tm}, index=[d_id])) 110 | 111 | #write out run info 112 | out_fname = '%s%s/run_info.csv'%(out_dir_main, out_dir_sub) 113 | pd.concat(df_run_info).reset_index(drop=True).to_csv(out_fname, index=False) 114 | 115 | -------------------------------------------------------------------------------- /Analyses/Code_Verification/regression/ds3/main_pystan_model3_corr_cells_NGAWest2CA.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Wed Jul 14 14:17:52 2021 5 | 6 | @author: glavrent 7 | """ 8 | # Working directory and Packages 9 | # --------------------------- 10 | #load libraries 11 | import os 12 | import sys 13 | import numpy as np 14 | import pandas as pd 15 | import time 16 | #user functions 17 | sys.path.insert(0,'../../../Python_lib/regression/pystan/') 18 | from regression_pystan_model3_corr_cells_unbounded_hyp import RunStan 19 | 20 | 21 | # Define variables 22 | # --------------------------- 23 | #filename suffix 24 | # synds_suffix = '_small_corr_len' 25 | # synds_suffix = '_large_corr_len' 26 | 27 | #synthetic datasets directory 28 | ds_dir = '../../../../Data/Validation/synthetic_datasets/ds3' 29 | ds_dir = r'%s%s/'%(ds_dir, synds_suffix) 30 | 31 | # dataset info 32 | #ds_fname_main = 'CatalogNGAWest3CA_synthetic_data' 33 | ds_fname_main = 'CatalogNGAWest3CALite_synthetic_data' 34 | ds_id = np.arange(1,6) 35 | #cell specific anelastic attenuation 36 | ds_fname_cellinfo = 'CatalogNGAWest3CALite_cellinfo' 37 | ds_fname_celldist = 'CatalogNGAWest3CALite_distancematrix' 38 | 39 | #stan model 40 | sm_fname = '../../../Stan_lib/regression_stan_model3_corr_cells_unbounded_hyp_chol_efficient.stan' 41 | 42 | #output info 43 | #main output filename 44 | out_fname_main = 'NGAWest2CA_syndata' 45 | #main output directory 46 | out_dir_main = '../../../../Data/Validation/regression/ds3/' 47 | #output sub-directory 48 | out_dir_sub = 'PYSTAN_NGAWest2CA_corr_cells_chol_eff' 49 | 50 | #stan parameters 51 | runstan_flag = True 52 | # pystan_ver = 2 53 | pystan_ver = 3 54 | res_name = 'tot' 55 | n_iter = 1000 56 | n_chains = 4 57 | adapt_delta = 0.8 58 | max_treedepth = 10 59 | #ergodic coefficients 60 | c_2_erg=-2.0 61 | c_3_erg=-0.6 62 | c_a_erg=0.0 63 | #parallel options 64 | # flag_parallel = True 65 | flag_parallel = False 66 | 67 | #output sub-dir with corr with suffix info 68 | out_dir_sub = f'%s%s'%(out_dir_sub, synds_suffix) 69 | 70 | #load cell dataframes 71 | cellinfo_fname = '%s%s.csv'%(ds_dir, ds_fname_cellinfo) 72 | celldist_fname = '%s%s.csv'%(ds_dir, ds_fname_celldist) 73 | df_cellinfo = pd.read_csv(cellinfo_fname) 74 | df_celldist = pd.read_csv(celldist_fname) 75 | 76 | # Run stan regression 77 | # --------------------------- 78 | #create datafame with computation time 79 | df_run_info = list() 80 | 81 | #iterate over all synthetic datasets 82 | for d_id in ds_id: 83 | print('Synthetic dataset %i fo %i'%(d_id, len(ds_id))) 84 | #run time start 85 | run_t_strt = time.time() 86 | #input flatfile 87 | ds_fname = '%s%s%s_Y%i.csv'%(ds_dir, ds_fname_main, synds_suffix, d_id) 88 | #load flatfile 89 | df_flatfile = pd.read_csv(ds_fname) 90 | #keep only NGAWest2 records 91 | df_flatfile = df_flatfile.loc[df_flatfile.dsid==0,:] 92 | 93 | #output file name and directory 94 | out_fname = '%s%s_Y%i'%(out_fname_main, synds_suffix, d_id) 95 | out_dir = '%s/%s/Y%i/'%(out_dir_main, out_dir_sub, d_id) 96 | 97 | #run stan model 98 | RunStan(df_flatfile, df_cellinfo, df_celldist, sm_fname, 99 | out_fname, out_dir, res_name, 100 | c_2_erg=c_2_erg, c_3_erg=c_3_erg, c_a_erg=c_a_erg, 101 | runstan_flag=runstan_flag, n_iter=n_iter, n_chains=n_chains, 102 | adapt_delta=adapt_delta, max_treedepth=max_treedepth, 103 | pystan_ver=pystan_ver, pystan_parallel=flag_parallel) 104 | 105 | #run time end 106 | run_t_end = time.time() 107 | 108 | #compute run time 109 | run_tm = (run_t_end - run_t_strt)/60 110 | 111 | #log run time 112 | df_run_info.append(pd.DataFrame({'computer_name':os.uname()[1],'out_name':out_dir_sub, 113 | 'ds_id':d_id,'run_time':run_tm}, index=[d_id])) 114 | 115 | #write out run info 116 | out_fname = '%s%s/run_info.csv'%(out_dir_main, out_dir_sub) 117 | pd.concat(df_run_info).reset_index(drop=True).to_csv(out_fname, index=False) 118 | 119 | 120 | -------------------------------------------------------------------------------- /Analyses/Code_Verification/regression/ds2/main_pystan_model2_corr_cells_NGAWest2CA_sparse.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Wed Jul 14 14:17:52 2021 5 | 6 | @author: glavrent 7 | """ 8 | # Working directory and Packages 9 | # --------------------------- 10 | #load libraries 11 | import os 12 | import sys 13 | import numpy as np 14 | import pandas as pd 15 | import time 16 | #user functions 17 | sys.path.insert(0,'../../../Python_lib/regression/pystan/') 18 | from regression_pystan_model2_corr_cells_sparse_unbounded_hyp import RunStan 19 | 20 | # Define variables 21 | # --------------------------- 22 | #filename suffix 23 | # synds_suffix = '_small_corr_len' 24 | # synds_suffix = '_large_corr_len' 25 | 26 | #synthetic datasets directory 27 | ds_dir = '../../../../Data/Verification/synthetic_datasets/ds2' 28 | ds_dir = r'%s%s/'%(ds_dir, synds_suffix) 29 | 30 | # dataset info 31 | #ds_fname_main = 'CatalogNGAWest3CA_synthetic_data' 32 | ds_fname_main = 'CatalogNGAWest3CALite_synthetic_data' 33 | ds_id = np.arange(1,6) 34 | #cell specific anelastic attenuation 35 | ds_fname_cellinfo = 'CatalogNGAWest3CALite_cellinfo' 36 | ds_fname_celldist = 'CatalogNGAWest3CALite_distancematrix' 37 | 38 | #stan model 39 | sm_fname = '../../../Stan_lib/regression_stan_model2_corr_cells_sparse_unbounded_hyp_chol_efficient.stan' 40 | 41 | #output info 42 | #main output filename 43 | out_fname_main = 'NGAWest2CA_syndata' 44 | #main output directory 45 | out_dir_main = '../../../../Data/Verification/regression/ds2/' 46 | #output sub-directory 47 | # out_dir_sub = 'PYSTAN_NGAWest2CA_corr_cells_chol_eff_sp' 48 | out_dir_sub = 'PYSTAN3_NGAWest2CA_corr_cells_chol_eff_sp' 49 | 50 | #stan parameters 51 | runstan_flag = True 52 | # pystan_ver = 2 53 | pystan_ver = 3 54 | res_name = 'tot' 55 | n_iter = 1000 56 | n_chains = 4 57 | adapt_delta = 0.8 #0.9 58 | max_treedepth = 10 59 | #ergodic coefficients 60 | c_a_erg=0.0 61 | #parallel options 62 | # flag_parallel = True 63 | flag_parallel = False 64 | 65 | #output sub-dir with corr with suffix info 66 | out_dir_sub = f'%s%s'%(out_dir_sub, synds_suffix) 67 | 68 | #load cell dataframes 69 | cellinfo_fname = '%s%s.csv'%(ds_dir, ds_fname_cellinfo) 70 | celldist_fname = '%s%s.csv'%(ds_dir, ds_fname_celldist) 71 | df_cellinfo = pd.read_csv(cellinfo_fname) 72 | df_celldist = pd.read_csv(celldist_fname) 73 | 74 | # Run stan regression 75 | # --------------------------- 76 | #create datafame with computation time 77 | df_run_info = list() 78 | 79 | #iterate over all synthetic datasets 80 | for d_id in ds_id: 81 | print('Synthetic dataset %i fo %i'%(d_id, len(ds_id))) 82 | #run time start 83 | run_t_strt = time.time() 84 | #input flatfile 85 | ds_fname = '%s%s%s_Y%i.csv'%(ds_dir, ds_fname_main, synds_suffix, d_id) 86 | #load flatfile 87 | df_flatfile = pd.read_csv(ds_fname) 88 | #keep only NGAWest2 records 89 | df_flatfile = df_flatfile.loc[df_flatfile.dsid==0,:] 90 | 91 | #output file name and directory 92 | out_fname = '%s%s_Y%i'%(out_fname_main, synds_suffix, d_id) 93 | out_dir = '%s/%s/Y%i/'%(out_dir_main, out_dir_sub, d_id) 94 | 95 | #run stan model 96 | RunStan(df_flatfile, df_cellinfo, df_celldist, sm_fname, 97 | out_fname, out_dir, res_name, c_a_erg=c_a_erg, 98 | runstan_flag=runstan_flag, n_iter=n_iter, n_chains=n_chains, 99 | adapt_delta=adapt_delta, max_treedepth=max_treedepth, 100 | pystan_ver=pystan_ver, pystan_parallel=flag_parallel) 101 | 102 | #run time end 103 | run_t_end = time.time() 104 | 105 | #compute run time 106 | run_tm = (run_t_end - run_t_strt)/60 107 | 108 | #log run time 109 | df_run_info.append(pd.DataFrame({'computer_name':os.uname()[1],'out_name':out_dir_sub, 110 | 'ds_id':d_id,'run_time':run_tm}, index=[d_id])) 111 | 112 | #write out run info 113 | out_fname = '%s%s/run_info.csv'%(out_dir_main, out_dir_sub) 114 | pd.concat(df_run_info).reset_index(drop=True).to_csv(out_fname, index=False) 115 | 116 | 117 | -------------------------------------------------------------------------------- /Analyses/Code_Verification/regression/ds3/main_pystan_model3_uncorr_cells_NGAWest2CA.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Wed Jul 14 14:17:52 2021 5 | 6 | @author: glavrent 7 | """ 8 | # Working directory and Packages 9 | # --------------------------- 10 | #load libraries 11 | import os 12 | import sys 13 | import numpy as np 14 | import pandas as pd 15 | import time 16 | #user functions 17 | sys.path.insert(0,'../../../Python_lib/regression/pystan/') 18 | from regression_pystan_model3_uncorr_cells_unbounded_hyp import RunStan 19 | 20 | 21 | # Define variables 22 | # --------------------------- 23 | #filename suffix 24 | # synds_suffix = '_small_corr_len' 25 | # synds_suffix = '_large_corr_len' 26 | 27 | #synthetic datasets directory 28 | ds_dir = '../../../../Data/Validation/synthetic_datasets/ds3' 29 | ds_dir = r'%s%s/'%(ds_dir, synds_suffix) 30 | 31 | # dataset info 32 | #ds_fname_main = 'CatalogNGAWest3CA_synthetic_data' 33 | ds_fname_main = 'CatalogNGAWest3CALite_synthetic_data' 34 | ds_id = np.arange(1,6) 35 | #cell specific anelastic attenuation 36 | ds_fname_cellinfo = 'CatalogNGAWest3CALite_cellinfo' 37 | ds_fname_celldist = 'CatalogNGAWest3CALite_distancematrix' 38 | 39 | #stan model 40 | sm_fname = '../../../Stan_lib/regression_stan_model3_uncorr_cells_unbounded_hyp_chol_efficient.stan' 41 | 42 | #output info 43 | #main output filename 44 | out_fname_main = 'NGAWest2CA_syndata' 45 | #main output directory 46 | out_dir_main = '../../../../Data/Validation/regression/ds3/' 47 | #output sub-directory 48 | out_dir_sub = 'PYSTAN_NGAWest2CA_uncorr_cells_chol_eff' 49 | 50 | #stan parameters 51 | runstan_flag = True 52 | # pystan_ver = 2 53 | pystan_ver = 3 54 | res_name = 'tot' 55 | n_iter = 1000 56 | n_chains = 4 57 | adapt_delta = 0.8 58 | max_treedepth = 10 59 | #ergodic coefficients 60 | c_2_erg=-2.0 61 | c_3_erg=-0.6 62 | c_a_erg=0.0 63 | #parallel options 64 | # flag_parallel = True 65 | flag_parallel = False 66 | 67 | #output sub-dir with corr with suffix info 68 | out_dir_sub = f'%s%s'%(out_dir_sub, synds_suffix) 69 | 70 | #load cell dataframes 71 | cellinfo_fname = '%s%s.csv'%(ds_dir, ds_fname_cellinfo) 72 | celldist_fname = '%s%s.csv'%(ds_dir, ds_fname_celldist) 73 | df_cellinfo = pd.read_csv(cellinfo_fname) 74 | df_celldist = pd.read_csv(celldist_fname) 75 | 76 | # Run stan regression 77 | # --------------------------- 78 | #create datafame with computation time 79 | df_run_info = list() 80 | 81 | #iterate over all synthetic datasets 82 | for d_id in ds_id: 83 | print('Synthetic dataset %i fo %i'%(d_id, len(ds_id))) 84 | #run time start 85 | run_t_strt = time.time() 86 | #input flatfile 87 | ds_fname = '%s%s%s_Y%i.csv'%(ds_dir, ds_fname_main, synds_suffix, d_id) 88 | #load flatfile 89 | df_flatfile = pd.read_csv(ds_fname) 90 | #keep only NGAWest2 records 91 | df_flatfile = df_flatfile.loc[df_flatfile.dsid==0,:] 92 | 93 | #output file name and directory 94 | out_fname = '%s%s_Y%i'%(out_fname_main, synds_suffix, d_id) 95 | out_dir = '%s/%s/Y%i/'%(out_dir_main, out_dir_sub, d_id) 96 | 97 | #run stan model 98 | RunStan(df_flatfile, df_cellinfo, df_celldist, sm_fname, 99 | out_fname, out_dir, res_name, 100 | c_2_erg=c_2_erg, c_3_erg=c_3_erg, c_a_erg=c_a_erg, 101 | runstan_flag=runstan_flag, n_iter=n_iter, n_chains=n_chains, 102 | adapt_delta=adapt_delta, max_treedepth=max_treedepth, 103 | pystan_ver=pystan_ver, pystan_parallel=flag_parallel) 104 | 105 | #run time end 106 | run_t_end = time.time() 107 | 108 | #compute run time 109 | run_tm = (run_t_end - run_t_strt)/60 110 | 111 | #log run time 112 | df_run_info.append(pd.DataFrame({'computer_name':os.uname()[1],'out_name':out_dir_sub, 113 | 'ds_id':d_id,'run_time':run_tm}, index=[d_id])) 114 | 115 | #write out run info 116 | out_fname = '%s%s/run_info.csv'%(out_dir_main, out_dir_sub) 117 | pd.concat(df_run_info).reset_index(drop=True).to_csv(out_fname, index=False) 118 | 119 | 120 | -------------------------------------------------------------------------------- /Analyses/Code_Verification/regression/ds2/main_pystan_model2_corr_cells_NGAWest2CANorth_sparse.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Wed Jul 14 14:17:52 2021 5 | 6 | @author: glavrent 7 | """ 8 | # Working directory and Packages 9 | # --------------------------- 10 | #load libraries 11 | import os 12 | import sys 13 | import numpy as np 14 | import pandas as pd 15 | import time 16 | #user functions 17 | sys.path.insert(0,'../../../Python_lib/regression/pystan/') 18 | from regression_pystan_model2_corr_cells_sparse_unbounded_hyp import RunStan 19 | 20 | 21 | # Define variables 22 | # --------------------------- 23 | #filename suffix 24 | # synds_suffix = '_small_corr_len' 25 | # synds_suffix = '_large_corr_len' 26 | 27 | #synthetic datasets directory 28 | ds_dir = '../../../../Data/Verification/synthetic_datasets/ds2' 29 | ds_dir = r'%s%s/'%(ds_dir, synds_suffix) 30 | 31 | # dataset info 32 | #ds_fname_main = 'CatalogNGAWest3CA_synthetic_data' 33 | ds_fname_main = 'CatalogNGAWest3CALite_synthetic_data' 34 | ds_id = np.arange(1,6) 35 | #cell specific anelastic attenuation 36 | ds_fname_cellinfo = 'CatalogNGAWest3CALite_cellinfo' 37 | ds_fname_celldist = 'CatalogNGAWest3CALite_distancematrix' 38 | 39 | #stan model 40 | sm_fname = '../../../Stan_lib/regression_stan_model2_corr_cells_sparse_unbounded_hyp_chol_efficient.stan' 41 | 42 | #output info 43 | #main output filename 44 | out_fname_main = 'NGAWest2CANorth_syndata' 45 | #main output directory 46 | out_dir_main = '../../../../Data/Verification/regression/ds2/' 47 | #output sub-directory 48 | out_dir_sub = 'PYSTAN_NGAWest2CANorth_corr_cells_chol_eff_sp' 49 | 50 | #stan parameters 51 | runstan_flag = True 52 | # pystan_ver = 2 53 | pystan_ver = 3 54 | res_name = 'tot' 55 | n_iter = 1000 56 | n_chains = 4 57 | adapt_delta = 0.8 58 | max_treedepth = 10 59 | #ergodic coefficients 60 | c_a_erg=0.0 61 | #parallel options 62 | # flag_parallel = True 63 | flag_parallel = False 64 | 65 | #output sub-dir with corr with suffix info 66 | out_dir_sub = f'%s%s'%(out_dir_sub, synds_suffix) 67 | 68 | #load cell dataframes 69 | cellinfo_fname = '%s%s.csv'%(ds_dir, ds_fname_cellinfo) 70 | celldist_fname = '%s%s.csv'%(ds_dir, ds_fname_celldist) 71 | df_cellinfo = pd.read_csv(cellinfo_fname) 72 | df_celldist = pd.read_csv(celldist_fname) 73 | 74 | # Run stan regression 75 | # --------------------------- 76 | #create datafame with computation time 77 | df_run_info = list() 78 | 79 | #iterate over all synthetic datasets 80 | for d_id in ds_id: 81 | print('Synthetic dataset %i fo %i'%(d_id, len(ds_id))) 82 | #run time start 83 | run_t_strt = time.time() 84 | #input flatfile 85 | ds_fname = '%s%s%s_Y%i.csv'%(ds_dir, ds_fname_main, synds_suffix, d_id) 86 | #load flatfile 87 | df_flatfile = pd.read_csv(ds_fname) 88 | #keep only North records of NGAWest2 89 | df_flatfile = df_flatfile.loc[np.logical_and(df_flatfile.dsid==0, 90 | df_flatfile.sreg==1),:] 91 | 92 | #output file name and directory 93 | out_fname = '%s%s_Y%i'%(out_fname_main, synds_suffix, d_id) 94 | out_dir = '%s/%s/Y%i/'%(out_dir_main, out_dir_sub, d_id) 95 | 96 | #run stan model 97 | RunStan(df_flatfile, df_cellinfo, df_celldist, sm_fname, 98 | out_fname, out_dir, res_name, c_a_erg=c_a_erg, 99 | runstan_flag=runstan_flag, n_iter=n_iter, n_chains=n_chains, 100 | adapt_delta=adapt_delta, max_treedepth=max_treedepth, 101 | pystan_ver=pystan_ver, pystan_parallel=flag_parallel) 102 | 103 | #run time end 104 | run_t_end = time.time() 105 | 106 | #compute run time 107 | run_tm = (run_t_end - run_t_strt)/60 108 | 109 | #log run time 110 | df_run_info.append(pd.DataFrame({'computer_name':os.uname()[1],'out_name':out_dir_sub, 111 | 'ds_id':d_id,'run_time':run_tm}, index=[d_id])) 112 | 113 | #write out run info 114 | out_fname = '%s%s/run_info.csv'%(out_dir_main, out_dir_sub) 115 | pd.concat(df_run_info).reset_index(drop=True).to_csv(out_fname, index=False) 116 | 117 | -------------------------------------------------------------------------------- /Analyses/Stan_lib/regression_stan_model1_unbounded_hyp.stan: -------------------------------------------------------------------------------- 1 | /********************************************* 2 | Stan program to obtain VCM parameters 3 | lower dimensions is used (event terms/station terms) 4 | 5 | This model explicitly estimates the latent event terms and station terms. 6 | This model includes a spatially varying earthquake term, a spatially 7 | varying station term, a spatially independent station term, and the 8 | between and within event residuals. 9 | The spatially varying terms are modeled as chelosky decomposition of the 10 | kernel function multiplied with standard normal variates. 11 | ********************************************/ 12 | 13 | data { 14 | int N; // number of records 15 | int NEQ; // number of earthquakes 16 | int NSTAT; // number of stations 17 | 18 | //event and station ID 19 | int eq[N]; // event id (in numerical order from 1 to last) 20 | int stat[N]; // station id (in numerical order from 1 to last) 21 | 22 | //observations 23 | vector[N] Y; // median predictions for each record with anelasic attenuation taken out 24 | 25 | //mean ground motion 26 | vector[N] rec_mu; 27 | 28 | //Earthquake, Station coordinates 29 | vector[2] X_e[NEQ]; // event coordinates for each record 30 | vector[2] X_s[NSTAT]; // station coordinates for each record 31 | } 32 | 33 | transformed data { 34 | real delta = 1e-9; 35 | } 36 | 37 | parameters { 38 | //Aleatory Variability Terms 39 | real phi_0; // phi_0 - remaining aleatory variability of within-event residuals 40 | real tau_0; // tau_0 - remaining aleatory variability of between-event residuals 41 | 42 | //Epistemic Uncertainty Terms 43 | real ell_1e; 44 | real omega_1e; 45 | real ell_1as; 46 | real omega_1as; 47 | real omega_1bs; 48 | 49 | //spatially correlated coefficients 50 | real dc_0; //constant shift 51 | vector[NEQ] dc_1e; //spatially varying eq coeff 52 | vector[NSTAT] dc_1as; //spatially varying stat coeff 53 | vector[NSTAT] dc_1bs; //zero correlation station term 54 | 55 | //between event terms 56 | vector[NEQ] dB; 57 | } 58 | 59 | model { 60 | //non-ergodic mean 61 | vector[N] rec_nerg_dB; 62 | 63 | //Aleatory Variability Terms 64 | phi_0 ~ lognormal(-1.20,0.3); 65 | tau_0 ~ lognormal(-1,0.3); 66 | //Station and earthquake paramters 67 | dB ~ normal(0,tau_0); 68 | 69 | //non-ergodic hyper-parameters 70 | ell_1e ~ inv_gamma(2.,50); 71 | ell_1as ~ inv_gamma(2.,50); 72 | omega_1e ~ exponential(5); 73 | omega_1as ~ exponential(5); 74 | omega_1bs ~ exponential(5); 75 | 76 | //constant shift 77 | dc_0 ~ normal(0.,0.1); 78 | 79 | //station contributions with zero correlation length 80 | dc_1bs ~ normal(0,omega_1bs); 81 | 82 | //spatillay latent variable for event contributions to GP 83 | { 84 | matrix[NEQ,NEQ] cov_1e; 85 | 86 | for(i in 1:NEQ) { 87 | for(j in i:NEQ) { 88 | real d_e; 89 | real c_1e; 90 | 91 | d_e = distance(X_e[i],X_e[j]); 92 | 93 | c_1e = (omega_1e^2 * exp(-d_e/ell_1e)); 94 | 95 | cov_1e[i,j] = c_1e; 96 | cov_1e[j,i] = c_1e; 97 | } 98 | cov_1e[i,i] += delta; 99 | } 100 | dc_1e ~ multi_normal(rep_vector(0.,NEQ), cov_1e); 101 | } 102 | 103 | //Spatially latent variable for station contributions to GP 104 | { 105 | matrix[NSTAT,NSTAT] cov_1as; 106 | 107 | for(i in 1:NSTAT) { 108 | for(j in i:NSTAT) { 109 | real d_s; 110 | real c_1as; 111 | 112 | d_s = distance(X_s[i],X_s[j]); 113 | 114 | c_1as = (omega_1as^2 * exp(-d_s/ell_1as)); 115 | 116 | cov_1as[i,j] = c_1as; 117 | cov_1as[j,i] = c_1as; 118 | } 119 | cov_1as[i,i] += delta; 120 | } 121 | dc_1as ~ multi_normal(rep_vector(0.,NSTAT), cov_1as); 122 | } 123 | 124 | //Mean non-ergodic including dB 125 | rec_nerg_dB = rec_mu + dc_0 + dc_1as[eq] + dc_1as[stat] + dc_1bs[stat] + dB[eq]; 126 | 127 | Y ~ normal(rec_nerg_dB,phi_0); 128 | } 129 | 130 | -------------------------------------------------------------------------------- /Examples/example1/regression_inla_postprocessing.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sun Mar 27 12:20:36 2022 5 | 6 | @author: glavrent 7 | """ 8 | # Working directory and Packages 9 | # --------------------------- 10 | 11 | #load packages 12 | import sys 13 | import pathlib 14 | import glob 15 | import re #regular expression package 16 | import pickle 17 | from joblib import cpu_count 18 | #arithmetic libraries 19 | import numpy as np 20 | #statistics libraries 21 | import pandas as pd 22 | #plot libraries 23 | import matplotlib as mpl 24 | import matplotlib.pyplot as plt 25 | from matplotlib.ticker import AutoLocator as plt_autotick 26 | 27 | # Define Problem 28 | # --------------------------- 29 | #data filename 30 | fname_data = 'data/examp_obs.csv' 31 | #inla regression filename 32 | fname_inla_reg = 'data/inla_regression/inla_regression.csv' 33 | 34 | #output directory 35 | dir_out = 'data/inla_regression/' 36 | 37 | # Read Data 38 | # --------------------------- 39 | #observation data 40 | df_data = pd.read_csv(fname_data, index_col=0) 41 | #inla regression results 42 | df_reg_summary = pd.read_csv(fname_inla_reg, index_col=0) 43 | 44 | # Summary figures 45 | # --------------------------- 46 | #color bar (mean) 47 | cbar_levs_mean = np.linspace(-2, 2, 101).tolist() 48 | cbar_ticks_mean = np.arange(-2, 2.01, 0.8).tolist() 49 | #color bar (sigma) 50 | cbar_levs_sig = np.linspace(0.0, 0.5, 101).tolist() 51 | cbar_ticks_sig = np.arange(0, 0.501, 0.1).tolist() 52 | 53 | # scatter comparison 54 | fname_fig = 'inla_gp_scatter' 55 | #create figure 56 | fig, ax = plt.subplots(figsize = (10,10)) 57 | #obsevations scatter 58 | hl = ax.plot(df_data.tot, df_reg_summary.tot_mean, 'o') 59 | ax.axline((0,0), slope=1, color="black", linestyle="--") 60 | #figure properties 61 | ax.grid(which='both') 62 | #tick size 63 | ax.tick_params(axis='x', labelsize=32) 64 | ax.tick_params(axis='y', labelsize=32) 65 | #figure limits 66 | ax.set_xticks([-2,-1,0,1,2]) 67 | ax.set_yticks([-2,-1,0,1,2]) 68 | ax.set_xlim([-2.0, 2.0]) 69 | ax.set_ylim([-2.0, 2.0]) 70 | #labels 71 | ax.set_xlabel('Data', fontsize=35) 72 | ax.set_ylabel('Estimated', fontsize=35) 73 | #save figure 74 | fig.tight_layout() 75 | fig.savefig( dir_out + fname_fig + '.png' ) 76 | 77 | #field mean 78 | fname_fig = 'inla_gp_field_mean' 79 | #create figure 80 | fig, ax = plt.subplots(figsize = (10,11)) 81 | #obsevations map 82 | hl = ax.scatter(df_reg_summary.X, df_reg_summary.Y, c=df_reg_summary.tot_mean, marker='D', vmin=-2, vmax=2, s=100) 83 | #figure properties 84 | ax.grid(which='both') 85 | #color bar 86 | cbar = fig.colorbar(hl, orientation="horizontal", pad=0.15, boundaries=cbar_levs_mean, ticks=cbar_ticks_mean) 87 | #tick size 88 | ax.tick_params(axis='x', labelsize=30) 89 | ax.tick_params(axis='y', labelsize=30) 90 | #labels 91 | ax.set_xlabel(r'$t_1$', fontsize=35) 92 | ax.set_ylabel(r'$t_2$', fontsize=35) 93 | #figure limits 94 | ax.set_xlim([0, 100]) 95 | ax.set_ylim([0, 100]) 96 | #update colorbar 97 | cbar.ax.tick_params(tick1On=1, labelsize=30) 98 | cbar.set_label(r'$\mu(c_0 + c_1(\vec{t}))$', size=35) 99 | #save figure 100 | fig.tight_layout() 101 | fig.savefig( dir_out + fname_fig + '.png' ) 102 | 103 | #field std 104 | fname_fig = 'inla_gp_field_std' 105 | #create figure 106 | fig, ax = plt.subplots(figsize = (10,11)) 107 | #obsevations map 108 | hl = ax.scatter(df_reg_summary.X, df_reg_summary.Y, c=df_reg_summary.tot_sig, marker='D', vmin=0, vmax=0.5, s=100, cmap='Oranges') 109 | #figure properties 110 | ax.grid(which='both') 111 | #color bar 112 | cbar = fig.colorbar(hl, orientation="horizontal", pad=0.15, boundaries=cbar_levs_sig, ticks=cbar_ticks_sig) 113 | #tick size 114 | ax.tick_params(axis='x', labelsize=30) 115 | ax.tick_params(axis='y', labelsize=30) 116 | #labels 117 | ax.set_xlabel(r'$t_1$', fontsize=35) 118 | ax.set_ylabel(r'$t_2$', fontsize=35) 119 | #figure limits 120 | ax.set_xlim([0, 100]) 121 | ax.set_ylim([0, 100]) 122 | #update colorbar 123 | cbar.ax.tick_params(tick1On=1, labelsize=30) 124 | cbar.set_label(r'$\psi(c_0 + c_1(\vec{t}))$', size=35) 125 | #save figure 126 | fig.tight_layout() 127 | fig.savefig( dir_out + fname_fig + '.png' ) 128 | -------------------------------------------------------------------------------- /Analyses/Code_Verification/regression/ds3/main_pystan_model3_corr_cells_NGAWest2CANorth.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Wed Jul 14 14:17:52 2021 5 | 6 | @author: glavrent 7 | """ 8 | # Working directory and Packages 9 | # --------------------------- 10 | #load libraries 11 | import os 12 | import sys 13 | import numpy as np 14 | import pandas as pd 15 | import time 16 | #user functions 17 | sys.path.insert(0,'../../../Python_lib/regression/pystan/') 18 | from regression_pystan_model3_corr_cells_unbounded_hyp import RunStan 19 | 20 | 21 | # Define variables 22 | # --------------------------- 23 | #filename suffix 24 | # synds_suffix = '_small_corr_len' 25 | # synds_suffix = '_large_corr_len' 26 | 27 | #synthetic datasets directory 28 | ds_dir = '../../../../Data/Validation/synthetic_datasets/ds3' 29 | ds_dir = r'%s%s/'%(ds_dir, synds_suffix) 30 | 31 | # dataset info 32 | #ds_fname_main = 'CatalogNGAWest3CA_synthetic_data' 33 | ds_fname_main = 'CatalogNGAWest3CALite_synthetic_data' 34 | ds_id = np.arange(1,6) 35 | #cell specific anelastic attenuation 36 | ds_fname_cellinfo = 'CatalogNGAWest3CALite_cellinfo' 37 | ds_fname_celldist = 'CatalogNGAWest3CALite_distancematrix' 38 | 39 | #stan model 40 | sm_fname = '../../../Stan_lib/regression_stan_model3_corr_cells_unbounded_hyp_chol_efficient.stan' 41 | 42 | #output info 43 | #main output filename 44 | out_fname_main = 'NGAWest2CANorth_syndata' 45 | #main output directory 46 | out_dir_main = '../../../../Data/Validation/regression/ds3/' 47 | #output sub-directory 48 | out_dir_sub = 'PYSTAN_NGAWest2CANorth_corr_cells_chol_eff' 49 | 50 | #stan parameters 51 | runstan_flag = True 52 | # pystan_ver = 2 53 | pystan_ver = 3 54 | res_name = 'tot' 55 | n_iter = 1000 56 | n_chains = 4 57 | adapt_delta = 0.8 58 | max_treedepth = 10 59 | #ergodic coefficients 60 | c_2_erg=-2.0 61 | c_3_erg=-0.6 62 | c_a_erg=0.0 63 | #parallel options 64 | # flag_parallel = True 65 | flag_parallel = False 66 | 67 | #output sub-dir with corr with suffix info 68 | out_dir_sub = f'%s%s'%(out_dir_sub, synds_suffix) 69 | 70 | #load cell dataframes 71 | cellinfo_fname = '%s%s.csv'%(ds_dir, ds_fname_cellinfo) 72 | celldist_fname = '%s%s.csv'%(ds_dir, ds_fname_celldist) 73 | df_cellinfo = pd.read_csv(cellinfo_fname) 74 | df_celldist = pd.read_csv(celldist_fname) 75 | 76 | # Run stan regression 77 | # --------------------------- 78 | #create datafame with computation time 79 | df_run_info = list() 80 | 81 | #iterate over all synthetic datasets 82 | for d_id in ds_id: 83 | print('Synthetic dataset %i fo %i'%(d_id, len(ds_id))) 84 | #run time start 85 | run_t_strt = time.time() 86 | #input flatfile 87 | ds_fname = '%s%s%s_Y%i.csv'%(ds_dir, ds_fname_main, synds_suffix, d_id) 88 | #load flatfile 89 | df_flatfile = pd.read_csv(ds_fname) 90 | #keep only North records of NGAWest2 91 | df_flatfile = df_flatfile.loc[np.logical_and(df_flatfile.dsid==0, 92 | df_flatfile.sreg==1),:] 93 | 94 | #output file name and directory 95 | out_fname = '%s%s_Y%i'%(out_fname_main, synds_suffix, d_id) 96 | out_dir = '%s/%s/Y%i/'%(out_dir_main, out_dir_sub, d_id) 97 | 98 | #run stan model 99 | RunStan(df_flatfile, df_cellinfo, df_celldist, sm_fname, 100 | out_fname, out_dir, res_name, 101 | c_2_erg=c_2_erg, c_3_erg=c_3_erg, c_a_erg=c_a_erg, 102 | runstan_flag=runstan_flag, n_iter=n_iter, n_chains=n_chains, 103 | adapt_delta=adapt_delta, max_treedepth=max_treedepth, 104 | pystan_ver=pystan_ver, pystan_parallel=flag_parallel) 105 | 106 | #run time end 107 | run_t_end = time.time() 108 | 109 | #compute run time 110 | run_tm = (run_t_end - run_t_strt)/60 111 | 112 | #log run time 113 | df_run_info.append(pd.DataFrame({'computer_name':os.uname()[1],'out_name':out_dir_sub, 114 | 'ds_id':d_id,'run_time':run_tm}, index=[d_id])) 115 | 116 | #write out run info 117 | out_fname = '%s%s/run_info.csv'%(out_dir_main, out_dir_sub) 118 | pd.concat(df_run_info).reset_index(drop=True).to_csv(out_fname, index=False) 119 | 120 | 121 | -------------------------------------------------------------------------------- /Analyses/Code_Verification/regression/ds3/main_pystan_model3_uncorr_cells_NGAWest2CANorth.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Wed Jul 14 14:17:52 2021 5 | 6 | @author: glavrent 7 | """ 8 | # Working directory and Packages 9 | # --------------------------- 10 | #load libraries 11 | import os 12 | import sys 13 | import numpy as np 14 | import pandas as pd 15 | import time 16 | #user functions 17 | sys.path.insert(0,'../../../Python_lib/regression/pystan/') 18 | from regression_pystan_model3_uncorr_cells_unbounded_hyp import RunStan 19 | 20 | 21 | # Define variables 22 | # --------------------------- 23 | #filename suffix 24 | # synds_suffix = '_small_corr_len' 25 | # synds_suffix = '_large_corr_len' 26 | 27 | #synthetic datasets directory 28 | ds_dir = '../../../../Data/Validation/synthetic_datasets/ds3' 29 | ds_dir = r'%s%s/'%(ds_dir, synds_suffix) 30 | 31 | # dataset info 32 | #ds_fname_main = 'CatalogNGAWest3CA_synthetic_data' 33 | ds_fname_main = 'CatalogNGAWest3CALite_synthetic_data' 34 | ds_id = np.arange(1,6) 35 | #cell specific anelastic attenuation 36 | ds_fname_cellinfo = 'CatalogNGAWest3CALite_cellinfo' 37 | ds_fname_celldist = 'CatalogNGAWest3CALite_distancematrix' 38 | 39 | #stan model 40 | sm_fname = '../../../Stan_lib/regression_stan_model3_uncorr_cells_unbounded_hyp_chol_efficient.stan' 41 | 42 | #output info 43 | #main output filename 44 | out_fname_main = 'NGAWest2CANorth_syndata' 45 | #main output directory 46 | out_dir_main = '../../../../Data/Validation/regression/ds3/' 47 | #output sub-directory 48 | out_dir_sub = 'PYSTAN_NGAWest2CANorth_uncorr_cells_chol_eff' 49 | 50 | #stan parameters 51 | runstan_flag = True 52 | # pystan_ver = 2 53 | pystan_ver = 3 54 | res_name = 'tot' 55 | n_iter = 1000 56 | n_chains = 4 57 | adapt_delta = 0.8 58 | max_treedepth = 10 59 | #ergodic coefficients 60 | c_2_erg=-2.0 61 | c_3_erg=-0.6 62 | c_a_erg=0.0 63 | #parallel options 64 | # flag_parallel = True 65 | flag_parallel = False 66 | 67 | #output sub-dir with corr with suffix info 68 | out_dir_sub = f'%s%s'%(out_dir_sub, synds_suffix) 69 | 70 | #load cell dataframes 71 | cellinfo_fname = '%s%s.csv'%(ds_dir, ds_fname_cellinfo) 72 | celldist_fname = '%s%s.csv'%(ds_dir, ds_fname_celldist) 73 | df_cellinfo = pd.read_csv(cellinfo_fname) 74 | df_celldist = pd.read_csv(celldist_fname) 75 | 76 | # Run stan regression 77 | # --------------------------- 78 | #create datafame with computation time 79 | df_run_info = list() 80 | 81 | #iterate over all synthetic datasets 82 | for d_id in ds_id: 83 | print('Synthetic dataset %i fo %i'%(d_id, len(ds_id))) 84 | #run time start 85 | run_t_strt = time.time() 86 | #input flatfile 87 | ds_fname = '%s%s%s_Y%i.csv'%(ds_dir, ds_fname_main, synds_suffix, d_id) 88 | #load flatfile 89 | df_flatfile = pd.read_csv(ds_fname) 90 | #keep only North records of NGAWest2 91 | df_flatfile = df_flatfile.loc[np.logical_and(df_flatfile.dsid==0, 92 | df_flatfile.sreg==1),:] 93 | 94 | #output file name and directory 95 | out_fname = '%s%s_Y%i'%(out_fname_main, synds_suffix, d_id) 96 | out_dir = '%s/%s/Y%i/'%(out_dir_main, out_dir_sub, d_id) 97 | 98 | #run stan model 99 | RunStan(df_flatfile, df_cellinfo, df_celldist, sm_fname, 100 | out_fname, out_dir, res_name, 101 | c_2_erg=c_2_erg, c_3_erg=c_3_erg, c_a_erg=c_a_erg, 102 | runstan_flag=runstan_flag, n_iter=n_iter, n_chains=n_chains, 103 | adapt_delta=adapt_delta, max_treedepth=max_treedepth, 104 | pystan_ver=pystan_ver, pystan_parallel=flag_parallel) 105 | 106 | #run time end 107 | run_t_end = time.time() 108 | 109 | #compute run time 110 | run_tm = (run_t_end - run_t_strt)/60 111 | 112 | #log run time 113 | df_run_info.append(pd.DataFrame({'computer_name':os.uname()[1],'out_name':out_dir_sub, 114 | 'ds_id':d_id,'run_time':run_tm}, index=[d_id])) 115 | 116 | #write out run info 117 | out_fname = '%s%s/run_info.csv'%(out_dir_main, out_dir_sub) 118 | pd.concat(df_run_info).reset_index(drop=True).to_csv(out_fname, index=False) 119 | 120 | 121 | -------------------------------------------------------------------------------- /Analyses/Code_Verification/regression/ds1/main_pystan_model1_NGAWest2CANorth.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Wed Jul 14 14:17:52 2021 5 | 6 | @author: glavrent 7 | """ 8 | # Working directory and Packages 9 | # --------------------------- 10 | #load libraries 11 | import os 12 | import sys 13 | import numpy as np 14 | import pandas as pd 15 | import time 16 | #user functions 17 | sys.path.insert(0,'../../../Python_lib/regression/pystan/') 18 | from regression_pystan_model1_unbounded_hyp import RunStan 19 | 20 | # Define variables 21 | # --------------------------- 22 | #filename suffix 23 | # synds_suffix = '_small_corr_len' 24 | # synds_suffix = '_large_corr_len' 25 | 26 | #synthetic datasets directory 27 | ds_dir = '../../../../Data/Verification/synthetic_datasets/ds1' 28 | ds_dir = r'%s%s/'%(ds_dir, synds_suffix) 29 | 30 | # dataset info 31 | # ds_fname_main = 'CatalogNGAWest3CA_synthetic_data' 32 | ds_fname_main = 'CatalogNGAWest3CALite_synthetic_data' 33 | ds_id = np.arange(1,6) 34 | 35 | #stan model 36 | # sm_fname = '../../../Stan_lib/regression_stan_model1_unbounded_hyp.stan' 37 | # sm_fname = '../../../Stan_lib/regression_stan_model1_unbounded_hyp_chol.stan' 38 | # sm_fname = '../../../Stan_lib/regression_stan_model1_unbounded_hyp_chol_efficient.stan' 39 | # sm_fname = '../../../Stan_lib/regression_stan_model1_unbounded_hyp_chol_efficient2.stan' 40 | 41 | #output info 42 | #main output filename 43 | out_fname_main = 'NGAWest2CANorth_syndata' 44 | #main output directory 45 | out_dir_main = '../../../../Data/Verification/regression/ds1/' 46 | #output sub-directory 47 | #pystan2 48 | # out_dir_sub = 'PYSTAN_NGAWest2CANorth' 49 | # out_dir_sub = 'PYSTAN_NGAWest2CANorth_chol' 50 | # out_dir_sub = 'PYSTAN_NGAWest2CANorth_chol_eff' 51 | # out_dir_sub = 'PYSTAN_NGAWest2CANorth_chol_eff2' 52 | #pystan3 53 | # out_dir_sub = 'PYSTAN3_NGAWest2CANorth' 54 | # out_dir_sub = 'PYSTAN3_NGAWest2CANorth_chol' 55 | # out_dir_sub = 'PYSTAN3_NGAWest2CANorth_chol_eff' 56 | # out_dir_sub = 'PYSTAN3_NGAWest2CANorth_chol_eff2' 57 | 58 | #stan parameters 59 | runstan_flag = True 60 | # pystan_ver = 2 61 | pystan_ver = 3 62 | res_name = 'tot' 63 | n_iter = 1000 64 | n_chains = 4 65 | adapt_delta = 0.8 66 | max_treedepth = 10 67 | #parallel options 68 | # flag_parallel = True 69 | flag_parallel = False 70 | 71 | #output sub-dir with corr with suffix info 72 | out_dir_sub = f'%s%s'%(out_dir_sub, synds_suffix) 73 | 74 | # Run stan regression 75 | # --------------------------- 76 | #create datafame with computation time 77 | df_run_info = list() 78 | 79 | #iterate over all synthetic datasets 80 | for d_id in ds_id: 81 | print('Synthetic dataset %i fo %i'%(d_id, len(ds_id))) 82 | #run time start 83 | run_t_strt = time.time() 84 | #input flatfile 85 | ds_fname = '%s%s%s_Y%i.csv'%(ds_dir, ds_fname_main, synds_suffix, d_id) 86 | #load flatfile 87 | df_flatfile = pd.read_csv(ds_fname) 88 | #keep only North records of NGAWest2 89 | df_flatfile = df_flatfile.loc[np.logical_and(df_flatfile.dsid==0, 90 | df_flatfile.sreg==1),:] 91 | 92 | #output file name and directory 93 | out_fname = '%s%s_Y%i'%(out_fname_main, synds_suffix, d_id) 94 | out_dir = '%s/%s/Y%i/'%(out_dir_main, out_dir_sub, d_id) 95 | 96 | #run stan model 97 | RunStan(df_flatfile, sm_fname, out_fname, out_dir, res_name, 98 | runstan_flag=runstan_flag, n_iter=n_iter, n_chains=n_chains, 99 | adapt_delta=adapt_delta, max_treedepth=max_treedepth, 100 | pystan_ver=pystan_ver, pystan_parallel=flag_parallel) 101 | 102 | #run time end 103 | run_t_end = time.time() 104 | 105 | #compute run time 106 | run_tm = (run_t_end - run_t_strt)/60 107 | 108 | #log run time 109 | df_run_info.append(pd.DataFrame({'computer_name':os.uname()[1],'out_name':out_dir_sub, 110 | 'ds_id':d_id,'run_time':run_tm}, index=[d_id])) 111 | 112 | #write out run info 113 | out_fname = '%s%s/run_info.csv'%(out_dir_main, out_dir_sub) 114 | pd.concat(df_run_info).reset_index(drop=True).to_csv(out_fname, index=False) 115 | 116 | -------------------------------------------------------------------------------- /Analyses/Code_Verification/regression/ds3/main_cmdstan_model3_uncorr_cells_NGAWest3CA.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Wed Dec 29 15:16:15 2021 5 | 6 | @author: glavrent 7 | """ 8 | # Working directory and Packages 9 | # --------------------------- 10 | #load libraries 11 | import os 12 | import sys 13 | import numpy as np 14 | import pandas as pd 15 | import time 16 | #user functions 17 | sys.path.insert(0,'../../../Python_lib/regression/cmdstan/') 18 | # from regression_cmdstan_model3_uncorr_cells_unbounded_hyp import RunStan 19 | # from regression_cmdstan_model3_uncorr_cells_sparse_unbounded_hyp import RunStan 20 | 21 | # Define variables 22 | # --------------------------- 23 | #filename suffix 24 | # synds_suffix = '_small_corr_len' 25 | # synds_suffix = '_large_corr_len' 26 | 27 | #synthetic datasets directory 28 | ds_dir = '../../../../Data/Verification/synthetic_datasets/ds3' 29 | ds_dir = r'%s%s/'%(ds_dir, synds_suffix) 30 | 31 | # dataset info 32 | #ds_fname_main = 'CatalogNGAWest3CA_synthetic_data' 33 | ds_fname_main = 'CatalogNGAWest3CALite_synthetic_data' 34 | ds_id = np.arange(1,6) 35 | #cell specific anelastic attenuation 36 | ds_fname_cellinfo = 'CatalogNGAWest3CALite_cellinfo' 37 | ds_fname_celldist = 'CatalogNGAWest3CALite_distancematrix' 38 | 39 | #stan model 40 | # sm_fname = '../../../Stan_lib/regression_stan_model3_uncorr_cells_unbounded_hyp_chol_efficient.stan' 41 | sm_fname = '../../../Stan_lib/regression_stan_model3_uncorr_cells_sparse_unbounded_hyp_chol_efficient.stan' 42 | 43 | #output info 44 | #main output filename 45 | out_fname_main = 'NGAWest3CA_syndata' 46 | #main output directory 47 | out_dir_main = '../../../../Data/Verification/regression/ds3/' 48 | #output sub-directory 49 | # out_dir_sub = 'CMDSTAN_NGAWest3CA_uncorr_cells_chol_eff' 50 | # out_dir_sub = 'CMDSTAN_NGAWest3CA_uncorr_cells_chol_eff_sp' 51 | 52 | #stan parameters 53 | res_name = 'tot' 54 | n_iter_warmup = 500 55 | n_iter_sampling = 500 56 | n_chains = 4 57 | adapt_delta = 0.8 58 | max_treedepth = 10 59 | #ergodic coefficients 60 | c_2_erg=-2.0 61 | c_3_erg=-0.6 62 | c_a_erg= 0.0 63 | #parallel options 64 | # flag_parallel = True 65 | flag_parallel = False 66 | 67 | #output sub-dir with corr with suffix info 68 | out_dir_sub = f'%s%s'%(out_dir_sub, synds_suffix) 69 | 70 | #load cell dataframes 71 | cellinfo_fname = '%s%s.csv'%(ds_dir, ds_fname_cellinfo) 72 | celldist_fname = '%s%s.csv'%(ds_dir, ds_fname_celldist) 73 | df_cellinfo = pd.read_csv(cellinfo_fname) 74 | df_celldist = pd.read_csv(celldist_fname) 75 | 76 | # Run stan regression 77 | # --------------------------- 78 | #create datafame with computation time 79 | df_run_info = list() 80 | 81 | #iterate over all synthetic datasets 82 | for d_id in ds_id: 83 | print('Synthetic dataset %i fo %i'%(d_id, len(ds_id))) 84 | #run time start 85 | run_t_strt = time.time() 86 | #input flatfile 87 | ds_fname = '%s%s%s_Y%i.csv'%(ds_dir, ds_fname_main, synds_suffix, d_id) 88 | #load flatfile 89 | df_flatfile = pd.read_csv(ds_fname) 90 | 91 | #output file name and directory 92 | out_fname = '%s%s_Y%i'%(out_fname_main, synds_suffix, d_id) 93 | out_dir = '%s/%s/Y%i/'%(out_dir_main, out_dir_sub, d_id) 94 | 95 | #run stan model 96 | RunStan(df_flatfile, df_cellinfo, df_celldist, sm_fname, 97 | out_fname, out_dir, res_name, 98 | c_2_erg=c_2_erg, c_3_erg=c_3_erg, c_a_erg=c_a_erg, 99 | n_iter_warmup=n_iter_warmup, n_iter_sampling=n_iter_sampling, n_chains=n_chains, 100 | adapt_delta=adapt_delta, max_treedepth=max_treedepth, 101 | stan_parallel=flag_parallel) 102 | 103 | #run time end 104 | run_t_end = time.time() 105 | 106 | #compute run time 107 | run_tm = (run_t_end - run_t_strt)/60 108 | 109 | #log run time 110 | df_run_info.append(pd.DataFrame({'computer_name':os.uname()[1],'out_name':out_dir_sub, 111 | 'ds_id':d_id,'run_time':run_tm}, index=[d_id])) 112 | 113 | #write out run info 114 | out_fname = '%s%s/run_info.csv'%(out_dir_main, out_dir_sub) 115 | pd.concat(df_run_info).reset_index(drop=True).to_csv(out_fname, index=False) 116 | 117 | 118 | -------------------------------------------------------------------------------- /Analyses/Code_Verification/regression/ds3/main_cmdstan_model3_corr_cells_NGAWest3CA.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Wed Dec 29 15:16:15 2021 5 | 6 | @author: glavrent 7 | """ 8 | # Working directory and Packages 9 | # --------------------------- 10 | #load libraries 11 | import os 12 | import sys 13 | import numpy as np 14 | import pandas as pd 15 | import time 16 | #user functions 17 | sys.path.insert(0,'../../../Python_lib/regression/cmdstan/') 18 | # from regression_cmdstan_model3_corr_cells_unbounded_hyp import RunStan 19 | # from regression_cmdstan_model3_corr_cells_sparse_unbounded_hyp import RunStan 20 | 21 | # Define variables 22 | # --------------------------- 23 | #filename suffix 24 | # synds_suffix = '_small_corr_len' 25 | # synds_suffix = '_large_corr_len' 26 | 27 | #synthetic datasets directory 28 | ds_dir = '../../../../Data/Verification/synthetic_datasets/ds3' 29 | ds_dir = r'%s%s/'%(ds_dir, synds_suffix) 30 | 31 | # dataset info 32 | #ds_fname_main = 'CatalogNGAWest3CA_synthetic_data' 33 | ds_fname_main = 'CatalogNGAWest3CALite_synthetic_data' 34 | ds_id = np.arange(1,6) 35 | #cell specific anelastic attenuation 36 | ds_fname_cellinfo = 'CatalogNGAWest3CALite_cellinfo' 37 | ds_fname_celldist = 'CatalogNGAWest3CALite_distancematrix' 38 | 39 | #stan model 40 | # sm_fname = '../../../Stan_lib/regression_stan_model3_corr_cells_unbounded_hyp_chol_efficient.stan' 41 | # sm_fname = '../../../Stan_lib/regression_stan_model3_corr_cells_sparse_unbounded_hyp_chol_efficient.stan' 42 | 43 | #output info 44 | #main output filename 45 | out_fname_main = 'NGAWest3CA_syndata' 46 | #main output directory 47 | out_dir_main = '../../../../Data/Verification/regression/ds3/' 48 | #output sub-directory 49 | # out_dir_sub = 'CMDSTAN_NGAWest3CA_corr_cells_chol_eff' 50 | # out_dir_sub = 'CMDSTAN_NGAWest3CA_corr_cells_chol_eff_sp' 51 | #stan parameters 52 | res_name = 'tot' 53 | n_iter_warmup = 500 54 | n_iter_sampling = 500 55 | n_chains = 4 56 | adapt_delta = 0.8 57 | max_treedepth = 10 58 | #ergodic coefficients 59 | c_2_erg=-2.0 60 | c_3_erg=-0.6 61 | c_a_erg= 0.0 62 | #parallel options 63 | # flag_parallel = True 64 | flag_parallel = False 65 | 66 | #output sub-dir with corr with suffix info 67 | out_dir_sub = f'%s%s'%(out_dir_sub, synds_suffix) 68 | 69 | #load cell dataframes 70 | cellinfo_fname = '%s%s.csv'%(ds_dir, ds_fname_cellinfo) 71 | celldist_fname = '%s%s.csv'%(ds_dir, ds_fname_celldist) 72 | df_cellinfo = pd.read_csv(cellinfo_fname) 73 | df_celldist = pd.read_csv(celldist_fname) 74 | 75 | # Run stan regression 76 | # --------------------------- 77 | #create datafame with computation time 78 | df_run_info = list() 79 | 80 | #iterate over all synthetic datasets 81 | for d_id in ds_id: 82 | print('Synthetic dataset %i fo %i'%(d_id, len(ds_id))) 83 | #run time start 84 | run_t_strt = time.time() 85 | #input flatfile 86 | ds_fname = '%s%s%s_Y%i.csv'%(ds_dir, ds_fname_main, synds_suffix, d_id) 87 | #load flatfile 88 | df_flatfile = pd.read_csv(ds_fname) 89 | #keep only NGAWest2 records 90 | df_flatfile = df_flatfile.loc[df_flatfile.dsid==0,:] 91 | 92 | #output file name and directory 93 | out_fname = '%s%s_Y%i'%(out_fname_main, synds_suffix, d_id) 94 | out_dir = '%s/%s/Y%i/'%(out_dir_main, out_dir_sub, d_id) 95 | 96 | #run stan model 97 | RunStan(df_flatfile, df_cellinfo, df_celldist, sm_fname, 98 | out_fname, out_dir, res_name, 99 | c_2_erg=c_2_erg, c_3_erg=c_3_erg, c_a_erg=c_a_erg, 100 | n_iter_warmup=n_iter_warmup, n_iter_sampling=n_iter_sampling, n_chains=n_chains, 101 | adapt_delta=adapt_delta, max_treedepth=max_treedepth, 102 | stan_parallel=flag_parallel) 103 | 104 | #run time end 105 | run_t_end = time.time() 106 | 107 | #compute run time 108 | run_tm = (run_t_end - run_t_strt)/60 109 | 110 | #log run time 111 | df_run_info.append(pd.DataFrame({'computer_name':os.uname()[1],'out_name':out_dir_sub, 112 | 'ds_id':d_id,'run_time':run_tm}, index=[d_id])) 113 | 114 | #write out run info 115 | out_fname = '%s%s/run_info.csv'%(out_dir_main, out_dir_sub) 116 | pd.concat(df_run_info).reset_index(drop=True).to_csv(out_fname, index=False) 117 | 118 | 119 | -------------------------------------------------------------------------------- /Analyses/Code_Verification/regression/ds3/main_cmdstan_model3_corr_cells_NGAWest2CA.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Wed Dec 29 15:16:15 2021 5 | 6 | @author: glavrent 7 | """ 8 | # Working directory and Packages 9 | # --------------------------- 10 | #load libraries 11 | import os 12 | import sys 13 | import numpy as np 14 | import pandas as pd 15 | import time 16 | #user functions 17 | sys.path.insert(0,'../../../Python_lib/regression/cmdstan/') 18 | # from regression_cmdstan_model3_corr_cells_unbounded_hyp import RunStan 19 | # from regression_cmdstan_model3_corr_cells_sparse_unbounded_hyp import RunStan 20 | 21 | # Define variables 22 | # --------------------------- 23 | #filename suffix 24 | # synds_suffix = '_small_corr_len' 25 | # synds_suffix = '_large_corr_len' 26 | 27 | #synthetic datasets directory 28 | ds_dir = '../../../../Data/Verification/synthetic_datasets/ds3' 29 | ds_dir = r'%s%s/'%(ds_dir, synds_suffix) 30 | 31 | # dataset info 32 | #ds_fname_main = 'CatalogNGAWest3CA_synthetic_data' 33 | ds_fname_main = 'CatalogNGAWest3CALite_synthetic_data' 34 | ds_id = np.arange(1,6) 35 | #cell specific anelastic attenuation 36 | ds_fname_cellinfo = 'CatalogNGAWest3CALite_cellinfo' 37 | ds_fname_celldist = 'CatalogNGAWest3CALite_distancematrix' 38 | 39 | #stan model 40 | # sm_fname = '../../../Stan_lib/regression_stan_model3_corr_cells_unbounded_hyp_chol_efficient.stan' 41 | # sm_fname = '../../../Stan_lib/regression_stan_model3_corr_cells_sparse_unbounded_hyp_chol_efficient.stan' 42 | 43 | #output info 44 | #main output filename 45 | out_fname_main = 'NGAWest2CA_syndata' 46 | #main output directory 47 | out_dir_main = '../../../../Data/Verification/regression/ds3/' 48 | #output sub-directory 49 | # out_dir_sub = 'CMDSTAN_NGAWest2CA_corr_cells_chol_eff' 50 | # out_dir_sub = 'CMDSTAN_NGAWest2CA_corr_cells_chol_eff_sp' 51 | 52 | #stan parameters 53 | res_name = 'tot' 54 | n_iter_warmup = 500 55 | n_iter_sampling = 500 56 | n_chains = 4 57 | adapt_delta = 0.8 58 | max_treedepth = 10 59 | #ergodic coefficients 60 | c_2_erg=-2.0 61 | c_3_erg=-0.6 62 | c_a_erg= 0.0 63 | #parallel options 64 | # flag_parallel = True 65 | flag_parallel = False 66 | 67 | #output sub-dir with corr with suffix info 68 | out_dir_sub = f'%s%s'%(out_dir_sub, synds_suffix) 69 | 70 | #load cell dataframes 71 | cellinfo_fname = '%s%s.csv'%(ds_dir, ds_fname_cellinfo) 72 | celldist_fname = '%s%s.csv'%(ds_dir, ds_fname_celldist) 73 | df_cellinfo = pd.read_csv(cellinfo_fname) 74 | df_celldist = pd.read_csv(celldist_fname) 75 | 76 | # Run stan regression 77 | # --------------------------- 78 | #create datafame with computation time 79 | df_run_info = list() 80 | 81 | #iterate over all synthetic datasets 82 | for d_id in ds_id: 83 | print('Synthetic dataset %i fo %i'%(d_id, len(ds_id))) 84 | #run time start 85 | run_t_strt = time.time() 86 | #input flatfile 87 | ds_fname = '%s%s%s_Y%i.csv'%(ds_dir, ds_fname_main, synds_suffix, d_id) 88 | #load flatfile 89 | df_flatfile = pd.read_csv(ds_fname) 90 | #keep only NGAWest2 records 91 | df_flatfile = df_flatfile.loc[df_flatfile.dsid==0,:] 92 | 93 | #output file name and directory 94 | out_fname = '%s%s_Y%i'%(out_fname_main, synds_suffix, d_id) 95 | out_dir = '%s/%s/Y%i/'%(out_dir_main, out_dir_sub, d_id) 96 | 97 | #run stan model 98 | RunStan(df_flatfile, df_cellinfo, df_celldist, sm_fname, 99 | out_fname, out_dir, res_name, 100 | c_2_erg=c_2_erg, c_3_erg=c_3_erg, c_a_erg=c_a_erg, 101 | n_iter_warmup=n_iter_warmup, n_iter_sampling=n_iter_sampling, n_chains=n_chains, 102 | adapt_delta=adapt_delta, max_treedepth=max_treedepth, 103 | stan_parallel=flag_parallel) 104 | 105 | #run time end 106 | run_t_end = time.time() 107 | 108 | #compute run time 109 | run_tm = (run_t_end - run_t_strt)/60 110 | 111 | #log run time 112 | df_run_info.append(pd.DataFrame({'computer_name':os.uname()[1],'out_name':out_dir_sub, 113 | 'ds_id':d_id,'run_time':run_tm}, index=[d_id])) 114 | 115 | #write out run info 116 | out_fname = '%s%s/run_info.csv'%(out_dir_main, out_dir_sub) 117 | pd.concat(df_run_info).reset_index(drop=True).to_csv(out_fname, index=False) 118 | 119 | 120 | -------------------------------------------------------------------------------- /Analyses/Code_Verification/regression/ds3/main_cmdstan_model3_uncorr_cells_NGAWest2CA.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Wed Dec 29 15:16:15 2021 5 | 6 | @author: glavrent 7 | """ 8 | # Working directory and Packages 9 | # --------------------------- 10 | #load libraries 11 | import os 12 | import sys 13 | import numpy as np 14 | import pandas as pd 15 | import time 16 | #user functions 17 | sys.path.insert(0,'../../../Python_lib/regression/cmdstan/') 18 | # from regression_cmdstan_model3_uncorr_cells_unbounded_hyp import RunStan 19 | # from regression_cmdstan_model3_uncorr_cells_sparse_unbounded_hyp import RunStan 20 | 21 | # Define variables 22 | # --------------------------- 23 | #filename suffix 24 | # synds_suffix = '_small_corr_len' 25 | # synds_suffix = '_large_corr_len' 26 | 27 | #synthetic datasets directory 28 | ds_dir = '../../../../Data/Verification/synthetic_datasets/ds3' 29 | ds_dir = r'%s%s/'%(ds_dir, synds_suffix) 30 | 31 | # dataset info 32 | #ds_fname_main = 'CatalogNGAWest3CA_synthetic_data' 33 | ds_fname_main = 'CatalogNGAWest3CALite_synthetic_data' 34 | ds_id = np.arange(1,6) 35 | #cell specific anelastic attenuation 36 | ds_fname_cellinfo = 'CatalogNGAWest3CALite_cellinfo' 37 | ds_fname_celldist = 'CatalogNGAWest3CALite_distancematrix' 38 | 39 | #stan model 40 | # sm_fname = '../../../Stan_lib/regression_stan_model3_uncorr_cells_unbounded_hyp_chol_efficient.stan' 41 | # sm_fname = '../../../Stan_lib/regression_stan_model3_uncorr_cells_sparse_unbounded_hyp_chol_efficient.stan' 42 | 43 | #output info 44 | #main output filename 45 | out_fname_main = 'NGAWest2CA_syndata' 46 | #main output directory 47 | out_dir_main = '../../../../Data/Verification/regression/ds3/' 48 | #output sub-directory 49 | # out_dir_sub = 'CMDSTAN_NGAWest2CA_uncorr_cells_chol_eff' 50 | # out_dir_sub = 'CMDSTAN_NGAWest2CA_uncorr_cells_chol_eff_sp' 51 | 52 | #stan parameters 53 | res_name = 'tot' 54 | n_iter_warmup = 500 55 | n_iter_sampling = 500 56 | n_chains = 4 57 | adapt_delta = 0.8 58 | max_treedepth = 10 59 | #ergodic coefficients 60 | c_2_erg=-2.0 61 | c_3_erg=-0.6 62 | c_a_erg= 0.0 63 | #parallel options 64 | # flag_parallel = True 65 | flag_parallel = False 66 | 67 | #output sub-dir with corr with suffix info 68 | out_dir_sub = f'%s%s'%(out_dir_sub, synds_suffix) 69 | 70 | #load cell dataframes 71 | cellinfo_fname = '%s%s.csv'%(ds_dir, ds_fname_cellinfo) 72 | celldist_fname = '%s%s.csv'%(ds_dir, ds_fname_celldist) 73 | df_cellinfo = pd.read_csv(cellinfo_fname) 74 | df_celldist = pd.read_csv(celldist_fname) 75 | 76 | # Run stan regression 77 | # --------------------------- 78 | #create datafame with computation time 79 | df_run_info = list() 80 | 81 | #iterate over all synthetic datasets 82 | for d_id in ds_id: 83 | print('Synthetic dataset %i fo %i'%(d_id, len(ds_id))) 84 | #run time start 85 | run_t_strt = time.time() 86 | #input flatfile 87 | ds_fname = '%s%s%s_Y%i.csv'%(ds_dir, ds_fname_main, synds_suffix, d_id) 88 | #load flatfile 89 | df_flatfile = pd.read_csv(ds_fname) 90 | #keep only NGAWest2 records 91 | df_flatfile = df_flatfile.loc[df_flatfile.dsid==0,:] 92 | 93 | #output file name and directory 94 | out_fname = '%s%s_Y%i'%(out_fname_main, synds_suffix, d_id) 95 | out_dir = '%s/%s/Y%i/'%(out_dir_main, out_dir_sub, d_id) 96 | 97 | #run stan model 98 | RunStan(df_flatfile, df_cellinfo, df_celldist, sm_fname, 99 | out_fname, out_dir, res_name, 100 | c_2_erg=c_2_erg, c_3_erg=c_3_erg, c_a_erg=c_a_erg, 101 | n_iter_warmup=n_iter_warmup, n_iter_sampling=n_iter_sampling, n_chains=n_chains, 102 | adapt_delta=adapt_delta, max_treedepth=max_treedepth, 103 | stan_parallel=flag_parallel) 104 | 105 | #run time end 106 | run_t_end = time.time() 107 | 108 | #compute run time 109 | run_tm = (run_t_end - run_t_strt)/60 110 | 111 | #log run time 112 | df_run_info.append(pd.DataFrame({'computer_name':os.uname()[1],'out_name':out_dir_sub, 113 | 'ds_id':d_id,'run_time':run_tm}, index=[d_id])) 114 | 115 | #write out run info 116 | out_fname = '%s%s/run_info.csv'%(out_dir_main, out_dir_sub) 117 | pd.concat(df_run_info).reset_index(drop=True).to_csv(out_fname, index=False) 118 | 119 | 120 | -------------------------------------------------------------------------------- /Analyses/Code_Verification/regression/ds2/main_pystan_model2_corr_cells_NGAWest3CA.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Wed Jul 14 14:17:52 2021 5 | 6 | @author: glavrent 7 | """ 8 | # Working directory and Packages 9 | # --------------------------- 10 | #load libraries 11 | import os 12 | import sys 13 | import numpy as np 14 | import pandas as pd 15 | import time 16 | #user functions 17 | sys.path.insert(0,'../../../Python_lib/regression/pystan/') 18 | from regression_pystan_model2_corr_cells_unbounded_hyp import RunStan 19 | 20 | 21 | # Define variables 22 | # --------------------------- 23 | #filename suffix 24 | # synds_suffix = '_small_corr_len' 25 | # synds_suffix = '_large_corr_len' 26 | 27 | #synthetic datasets directory 28 | ds_dir = '../../../../Data/Verification/synthetic_datasets/ds2' 29 | ds_dir = r'%s%s/'%(ds_dir, synds_suffix) 30 | 31 | # dataset info 32 | #ds_fname_main = 'CatalogNGAWest3CA_synthetic_data' 33 | ds_fname_main = 'CatalogNGAWest3CALite_synthetic_data' 34 | ds_id = np.arange(1,6) 35 | #cell specific anelastic attenuation 36 | ds_fname_cellinfo = 'CatalogNGAWest3CALite_cellinfo' 37 | ds_fname_celldist = 'CatalogNGAWest3CALite_distancematrix' 38 | 39 | #stan model 40 | # sm_fname = '../../../Stan_lib/regression_stan_model2_corr_cells_unbounded_hyp.stan' 41 | # sm_fname = '../../../Stan_lib/regression_stan_model2_corr_cells_unbounded_hyp_chol.stan' 42 | # sm_fname = '../../../Stan_lib/regression_stan_model2_corr_cells_unbounded_hyp_chol_efficient.stan' 43 | # sm_fname = '../../../Stan_lib/regression_stan_model2_corr_cells_unbounded_hyp_chol_efficient2.stan' 44 | 45 | #output info 46 | #main output filename 47 | out_fname_main = 'NGAWest3CA_syndata' 48 | #main output directory 49 | out_dir_main = '../../../../Data/Verification/regression/ds2/' 50 | #output sub-directory 51 | # out_dir_sub = 'PYSTAN_NGAWest3CA_corr_cells' 52 | # out_dir_sub = 'PYSTAN_NGAWest3CA_corr_cells_chol' 53 | # out_dir_sub = 'PYSTAN_NGAWest3CA_corr_cells_chol_eff' 54 | # out_dir_sub = 'PYSTAN_NGAWest3CA_corr_cells_chol_eff2' 55 | 56 | #stan parameters 57 | runstan_flag = True 58 | # pystan_ver = 2 59 | pystan_ver = 3 60 | res_name = 'tot' 61 | n_iter = 1000 62 | n_chains = 4 63 | adapt_delta = 0.8 64 | max_treedepth = 10 65 | #ergodic coefficients 66 | c_a_erg=0.0 67 | #parallel options 68 | # flag_parallel = True 69 | flag_parallel = False 70 | 71 | #output sub-dir with corr with suffix info 72 | out_dir_sub = f'%s%s'%(out_dir_sub, synds_suffix) 73 | 74 | #load cell dataframes 75 | cellinfo_fname = '%s%s.csv'%(ds_dir, ds_fname_cellinfo) 76 | celldist_fname = '%s%s.csv'%(ds_dir, ds_fname_celldist) 77 | df_cellinfo = pd.read_csv(cellinfo_fname) 78 | df_celldist = pd.read_csv(celldist_fname) 79 | 80 | # Run stan regression 81 | # --------------------------- 82 | #create datafame with computation time 83 | df_run_info = list() 84 | 85 | #iterate over all synthetic datasets 86 | for d_id in ds_id: 87 | print('Synthetic dataset %i fo %i'%(d_id, len(ds_id))) 88 | #run time start 89 | run_t_strt = time.time() 90 | #input flatfile 91 | ds_fname = '%s%s%s_Y%i.csv'%(ds_dir, ds_fname_main, synds_suffix, d_id) 92 | #load flatfile 93 | df_flatfile = pd.read_csv(ds_fname) 94 | 95 | #output file name and directory 96 | out_fname = '%s%s_Y%i'%(out_fname_main, synds_suffix, d_id) 97 | out_dir = '%s/%s/Y%i/'%(out_dir_main, out_dir_sub, d_id) 98 | 99 | #run stan model 100 | RunStan(df_flatfile, df_cellinfo, df_celldist, sm_fname, 101 | out_fname, out_dir, res_name, c_a_erg=c_a_erg, 102 | runstan_flag=runstan_flag, n_iter=n_iter, n_chains=n_chains, 103 | adapt_delta=adapt_delta, max_treedepth=max_treedepth, 104 | pystan_ver=pystan_ver, pystan_parallel=flag_parallel) 105 | 106 | #run time end 107 | run_t_end = time.time() 108 | 109 | #compute run time 110 | run_tm = (run_t_end - run_t_strt)/60 111 | 112 | #log run time 113 | df_run_info.append(pd.DataFrame({'computer_name':os.uname()[1],'out_name':out_dir_sub, 114 | 'ds_id':d_id,'run_time':run_tm}, index=[d_id])) 115 | 116 | #write out run info 117 | out_fname = '%s%s/run_info.csv'%(out_dir_main, out_dir_sub) 118 | pd.concat(df_run_info).reset_index(drop=True).to_csv(out_fname, index=False) 119 | 120 | -------------------------------------------------------------------------------- /Analyses/Code_Verification/regression/ds2/main_pystan_model2_uncorr_cells_NGAWest3CA.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Wed Jul 14 14:17:52 2021 5 | 6 | @author: glavrent 7 | """ 8 | # Working directory and Packages 9 | # --------------------------- 10 | #load libraries 11 | import os 12 | import sys 13 | import numpy as np 14 | import pandas as pd 15 | import time 16 | #user functions 17 | sys.path.insert(0,'../../../Python_lib/regression/pystan/') 18 | from regression_pystan_model2_uncorr_cells_unbounded_hyp import RunStan 19 | 20 | 21 | # Define variables 22 | # --------------------------- 23 | #filename suffix 24 | # synds_suffix = '_small_corr_len' 25 | # synds_suffix = '_large_corr_len' 26 | 27 | #synthetic datasets directory 28 | ds_dir = '../../../../Data/Verification/synthetic_datasets/ds2' 29 | ds_dir = r'%s%s/'%(ds_dir, synds_suffix) 30 | 31 | # dataset info 32 | #ds_fname_main = 'CatalogNGAWest3CA_synthetic_data' 33 | ds_fname_main = 'CatalogNGAWest3CALite_synthetic_data' 34 | ds_id = np.arange(1,6) 35 | #cell specific anelastic attenuation 36 | ds_fname_cellinfo = 'CatalogNGAWest3CALite_cellinfo' 37 | ds_fname_celldist = 'CatalogNGAWest3CALite_distancematrix' 38 | 39 | #stan model 40 | # sm_fname = '../../../Stan_lib/regression_stan_model2_uncorr_cells_unbounded_hyp.stan' 41 | # sm_fname = '../../../Stan_lib/regression_stan_model2_uncorr_cells_unbounded_hyp_chol.stan' 42 | # sm_fname = '../../../Stan_lib/regression_stan_model2_uncorr_cells_unbounded_hyp_chol_efficient.stan' 43 | # sm_fname = '../../../Stan_lib/regression_stan_model2_uncorr_cells_unbounded_hyp_chol_efficient2.stan' 44 | 45 | #output info 46 | #main output filename 47 | out_fname_main = 'NGAWest3CA_syndata' 48 | #main output directory 49 | out_dir_main = '../../../../Data/Verification/regression/ds2/' 50 | #output sub-directory 51 | # out_dir_sub = 'PYSTAN_NGAWest3CA_uncorr_cells' 52 | # out_dir_sub = 'PYSTAN_NGAWest3CA_uncorr_cells_chol' 53 | # out_dir_sub = 'PYSTAN_NGAWest3CA_uncorr_cells_chol_eff' 54 | # out_dir_sub = 'PYSTAN_NGAWest3CA_uncorr_cells_chol_eff2' 55 | 56 | #stan parameters 57 | runstan_flag = True 58 | # pystan_ver = 2 59 | pystan_ver = 3 60 | res_name = 'tot' 61 | n_iter = 1000 62 | n_chains = 4 63 | adapt_delta = 0.8 64 | max_treedepth = 10 65 | #ergodic coefficients 66 | c_a_erg=0.0 67 | #parallel options 68 | # flag_parallel = True 69 | flag_parallel = False 70 | 71 | #output sub-dir with corr with suffix info 72 | out_dir_sub = f'%s%s'%(out_dir_sub, synds_suffix) 73 | 74 | #load cell dataframes 75 | cellinfo_fname = '%s%s.csv'%(ds_dir, ds_fname_cellinfo) 76 | celldist_fname = '%s%s.csv'%(ds_dir, ds_fname_celldist) 77 | df_cellinfo = pd.read_csv(cellinfo_fname) 78 | df_celldist = pd.read_csv(celldist_fname) 79 | 80 | # Run stan regression 81 | # --------------------------- 82 | #create datafame with computation time 83 | df_run_info = list() 84 | 85 | #iterate over all synthetic datasets 86 | for d_id in ds_id: 87 | print('Synthetic dataset %i fo %i'%(d_id, len(ds_id))) 88 | #run time start 89 | run_t_strt = time.time() 90 | #input flatfile 91 | ds_fname = '%s%s%s_Y%i.csv'%(ds_dir, ds_fname_main, synds_suffix, d_id) 92 | #load flatfile 93 | df_flatfile = pd.read_csv(ds_fname) 94 | 95 | #output file name and directory 96 | out_fname = '%s%s_Y%i'%(out_fname_main, synds_suffix, d_id) 97 | out_dir = '%s/%s/Y%i/'%(out_dir_main, out_dir_sub, d_id) 98 | 99 | #run stan model 100 | RunStan(df_flatfile, df_cellinfo, df_celldist, sm_fname, 101 | out_fname, out_dir, res_name, c_a_erg=c_a_erg, 102 | runstan_flag=runstan_flag, n_iter=n_iter, n_chains=n_chains, 103 | adapt_delta=adapt_delta, max_treedepth=max_treedepth, 104 | pystan_ver=pystan_ver, pystan_parallel=flag_parallel) 105 | 106 | #run time end 107 | run_t_end = time.time() 108 | 109 | #compute run time 110 | run_tm = (run_t_end - run_t_strt)/60 111 | 112 | #log run time 113 | df_run_info.append(pd.DataFrame({'computer_name':os.uname()[1],'out_name':out_dir_sub, 114 | 'ds_id':d_id,'run_time':run_tm}, index=[d_id])) 115 | 116 | #write out run info 117 | out_fname = '%s%s/run_info.csv'%(out_dir_main, out_dir_sub) 118 | pd.concat(df_run_info).reset_index(drop=True).to_csv(out_fname, index=False) 119 | 120 | -------------------------------------------------------------------------------- /Analyses/Code_Verification/regression/ds3/main_cmdstan_model3_corr_cells_NGAWest2CANorth.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Wed Dec 29 15:16:15 2021 5 | 6 | @author: glavrent 7 | """ 8 | # Working directory and Packages 9 | # --------------------------- 10 | #load libraries 11 | import os 12 | import sys 13 | import numpy as np 14 | import pandas as pd 15 | import time 16 | #user functions 17 | sys.path.insert(0,'../../../Python_lib/regression/cmdstan/') 18 | # from regression_cmdstan_model3_corr_cells_unbounded_hyp import RunStan 19 | # from regression_cmdstan_model3_corr_cells_sparse_unbounded_hyp import RunStan 20 | 21 | # Define variables 22 | # --------------------------- 23 | #filename suffix 24 | # synds_suffix = '_small_corr_len' 25 | # synds_suffix = '_large_corr_len' 26 | 27 | #synthetic datasets directory 28 | ds_dir = '../../../../Data/Verification/synthetic_datasets/ds3' 29 | ds_dir = r'%s%s/'%(ds_dir, synds_suffix) 30 | 31 | # dataset info 32 | #ds_fname_main = 'CatalogNGAWest3CA_synthetic_data' 33 | ds_fname_main = 'CatalogNGAWest3CALite_synthetic_data' 34 | ds_id = np.arange(1,6) 35 | #cell specific anelastic attenuation 36 | ds_fname_cellinfo = 'CatalogNGAWest3CALite_cellinfo' 37 | ds_fname_celldist = 'CatalogNGAWest3CALite_distancematrix' 38 | 39 | #stan model 40 | # sm_fname = '../../../Stan_lib/regression_stan_model3_corr_cells_unbounded_hyp_chol_efficient.stan' 41 | # sm_fname = '../../../Stan_lib/regression_stan_model3_corr_cells_sparse_unbounded_hyp_chol_efficient.stan' 42 | 43 | #output info 44 | #main output filename 45 | out_fname_main = 'NGAWest2CANorth_syndata' 46 | #main output directory 47 | out_dir_main = '../../../../Data/Verification/regression/ds3/' 48 | #output sub-directory 49 | # out_dir_sub = 'CMDSTAN_NGAWest2CANorth_corr_cells_chol_eff' 50 | # out_dir_sub = 'CMDSTAN_NGAWest2CANorth_corr_cells_chol_eff_sp' 51 | 52 | #stan parameters 53 | res_name = 'tot' 54 | n_iter_warmup = 500 55 | n_iter_sampling = 500 56 | n_chains = 4 57 | adapt_delta = 0.8 58 | max_treedepth = 10 59 | #ergodic coefficients 60 | c_2_erg=-2.0 61 | c_3_erg=-0.6 62 | c_a_erg= 0.0 63 | #parallel options 64 | # flag_parallel = True 65 | flag_parallel = False 66 | 67 | #output sub-dir with corr with suffix info 68 | out_dir_sub = f'%s%s'%(out_dir_sub, synds_suffix) 69 | 70 | #load cell dataframes 71 | cellinfo_fname = '%s%s.csv'%(ds_dir, ds_fname_cellinfo) 72 | celldist_fname = '%s%s.csv'%(ds_dir, ds_fname_celldist) 73 | df_cellinfo = pd.read_csv(cellinfo_fname) 74 | df_celldist = pd.read_csv(celldist_fname) 75 | 76 | # Run stan regression 77 | # --------------------------- 78 | #create datafame with computation time 79 | df_run_info = list() 80 | 81 | #iterate over all synthetic datasets 82 | for d_id in ds_id: 83 | print('Synthetic dataset %i fo %i'%(d_id, len(ds_id))) 84 | #run time start 85 | run_t_strt = time.time() 86 | #input flatfile 87 | ds_fname = '%s%s%s_Y%i.csv'%(ds_dir, ds_fname_main, synds_suffix, d_id) 88 | #load flatfile 89 | df_flatfile = pd.read_csv(ds_fname) 90 | #keep only North records of NGAWest2 91 | df_flatfile = df_flatfile.loc[np.logical_and(df_flatfile.dsid==0, 92 | df_flatfile.sreg==1),:] 93 | 94 | #output file name and directory 95 | out_fname = '%s%s_Y%i'%(out_fname_main, synds_suffix, d_id) 96 | out_dir = '%s/%s/Y%i/'%(out_dir_main, out_dir_sub, d_id) 97 | 98 | #run stan model 99 | RunStan(df_flatfile, df_cellinfo, df_celldist, sm_fname, 100 | out_fname, out_dir, res_name, 101 | c_2_erg=c_2_erg, c_3_erg=c_3_erg, c_a_erg=c_a_erg, 102 | n_iter_warmup=n_iter_warmup, n_iter_sampling=n_iter_sampling, n_chains=n_chains, 103 | adapt_delta=adapt_delta, max_treedepth=max_treedepth, 104 | stan_parallel=flag_parallel) 105 | 106 | #run time end 107 | run_t_end = time.time() 108 | 109 | #compute run time 110 | run_tm = (run_t_end - run_t_strt)/60 111 | 112 | #log run time 113 | df_run_info.append(pd.DataFrame({'computer_name':os.uname()[1],'out_name':out_dir_sub, 114 | 'ds_id':d_id,'run_time':run_tm}, index=[d_id])) 115 | 116 | #write out run info 117 | out_fname = '%s%s/run_info.csv'%(out_dir_main, out_dir_sub) 118 | pd.concat(df_run_info).reset_index(drop=True).to_csv(out_fname, index=False) 119 | 120 | 121 | -------------------------------------------------------------------------------- /Analyses/Code_Verification/regression/ds3/main_cmdstan_model3_uncorr_cells_NGAWest2CANorth.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Wed Dec 29 15:16:15 2021 5 | 6 | @author: glavrent 7 | """ 8 | # Working directory and Packages 9 | # --------------------------- 10 | #load libraries 11 | import os 12 | import sys 13 | import numpy as np 14 | import pandas as pd 15 | import time 16 | #user functions 17 | sys.path.insert(0,'../../../Python_lib/regression/cmdstan/') 18 | # from regression_cmdstan_model3_uncorr_cells_unbounded_hyp import RunStan 19 | # from regression_cmdstan_model3_uncorr_cells_sparse_unbounded_hyp import RunStan 20 | 21 | # Define variables 22 | # --------------------------- 23 | #filename suffix 24 | # synds_suffix = '_small_corr_len' 25 | # synds_suffix = '_large_corr_len' 26 | 27 | #synthetic datasets directory 28 | ds_dir = '../../../../Data/Verification/synthetic_datasets/ds3' 29 | ds_dir = r'%s%s/'%(ds_dir, synds_suffix) 30 | 31 | # dataset info 32 | #ds_fname_main = 'CatalogNGAWest3CA_synthetic_data' 33 | ds_fname_main = 'CatalogNGAWest3CALite_synthetic_data' 34 | ds_id = np.arange(1,6) 35 | #cell specific anelastic attenuation 36 | ds_fname_cellinfo = 'CatalogNGAWest3CALite_cellinfo' 37 | ds_fname_celldist = 'CatalogNGAWest3CALite_distancematrix' 38 | 39 | #stan model 40 | # sm_fname = '../../../Stan_lib/regression_stan_model3_uncorr_cells_unbounded_hyp_chol_efficient.stan' 41 | # sm_fname = '../../../Stan_lib/regression_stan_model3_uncorr_cells_sparse_unbounded_hyp_chol_efficient.stan' 42 | 43 | #output info 44 | #main output filename 45 | out_fname_main = 'NGAWest2CANorth_syndata' 46 | #main output directory 47 | out_dir_main = '../../../../Data/Verification/regression/ds3/' 48 | #output sub-directory 49 | # out_dir_sub = 'CMDSTAN_NGAWest2CANorth_uncorr_cells_chol_eff' 50 | # out_dir_sub = 'CMDSTAN_NGAWest2CANorth_uncorr_cells_chol_eff_sp' 51 | 52 | #stan parameters 53 | res_name = 'tot' 54 | n_iter_warmup = 500 55 | n_iter_sampling = 500 56 | n_chains = 4 57 | adapt_delta = 0.8 58 | max_treedepth = 10 59 | #ergodic coefficients 60 | c_2_erg=-2.0 61 | c_3_erg=-0.6 62 | c_a_erg= 0.0 63 | #parallel options 64 | # flag_parallel = True 65 | flag_parallel = False 66 | 67 | #output sub-dir with corr with suffix info 68 | out_dir_sub = f'%s%s'%(out_dir_sub, synds_suffix) 69 | 70 | #load cell dataframes 71 | cellinfo_fname = '%s%s.csv'%(ds_dir, ds_fname_cellinfo) 72 | celldist_fname = '%s%s.csv'%(ds_dir, ds_fname_celldist) 73 | df_cellinfo = pd.read_csv(cellinfo_fname) 74 | df_celldist = pd.read_csv(celldist_fname) 75 | 76 | # Run stan regression 77 | # --------------------------- 78 | #create datafame with computation time 79 | df_run_info = list() 80 | 81 | #iterate over all synthetic datasets 82 | for d_id in ds_id: 83 | print('Synthetic dataset %i fo %i'%(d_id, len(ds_id))) 84 | #run time start 85 | run_t_strt = time.time() 86 | #input flatfile 87 | ds_fname = '%s%s%s_Y%i.csv'%(ds_dir, ds_fname_main, synds_suffix, d_id) 88 | #load flatfile 89 | df_flatfile = pd.read_csv(ds_fname) 90 | #keep only North records of NGAWest2 91 | df_flatfile = df_flatfile.loc[np.logical_and(df_flatfile.dsid==0, 92 | df_flatfile.sreg==1),:] 93 | 94 | #output file name and directory 95 | out_fname = '%s%s_Y%i'%(out_fname_main, synds_suffix, d_id) 96 | out_dir = '%s/%s/Y%i/'%(out_dir_main, out_dir_sub, d_id) 97 | 98 | #run stan model 99 | RunStan(df_flatfile, df_cellinfo, df_celldist, sm_fname, 100 | out_fname, out_dir, res_name, 101 | c_2_erg=c_2_erg, c_3_erg=c_3_erg, c_a_erg=c_a_erg, 102 | n_iter_warmup=n_iter_warmup, n_iter_sampling=n_iter_sampling, n_chains=n_chains, 103 | adapt_delta=adapt_delta, max_treedepth=max_treedepth, 104 | stan_parallel=flag_parallel) 105 | 106 | #run time end 107 | run_t_end = time.time() 108 | 109 | #compute run time 110 | run_tm = (run_t_end - run_t_strt)/60 111 | 112 | #log run time 113 | df_run_info.append(pd.DataFrame({'computer_name':os.uname()[1],'out_name':out_dir_sub, 114 | 'ds_id':d_id,'run_time':run_tm}, index=[d_id])) 115 | 116 | #write out run info 117 | out_fname = '%s%s/run_info.csv'%(out_dir_main, out_dir_sub) 118 | pd.concat(df_run_info).reset_index(drop=True).to_csv(out_fname, index=False) 119 | 120 | 121 | -------------------------------------------------------------------------------- /Analyses/Stan_lib/regression_stan_model1_unbounded_hyp_chol.stan: -------------------------------------------------------------------------------- 1 | /********************************************* 2 | Stan program to obtain VCM parameters 3 | lower dimensions is used (event terms/station terms) 4 | 5 | This model explicitly estimates the latent event terms and station terms. 6 | This model includes a spatially varying earthquake term, a spatially 7 | varying station term, a spatially independent station term, and the 8 | between and within event residuals. 9 | The spatially varying terms are modeled as chelosky decomposition of the 10 | kernel function multiplied with standard normal variates. 11 | ********************************************/ 12 | 13 | data { 14 | int N; // number of records 15 | int NEQ; // number of earthquakes 16 | int NSTAT; // number of stations 17 | 18 | //event and station ID 19 | int eq[N]; // event id (in numerical order from 1 to last) 20 | int stat[N]; // station id (in numerical order from 1 to last) 21 | 22 | //observations 23 | vector[N] Y; // median predictions for each record with anelasic attenuation taken out 24 | 25 | //mean ground motion 26 | vector[N] rec_mu; 27 | 28 | //Earthquake, Station coordinates 29 | vector[2] X_e[NEQ]; // event coordinates for each record 30 | vector[2] X_s[NSTAT]; // station coordinates for each record 31 | } 32 | 33 | transformed data { 34 | real delta = 1e-9; 35 | } 36 | 37 | parameters { 38 | //Aleatory Variability Terms 39 | real phi_0; // phi_0 - remaining aleatory variability of within-event residuals 40 | real tau_0; // tau_0 - remaining aleatory variability of between-event residuals 41 | 42 | //Epistemic Uncertainty Terms 43 | real ell_1e; 44 | real omega_1e; 45 | real ell_1as; 46 | real omega_1as; 47 | real omega_1bs; 48 | 49 | //spatially correlated coefficients 50 | real dc_0; //constant shift 51 | vector[NSTAT] dc_1bs; //zero correlation station term 52 | 53 | //standardized normal variables for spatially correlated coefficients 54 | vector[NEQ] z_1e; //spatially varying eq coeff 55 | vector[NSTAT] z_1as; //spatially varying stat coeff 56 | 57 | //between event terms 58 | vector[NEQ] dB; 59 | } 60 | 61 | transformed parameters{ 62 | //Spatially correlated coefficients 63 | vector[NEQ] dc_1e; //spatially varying eq coeff 64 | vector[NSTAT] dc_1as; //spatially varying stat coeff 65 | 66 | //spatillay latent variable for event contributions to GP 67 | { 68 | matrix[NEQ,NEQ] COV_1e; 69 | matrix[NEQ,NEQ] L_1e; 70 | for(i in 1:NEQ) { 71 | for(j in i:NEQ) { 72 | real d_e = distance(X_e[i],X_e[j]); 73 | real C_1e = (omega_1e^2 * exp(-d_e/ell_1e)); 74 | COV_1e[i,j] = C_1e; 75 | COV_1e[j,i] = C_1e; 76 | } 77 | COV_1e[i,i] += delta; 78 | } 79 | L_1e = cholesky_decompose(COV_1e); 80 | dc_1e = L_1e * z_1e; 81 | } 82 | 83 | 84 | //Spatially latent variable for station contributions to GP 85 | { 86 | matrix[NSTAT,NSTAT] COV_1as; 87 | matrix[NSTAT,NSTAT] L_1as; 88 | for(i in 1:NSTAT) { 89 | for(j in i:NSTAT) { 90 | real d_s = distance(X_s[i],X_s[j]); 91 | real C_1as = (omega_1as^2 * exp(-d_s/ell_1as)); 92 | COV_1as[i,j] = C_1as; 93 | COV_1as[j,i] = C_1as; 94 | } 95 | COV_1as[i,i] += delta; 96 | } 97 | L_1as = cholesky_decompose(COV_1as); 98 | dc_1as = L_1as * z_1as; 99 | } 100 | 101 | } 102 | 103 | model { 104 | //non-ergodic mean 105 | vector[N] rec_nerg_dB; 106 | 107 | //Aleatory Variability Terms 108 | phi_0 ~ lognormal(-1.20,0.3); 109 | tau_0 ~ lognormal(-1,0.3); 110 | //Station and earthquake paramters 111 | dB ~ normal(0,tau_0); 112 | 113 | //non-ergodic hyper-parameters 114 | ell_1e ~ inv_gamma(2.,50); 115 | ell_1as ~ inv_gamma(2.,50); 116 | omega_1e ~ exponential(5); 117 | omega_1as ~ exponential(5); 118 | omega_1bs ~ exponential(5); 119 | 120 | //constant shift 121 | dc_0 ~ normal(0.,0.1); 122 | 123 | //standardized event contributions to GP 124 | z_1e ~ std_normal(); 125 | 126 | //standardized station contributions to GP 127 | z_1as ~ std_normal(); 128 | 129 | //station contributions with zero correlation length 130 | dc_1bs ~ normal(0,omega_1bs); 131 | 132 | //Mean non-ergodic including dB 133 | rec_nerg_dB = rec_mu + dc_0 + dc_1e[eq] + dc_1as[stat] + dc_1bs[stat] + dB[eq]; 134 | 135 | Y ~ normal(rec_nerg_dB,phi_0); 136 | } 137 | 138 | -------------------------------------------------------------------------------- /Analyses/Code_Verification/regression/ds2/main_cmdstan_model2_corr_cells_NGAWest3CA.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Wed Dec 29 15:16:15 2021 5 | 6 | @author: glavrent 7 | """ 8 | # Working directory and Packages 9 | # --------------------------- 10 | #load libraries 11 | import os 12 | import sys 13 | import numpy as np 14 | import pandas as pd 15 | import time 16 | #user functions 17 | sys.path.insert(0,'../../../Python_lib/regression/cmdstan/') 18 | # from regression_cmdstan_model2_corr_cells_unbounded_hyp import RunStan 19 | from regression_cmdstan_model2_corr_cells_sparse_unbounded_hyp import RunStan 20 | 21 | # Define variables 22 | # --------------------------- 23 | #filename suffix 24 | # synds_suffix = '_small_corr_len' 25 | # synds_suffix = '_large_corr_len' 26 | 27 | #synthetic datasets directory 28 | ds_dir = '../../../../Data/Verification/synthetic_datasets/ds2' 29 | ds_dir = r'%s%s/'%(ds_dir, synds_suffix) 30 | 31 | # dataset info 32 | #ds_fname_main = 'CatalogNGAWest3CA_synthetic_data' 33 | ds_fname_main = 'CatalogNGAWest3CALite_synthetic_data' 34 | ds_id = np.arange(1,6) 35 | #cell specific anelastic attenuation 36 | ds_fname_cellinfo = 'CatalogNGAWest3CALite_cellinfo' 37 | ds_fname_celldist = 'CatalogNGAWest3CALite_distancematrix' 38 | 39 | #stan model 40 | # sm_fname = '../../../Stan_lib/regression_stan_model2_corr_cells_unbounded_hyp.stan' 41 | # sm_fname = '../../../Stan_lib/regression_stan_model2_corr_cells_unbounded_hyp_chol.stan' 42 | # sm_fname = '../../../Stan_lib/regression_stan_model2_corr_cells_unbounded_hyp_chol_efficient.stan' 43 | # sm_fname = '../../../Stan_lib/regression_stan_model2_corr_cells_unbounded_hyp_chol_efficient2.stan' 44 | # sm_fname = '../../../Stan_lib/regression_stan_model2_corr_cells_sparse_unbounded_hyp_chol_efficient.stan' 45 | 46 | #output info 47 | #main output filename 48 | out_fname_main = 'NGAWest3CA_syndata' 49 | #main output directory 50 | out_dir_main = '../../../../Data/Verification/regression/ds2/' 51 | #output sub-directory 52 | # out_dir_sub = 'CMDSTAN_NGAWest3CA_corr_cells' 53 | # out_dir_sub = 'CMDSTAN_NGAWest3CA_corr_cells_chol' 54 | # out_dir_sub = 'CMDSTAN_NGAWest3CA_corr_cells_chol_efficient' 55 | # out_dir_sub = 'CMDSTAN_NGAWest3CA_corr_cells_chol_efficient2' 56 | # out_dir_sub = 'CMDSTAN_NGAWest3CA_corr_cells_chol_efficient_sp' 57 | 58 | #stan parameters 59 | res_name = 'tot' 60 | n_iter_warmup = 500 61 | n_iter_sampling = 500 62 | n_chains = 4 63 | adapt_delta = 0.8 64 | max_treedepth = 10 65 | #ergodic coefficients 66 | c_a_erg=0.0 67 | #parallel options 68 | # flag_parallel = True 69 | flag_parallel = False 70 | 71 | #output sub-dir with corr with suffix info 72 | out_dir_sub = f'%s%s'%(out_dir_sub, synds_suffix) 73 | 74 | #load cell dataframes 75 | cellinfo_fname = '%s%s.csv'%(ds_dir, ds_fname_cellinfo) 76 | celldist_fname = '%s%s.csv'%(ds_dir, ds_fname_celldist) 77 | df_cellinfo = pd.read_csv(cellinfo_fname) 78 | df_celldist = pd.read_csv(celldist_fname) 79 | 80 | # Run stan regression 81 | # --------------------------- 82 | #create datafame with computation time 83 | df_run_info = list() 84 | 85 | #iterate over all synthetic datasets 86 | for d_id in ds_id: 87 | print('Synthetic dataset %i fo %i'%(d_id, len(ds_id))) 88 | #run time start 89 | run_t_strt = time.time() 90 | #input flatfile 91 | ds_fname = '%s%s%s_Y%i.csv'%(ds_dir, ds_fname_main, synds_suffix, d_id) 92 | #load flatfile 93 | df_flatfile = pd.read_csv(ds_fname) 94 | 95 | #output file name and directory 96 | out_fname = '%s%s_Y%i'%(out_fname_main, synds_suffix, d_id) 97 | out_dir = '%s/%s/Y%i/'%(out_dir_main, out_dir_sub, d_id) 98 | 99 | #run stan model 100 | RunStan(df_flatfile, df_cellinfo, df_celldist, sm_fname, 101 | out_fname, out_dir, res_name, c_a_erg=c_a_erg, 102 | n_iter_warmup=n_iter_warmup, n_iter_sampling=n_iter_sampling, n_chains=n_chains, 103 | adapt_delta=adapt_delta, max_treedepth=max_treedepth, 104 | stan_parallel=flag_parallel) 105 | 106 | #run time end 107 | run_t_end = time.time() 108 | 109 | #compute run time 110 | run_tm = (run_t_end - run_t_strt)/60 111 | 112 | #log run time 113 | df_run_info.append(pd.DataFrame({'computer_name':os.uname()[1],'out_name':out_dir_sub, 114 | 'ds_id':d_id,'run_time':run_tm}, index=[d_id])) 115 | 116 | #write out run info 117 | out_fname = '%s%s/run_info.csv'%(out_dir_main, out_dir_sub) 118 | pd.concat(df_run_info).reset_index(drop=True).to_csv(out_fname, index=False) 119 | 120 | 121 | -------------------------------------------------------------------------------- /Analyses/Code_Verification/regression/ds2/main_cmdstan_model2_uncorr_cells_NGAWest3CA.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Wed Dec 29 15:16:15 2021 5 | 6 | @author: glavrent 7 | """ 8 | # Working directory and Packages 9 | # --------------------------- 10 | #load libraries 11 | import os 12 | import sys 13 | import numpy as np 14 | import pandas as pd 15 | import time 16 | #user functions 17 | sys.path.insert(0,'../../../Python_lib/regression/cmdstan/') 18 | # from regression_cmdstan_model2_uncorr_cells_unbounded_hyp import RunStan 19 | from regression_cmdstan_model2_uncorr_cells_sparse_unbounded_hyp import RunStan 20 | 21 | # Define variables 22 | # --------------------------- 23 | #filename suffix 24 | # synds_suffix = '_small_corr_len' 25 | # synds_suffix = '_large_corr_len' 26 | 27 | #synthetic datasets directory 28 | ds_dir = '../../../../Data/Verification/synthetic_datasets/ds2' 29 | ds_dir = r'%s%s/'%(ds_dir, synds_suffix) 30 | 31 | # dataset info 32 | #ds_fname_main = 'CatalogNGAWest3CA_synthetic_data' 33 | ds_fname_main = 'CatalogNGAWest3CALite_synthetic_data' 34 | ds_id = np.arange(1,6) 35 | #cell specific anelastic attenuation 36 | ds_fname_cellinfo = 'CatalogNGAWest3CALite_cellinfo' 37 | ds_fname_celldist = 'CatalogNGAWest3CALite_distancematrix' 38 | 39 | #stan model 40 | # sm_fname = '../../../Stan_lib/regression_stan_model2_uncorr_cells_unbounded_hyp.stan' 41 | # sm_fname = '../../../Stan_lib/regression_stan_model2_uncorr_cells_unbounded_hyp_chol.stan' 42 | # sm_fname = '../../../Stan_lib/regression_stan_model2_uncorr_cells_unbounded_hyp_chol_efficient.stan' 43 | # sm_fname = '../../../Stan_lib/regression_stan_model2_uncorr_cells_unbounded_hyp_chol_efficient2.stan' 44 | # sm_fname = '../../../Stan_lib/regression_stan_model2_uncorr_cells_sparse_unbounded_hyp_chol_efficient.stan' 45 | 46 | #output info 47 | #main output filename 48 | out_fname_main = 'NGAWest3CA_syndata' 49 | #main output directory 50 | out_dir_main = '../../../../Data/Verification/regression/ds2/' 51 | #output sub-directory 52 | # out_dir_sub = 'CMDSTAN_NGAWest3CA_uncorr_cells' 53 | # out_dir_sub = 'CMDSTAN_NGAWest3CA_uncorr_cells_chol' 54 | # out_dir_sub = 'CMDSTAN_NGAWest3CA_uncorr_cells_chol_eff' 55 | # out_dir_sub = 'CMDSTAN_NGAWest3CA_uncorr_cells_chol_eff2' 56 | # out_dir_sub = 'CMDSTAN_NGAWest3CA_uncorr_cells_chol_eff_sp' 57 | 58 | #stan parameters 59 | res_name = 'tot' 60 | n_iter_warmup = 500 61 | n_iter_sampling = 500 62 | n_chains = 4 63 | adapt_delta = 0.8 64 | max_treedepth = 10 65 | #ergodic coefficients 66 | c_a_erg=0.0 67 | #parallel options 68 | # flag_parallel = True 69 | flag_parallel = False 70 | 71 | #output sub-dir with corr with suffix info 72 | out_dir_sub = f'%s%s'%(out_dir_sub, synds_suffix) 73 | 74 | #load cell dataframes 75 | cellinfo_fname = '%s%s.csv'%(ds_dir, ds_fname_cellinfo) 76 | celldist_fname = '%s%s.csv'%(ds_dir, ds_fname_celldist) 77 | df_cellinfo = pd.read_csv(cellinfo_fname) 78 | df_celldist = pd.read_csv(celldist_fname) 79 | 80 | # Run stan regression 81 | # --------------------------- 82 | #create datafame with computation time 83 | df_run_info = list() 84 | 85 | #iterate over all synthetic datasets 86 | for d_id in ds_id: 87 | print('Synthetic dataset %i fo %i'%(d_id, len(ds_id))) 88 | #run time start 89 | run_t_strt = time.time() 90 | #input flatfile 91 | ds_fname = '%s%s%s_Y%i.csv'%(ds_dir, ds_fname_main, synds_suffix, d_id) 92 | #load flatfile 93 | df_flatfile = pd.read_csv(ds_fname) 94 | 95 | #output file name and directory 96 | out_fname = '%s%s_Y%i'%(out_fname_main, synds_suffix, d_id) 97 | out_dir = '%s/%s/Y%i/'%(out_dir_main, out_dir_sub, d_id) 98 | 99 | #run stan model 100 | RunStan(df_flatfile, df_cellinfo, df_celldist, sm_fname, 101 | out_fname, out_dir, res_name, c_a_erg=c_a_erg, 102 | n_iter_warmup=n_iter_warmup, n_iter_sampling=n_iter_sampling, n_chains=n_chains, 103 | adapt_delta=adapt_delta, max_treedepth=max_treedepth, 104 | stan_parallel=flag_parallel) 105 | 106 | #run time end 107 | run_t_end = time.time() 108 | 109 | #compute run time 110 | run_tm = (run_t_end - run_t_strt)/60 111 | 112 | #log run time 113 | df_run_info.append(pd.DataFrame({'computer_name':os.uname()[1],'out_name':out_dir_sub, 114 | 'ds_id':d_id,'run_time':run_tm}, index=[d_id])) 115 | 116 | #write out run info 117 | out_fname = '%s%s/run_info.csv'%(out_dir_main, out_dir_sub) 118 | pd.concat(df_run_info).reset_index(drop=True).to_csv(out_fname, index=False) 119 | 120 | 121 | -------------------------------------------------------------------------------- /Analyses/Code_Verification/regression/ds2/main_cmdstan_model2_corr_cells_NGAWest2CA.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Wed Dec 29 15:16:15 2021 5 | 6 | @author: glavrent 7 | """ 8 | # Working directory and Packages 9 | # --------------------------- 10 | #load libraries 11 | import os 12 | import sys 13 | import numpy as np 14 | import pandas as pd 15 | import time 16 | #user functions 17 | sys.path.insert(0,'../../../Python_lib/regression/cmdstan/') 18 | # from regression_cmdstan_model2_corr_cells_unbounded_hyp import RunStan 19 | from regression_cmdstan_model2_corr_cells_sparse_unbounded_hyp import RunStan 20 | 21 | # Define variables 22 | # --------------------------- 23 | #filename suffix 24 | # synds_suffix = '_small_corr_len' 25 | # synds_suffix = '_large_corr_len' 26 | 27 | #synthetic datasets directory 28 | ds_dir = '../../../../Data/Validation/synthetic_datasets/ds2' 29 | ds_dir = r'%s%s/'%(ds_dir, synds_suffix) 30 | 31 | # dataset info 32 | #ds_fname_main = 'CatalogNGAWest3CA_synthetic_data' 33 | ds_fname_main = 'CatalogNGAWest3CALite_synthetic_data' 34 | ds_id = np.arange(1,6) 35 | #cell specific anelastic attenuation 36 | ds_fname_cellinfo = 'CatalogNGAWest3CALite_cellinfo' 37 | ds_fname_celldist = 'CatalogNGAWest3CALite_distancematrix' 38 | 39 | #stan model 40 | # sm_fname = '../../../Stan_lib/regression_stan_model2_corr_cells_unbounded_hyp.stan' 41 | # sm_fname = '../../../Stan_lib/regression_stan_model2_corr_cells_unbounded_hyp_chol.stan' 42 | # sm_fname = '../../../Stan_lib/regression_stan_model2_corr_cells_unbounded_hyp_chol_efficient.stan' 43 | # sm_fname = '../../../Stan_lib/regression_stan_model2_corr_cells_unbounded_hyp_chol_efficient2.stan' 44 | # sm_fname = '../../../Stan_lib/regression_stan_model2_corr_cells_sparse_unbounded_hyp_chol_efficient.stan' 45 | 46 | #output info 47 | #main output filename 48 | out_fname_main = 'NGAWest2CA_syndata' 49 | #main output directory 50 | out_dir_main = '../../../../Data/Validation/regression/ds2/' 51 | #output sub-directory 52 | # out_dir_sub = 'CMDSTAN_NGAWest2CA_corr_cells' 53 | # out_dir_sub = 'CMDSTAN_NGAWest2CA_corr_cells_chol' 54 | # out_dir_sub = 'CMDSTAN_NGAWest2CA_corr_cells_chol_efficient' 55 | # out_dir_sub = 'CMDSTAN_NGAWest2CA_corr_cells_chol_efficient2' 56 | # out_dir_sub = 'CMDSTAN_NGAWest2CA_corr_cells_chol_efficient_sp' 57 | 58 | #stan parameters 59 | res_name = 'tot' 60 | n_iter_warmup = 500 61 | n_iter_sampling = 500 62 | n_chains = 4 63 | adapt_delta = 0.8 64 | max_treedepth = 10 65 | #ergodic coefficients 66 | c_a_erg=0.0 67 | #parallel options 68 | # flag_parallel = True 69 | flag_parallel = False 70 | 71 | #output sub-dir with corr with suffix info 72 | out_dir_sub = f'%s%s'%(out_dir_sub, synds_suffix) 73 | 74 | #load cell dataframes 75 | cellinfo_fname = '%s%s.csv'%(ds_dir, ds_fname_cellinfo) 76 | celldist_fname = '%s%s.csv'%(ds_dir, ds_fname_celldist) 77 | df_cellinfo = pd.read_csv(cellinfo_fname) 78 | df_celldist = pd.read_csv(celldist_fname) 79 | 80 | # Run stan regression 81 | # --------------------------- 82 | #create datafame with computation time 83 | df_run_info = list() 84 | 85 | #iterate over all synthetic datasets 86 | for d_id in ds_id: 87 | print('Synthetic dataset %i fo %i'%(d_id, len(ds_id))) 88 | #run time start 89 | run_t_strt = time.time() 90 | #input flatfile 91 | ds_fname = '%s%s%s_Y%i.csv'%(ds_dir, ds_fname_main, synds_suffix, d_id) 92 | #load flatfile 93 | df_flatfile = pd.read_csv(ds_fname) 94 | #keep only NGAWest2 records 95 | df_flatfile = df_flatfile.loc[df_flatfile.dsid==0,:] 96 | 97 | #output file name and directory 98 | out_fname = '%s%s_Y%i'%(out_fname_main, synds_suffix, d_id) 99 | out_dir = '%s/%s/Y%i/'%(out_dir_main, out_dir_sub, d_id) 100 | 101 | #run stan model 102 | RunStan(df_flatfile, df_cellinfo, df_celldist, sm_fname, 103 | out_fname, out_dir, res_name, c_a_erg=c_a_erg, 104 | n_iter_warmup=n_iter_warmup, n_iter_sampling=n_iter_sampling, n_chains=n_chains, 105 | adapt_delta=adapt_delta, max_treedepth=max_treedepth, 106 | stan_parallel=flag_parallel) 107 | 108 | #run time end 109 | run_t_end = time.time() 110 | 111 | #compute run time 112 | run_tm = (run_t_end - run_t_strt)/60 113 | 114 | #log run time 115 | df_run_info.append(pd.DataFrame({'computer_name':os.uname()[1],'out_name':out_dir_sub, 116 | 'ds_id':d_id,'run_time':run_tm}, index=[d_id])) 117 | 118 | #write out run info 119 | out_fname = '%s%s/run_info.csv'%(out_dir_main, out_dir_sub) 120 | pd.concat(df_run_info).reset_index(drop=True).to_csv(out_fname, index=False) 121 | 122 | 123 | -------------------------------------------------------------------------------- /Analyses/Code_Verification/regression/ds2/main_cmdstan_model2_uncorr_cells_NGAWest2CA.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Wed Dec 29 15:16:15 2021 5 | 6 | @author: glavrent 7 | """ 8 | # Working directory and Packages 9 | # --------------------------- 10 | #load libraries 11 | import os 12 | import sys 13 | import numpy as np 14 | import pandas as pd 15 | import time 16 | #user functions 17 | sys.path.insert(0,'../../../Python_lib/regression/cmdstan/') 18 | # from regression_cmdstan_model2_uncorr_cells_unbounded_hyp import RunStan 19 | from regression_cmdstan_model2_uncorr_cells_sparse_unbounded_hyp import RunStan 20 | 21 | # Define variables 22 | # --------------------------- 23 | #filename suffix 24 | # synds_suffix = '_small_corr_len' 25 | # synds_suffix = '_large_corr_len' 26 | 27 | #synthetic datasets directory 28 | ds_dir = '../../../../Data/Verification/synthetic_datasets/ds2' 29 | ds_dir = r'%s%s/'%(ds_dir, synds_suffix) 30 | 31 | # dataset info 32 | #ds_fname_main = 'CatalogNGAWest3CA_synthetic_data' 33 | ds_fname_main = 'CatalogNGAWest3CALite_synthetic_data' 34 | ds_id = np.arange(1,6) 35 | #cell specific anelastic attenuation 36 | ds_fname_cellinfo = 'CatalogNGAWest3CALite_cellinfo' 37 | ds_fname_celldist = 'CatalogNGAWest3CALite_distancematrix' 38 | 39 | #stan model 40 | # sm_fname = '../../../Stan_lib/regression_stan_model2_uncorr_cells_unbounded_hyp.stan' 41 | # sm_fname = '../../../Stan_lib/regression_stan_model2_uncorr_cells_unbounded_hyp_chol.stan' 42 | # sm_fname = '../../../Stan_lib/regression_stan_model2_uncorr_cells_unbounded_hyp_chol_efficient.stan' 43 | # sm_fname = '../../../Stan_lib/regression_stan_model2_uncorr_cells_unbounded_hyp_chol_efficient2.stan' 44 | # sm_fname = '../../../Stan_lib/regression_stan_model2_uncorr_cells_sparse_unbounded_hyp_chol_efficient.stan' 45 | 46 | #output info 47 | #main output filename 48 | out_fname_main = 'NGAWest2CA_syndata' 49 | #main output directory 50 | out_dir_main = '../../../../Data/Verification/regression/ds2/' 51 | #output sub-directory 52 | # out_dir_sub = 'CMDSTAN_NGAWest2CA_uncorr_cells' 53 | # out_dir_sub = 'CMDSTAN_NGAWest2CA_uncorr_cells_chol' 54 | # out_dir_sub = 'CMDSTAN_NGAWest2CA_uncorr_cells_chol_eff' 55 | # out_dir_sub = 'CMDSTAN_NGAWest2CA_uncorr_cells_chol_eff2' 56 | # out_dir_sub = 'CMDSTAN_NGAWest2CA_uncorr_cells_chol_eff_sp' 57 | 58 | #stan parameters 59 | res_name = 'tot' 60 | n_iter_warmup = 500 61 | n_iter_sampling = 500 62 | n_chains = 4 63 | adapt_delta = 0.8 64 | max_treedepth = 10 65 | #ergodic coefficients 66 | c_a_erg=0.0 67 | #parallel options 68 | # flag_parallel = True 69 | flag_parallel = False 70 | 71 | #output sub-dir with corr with suffix info 72 | out_dir_sub = f'%s%s'%(out_dir_sub, synds_suffix) 73 | 74 | #load cell dataframes 75 | cellinfo_fname = '%s%s.csv'%(ds_dir, ds_fname_cellinfo) 76 | celldist_fname = '%s%s.csv'%(ds_dir, ds_fname_celldist) 77 | df_cellinfo = pd.read_csv(cellinfo_fname) 78 | df_celldist = pd.read_csv(celldist_fname) 79 | 80 | # Run stan regression 81 | # --------------------------- 82 | #create datafame with computation time 83 | df_run_info = list() 84 | 85 | #iterate over all synthetic datasets 86 | for d_id in ds_id: 87 | print('Synthetic dataset %i fo %i'%(d_id, len(ds_id))) 88 | #run time start 89 | run_t_strt = time.time() 90 | #input flatfile 91 | ds_fname = '%s%s%s_Y%i.csv'%(ds_dir, ds_fname_main, synds_suffix, d_id) 92 | #load flatfile 93 | df_flatfile = pd.read_csv(ds_fname) 94 | #keep only NGAWest2 records 95 | df_flatfile = df_flatfile.loc[df_flatfile.dsid==0,:] 96 | 97 | #output file name and directory 98 | out_fname = '%s%s_Y%i'%(out_fname_main, synds_suffix, d_id) 99 | out_dir = '%s/%s/Y%i/'%(out_dir_main, out_dir_sub, d_id) 100 | 101 | #run stan model 102 | RunStan(df_flatfile, df_cellinfo, df_celldist, sm_fname, 103 | out_fname, out_dir, res_name, c_a_erg=c_a_erg, 104 | n_iter_warmup=n_iter_warmup, n_iter_sampling=n_iter_sampling, n_chains=n_chains, 105 | adapt_delta=adapt_delta, max_treedepth=max_treedepth, 106 | stan_parallel=flag_parallel) 107 | 108 | #run time end 109 | run_t_end = time.time() 110 | 111 | #compute run time 112 | run_tm = (run_t_end - run_t_strt)/60 113 | 114 | #log run time 115 | df_run_info.append(pd.DataFrame({'computer_name':os.uname()[1],'out_name':out_dir_sub, 116 | 'ds_id':d_id,'run_time':run_tm}, index=[d_id])) 117 | 118 | #write out run info 119 | out_fname = '%s%s/run_info.csv'%(out_dir_main, out_dir_sub) 120 | pd.concat(df_run_info).reset_index(drop=True).to_csv(out_fname, index=False) 121 | 122 | 123 | -------------------------------------------------------------------------------- /Analyses/Code_Verification/regression/ds3/main_inla_model3_uncorr_cells_NGAWest2CANorth.R: -------------------------------------------------------------------------------- 1 | ################################################################################## 2 | # This script iterates over all sythetic datasets based on the NGAWest3 flatfile 3 | # and calculates the non-ergodic terms 4 | ################################################################################## 5 | 6 | #user functions 7 | source('../../../R_lib/regression/inla/regression_inla_model3_uncorr_cells_unbounded_hyp.R') 8 | 9 | # Define variables 10 | # --------------------------- 11 | #main directory 12 | main_dir <- '../../../../' #local machine 13 | # main_dir <- '/u/scratch/g/glavrent/Research/Nonerg_GMM_methodology/' #Hoffman2 14 | 15 | #output filename sufix 16 | # synds_suffix <- '_small_corr_len' 17 | # synds_suffix <- '_large_corr_len' 18 | # synds_suffix <- '_small_corr_len' 19 | 20 | #synthetic datasets directory 21 | ds_dir <- 'Data/Verification/synthetic_datasets/ds3' 22 | ds_dir <- sprintf('%s%s', ds_dir, synds_suffix) 23 | 24 | # dataset info 25 | # ds_main_data_fname <- 'CatalogNGAWest3CA_synthetic_data' 26 | # ds_main_cellinfo_fname <- 'CatalogNGAWest3CA_cellinfo' 27 | # ds_main_cellmat_fname <- 'CatalogNGAWest3CA_distancematrix' 28 | ds_main_data_fname <- 'CatalogNGAWest3CALite_synthetic_data' 29 | ds_main_cellinfo_fname <- 'CatalogNGAWest3CALite_cellinfo' 30 | ds_main_cellmat_fname <- 'CatalogNGAWest3CALite_distancematrix' 31 | ds_id <- seq(1,5) 32 | 33 | #output info 34 | #main output filename 35 | out_fname_main <- 'NGAWest2CANorth_syndata' 36 | #main output directory 37 | out_dir_main <- 'Data/Verification/regression/ds3' 38 | #output sub-directory 39 | # out_dir_sub <- 'INLA_NGAWest2CANorth_uncorr_cells' 40 | # out_dir_sub <- 'INLA_NGAWest2CANorth_uncorr_cells_fine' 41 | # out_dir_sub <- 'INLA_NGAWest2CANorth_uncorr_cells_medium' 42 | # out_dir_sub <- 'INLA_NGAWest2CANorth_uncorr_cells_coarse' 43 | 44 | #inla parameters 45 | runinla_flag <- TRUE 46 | # runinla_flag <- FALSE 47 | res_name <- 'tot' 48 | 49 | #mesh coarseness 50 | # #fine 51 | # mesh_edge_max <- 5 52 | # mesh_inner_offset <- 15 53 | # mesh_outer_offset <- 15 54 | # #medium 55 | # mesh_edge_max <- 15 56 | # mesh_inner_offset <- 15 57 | # mesh_outer_offset <- 50 58 | # #coarse 59 | # mesh_edge_max <- 50 60 | # mesh_inner_offset <- 50 61 | # mesh_outer_offset <- 150 62 | 63 | #ergodic coefficients 64 | c_2_erg <- -2.0 65 | c_3_erg <- -0.6 66 | c_a_erg <- 0.0 #anelastic attenuation 67 | 68 | #output sub-dir with corr with suffix info 69 | out_dir_sub <- sprintf('%s%s',out_dir_sub, synds_suffix) 70 | 71 | # Run inla regression 72 | # --------------------------- 73 | #create datafame with computation time 74 | df_run_info <- data.frame() 75 | 76 | #iterate over all synthetic datasets 77 | for (d_id in ds_id){ 78 | print(paste("Synthetic dataset",d_id,"of",length(ds_id))) 79 | #run time start 80 | run_t_strt <- Sys.time() 81 | #input file names 82 | analysis_fname <- sprintf('%s%s_Y%i', ds_main_data_fname, synds_suffix, d_id) 83 | flatfile_fname <- file.path(main_dir, ds_dir, sprintf('%s%s_Y%i.csv', ds_main_data_fname, synds_suffix, d_id)) 84 | cellinfo_fname <- file.path(main_dir, ds_dir, sprintf('%s.csv', ds_main_cellinfo_fname)) 85 | cellmat_fname <- file.path(main_dir, ds_dir, sprintf('%s.csv', ds_main_cellmat_fname)) 86 | 87 | #load files 88 | df_flatfile <- read.csv(flatfile_fname) 89 | df_cellinfo <- read.csv(cellinfo_fname) 90 | df_cellmat <- read.csv(cellmat_fname) 91 | #keep only NGAWest2 records 92 | df_flatfile <- subset(df_flatfile, dsid==0 & sreg==1) 93 | 94 | 95 | #output file name and directory 96 | out_fname <- sprintf('%s%s_Y%i', out_fname_main, synds_suffix, d_id) 97 | out_dir <- sprintf('%s%s/%s/Y%i', main_dir, out_dir_main, out_dir_sub, d_id) 98 | 99 | #run INLA model 100 | RunINLA(df_flatfile, df_cellinfo, df_cellmat, out_fname, out_dir, res_name=res_name, 101 | c_2_erg=c_2_erg, c_3_erg=c_3_erg, c_a_erg=c_a_erg 102 | runinla_flag=runinla_flag, 103 | mesh_edge_max=mesh_edge_max, 104 | mesh_inner_offset=mesh_inner_offset, mesh_outer_offset=mesh_outer_offset) 105 | 106 | #run time end 107 | run_t_end <- Sys.time() 108 | 109 | #compute run time 110 | run_tm <- run_t_end - run_t_strt 111 | 112 | #log run time 113 | df_r_i <- data.frame(computer_name=Sys.info()["nodename"], out_name=out_dir_sub, ds_id=d_id, run_time=run_tm) 114 | df_run_info <- rbind(df_run_info, df_r_i) 115 | 116 | #write out run info 117 | row.names(df_run_info) <- NULL 118 | out_fname <- sprintf('%s%s/%s/run_info.csv', main_dir, out_dir_main, out_dir_sub) 119 | write.csv(df_run_info, out_fname, row.names=FALSE) 120 | } 121 | -------------------------------------------------------------------------------- /Analyses/Code_Verification/regression/ds2/main_pystan_model2_uncorr_cells_NGAWest2CA.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Wed Jul 14 14:17:52 2021 5 | 6 | @author: glavrent 7 | """ 8 | # Working directory and Packages 9 | # --------------------------- 10 | #load libraries 11 | import os 12 | import sys 13 | import numpy as np 14 | import pandas as pd 15 | import time 16 | #user functions 17 | sys.path.insert(0,'../../../Python_lib/regression/pystan/') 18 | from regression_pystan_model2_uncorr_cells_unbounded_hyp import RunStan 19 | 20 | 21 | # Define variables 22 | # --------------------------- 23 | #filename suffix 24 | # synds_suffix = '_small_corr_len' 25 | # synds_suffix = '_large_corr_len' 26 | 27 | #synthetic datasets directory 28 | ds_dir = '../../../../Data/Verification/synthetic_datasets/ds2' 29 | ds_dir = r'%s%s/'%(ds_dir, synds_suffix) 30 | 31 | # dataset info 32 | #ds_fname_main = 'CatalogNGAWest3CA_synthetic_data' 33 | ds_fname_main = 'CatalogNGAWest3CALite_synthetic_data' 34 | ds_id = np.arange(1,6) 35 | #cell specific anelastic attenuation 36 | ds_fname_cellinfo = 'CatalogNGAWest3CALite_cellinfo' 37 | ds_fname_celldist = 'CatalogNGAWest3CALite_distancematrix' 38 | 39 | #stan model 40 | # sm_fname = '../../../Stan_lib/regression_stan_model2_uncorr_cells_unbounded_hyp.stan' 41 | # sm_fname = '../../../Stan_lib/regression_stan_model2_uncorr_cells_unbounded_hyp_chol.stan' 42 | # sm_fname = '../../../Stan_lib/regression_stan_model2_uncorr_cells_unbounded_hyp_chol_efficient.stan' 43 | # sm_fname = '../../../Stan_lib/regression_stan_model2_uncorr_cells_unbounded_hyp_chol_efficient2.stan' 44 | 45 | #output info 46 | #main output filename 47 | out_fname_main = 'NGAWest2CA_syndata' 48 | #main output directory 49 | out_dir_main = '../../../../Data/Verification/regression/ds2/' 50 | #output sub-directory 51 | #pystan 2 52 | # out_dir_sub = 'PYSTAN_NGAWest2CA_uncorr_cells' 53 | # out_dir_sub = 'PYSTAN_NGAWest2CA_uncorr_cells_chol' 54 | # out_dir_sub = 'PYSTAN_NGAWest2CA_uncorr_cells_chol_eff' 55 | # out_dir_sub = 'PYSTAN_NGAWest2CA_uncorr_cells_chol_eff2' 56 | #pystan 3 57 | # out_dir_sub = 'PYSTAN3_NGAWest2CA_uncorr_cells' 58 | # out_dir_sub = 'PYSTAN3_NGAWest2CA_uncorr_cells_chol' 59 | # out_dir_sub = 'PYSTAN3_NGAWest2CA_uncorr_cells_chol_eff' 60 | # out_dir_sub = 'PYSTAN3_NGAWest2CA_uncorr_cells_chol_eff2' 61 | 62 | #stan parameters 63 | runstan_flag = True 64 | # pystan_ver = 2 65 | pystan_ver = 3 66 | res_name = 'tot' 67 | n_iter = 1000 68 | n_chains = 4 69 | adapt_delta = 0.8 70 | max_treedepth = 10 71 | #ergodic coefficients 72 | c_a_erg=0.0 73 | #parallel options 74 | # flag_parallel = True 75 | flag_parallel = False 76 | 77 | #output sub-dir with corr with suffix info 78 | out_dir_sub = f'%s%s'%(out_dir_sub, synds_suffix) 79 | 80 | #load cell dataframes 81 | cellinfo_fname = '%s%s.csv'%(ds_dir, ds_fname_cellinfo) 82 | celldist_fname = '%s%s.csv'%(ds_dir, ds_fname_celldist) 83 | df_cellinfo = pd.read_csv(cellinfo_fname) 84 | df_celldist = pd.read_csv(celldist_fname) 85 | 86 | # Run stan regression 87 | # --------------------------- 88 | #create datafame with computation time 89 | df_run_info = list() 90 | 91 | #iterate over all synthetic datasets 92 | for d_id in ds_id: 93 | print('Synthetic dataset %i fo %i'%(d_id, len(ds_id))) 94 | #run time start 95 | run_t_strt = time.time() 96 | #input flatfile 97 | ds_fname = '%s%s%s_Y%i.csv'%(ds_dir, ds_fname_main, synds_suffix, d_id) 98 | #load flatfile 99 | df_flatfile = pd.read_csv(ds_fname) 100 | #keep only NGAWest2 records 101 | df_flatfile = df_flatfile.loc[df_flatfile.dsid==0,:] 102 | 103 | #output file name and directory 104 | out_fname = '%s%s_Y%i'%(out_fname_main, synds_suffix, d_id) 105 | out_dir = '%s/%s/Y%i/'%(out_dir_main, out_dir_sub, d_id) 106 | 107 | #run stan model 108 | RunStan(df_flatfile, df_cellinfo, df_celldist, sm_fname, 109 | out_fname, out_dir, res_name, c_a_erg=c_a_erg, 110 | runstan_flag=runstan_flag, n_iter=n_iter, n_chains=n_chains, 111 | adapt_delta=adapt_delta, max_treedepth=max_treedepth, 112 | pystan_ver=pystan_ver, pystan_parallel=flag_parallel) 113 | 114 | #run time end 115 | run_t_end = time.time() 116 | 117 | #compute run time 118 | run_tm = (run_t_end - run_t_strt)/60 119 | 120 | #log run time 121 | df_run_info.append(pd.DataFrame({'computer_name':os.uname()[1],'out_name':out_dir_sub, 122 | 'ds_id':d_id,'run_time':run_tm}, index=[d_id])) 123 | 124 | #write out run info 125 | out_fname = '%s%s/run_info.csv'%(out_dir_main, out_dir_sub) 126 | pd.concat(df_run_info).reset_index(drop=True).to_csv(out_fname, index=False) 127 | 128 | 129 | -------------------------------------------------------------------------------- /Analyses/Code_Verification/regression/ds2/main_cmdstan_model2_corr_cells_NGAWest2CANorth.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Wed Dec 29 15:16:15 2021 5 | 6 | @author: glavrent 7 | """ 8 | # Working directory and Packages 9 | # --------------------------- 10 | #load libraries 11 | import os 12 | import sys 13 | import numpy as np 14 | import pandas as pd 15 | import time 16 | #user functions 17 | sys.path.insert(0,'../../../Python_lib/regression/cmdstan/') 18 | # from regression_cmdstan_model2_corr_cells_unbounded_hyp import RunStan 19 | from regression_cmdstan_model2_corr_cells_sparse_unbounded_hyp import RunStan 20 | 21 | # Define variables 22 | # --------------------------- 23 | #filename suffix 24 | # synds_suffix = '_small_corr_len' 25 | # synds_suffix = '_large_corr_len' 26 | 27 | #synthetic datasets directory 28 | ds_dir = '../../../../Data/Verification/synthetic_datasets/ds2' 29 | ds_dir = r'%s%s/'%(ds_dir, synds_suffix) 30 | 31 | # dataset info 32 | #ds_fname_main = 'CatalogNGAWest3CA_synthetic_data' 33 | ds_fname_main = 'CatalogNGAWest3CALite_synthetic_data' 34 | ds_id = np.arange(1,6) 35 | #cell specific anelastic attenuation 36 | ds_fname_cellinfo = 'CatalogNGAWest3CALite_cellinfo' 37 | ds_fname_celldist = 'CatalogNGAWest3CALite_distancematrix' 38 | 39 | #stan model 40 | # sm_fname = '../../../Stan_lib/regression_stan_model2_corr_cells_unbounded_hyp.stan' 41 | # sm_fname = '../../../Stan_lib/regression_stan_model2_corr_cells_unbounded_hyp_chol.stan' 42 | # sm_fname = '../../../Stan_lib/regression_stan_model2_corr_cells_unbounded_hyp_chol_efficient.stan' 43 | # sm_fname = '../../../Stan_lib/regression_stan_model2_corr_cells_unbounded_hyp_chol_efficient2.stan' 44 | # sm_fname = '../../../Stan_lib/regression_stan_model2_corr_cells_sparse_unbounded_hyp_chol_efficient.stan' 45 | 46 | #output info 47 | #main output filename 48 | out_fname_main = 'NGAWest2CANorth_syndata' 49 | #main output directory 50 | out_dir_main = '../../../../Data/Verification/regression/ds2/' 51 | #output sub-directory 52 | # out_dir_sub = 'CMDSTAN_NGAWest2CANorth_corr_cells' 53 | # out_dir_sub = 'CMDSTAN_NGAWest2CANorth_corr_cells_chol' 54 | # out_dir_sub = 'CMDSTAN_NGAWest2CANorth_corr_cells_chol_eff' 55 | # out_dir_sub = 'CMDSTAN_NGAWest2CANorth_corr_cells_chol_eff2' 56 | # out_dir_sub = 'CMDSTAN_NGAWest2CANorth_corr_cells_chol_eff_sp' 57 | 58 | #stan parameters 59 | res_name = 'tot' 60 | n_iter_warmup = 500 61 | n_iter_sampling = 500 62 | n_chains = 4 63 | adapt_delta = 0.8 64 | max_treedepth = 10 65 | #ergodic coefficients 66 | c_a_erg=0.0 67 | #parallel options 68 | # flag_parallel = True 69 | flag_parallel = False 70 | 71 | #output sub-dir with corr with suffix info 72 | out_dir_sub = f'%s%s'%(out_dir_sub, synds_suffix) 73 | 74 | #load cell dataframes 75 | cellinfo_fname = '%s%s.csv'%(ds_dir, ds_fname_cellinfo) 76 | celldist_fname = '%s%s.csv'%(ds_dir, ds_fname_celldist) 77 | df_cellinfo = pd.read_csv(cellinfo_fname) 78 | df_celldist = pd.read_csv(celldist_fname) 79 | 80 | # Run stan regression 81 | # --------------------------- 82 | #create datafame with computation time 83 | df_run_info = list() 84 | 85 | #iterate over all synthetic datasets 86 | for d_id in ds_id: 87 | print('Synthetic dataset %i fo %i'%(d_id, len(ds_id))) 88 | #run time start 89 | run_t_strt = time.time() 90 | #input flatfile 91 | ds_fname = '%s%s%s_Y%i.csv'%(ds_dir, ds_fname_main, synds_suffix, d_id) 92 | #load flatfile 93 | df_flatfile = pd.read_csv(ds_fname) 94 | #keep only North records of NGAWest2 95 | df_flatfile = df_flatfile.loc[np.logical_and(df_flatfile.dsid==0, 96 | df_flatfile.sreg==1),:] 97 | 98 | #output file name and directory 99 | out_fname = '%s%s_Y%i'%(out_fname_main, synds_suffix, d_id) 100 | out_dir = '%s/%s/Y%i/'%(out_dir_main, out_dir_sub, d_id) 101 | 102 | #run stan model 103 | RunStan(df_flatfile, df_cellinfo, df_celldist, sm_fname, 104 | out_fname, out_dir, res_name, c_a_erg=c_a_erg, 105 | n_iter_warmup=n_iter_warmup, n_iter_sampling=n_iter_sampling, n_chains=n_chains, 106 | adapt_delta=adapt_delta, max_treedepth=max_treedepth, 107 | stan_parallel=flag_parallel) 108 | 109 | #run time end 110 | run_t_end = time.time() 111 | 112 | #compute run time 113 | run_tm = (run_t_end - run_t_strt)/60 114 | 115 | #log run time 116 | df_run_info.append(pd.DataFrame({'computer_name':os.uname()[1],'out_name':out_dir_sub, 117 | 'ds_id':d_id,'run_time':run_tm}, index=[d_id])) 118 | 119 | #write out run info 120 | out_fname = '%s%s/run_info.csv'%(out_dir_main, out_dir_sub) 121 | pd.concat(df_run_info).reset_index(drop=True).to_csv(out_fname, index=False) 122 | 123 | 124 | -------------------------------------------------------------------------------- /Analyses/Code_Verification/regression/ds2/main_cmdstan_model2_uncorr_cells_NGAWest2CANorth.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Wed Dec 29 15:16:15 2021 5 | 6 | @author: glavrent 7 | """ 8 | # Working directory and Packages 9 | # --------------------------- 10 | #load libraries 11 | import os 12 | import sys 13 | import numpy as np 14 | import pandas as pd 15 | import time 16 | #user functions 17 | sys.path.insert(0,'../../../Python_lib/regression/cmdstan/') 18 | # from regression_cmdstan_model2_uncorr_cells_unbounded_hyp import RunStan 19 | from regression_cmdstan_model2_uncorr_cells_sparse_unbounded_hyp import RunStan 20 | 21 | # Define variables 22 | # --------------------------- 23 | #filename suffix 24 | # synds_suffix = '_small_corr_len' 25 | # synds_suffix = '_large_corr_len' 26 | 27 | #synthetic datasets directory 28 | ds_dir = '../../../../Data/Verification/synthetic_datasets/ds2' 29 | ds_dir = r'%s%s/'%(ds_dir, synds_suffix) 30 | 31 | # dataset info 32 | #ds_fname_main = 'CatalogNGAWest3CA_synthetic_data' 33 | ds_fname_main = 'CatalogNGAWest3CALite_synthetic_data' 34 | ds_id = np.arange(1,6) 35 | #cell specific anelastic attenuation 36 | ds_fname_cellinfo = 'CatalogNGAWest3CALite_cellinfo' 37 | ds_fname_celldist = 'CatalogNGAWest3CALite_distancematrix' 38 | 39 | #stan model 40 | # sm_fname = '../../../Stan_lib/regression_stan_model2_uncorr_cells_unbounded_hyp.stan' 41 | # sm_fname = '../../../Stan_lib/regression_stan_model2_uncorr_cells_unbounded_hyp_chol.stan' 42 | # sm_fname = '../../../Stan_lib/regression_stan_model2_uncorr_cells_unbounded_hyp_chol_efficient.stan' 43 | # sm_fname = '../../../Stan_lib/regression_stan_model2_uncorr_cells_unbounded_hyp_chol_efficient2.stan' 44 | # sm_fname = '../../../Stan_lib/regression_stan_model2_uncorr_cells_sparse_unbounded_hyp_chol_efficient.stan' 45 | 46 | #output info 47 | #main output filename 48 | out_fname_main = 'NGAWest2CANorth_syndata' 49 | #main output directory 50 | out_dir_main = '../../../../Data/Verification/regression/ds2/' 51 | #output sub-directory 52 | # out_dir_sub = 'CMDSTAN_NGAWest2CANorth_uncorr_cells' 53 | # out_dir_sub = 'CMDSTAN_NGAWest2CANorth_uncorr_cells_chol' 54 | # out_dir_sub = 'CMDSTAN_NGAWest2CANorth_uncorr_cells_chol_eff' 55 | # out_dir_sub = 'CMDSTAN_NGAWest2CANorth_uncorr_cells_chol_eff2' 56 | # out_dir_sub = 'CMDSTAN_NGAWest2CANorth_uncorr_cells_chol_eff_sp' 57 | 58 | #stan parameters 59 | res_name = 'tot' 60 | n_iter_warmup = 500 61 | n_iter_sampling = 500 62 | n_chains = 4 63 | adapt_delta = 0.8 64 | max_treedepth = 10 65 | #ergodic coefficients 66 | c_a_erg=0.0 67 | #parallel options 68 | # flag_parallel = True 69 | flag_parallel = False 70 | 71 | #output sub-dir with corr with suffix info 72 | out_dir_sub = f'%s%s'%(out_dir_sub, synds_suffix) 73 | 74 | #load cell dataframes 75 | cellinfo_fname = '%s%s.csv'%(ds_dir, ds_fname_cellinfo) 76 | celldist_fname = '%s%s.csv'%(ds_dir, ds_fname_celldist) 77 | df_cellinfo = pd.read_csv(cellinfo_fname) 78 | df_celldist = pd.read_csv(celldist_fname) 79 | 80 | # Run stan regression 81 | # --------------------------- 82 | #create datafame with computation time 83 | df_run_info = list() 84 | 85 | #iterate over all synthetic datasets 86 | for d_id in ds_id: 87 | print('Synthetic dataset %i fo %i'%(d_id, len(ds_id))) 88 | #run time start 89 | run_t_strt = time.time() 90 | #input flatfile 91 | ds_fname = '%s%s%s_Y%i.csv'%(ds_dir, ds_fname_main, synds_suffix, d_id) 92 | #load flatfile 93 | df_flatfile = pd.read_csv(ds_fname) 94 | #keep only North records of NGAWest2 95 | df_flatfile = df_flatfile.loc[np.logical_and(df_flatfile.dsid==0, 96 | df_flatfile.sreg==1),:] 97 | 98 | #output file name and directory 99 | out_fname = '%s%s_Y%i'%(out_fname_main, synds_suffix, d_id) 100 | out_dir = '%s/%s/Y%i/'%(out_dir_main, out_dir_sub, d_id) 101 | 102 | #run stan model 103 | RunStan(df_flatfile, df_cellinfo, df_celldist, sm_fname, 104 | out_fname, out_dir, res_name, c_a_erg=c_a_erg, 105 | n_iter_warmup=n_iter_warmup, n_iter_sampling=n_iter_sampling, n_chains=n_chains, 106 | adapt_delta=adapt_delta, max_treedepth=max_treedepth, 107 | stan_parallel=flag_parallel) 108 | 109 | #run time end 110 | run_t_end = time.time() 111 | 112 | #compute run time 113 | run_tm = (run_t_end - run_t_strt)/60 114 | 115 | #log run time 116 | df_run_info.append(pd.DataFrame({'computer_name':os.uname()[1],'out_name':out_dir_sub, 117 | 'ds_id':d_id,'run_time':run_tm}, index=[d_id])) 118 | 119 | #write out run info 120 | out_fname = '%s%s/run_info.csv'%(out_dir_main, out_dir_sub) 121 | pd.concat(df_run_info).reset_index(drop=True).to_csv(out_fname, index=False) 122 | 123 | 124 | -------------------------------------------------------------------------------- /Analyses/Code_Verification/regression/ds2/main_pystan_model2_uncorr_cells_NGAWest2CANorth.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Wed Jul 14 14:17:52 2021 5 | 6 | @author: glavrent 7 | """ 8 | # Working directory and Packages 9 | # --------------------------- 10 | #load libraries 11 | import os 12 | import sys 13 | import numpy as np 14 | import pandas as pd 15 | import time 16 | #user functions 17 | sys.path.insert(0,'../../../Python_lib/regression/pystan/') 18 | from regression_pystan_model2_uncorr_cells_unbounded_hyp import RunStan 19 | 20 | 21 | # Define variables 22 | # --------------------------- 23 | #filename suffix 24 | # synds_suffix = '_small_corr_len' 25 | # synds_suffix = '_large_corr_len' 26 | 27 | #synthetic datasets directory 28 | ds_dir = '../../../../Data/Verification/synthetic_datasets/ds2' 29 | ds_dir = r'%s%s/'%(ds_dir, synds_suffix) 30 | 31 | # dataset info 32 | #ds_fname_main = 'CatalogNGAWest3CA_synthetic_data' 33 | ds_fname_main = 'CatalogNGAWest3CALite_synthetic_data' 34 | ds_id = np.arange(1,6) 35 | #cell specific anelastic attenuation 36 | ds_fname_cellinfo = 'CatalogNGAWest3CALite_cellinfo' 37 | ds_fname_celldist = 'CatalogNGAWest3CALite_distancematrix' 38 | 39 | #stan model 40 | # sm_fname = '../../../Stan_lib/regression_stan_model2_uncorr_cells_unbounded_hyp.stan' 41 | # sm_fname = '../../../Stan_lib/regression_stan_model2_uncorr_cells_unbounded_hyp_chol.stan' 42 | # sm_fname = '../../../Stan_lib/regression_stan_model2_uncorr_cells_unbounded_hyp_chol_efficient.stan' 43 | # sm_fname = '../../../Stan_lib/regression_stan_model2_uncorr_cells_unbounded_hyp_chol_efficient2.stan' 44 | 45 | #output info 46 | #main output filename 47 | out_fname_main = 'NGAWest2CANorth_syndata' 48 | #main output directory 49 | out_dir_main = '../../../../Data/Verification/regression/ds2/' 50 | #output sub-directory 51 | #pystan2 52 | # out_dir_sub = 'PYSTAN_NGAWest2CANorth_uncorr_cells' 53 | # out_dir_sub = 'PYSTAN_NGAWest2CANorth_uncorr_cells_chol' 54 | # out_dir_sub = 'PYSTAN_NGAWest2CANorth_uncorr_cells_chol_eff' 55 | # out_dir_sub = 'PYSTAN_NGAWest2CANorth_uncorr_cells_chol_eff2' 56 | #pystan3 57 | # out_dir_sub = 'PYSTAN3_NGAWest2CANorth_uncorr_cells' 58 | # out_dir_sub = 'PYSTAN3_NGAWest2CANorth_uncorr_cells_chol' 59 | # out_dir_sub = 'PYSTAN3_NGAWest2CANorth_uncorr_cells_chol_eff' 60 | # out_dir_sub = 'PYSTAN3_NGAWest2CANorth_uncorr_cells_chol_eff2' 61 | 62 | #stan parameters 63 | runstan_flag = True 64 | # pystan_ver = 2 65 | pystan_ver = 3 66 | res_name = 'tot' 67 | n_iter = 1000 68 | n_chains = 4 69 | adapt_delta = 0.8 70 | max_treedepth = 10 71 | #ergodic coefficients 72 | c_a_erg=0.0 73 | #parallel options 74 | # flag_parallel = True 75 | flag_parallel = False 76 | 77 | #output sub-dir with corr with suffix info 78 | out_dir_sub = f'%s%s'%(out_dir_sub, synds_suffix) 79 | 80 | #load cell dataframes 81 | cellinfo_fname = '%s%s.csv'%(ds_dir, ds_fname_cellinfo) 82 | celldist_fname = '%s%s.csv'%(ds_dir, ds_fname_celldist) 83 | df_cellinfo = pd.read_csv(cellinfo_fname) 84 | df_celldist = pd.read_csv(celldist_fname) 85 | 86 | # Run stan regression 87 | # --------------------------- 88 | #create datafame with computation time 89 | df_run_info = list() 90 | 91 | #iterate over all synthetic datasets 92 | for d_id in ds_id: 93 | print('Synthetic dataset %i fo %i'%(d_id, len(ds_id))) 94 | #run time start 95 | run_t_strt = time.time() 96 | #input flatfile 97 | ds_fname = '%s%s%s_Y%i.csv'%(ds_dir, ds_fname_main, synds_suffix, d_id) 98 | #load flatfile 99 | df_flatfile = pd.read_csv(ds_fname) 100 | #keep only North records of NGAWest2 101 | df_flatfile = df_flatfile.loc[np.logical_and(df_flatfile.dsid==0, 102 | df_flatfile.sreg==1),:] 103 | 104 | #output file name and directory 105 | out_fname = '%s%s_Y%i'%(out_fname_main, synds_suffix, d_id) 106 | out_dir = '%s/%s/Y%i/'%(out_dir_main, out_dir_sub, d_id) 107 | 108 | #run stan model 109 | RunStan(df_flatfile, df_cellinfo, df_celldist, sm_fname, 110 | out_fname, out_dir, res_name, c_a_erg=c_a_erg, 111 | runstan_flag=runstan_flag, n_iter=n_iter, n_chains=n_chains, 112 | adapt_delta=adapt_delta, max_treedepth=max_treedepth, 113 | pystan_ver=pystan_ver, pystan_parallel=flag_parallel) 114 | 115 | #run time end 116 | run_t_end = time.time() 117 | 118 | #compute run time 119 | run_tm = (run_t_end - run_t_strt)/60 120 | 121 | #log run time 122 | df_run_info.append(pd.DataFrame({'computer_name':os.uname()[1],'out_name':out_dir_sub, 123 | 'ds_id':d_id,'run_time':run_tm}, index=[d_id])) 124 | 125 | #write out run info 126 | out_fname = '%s%s/run_info.csv'%(out_dir_main, out_dir_sub) 127 | pd.concat(df_run_info).reset_index(drop=True).to_csv(out_fname, index=False) 128 | 129 | 130 | -------------------------------------------------------------------------------- /Analyses/Code_Verification/regression/ds1/main_inla_model1_NGAWest2CA.R: -------------------------------------------------------------------------------- 1 | ################################################################################## 2 | # This script iterates over all synthetic datasets based on the NGAWest3 flatfile 3 | # and calculates the non-ergodic terms 4 | ################################################################################## 5 | # user sets this file's directory as working directory. 6 | # user installs INLA by running the following two lines in the console: 7 | # options(timeout=600) 8 | # install.packages("INLA",repos=c(getOption("repos"),INLA="https://inla.r-inla-download.org/R/stable"), dep=TRUE) 9 | 10 | #user functions 11 | source('../../../R_lib/regression/inla/regression_inla_model1_unbounded_hyp.R') 12 | 13 | # Define variables 14 | # --------------------------- 15 | #main directory 16 | main_dir <- '../../../../' #local machine 17 | # main_dir <- '/u/scratch/g/glavrent/Research/Nonerg_GMM_methodology/' #Hoffman2 18 | # main_dir <- '/Users/elnaz-seylabi/Dropbox/NonErgModeling-local/' 19 | 20 | #filename suffix 21 | # synds_suffix <- '_small_corr_len' 22 | # synds_suffix <- '_large_corr_len' 23 | 24 | #synthetic datasets directory 25 | ds_dir <- 'Data/Verification/synthetic_datasets/ds1' 26 | ds_dir <- sprintf('%s%s', ds_dir, synds_suffix) 27 | 28 | # dataset info 29 | # ds_main_data_fname <- 'CatalogNGAWest3CA_synthetic_data' 30 | ds_main_data_fname <- 'CatalogNGAWest3CALite_synthetic_data' 31 | ds_id <- seq(1,5) 32 | 33 | #output info 34 | #main output filename 35 | out_fname_main <- 'NGAWest2CA_syndata' 36 | #main output directory 37 | out_dir_main <- 'Data/Verification/regression/ds1' 38 | #output sub-directory 39 | # out_dir_sub <- 'INLA_NGAWest2CA' 40 | #matern kernel function (nu=2) 41 | # out_dir_sub <- 'INLA_NGAWest2CA_fine' 42 | # out_dir_sub <- 'INLA_NGAWest2CA_medium' 43 | # out_dir_sub <- 'INLA_NGAWest2CA_coarse' 44 | # out_dir_sub <- 'INLA_NGAWest2CA_medium_full' 45 | #exponential kernel function 46 | # out_dir_sub <- 'INLA_NGAWest2CA_fine_nexp' 47 | # out_dir_sub <- 'INLA_NGAWest2CA_medium_nexp' 48 | # out_dir_sub <- 'INLA_NGAWest2CA_coarse_nexp' 49 | 50 | #inla parameters 51 | runinla_flag <- TRUE # TRUE or FALSE 52 | # alpha <- 2 #matern kernel function nu=2 53 | alpha <- 3/2 #negative exponential kernel function 54 | res_name <- 'tot' 55 | 56 | #mesh coarseness 57 | # #fine 58 | # mesh_edge_max <- 5 59 | # mesh_inner_offset <- 15 60 | # mesh_outer_offset <- 15 61 | # #medium 62 | # mesh_edge_max <- 15 63 | # mesh_inner_offset <- 15 64 | # mesh_outer_offset <- 50 65 | # #coarse 66 | # mesh_edge_max <- 50 67 | # mesh_inner_offset <- 50 68 | # mesh_outer_offset <- 150 69 | 70 | #approximation options 71 | # if flag_gp_approx=TRUE uses int.strategy="eb" and strategy="gaussian" 72 | # int.strategy="eb" corresponds to one integration point, and 73 | # strategy="gaussian" approximates posteriors as gaussian distributions 74 | flag_gp_approx <- FALSE # TRUE or FALSE 75 | 76 | #output sub-dir with corr with suffix info 77 | out_dir_sub <- sprintf('%s%s',out_dir_sub, synds_suffix) 78 | 79 | # Run inla regression 80 | # --------------------------- 81 | #create datafame with computation time 82 | df_run_info <- data.frame() 83 | 84 | #iterate over all synthetic datasets 85 | for (d_id in ds_id){ 86 | print(paste("Synthetic dataset",d_id,"of",length(ds_id))) 87 | #run time start 88 | run_t_strt <- Sys.time() 89 | #input file names 90 | analysis_fname <- sprintf('%s%s_Y%i', ds_main_data_fname, synds_suffix, d_id) 91 | flatfile_fname <- file.path(main_dir, ds_dir, sprintf('%s%s_Y%i.csv', ds_main_data_fname, synds_suffix, d_id)) 92 | 93 | #load files 94 | df_flatfile <- read.csv(flatfile_fname) 95 | #keep only NGAWest2 records 96 | df_flatfile <- subset(df_flatfile, dsid==0) 97 | 98 | #output file name and directory 99 | out_fname <- sprintf('%s%s_Y%i', out_fname_main, synds_suffix, d_id) 100 | out_dir <- sprintf('%s%s/%s/Y%i', main_dir, out_dir_main, out_dir_sub, d_id) 101 | 102 | #run INLA model 103 | RunINLA(df_flatfile, out_fname, out_dir, res_name=res_name, 104 | alpha=alpha, 105 | mesh_edge_max=mesh_edge_max, 106 | mesh_inner_offset=mesh_inner_offset, mesh_outer_offset=mesh_outer_offset, 107 | flag_gp_approx=flag_gp_approx, 108 | runinla_flag=runinla_flag) 109 | 110 | #run time end 111 | run_t_end <- Sys.time() 112 | 113 | #compute run time 114 | run_tm <- run_t_end - run_t_strt 115 | 116 | #log run time 117 | df_r_i <- data.frame(computer_name=Sys.info()["nodename"], out_name=out_dir_sub, ds_id=d_id, run_time=run_tm) 118 | df_run_info <- rbind(df_run_info, df_r_i) 119 | 120 | #write out run info 121 | row.names(df_run_info) <- NULL 122 | out_fname <- sprintf('%s%s/%s/run_info.csv', main_dir, out_dir_main, out_dir_sub) 123 | write.csv(df_run_info, out_fname, row.names=FALSE) 124 | } 125 | 126 | -------------------------------------------------------------------------------- /Analyses/Code_Verification/regression/ds2/main_inla_model2_uncorr_cells_NGAWest2CANorth.R: -------------------------------------------------------------------------------- 1 | ################################################################################## 2 | # This script iterates over all sythetic datasets based on the NGAWest3 flatfile 3 | # and calculates the non-ergodic terms 4 | ################################################################################## 5 | # user sets this file's directory as working directory. 6 | # user installs INLA by running the following two lines in the console: 7 | # options(timeout=600) 8 | # install.packages("INLA",repos=c(getOption("repos"),INLA="https://inla.r-inla-download.org/R/stable"), dep=TRUE) 9 | 10 | #user functions 11 | source('../../../R_lib/regression/inla/regression_inla_model2_uncorr_cells_unbounded_hyp.R') 12 | 13 | # Define variables 14 | # --------------------------- 15 | #main directory 16 | main_dir <- '../../../../' #local machine 17 | # main_dir <- '/u/scratch/g/glavrent/Research/Nonerg_GMM_methodology/' #Hoffman2 18 | # main_dir <- '/Users/elnaz-seylabi/Dropbox/NonErgModeling-local/' 19 | 20 | #output filename sufix 21 | # synds_suffix <- '_small_corr_len' 22 | synds_suffix <- '_large_corr_len' 23 | 24 | #synthetic datasets directory 25 | ds_dir <- 'Data/Verification/synthetic_datasets/ds2' 26 | ds_dir <- sprintf('%s%s', ds_dir, synds_suffix) 27 | 28 | # dataset info 29 | # ds_main_data_fname <- 'CatalogNGAWest3CA_synthetic_data' 30 | # ds_main_cellinfo_fname <- 'CatalogNGAWest3CA_cellinfo' 31 | # ds_main_cellmat_fname <- 'CatalogNGAWest3CA_distancematrix' 32 | ds_main_data_fname <- 'CatalogNGAWest3CALite_synthetic_data' 33 | ds_main_cellinfo_fname <- 'CatalogNGAWest3CALite_cellinfo' 34 | ds_main_cellmat_fname <- 'CatalogNGAWest3CALite_distancematrix' 35 | ds_id <- seq(1,5) 36 | 37 | #output info 38 | #main output filename 39 | out_fname_main <- 'NGAWest2CANorth_syndata' 40 | #main output directory 41 | out_dir_main <- 'Data/Verification/regression/ds2' 42 | #output sub-directory 43 | # out_dir_sub <- 'INLA_NGAWest2CANorth_uncorr_cells' 44 | out_dir_sub <- 'INLA_NGAWest2CANorth_uncorr_cells_fine' 45 | # out_dir_sub <- 'INLA_NGAWest2CANorth_uncorr_cells_medium' 46 | # out_dir_sub <- 'INLA_NGAWest2CANorth_uncorr_cells_coarse' 47 | 48 | #inla parameters 49 | runinla_flag <- TRUE # TRUE or FALSE 50 | res_name <- 'tot' 51 | 52 | #mesh coarseness 53 | #fine 54 | mesh_edge_max <- 5 55 | mesh_inner_offset <- 15 56 | mesh_outer_offset <- 15 57 | # #medium 58 | # mesh_edge_max <- 15 59 | # mesh_inner_offset <- 15 60 | # mesh_outer_offset <- 50 61 | # #coarse 62 | # mesh_edge_max <- 50 63 | # mesh_inner_offset <- 50 64 | # mesh_outer_offset <- 150 65 | 66 | #ergodic coefficients 67 | c_a_erg <- 0.0 #anelastic attenuation 68 | 69 | #output sub-dir with corr with suffix info 70 | out_dir_sub <- sprintf('%s%s',out_dir_sub, synds_suffix) 71 | 72 | # Run inla regression 73 | # --------------------------- 74 | #create datafame with computation time 75 | df_run_info <- data.frame() 76 | 77 | #iterate over all synthetic datasets 78 | for (d_id in ds_id){ 79 | print(paste("Synthetic dataset",d_id,"of",length(ds_id))) 80 | #run time start 81 | run_t_strt <- Sys.time() 82 | #input file names 83 | analysis_fname <- sprintf('%s%s_Y%i', ds_main_data_fname, synds_suffix, d_id) 84 | flatfile_fname <- file.path(main_dir, ds_dir, sprintf('%s%s_Y%i.csv', ds_main_data_fname, synds_suffix, d_id)) 85 | cellinfo_fname <- file.path(main_dir, ds_dir, sprintf('%s.csv', ds_main_cellinfo_fname)) 86 | cellmat_fname <- file.path(main_dir, ds_dir, sprintf('%s.csv', ds_main_cellmat_fname)) 87 | 88 | #load files 89 | df_flatfile <- read.csv(flatfile_fname) 90 | df_cellinfo <- read.csv(cellinfo_fname) 91 | df_cellmat <- read.csv(cellmat_fname) 92 | #keep only NGAWest2 records 93 | df_flatfile <- subset(df_flatfile, dsid==0 & sreg==1) 94 | 95 | #output file name and directory 96 | out_fname <- sprintf('%s%s_Y%i', out_fname_main, synds_suffix, d_id) 97 | out_dir <- sprintf('%s%s/%s/Y%i', main_dir, out_dir_main, out_dir_sub, d_id) 98 | 99 | #run INLA model 100 | RunINLA(df_flatfile, df_cellinfo, df_cellmat, out_fname, out_dir, res_name=res_name, 101 | c_a_erg=c_a_erg, 102 | mesh_edge_max=mesh_edge_max, 103 | mesh_inner_offset=mesh_inner_offset, mesh_outer_offset=mesh_outer_offset, 104 | runinla_flag=runinla_flag) 105 | 106 | #run time end 107 | run_t_end <- Sys.time() 108 | 109 | #compute run time 110 | run_tm <- run_t_end - run_t_strt 111 | 112 | #log run time 113 | df_r_i <- data.frame(computer_name=Sys.info()["nodename"], out_name=out_dir_sub, ds_id=d_id, run_time=run_tm) 114 | df_run_info <- rbind(df_run_info, df_r_i) 115 | 116 | #write out run info 117 | row.names(df_run_info) <- NULL 118 | out_fname <- sprintf('%s%s/%s/run_info.csv', main_dir, out_dir_main, out_dir_sub) 119 | write.csv(df_run_info, out_fname, row.names=FALSE) 120 | } 121 | -------------------------------------------------------------------------------- /Analyses/Code_Verification/regression/ds1/main_inla_model1_NGAWest2CANorth.R: -------------------------------------------------------------------------------- 1 | ################################################################################## 2 | # This script iterates over all synthetic datasets based on the NGAWest3 flatfile 3 | # and calculates the non-ergodic terms 4 | ################################################################################## 5 | # user sets this file's directory as working directory. 6 | # user installs INLA by running the following two lines in the console: 7 | # options(timeout=600) 8 | # install.packages("INLA",repos=c(getOption("repos"),INLA="https://inla.r-inla-download.org/R/stable"), dep=TRUE) 9 | 10 | #user functions 11 | source('../../../R_lib/regression/inla/regression_inla_model1_unbounded_hyp.R') 12 | 13 | # Define variables 14 | # --------------------------- 15 | #main directory 16 | main_dir <- '../../../../' #local machine 17 | # main_dir <- '/u/scratch/g/glavrent/Research/Nonerg_GMM_methodology/' #Hoffman2 18 | # main_dir <- '/Users/elnaz-seylabi/Dropbox/NonErgModeling-local/' 19 | 20 | #filename suffix 21 | # synds_suffix <- '_small_corr_len' 22 | # synds_suffix <- '_large_corr_len' 23 | 24 | #synthetic datasets directory 25 | ds_dir <- 'Data/Verification/synthetic_datasets/ds1' 26 | ds_dir <- sprintf('%s%s', ds_dir, synds_suffix) 27 | 28 | # dataset info 29 | # ds_main_data_fname <- 'CatalogNGAWest3CA_synthetic_data' 30 | ds_main_data_fname <- 'CatalogNGAWest3CALite_synthetic_data' 31 | ds_id <- seq(1,5) 32 | 33 | #output info 34 | #main output filename 35 | out_fname_main <- 'NGAWest2CANorth_syndata' 36 | #main output directory 37 | out_dir_main <- 'Data/Verification/regression/ds1' 38 | #output sub-directory 39 | # out_dir_sub <- 'INLA_NGAWest2CANorth' 40 | #matern kernel function (nu=2) 41 | # out_dir_sub <- 'INLA_NGAWest2CANorth_fine' 42 | # out_dir_sub <- 'INLA_NGAWest2CANorth_medium' 43 | # out_dir_sub <- 'INLA_NGAWest2CANorth_coarse' 44 | #exponential kernel function 45 | # out_dir_sub <- 'INLA_NGAWest2CANorth_fine_nexp' 46 | # out_dir_sub <- 'INLA_NGAWest2CANorth_medium_nexp' 47 | # out_dir_sub <- 'INLA_NGAWest2CANorth_coarse_nexp' 48 | 49 | #inla parameters 50 | runinla_flag <- TRUE # TRUE or FALSE 51 | # alpha <- 2 #matern kernel function nu=2 52 | alpha <- 3/2 #negative exponential kernel function 53 | res_name <- 'tot' 54 | 55 | 56 | #mesh coarseness 57 | # #fine 58 | # mesh_edge_max <- 5 59 | # mesh_inner_offset <- 15 60 | # mesh_outer_offset <- 15 61 | # #medium 62 | # mesh_edge_max <- 15 63 | # mesh_inner_offset <- 15 64 | # mesh_outer_offset <- 50 65 | # #coarse 66 | # mesh_edge_max <- 50 67 | # mesh_inner_offset <- 50 68 | # mesh_outer_offset <- 150 69 | 70 | #approximation options 71 | # if flag_gp_approx=TRUE uses int.strategy="eb" and strategy="gaussian" 72 | # int.strategy="eb" corresponds to one integration point, and 73 | # strategy="gaussian" approximates posteriors as gaussian distributions 74 | flag_gp_approx <- TRUE # TRUE or FALSE 75 | 76 | #output sub-dir with corr with suffix info 77 | out_dir_sub <- sprintf('%s%s',out_dir_sub, synds_suffix) 78 | 79 | # Run inla regression 80 | # --------------------------- 81 | #create datafame with computation time 82 | df_run_info <- data.frame() 83 | 84 | #iterate over all synthetic datasets 85 | for (d_id in ds_id){ 86 | print(paste("Synthetic dataset",d_id,"of",length(ds_id))) 87 | #run time start 88 | run_t_strt <- Sys.time() 89 | #input file names 90 | analysis_fname <- sprintf('%s%s_Y%i', ds_main_data_fname, synds_suffix, d_id) 91 | flatfile_fname <- file.path(main_dir, ds_dir, sprintf('%s%s_Y%i.csv', ds_main_data_fname, synds_suffix, d_id)) 92 | 93 | #load files 94 | df_flatfile <- read.csv(flatfile_fname) 95 | #keep only NGAWest2 records 96 | df_flatfile <- subset(df_flatfile, dsid==0 & sreg==1) 97 | 98 | #output file name and directory 99 | out_fname <- sprintf('%s%s_Y%i', out_fname_main, synds_suffix, d_id) 100 | out_dir <- sprintf('%s%s/%s/Y%i', main_dir, out_dir_main, out_dir_sub, d_id) 101 | 102 | #run INLA model 103 | RunINLA(df_flatfile, out_fname, out_dir, res_name=res_name, 104 | alpha=alpha, 105 | mesh_edge_max=mesh_edge_max, 106 | mesh_inner_offset=mesh_inner_offset, mesh_outer_offset=mesh_outer_offset, 107 | flag_gp_approx=flag_gp_approx, 108 | runinla_flag=runinla_flag) 109 | 110 | #run time end 111 | run_t_end <- Sys.time() 112 | 113 | #compute run time 114 | run_tm <- run_t_end - run_t_strt 115 | 116 | #log run time 117 | df_r_i <- data.frame(computer_name=Sys.info()["nodename"], out_name=out_dir_sub, ds_id=d_id, run_time=run_tm) 118 | df_run_info <- rbind(df_run_info, df_r_i) 119 | 120 | #write out run info 121 | row.names(df_run_info) <- NULL 122 | out_fname <- sprintf('%s%s/%s/run_info.csv', main_dir, out_dir_main, out_dir_sub) 123 | write.csv(df_run_info, out_fname, row.names=FALSE) 124 | } 125 | 126 | -------------------------------------------------------------------------------- /Analyses/Code_Verification/regression/ds2/main_pystan_model2_corr_cells_NGAWest2CA.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Wed Jul 14 14:17:52 2021 5 | 6 | @author: glavrent 7 | """ 8 | # Working directory and Packages 9 | # --------------------------- 10 | #load libraries 11 | import os 12 | import sys 13 | import numpy as np 14 | import pandas as pd 15 | import time 16 | #user functions 17 | sys.path.insert(0,'../../../Python_lib/regression/pystan/') 18 | from regression_pystan_model2_corr_cells_unbounded_hyp import RunStan 19 | # from regression_pystan_model2_corr_cells_sparse_unbounded_hyp import RunStan 20 | 21 | # Define variables 22 | # --------------------------- 23 | #filename suffix 24 | # synds_suffix = '_small_corr_len' 25 | # synds_suffix = '_large_corr_len' 26 | 27 | #synthetic datasets directory 28 | ds_dir = '../../../../Data/Verification/synthetic_datasets/ds2' 29 | ds_dir = r'%s%s/'%(ds_dir, synds_suffix) 30 | 31 | # dataset info 32 | #ds_fname_main = 'CatalogNGAWest3CA_synthetic_data' 33 | ds_fname_main = 'CatalogNGAWest3CALite_synthetic_data' 34 | ds_id = np.arange(1,6) 35 | #cell specific anelastic attenuation 36 | ds_fname_cellinfo = 'CatalogNGAWest3CALite_cellinfo' 37 | ds_fname_celldist = 'CatalogNGAWest3CALite_distancematrix' 38 | 39 | #stan model 40 | # sm_fname = '../../../Stan_lib/regression_stan_model2_corr_cells_unbounded_hyp.stan' 41 | # sm_fname = '../../../Stan_lib/regression_stan_model2_corr_cells_unbounded_hyp_chol.stan' 42 | # sm_fname = '../../../Stan_lib/regression_stan_model2_corr_cells_unbounded_hyp_chol_efficient.stan' 43 | # sm_fname = '../../../Stan_lib/regression_stan_model2_corr_cells_unbounded_hyp_chol_efficient2.stan' 44 | # sm_fname = '../../../Stan_lib/regression_stan_model2_corr_cells_sparse_unbounded_hyp_chol_efficient2.stan' 45 | 46 | #output info 47 | #main output filename 48 | out_fname_main = 'NGAWest2CA_syndata' 49 | #main output directory 50 | out_dir_main = '../../../../Data/Verification/regression/ds2/' 51 | #output sub-directory 52 | #python 2 53 | # out_dir_sub = 'PYSTAN_NGAWest2CA_corr_cells' 54 | # out_dir_sub = 'PYSTAN_NGAWest2CA_corr_cells_chol' 55 | # out_dir_sub = 'PYSTAN_NGAWest2CA_corr_cells_chol_eff' 56 | # out_dir_sub = 'PYSTAN_NGAWest2CA_corr_cells_chol_eff2' 57 | #python 3 58 | # out_dir_sub = 'PYSTAN3_NGAWest2CA_corr_cells' 59 | # out_dir_sub = 'PYSTAN3_NGAWest2CA_corr_cells_chol' 60 | # out_dir_sub = 'PYSTAN3_NGAWest2CA_corr_cells_chol_eff' 61 | # out_dir_sub = 'PYSTAN3_NGAWest2CA_corr_cells_chol_eff2' 62 | # out_dir_sub = 'PYSTAN3_NGAWest2CA_corr_cells_chol_eff_sp' 63 | 64 | #stan parameters 65 | runstan_flag = True 66 | pystan_ver = 2 67 | # pystan_ver = 3 68 | res_name = 'tot' 69 | n_iter = 1000 70 | n_chains = 4 71 | adapt_delta = 0.8 #0.9 72 | max_treedepth = 10 73 | #ergodic coefficients 74 | c_a_erg=0.0 75 | #parallel options 76 | # flag_parallel = True 77 | flag_parallel = False 78 | 79 | #output sub-dir with corr with suffix info 80 | out_dir_sub = f'%s%s'%(out_dir_sub, synds_suffix) 81 | 82 | #load cell dataframes 83 | cellinfo_fname = '%s%s.csv'%(ds_dir, ds_fname_cellinfo) 84 | celldist_fname = '%s%s.csv'%(ds_dir, ds_fname_celldist) 85 | df_cellinfo = pd.read_csv(cellinfo_fname) 86 | df_celldist = pd.read_csv(celldist_fname) 87 | 88 | # Run stan regression 89 | # --------------------------- 90 | #create datafame with computation time 91 | df_run_info = list() 92 | 93 | #iterate over all synthetic datasets 94 | for d_id in ds_id: 95 | print('Synthetic dataset %i fo %i'%(d_id, len(ds_id))) 96 | #run time start 97 | run_t_strt = time.time() 98 | #input flatfile 99 | ds_fname = '%s%s%s_Y%i.csv'%(ds_dir, ds_fname_main, synds_suffix, d_id) 100 | #load flatfile 101 | df_flatfile = pd.read_csv(ds_fname) 102 | #keep only NGAWest2 records 103 | df_flatfile = df_flatfile.loc[df_flatfile.dsid==0,:] 104 | 105 | #output file name and directory 106 | out_fname = '%s%s_Y%i'%(out_fname_main, synds_suffix, d_id) 107 | out_dir = '%s/%s/Y%i/'%(out_dir_main, out_dir_sub, d_id) 108 | 109 | #run stan model 110 | RunStan(df_flatfile, df_cellinfo, df_celldist, sm_fname, 111 | out_fname, out_dir, res_name, c_a_erg=c_a_erg, 112 | runstan_flag=runstan_flag, n_iter=n_iter, n_chains=n_chains, 113 | adapt_delta=adapt_delta, max_treedepth=max_treedepth, 114 | pystan_ver=pystan_ver, pystan_parallel=flag_parallel) 115 | 116 | #run time end 117 | run_t_end = time.time() 118 | 119 | #compute run time 120 | run_tm = (run_t_end - run_t_strt)/60 121 | 122 | #log run time 123 | df_run_info.append(pd.DataFrame({'computer_name':os.uname()[1],'out_name':out_dir_sub, 124 | 'ds_id':d_id,'run_time':run_tm}, index=[d_id])) 125 | 126 | #write out run info 127 | out_fname = '%s%s/run_info.csv'%(out_dir_main, out_dir_sub) 128 | pd.concat(df_run_info).reset_index(drop=True).to_csv(out_fname, index=False) 129 | 130 | 131 | -------------------------------------------------------------------------------- /Analyses/Code_Verification/regression/ds3/main_inla_model3_uncorr_cells_NGAWest3CA.R: -------------------------------------------------------------------------------- 1 | ################################################################################## 2 | # This script iterates over all sythetic datasets based on the NGAWest3 flatfile 3 | # and calculates the non-ergodic terms 4 | ################################################################################## 5 | 6 | #user functions 7 | source('../../../R_lib/regression/inla/regression_inla_model3_uncorr_cells_unbounded_hyp.R') 8 | 9 | # Define variables 10 | # --------------------------- 11 | #main directory 12 | main_dir <- '../../../../' #local machine 13 | # main_dir <- '/u/scratch/g/glavrent/Research/Nonerg_GMM_methodology/' #Hoffman2 14 | 15 | #output filename sufix 16 | # synds_suffix <- '_small_corr_len' 17 | # synds_suffix <- '_large_corr_len' 18 | 19 | #synthetic datasets directory 20 | ds_dir <- 'Data/Verification/synthetic_datasets/ds3' 21 | ds_dir <- sprintf('%s%s', ds_dir, synds_suffix) 22 | 23 | # dataset info 24 | # ds_main_data_fname <- 'CatalogNGAWest3CA_synthetic_data' 25 | # ds_main_cellinfo_fname <- 'CatalogNGAWest3CA_cellinfo' 26 | # ds_main_cellmat_fname <- 'CatalogNGAWest3CA_distancematrix' 27 | ds_main_data_fname <- 'CatalogNGAWest3CALite_synthetic_data' 28 | ds_main_cellinfo_fname <- 'CatalogNGAWest3CALite_cellinfo' 29 | ds_main_cellmat_fname <- 'CatalogNGAWest3CALite_distancematrix' 30 | ds_id <- seq(1,5) 31 | 32 | #output info 33 | #main output filename 34 | out_fname_main <- 'NGAWest2CA_syndata' 35 | #main output directory 36 | out_dir_main <- 'Data/Verification/regression/ds3' 37 | #output sub-directory 38 | # out_dir_sub <- 'INLA_NGAWest3CA_uncorr_cells' 39 | #matern kernel function (nu=2) 40 | # out_dir_sub <- 'INLA_NGAWest3CA_uncorr_cells_fine' 41 | # out_dir_sub <- 'INLA_NGAWest3CA_uncorr_cells_medium' 42 | # out_dir_sub <- 'INLA_NGAWest3CA_uncorr_cells_coarse' 43 | #exponential kernel function 44 | # out_dir_sub <- 'INLA_NGAWest3CA_uncorr_cells_fine_nerg' 45 | # out_dir_sub <- 'INLA_NGAWest3CA_uncorr_cells_medium_nerg' 46 | # out_dir_sub <- 'INLA_NGAWest3CA_uncorr_cells_coarse_nerg' 47 | 48 | #inla parameters 49 | runinla_flag <- TRUE 50 | alpha <- 2 #matern kernel function nu=2 51 | # alpha <- 3/2 #negative exponential kernel function 52 | res_name <- 'tot' 53 | num_threads <- 8 54 | 55 | #mesh coarseness 56 | # #fine 57 | # mesh_edge_max <- 5 58 | # mesh_inner_offset <- 15 59 | # mesh_outer_offset <- 15 60 | # #medium 61 | # mesh_edge_max <- 15 62 | # mesh_inner_offset <- 15 63 | # mesh_outer_offset <- 50 64 | # #coarse 65 | # mesh_edge_max <- 50 66 | # mesh_inner_offset <- 50 67 | # mesh_outer_offset <- 150 68 | 69 | #ergodic coefficients 70 | c_2_erg <- -2.0 71 | c_3_erg <- -0.6 72 | c_a_erg <- 0.0 #anelastic attenuation 73 | 74 | #output sub-dir with corr with suffix info 75 | out_dir_sub <- sprintf('%s%s',out_dir_sub, synds_suffix) 76 | 77 | # Run inla regression 78 | # --------------------------- 79 | #create datafame with computation time 80 | df_run_info <- data.frame() 81 | 82 | #iterate over all synthetic datasets 83 | for (d_id in ds_id){ 84 | print(paste("Synthetic dataset",d_id,"of",length(ds_id))) 85 | #run time start 86 | run_t_strt <- Sys.time() 87 | #input file names 88 | analysis_fname <- sprintf('%s%s_Y%i', ds_main_data_fname, synds_suffix, d_id) 89 | flatfile_fname <- file.path(main_dir, ds_dir, sprintf('%s%s_Y%i.csv', ds_main_data_fname, synds_suffix, d_id)) 90 | cellinfo_fname <- file.path(main_dir, ds_dir, sprintf('%s.csv', ds_main_cellinfo_fname)) 91 | cellmat_fname <- file.path(main_dir, ds_dir, sprintf('%s.csv', ds_main_cellmat_fname)) 92 | 93 | #load files 94 | df_flatfile <- read.csv(flatfile_fname) 95 | df_cellinfo <- read.csv(cellinfo_fname) 96 | df_cellmat <- read.csv(cellmat_fname) 97 | 98 | #output file name and directory 99 | out_fname <- sprintf('%s%s_Y%i', out_fname_main, synds_suffix, d_id) 100 | out_dir <- sprintf('%s%s/%s/Y%i', main_dir, out_dir_main, out_dir_sub, d_id) 101 | 102 | #run INLA model 103 | RunINLA(df_flatfile, df_cellinfo, df_cellmat, out_fname, out_dir, res_name=res_name, 104 | c_2_erg=c_2_erg, c_3_erg=c_3_erg, c_a_erg=c_a_erg, 105 | alpha=alpha, 106 | mesh_edge_max=mesh_edge_max, 107 | mesh_inner_offset=mesh_inner_offset, mesh_outer_offset=mesh_outer_offset, 108 | n_threads=num_threads, 109 | runinla_flag=runinla_flag) 110 | 111 | #run time end 112 | run_t_end <- Sys.time() 113 | 114 | #compute run time 115 | run_tm <- run_t_end - run_t_strt 116 | 117 | #log run time 118 | df_r_i <- data.frame(computer_name=Sys.info()["nodename"], out_name=out_dir_sub, ds_id=d_id, run_time=run_tm) 119 | df_run_info <- rbind(df_run_info, df_r_i) 120 | 121 | #write out run info 122 | row.names(df_run_info) <- NULL 123 | out_fname <- sprintf('%s%s/%s/run_info.csv', main_dir, out_dir_main, out_dir_sub) 124 | write.csv(df_run_info, out_fname, row.names=FALSE) 125 | } 126 | --------------------------------------------------------------------------------