├── data_preprocess ├── tracer_demo │ ├── chichi.evt │ ├── input │ ├── ._src │ ├── ._input │ ├── tracer │ ├── ._README │ ├── ._tracer │ ├── ._tt.table │ ├── src │ │ ├── tracer │ │ ├── ._tracer │ │ ├── ._setup.inc │ │ ├── ._subr.f90 │ │ ├── ._tracer.f90 │ │ ├── setup.inc │ │ └── tracer.f90 │ ├── ._P_path.txt │ ├── ._S_path.txt │ ├── ._chichi.evt │ ├── ._chichi.sta │ ├── ._vel3d.mod │ ├── tracer.sh │ ├── create_input_file │ │ └── input_file.py │ ├── chichi.sta │ ├── read_velocity_model.py │ ├── README │ └── tt.table ├── images │ └── workflow.png ├── 11_concat_final_table.py ├── cut_hdf5_sample.py ├── process_time_shift.py ├── README.md ├── 6_checked_traces_events.py ├── plot_event_trace_distribution.py ├── 4_data_selection.py ├── 1_afile_to_catalog.py ├── 10_check_station_overlap.py ├── 7_traces_picking.py ├── 8_shift_picking_by_velocity_model.py ├── plot_cut_traces.py ├── 9_label.py ├── 0918_M6.8_1319_1330 │ ├── 1_merge_sta_info.py │ ├── 3_label.py │ ├── 4_cut_waveform.py │ └── 2_picking.py ├── plot_picking_waveform.py ├── 2_catalog_records_cleaning.py ├── plot_data_distribution.py ├── 3_station_location_dataset.py ├── analyze_resample.py ├── plot_double_event.py ├── Vs30_preprocess.py ├── 13_cut_waveform_to_hdf5.py ├── 5_check_waveform.py └── 12_TSMIP_vs30.py ├── images ├── Meinong_event.gif ├── Taitung_offshore_event.gif └── TEAM-Taiwan_model_architecture.png ├── prediction_images_in_readme └── ref.png ├── docker ├── requirements.txt ├── Dockerfile └── README.md ├── requirements.txt ├── data ├── plot_station_distribution.py ├── intensity_sort_from_station.py ├── plot_number_of_traces_station_map.py ├── data_visualize.py └── visualize.py ├── model_train_predict ├── plot_loss_curve.py └── predict_ensemble_merge_info.py ├── model_performance_analysis ├── plot_event_input_stations.py ├── plot_0918_M6.8_event_intensity_map.py ├── 0403_Hualien_Earthquake │ ├── plot_input_station.py │ ├── 3_waveform_after_preprocess.py │ ├── 4_plot_intensity_map.py │ ├── 2_TTSAM_0403.py │ ├── 5_plot_confusion_matrix.py │ ├── residual_map.py │ └── 1_find_trigger_station.py ├── analyze_prediction_in_magnitude.py ├── plot_CWA_TTSAM_intensity_comparision.py ├── plot_residual.py ├── plot_event_input_waveforms.py ├── warning_time_maximize.py ├── plot_intensity_map_and_warning_time.py ├── confusion_matrix_multi_station.py └── plot_CWA_EEW_intensity.py ├── .gitignore ├── README.md └── feature_map_correlation ├── plot_single_event_feature_map.py ├── tlcc_analysis.py └── feature_map_correlation.py /data_preprocess/tracer_demo/chichi.evt: -------------------------------------------------------------------------------- 1 | 120.816 23.853 7.5 2 | -------------------------------------------------------------------------------- /data_preprocess/tracer_demo/input: -------------------------------------------------------------------------------- 1 | 2 2 | chichi.evt 3 | chichi.sta 4 | 1 5 | 1 6 | -------------------------------------------------------------------------------- /images/Meinong_event.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonChang0320/TT-SAM/HEAD/images/Meinong_event.gif -------------------------------------------------------------------------------- /data_preprocess/tracer_demo/._src: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonChang0320/TT-SAM/HEAD/data_preprocess/tracer_demo/._src -------------------------------------------------------------------------------- /images/Taitung_offshore_event.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonChang0320/TT-SAM/HEAD/images/Taitung_offshore_event.gif -------------------------------------------------------------------------------- /data_preprocess/images/workflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonChang0320/TT-SAM/HEAD/data_preprocess/images/workflow.png -------------------------------------------------------------------------------- /data_preprocess/tracer_demo/._input: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonChang0320/TT-SAM/HEAD/data_preprocess/tracer_demo/._input -------------------------------------------------------------------------------- /data_preprocess/tracer_demo/tracer: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonChang0320/TT-SAM/HEAD/data_preprocess/tracer_demo/tracer -------------------------------------------------------------------------------- /prediction_images_in_readme/ref.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonChang0320/TT-SAM/HEAD/prediction_images_in_readme/ref.png -------------------------------------------------------------------------------- /data_preprocess/tracer_demo/._README: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonChang0320/TT-SAM/HEAD/data_preprocess/tracer_demo/._README -------------------------------------------------------------------------------- /data_preprocess/tracer_demo/._tracer: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonChang0320/TT-SAM/HEAD/data_preprocess/tracer_demo/._tracer -------------------------------------------------------------------------------- /data_preprocess/tracer_demo/._tt.table: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonChang0320/TT-SAM/HEAD/data_preprocess/tracer_demo/._tt.table -------------------------------------------------------------------------------- /data_preprocess/tracer_demo/src/tracer: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonChang0320/TT-SAM/HEAD/data_preprocess/tracer_demo/src/tracer -------------------------------------------------------------------------------- /data_preprocess/tracer_demo/._P_path.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonChang0320/TT-SAM/HEAD/data_preprocess/tracer_demo/._P_path.txt -------------------------------------------------------------------------------- /data_preprocess/tracer_demo/._S_path.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonChang0320/TT-SAM/HEAD/data_preprocess/tracer_demo/._S_path.txt -------------------------------------------------------------------------------- /data_preprocess/tracer_demo/._chichi.evt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonChang0320/TT-SAM/HEAD/data_preprocess/tracer_demo/._chichi.evt -------------------------------------------------------------------------------- /data_preprocess/tracer_demo/._chichi.sta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonChang0320/TT-SAM/HEAD/data_preprocess/tracer_demo/._chichi.sta -------------------------------------------------------------------------------- /data_preprocess/tracer_demo/._vel3d.mod: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonChang0320/TT-SAM/HEAD/data_preprocess/tracer_demo/._vel3d.mod -------------------------------------------------------------------------------- /data_preprocess/tracer_demo/src/._tracer: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonChang0320/TT-SAM/HEAD/data_preprocess/tracer_demo/src/._tracer -------------------------------------------------------------------------------- /data_preprocess/tracer_demo/src/._setup.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonChang0320/TT-SAM/HEAD/data_preprocess/tracer_demo/src/._setup.inc -------------------------------------------------------------------------------- /data_preprocess/tracer_demo/src/._subr.f90: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonChang0320/TT-SAM/HEAD/data_preprocess/tracer_demo/src/._subr.f90 -------------------------------------------------------------------------------- /images/TEAM-Taiwan_model_architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonChang0320/TT-SAM/HEAD/images/TEAM-Taiwan_model_architecture.png -------------------------------------------------------------------------------- /data_preprocess/tracer_demo/src/._tracer.f90: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonChang0320/TT-SAM/HEAD/data_preprocess/tracer_demo/src/._tracer.f90 -------------------------------------------------------------------------------- /docker/requirements.txt: -------------------------------------------------------------------------------- 1 | torch==1.10.2 2 | numpy==1.22.2 3 | pandas==1.3.5 4 | h5py==2.10.0 5 | mlflow==1.30.0 6 | scipy==1.8.0 7 | matplotlib==3.5.1 8 | tables==3.6.1 9 | scikit-learn==1.1.0 10 | black==23.1.0 11 | Cartopy==0.21.1 12 | obspy==1.3.0 13 | seaborn==0.11.2 14 | tqdm==4.63.0 15 | ViTables==3.0.2 -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | black==23.1.0 2 | Cartopy==0.21.1 3 | h5py==2.10.0 4 | matplotlib==3.5.1 5 | mlflow==1.30.0 6 | numpy==1.22.2 7 | obspy==1.3.0 8 | pandas==1.3.5 9 | scikit-learn==1.1.0 10 | scipy==1.8.0 11 | seaborn==0.11.2 12 | tables==3.6.1 13 | torch==1.10.2+cu113 14 | torchaudio==0.10.2+cu113 15 | torchsampler==0.1.2 16 | torchsummary==1.5.1 17 | torchvision==0.11.3+cu113 18 | tqdm==4.63.0 19 | ViTables==3.0.2 20 | -------------------------------------------------------------------------------- /data/plot_station_distribution.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from visualize import plot_station_distribution 3 | 4 | data = pd.read_csv("../data_preprocess/events_traces_catalog/1999_2019_final_traces_Vs30.csv") 5 | 6 | unique_station = data.drop_duplicates(subset="station_name") 7 | 8 | fig,ax=plot_station_distribution(stations=unique_station,title="TSMIP station distribution") 9 | 10 | # fig.savefig(f"paper image/TSMIP_station_distribution.png",dpi=300) 11 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.8.12 2 | 3 | # Ignore all interactive dialog during apt-get update 4 | ENV DEBIAN_FRONTEND noninteractive 5 | 6 | # Install linux package 7 | RUN apt-get update && apt-get upgrade -y; \ 8 | apt-get install -y \ 9 | curl git htop sudo vim \ 10 | python3-dev python3-pip libgeos-dev 11 | 12 | # Python package install 13 | COPY requirements.txt /tmp/ 14 | RUN python3 -m pip install --upgrade pip; 15 | RUN python3 -m pip install shapely --no-binary shapely; 16 | RUN python3 -m pip --no-cache-dir install --requirement /tmp/requirements.txt; 17 | 18 | # Define working directory 19 | CMD ["bash"] -------------------------------------------------------------------------------- /data_preprocess/tracer_demo/tracer.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | for folder in inputs/*/; 4 | do 5 | eq_id_path="${folder%/}" 6 | echo $eq_id_path 7 | echo "2" > input.txt #mode option: 1-two points, 2-two files 8 | echo "\"$eq_id_path/event_input.evt\"" >> input.txt #source file 9 | echo "\"$eq_id_path/station_input.sta\"" >> input.txt #receiver file 10 | echo "1" >> input.txt #output raypath-1, otherwise 0 11 | echo "1" >> input.txt #output type 1-ascii, 2-binary 12 | #Fortran tracer 13 | ./tracer < input.txt 14 | 15 | output_file="$eq_id_path/output.table" 16 | cp "tt.table" $output_file 17 | echo "==========================" 18 | done 19 | 20 | 21 | -------------------------------------------------------------------------------- /model_train_predict/plot_loss_curve.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import matplotlib.pyplot as plt 3 | 4 | data = pd.read_csv("../model/model11_loss.csv") 5 | 6 | train_loss = data.query("key=='train_loss'") 7 | validation_loss = data.query("key=='val_loss'") 8 | 9 | fig, ax = plt.subplots() 10 | ax.plot(train_loss["step"], train_loss["value"], label="train") 11 | ax.plot(validation_loss["step"], validation_loss["value"], label="validation") 12 | ax.scatter( 13 | validation_loss["step"][validation_loss["value"].idxmin()], 14 | validation_loss["value"].min(), 15 | c="red", 16 | s=30, 17 | ) 18 | ax.legend() 19 | ax.set_ylabel("loss") 20 | ax.set_xlabel("epoch") 21 | # fig.savefig(f"model/model11_loss_curve.png",dpi=300) 22 | -------------------------------------------------------------------------------- /model_performance_analysis/plot_event_input_stations.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from analysis import Triggered_Map 3 | 4 | # plot input station map 5 | mask_after_sec = 10 6 | eq_id = 25900 7 | prediction_with_info = pd.read_csv( 8 | f"../predict/station_blind_noVs30_bias2closed_station_2016/{mask_after_sec} sec ensemble 510 with all info.csv" 9 | ) 10 | record_prediction = prediction_with_info.query(f"EQ_ID=={eq_id}") 11 | first_trigger_time = min(record_prediction["p_picks"]) 12 | input_station = record_prediction[ 13 | record_prediction["p_picks"] < first_trigger_time + (mask_after_sec * 200) 14 | ] 15 | 16 | 17 | if len(input_station) >= 25: 18 | input_station = input_station[:25] 19 | 20 | fig, ax = Triggered_Map.plot_station_map( 21 | trace_info=input_station, 22 | sec=mask_after_sec, 23 | EQ_ID=eq_id, 24 | pad=100, 25 | ) 26 | 27 | # fig.savefig( 28 | # f"../paper image/eqid{eq_id}_{mask_after_sec}_sec_station_input.png",dpi=300 29 | # ) 30 | -------------------------------------------------------------------------------- /data_preprocess/11_concat_final_table.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | start_year1=1999 4 | end_year1=2008 5 | traces1 = pd.read_csv( 6 | f"./events_traces_catalog/{start_year1}_{end_year1}_picked_traces_p_arrival_abstime_labeled_nostaoverlap.csv" 7 | ) 8 | catalog1 = pd.read_csv( 9 | f"./events_traces_catalog/{start_year1}_{end_year1}_ok_events_p_arrival_abstime.csv" 10 | ) 11 | 12 | start_year2=2009 13 | end_year2=2019 14 | traces2 = pd.read_csv( 15 | f"./events_traces_catalog/{start_year2}_{end_year2}_picked_traces_p_arrival_abstime_labeled_nostaoverlap.csv" 16 | ) 17 | catalog2 = pd.read_csv( 18 | f"./events_traces_catalog/{start_year2}_{end_year2}_ok_events_p_arrival_abstime.csv" 19 | ) 20 | 21 | final_trace=pd.concat([traces1,traces2]) 22 | 23 | final_catalog=pd.concat([catalog1,catalog2]) 24 | 25 | # final_trace.to_csv(f"./events_traces_catalog/{start_year1}_{end_year2}_final_traces.csv",index=False) 26 | # final_catalog.to_csv(f"./events_traces_catalog/{start_year1}_{end_year2}_final_catalog.csv",index=False) 27 | -------------------------------------------------------------------------------- /data_preprocess/cut_hdf5_sample.py: -------------------------------------------------------------------------------- 1 | import h5py 2 | import pandas as pd 3 | 4 | 5 | data_path="../data/TSMIP_1999_2019_Vs30.hdf5" 6 | init_event_metadata = pd.read_hdf(data_path, "metadata/event_metadata") 7 | trace_metadata = pd.read_hdf(data_path, "metadata/traces_metadata") 8 | 9 | sample_eqid=init_event_metadata.query("year==2016")["EQ_ID"] 10 | 11 | 12 | with h5py.File(data_path, "r") as origin, h5py.File("../data/2016_sample.hdf5", 'w') as sample: 13 | sample.create_group("data") 14 | sample.create_group("metadata") 15 | 16 | for eqid in sample_eqid.values: 17 | print(eqid) 18 | data = origin["data"][str(eqid)] 19 | sample_group=sample["data"].create_group(f"{eqid}") 20 | 21 | for col in data: 22 | attr=data[f"{col}"] 23 | 24 | sample_group.copy(attr,col) 25 | 26 | init_event_metadata.to_hdf('2016_sample.hdf5', key="metadata/event_metadata", mode="a", format="table") 27 | trace_metadata.to_hdf('2016_sample.hdf5', key="metadata/traces_metadata", mode="a", format="table") 28 | -------------------------------------------------------------------------------- /model_performance_analysis/plot_0918_M6.8_event_intensity_map.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from analysis import Intensity_Plotter 3 | 4 | predict_path = "../predict/station_blind_Vs30_bias2closed_station_2016/0918_M6.8_1319_1330" 5 | catalog_path="../data_preprocess/0918_M6.8_1319_1330" 6 | mask_sec = 10 7 | catalog = pd.read_csv(f"{catalog_path}/event_catalog.csv") 8 | prediction = pd.read_csv( 9 | f"{predict_path}/{mask_sec} sec model11 eqid_30792 prediction with all info.csv" 10 | ) 11 | catalog["longitude"]=catalog["lon"]+catalog["lon_minute"]/60 12 | catalog["latitude"]=catalog["lat"]+catalog["lat_minute"]/60 13 | fig, ax = Intensity_Plotter.plot_intensity_map( 14 | trace_info=prediction, 15 | eventmeta=catalog, 16 | label_type="pga", 17 | true_label=prediction["answer"], 18 | pred_label=prediction["predict"], 19 | sec=mask_sec, 20 | EQ_ID=None, 21 | grid_method="linear", 22 | pad=100, 23 | title=f"{mask_sec} sec intensity Map", 24 | ) 25 | # fig.savefig(f"{predict_path}/{mask_sec} sec intensity map.png",dpi=300) 26 | -------------------------------------------------------------------------------- /data_preprocess/process_time_shift.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | """ 4 | The script calculates the time between first triggered station got waveform and earthquake occurred. 5 | """ 6 | 7 | input_path = "./events_traces_catalog" 8 | catalog = pd.read_csv(f"{input_path}/1999_2019_final_catalog.csv") 9 | traces = pd.read_csv(f"{input_path}/1999_2019_final_traces_Vs30.csv") 10 | 11 | traces.loc[traces.index, "p_arrival_abs_time"] = pd.to_datetime( 12 | traces["p_arrival_abs_time"], format="%Y-%m-%d %H:%M:%S" 13 | ) 14 | catalog["event_time"] = pd.to_datetime( 15 | catalog[["year", "month", "day", "hour", "minute", "second"]] 16 | ) 17 | 18 | 19 | eq_id_list = [24757, 24784, 25112, 25193, 25225, 25396, 25401, 25561, 25900] 20 | 21 | for eq_id in eq_id_list: 22 | event = catalog.query(f"EQ_ID=={eq_id}") 23 | triggered_trace = traces.query(f"EQ_ID=={eq_id}") 24 | 25 | first_triggered_trace = triggered_trace.loc[ 26 | triggered_trace["p_arrival_abs_time"].idxmin() 27 | ] 28 | p_wave_propogated_time = ( 29 | first_triggered_trace["p_arrival_abs_time"] - event["event_time"] 30 | ) 31 | print(eq_id, p_wave_propogated_time) 32 | -------------------------------------------------------------------------------- /model_performance_analysis/0403_Hualien_Earthquake/plot_input_station.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import json 3 | import sys 4 | sys.path.append("..") 5 | from analysis import Triggered_Map 6 | 7 | i = 1 8 | # for mask_after_sec in range(1,11): 9 | mask_after_sec = 10 10 | with open(f"model_input/{mask_after_sec}_sec/{i}.json", "r") as json_file: 11 | data = json.load(json_file) 12 | 13 | station = data["sta"] 14 | station_info = pd.DataFrame( 15 | station, columns=["latitude", "longitude", "elevation", "Vs30"] 16 | ) 17 | condition = ( 18 | (station_info["latitude"] == 0) 19 | & (station_info["longitude"] == 0) 20 | & (station_info["elevation"] == 0) 21 | & (station_info["Vs30"] == 0) 22 | ) 23 | station_info = station_info.drop(station_info[condition].index) 24 | 25 | station_info["event_lon"]=121.67 26 | station_info["event_lat"]=23.77 27 | station_info['magnitude']=7.2 28 | 29 | fig,ax_map=Triggered_Map.plot_station_map(trace_info=station_info,min_epdis=10.87177078,sec=mask_after_sec) 30 | 31 | ax_map.set_title(f"After {mask_after_sec} seconds") 32 | 33 | # fig.savefig(f"triggered_station/{mask_after_sec}_sec_triggered_station.png", dpi=300) 34 | -------------------------------------------------------------------------------- /data_preprocess/README.md: -------------------------------------------------------------------------------- 1 | # Data Preprocess 2 | There are four components you need to prepare first 3 | 1. **Event catalog** 4 | 2. **Traces catalog** 5 | 3. **Seismic waveform** 6 | 4. **Vs30 table for TSMIP station** 7 | 8 | Please follow the step: 9 | 10 | `1_afile_to_catalog.py` 11 | 12 | `2_catalog_records_cleaning.py` 13 | 14 | ... 15 | 16 | `13_cut_waveform_to_hdf5.py` 17 | 18 | At each step you will get an `.csv` output, it will be input in next step script. 19 | 20 | After finished these steps, you will get an `.hdf5` file include all of the information you gave before. 21 | 22 | ## Preprocess Workflow 23 | ![image](images/workflow.png) 24 | 25 | ### Others 26 | 27 | We used 3D velocity model to shift P-wave arrival, follow: Huang et al., 2014 28 | 29 | Paper link: 30 | 31 | https://www.sciencedirect.com/science/article/pii/S0012821X14000995 32 | 33 | All of the works are in `tracer_demo`. 34 | 35 | Vs30 dataset was contributed by Kuo et al., 2012 and Lee et al., 2008 36 | 37 | Paper link: 38 | 39 | https://www.sciencedirect.com/science/article/pii/S0013795212000397 40 | 41 | http://tao.cgu.org.tw/index.php/articles/archive/geophysics/item/799-2008196671pt 42 | -------------------------------------------------------------------------------- /docker/README.md: -------------------------------------------------------------------------------- 1 | # Dockerfile for TT-SAM environment (CPU version) 2 | 3 | This Dockerfile is based on the `python:3.8.12` image, and installs several Linux packages and Python packages to create a Python environment for CUDA-enabled applications. 4 | 5 | ## Usage 6 | To build the Docker image, run: 7 | ``` 8 | docker build -t . 9 | ``` 10 | where `` is the desired name for the Docker image. 11 | 12 | To run a container based on this image, use: 13 | ``` 14 | docker run -it bash 15 | ``` 16 | 17 | This will launch an interactive shell in the container, with access to the installed packages and Python environment. 18 | 19 | ## Packages 20 | The Dockerfile installs the following Linux packages: 21 | 22 | - `curl` 23 | - `git` 24 | - `htop` 25 | - `sudo` 26 | - `vim` 27 | - `python3-dev` 28 | - `python3-pip` 29 | - `libgeos-dev` 30 | 31 | And the following Python packages, installed via pip: 32 | 33 | - `shapely` (built from source) 34 | - any packages listed in `requirements.txt` 35 | 36 | ## Notes 37 | - The `ENV DEBIAN_FRONTEND noninteractive` line is included to prevent any interactive prompts during the package installation process. 38 | -------------------------------------------------------------------------------- /data_preprocess/tracer_demo/src/setup.inc: -------------------------------------------------------------------------------- 1 | !------ DIMENSION SETTING ------ 2 | ! Data 3 | integer maxnsta 4 | parameter(maxnsta=1500) 5 | ! Model 6 | integer maxnlat,maxnlon,maxndep,ilatdeg,ilondeg,idepkm 7 | parameter(maxnlat=100) 8 | parameter(maxnlon=100) 9 | parameter(maxndep=100) 10 | !-ilatdeg, ilondeg are size of map in latitude, longitude 11 | parameter(ilatdeg=100000) 12 | parameter(ilondeg=100000) 13 | parameter(idepkm=100000) 14 | 15 | !------ PARAMETER SETTING ------ 16 | ! getdata 17 | real*8 sta_loc(maxnsta,3),ray_tt(maxnsta,2),ray_wei(maxnsta,2) 18 | real*8 olon,olat,odep,omag 19 | integer date,time,sta_idx(maxnsta),npair 20 | character*6 sta_nm(maxnsta) 21 | character*12 pfile 22 | common/data/sta_loc,ray_tt,ray_wei,date,time,olon,olat,odep,omag,sta_idx,npair,sta_nm,pfile 23 | ! input_vel 24 | real*8 bld1,bld2 25 | real*8 lat_a(maxnlat),lon_a(maxnlon),dep_a(maxndep) 26 | real*8 vp_a(maxnlon,maxnlat,maxndep),vs_a(maxnlon,maxnlat,maxndep) 27 | integer nlat_a,nlon_a,ndep_a,ips 28 | common/vmodel_a/vp_a,vs_a,lat_a,lon_a,dep_a,bld1,bld2,nlat_a,nlon_a,ndep_a,ips 29 | ! bldmap 30 | real*8 lat1_a,lon1_a,dep1_a 31 | integer ilonloc_a(ilondeg),ilatloc_a(ilatdeg),ideploc_a(idepkm) 32 | common/loc_a/ lat1_a,lon1_a,dep1_a,ilonloc_a,ilatloc_a,ideploc_a 33 | -------------------------------------------------------------------------------- /data/intensity_sort_from_station.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | 5 | path = "../data_preprocess" 6 | data = pd.read_csv(f"{path}/events_traces_catalog/1999_2019_final_traces_Vs30.csv") 7 | 8 | stations = data["station_name"].unique() 9 | 10 | for station in stations: 11 | print(station) 12 | tmp_data = data.query(f"station_name=='{station}'") 13 | fig, ax = plt.subplots() 14 | ax.hist( 15 | tmp_data["pga"], 16 | bins=30, 17 | ec="black", 18 | ) 19 | hist, bins = np.histogram(tmp_data["pga"], bins=30) 20 | pga_threshold = np.log10( 21 | [1e-5, 0.008, 0.025, 0.080, 0.250, 0.80, 1.4, 2.5, 4.4, 8.0, 10] 22 | ) 23 | label = ["0", "1", "2", "3", "4", "5-", "5+", "6-", "6+", "7"] 24 | ax.vlines(pga_threshold[1:-1], 0, hist.max()+5, linestyles="dotted", color="k") 25 | for i in range(len(label)): 26 | if label[i] == "0": 27 | continue 28 | ax.text( 29 | ((pga_threshold[i] + pga_threshold[i + 1]) / 2) - 0.05, hist.max()+5, label[i] 30 | ) 31 | ax.set_xlabel(r"PGA log(${m/s^2}$)", fontsize=12) 32 | ax.set_ylabel("Number of traces", fontsize=12) 33 | ax.set_title(f"station name: {station}", fontsize=15) 34 | # fig.savefig(f"{path}/each_station_distribution/{station}.png", dpi=300) 35 | plt.close() 36 | -------------------------------------------------------------------------------- /model_performance_analysis/analyze_prediction_in_magnitude.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import sys 3 | 4 | from analysis import Intensity_Plotter 5 | 6 | path="../predict/station_blind_Vs30_bias2closed_station_2016" 7 | mask_after_sec=7 8 | prediction_with_info=pd.read_csv(f"{path}/{mask_after_sec} sec model11 with all info.csv") 9 | # ===========plot mag>=5.5=========== 10 | mag5_5_prediction = prediction_with_info.query("magnitude>=5.5") 11 | label_type = "pga" 12 | fig, ax = Intensity_Plotter.plot_true_predicted( 13 | y_true=mag5_5_prediction["answer"], 14 | y_pred=mag5_5_prediction["predict"], 15 | quantile=False, 16 | agg="point", 17 | point_size=70, 18 | target=label_type, 19 | title=f"Magnitude>=5.5 event {mask_after_sec} sec", 20 | ) 21 | 22 | # ===========check prediction in magnitude=========== 23 | 24 | label = "pga" 25 | fig, ax = Intensity_Plotter.plot_true_predicted( 26 | y_true=prediction_with_info["answer"][prediction_with_info["magnitude"] >= 5], 27 | y_pred=prediction_with_info["predict"][prediction_with_info["magnitude"] >= 5], 28 | quantile=False, 29 | agg="point", 30 | point_size=20, 31 | target=label, 32 | ) 33 | 34 | ax.scatter( 35 | prediction_with_info["answer"][prediction_with_info["magnitude"] < 5], 36 | prediction_with_info["predict"][prediction_with_info["magnitude"] < 5], 37 | c="r", 38 | label="magnitude < 5", 39 | ) -------------------------------------------------------------------------------- /data/plot_number_of_traces_station_map.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from visualize import plot_received_traces_station_map 3 | 4 | sta_path = "data/station_information" 5 | input_path = "predict/station_blind_Vs30_bias2closed_station_2016" 6 | output_path = "./data preprocess/events_traces_catalog" 7 | prediction = pd.read_csv(f"{input_path}/model 11 5 sec prediction.csv") 8 | station_info = pd.read_csv(f"{sta_path}/TSMIPstations_new.csv") 9 | merge_traces = pd.merge( 10 | prediction, 11 | station_info[["location_code", "latitude", "longitude", "elevation (m)"]], 12 | how="left", 13 | left_on=["latitude", "longitude", "elevation"], 14 | right_on=["latitude", "longitude", "elevation (m)"], 15 | ) 16 | total_station_value_counts = ( 17 | merge_traces["location_code"] 18 | .value_counts() 19 | .rename_axis("location_code") 20 | .reset_index(name="counts") 21 | ) 22 | total_station_value_counts = pd.merge( 23 | total_station_value_counts, 24 | station_info[["location_code", "latitude", "longitude", "elevation (m)"]], 25 | how="left", 26 | left_on="location_code", 27 | right_on="location_code", 28 | ) 29 | title = "Number of records received by stations in test data" 30 | fig, ax = plot_received_traces_station_map(total_station_value_counts, title=title) 31 | 32 | # total_station_value_counts.to_csv( 33 | # "predict/station_blind_Vs30_bias2closed_station_2016/Number of records received by stations in train data.csv", 34 | # index=False, 35 | # ) 36 | -------------------------------------------------------------------------------- /data_preprocess/6_checked_traces_events.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import matplotlib.pyplot as plt 3 | 4 | start_year=1999 5 | end_year=2008 6 | traces=pd.read_csv(f"events_traces_catalog/{start_year}_{end_year}_target_traces.csv") 7 | events=pd.read_csv(f"events_traces_catalog/{start_year}_{end_year}_target_catalog.csv") 8 | 9 | traces.quality_control.value_counts().plot(kind='pie', autopct='%.1f%%') 10 | labels = traces.quality_control.unique() 11 | plt.legend(labels=labels) 12 | 13 | #trace為Y 14 | y_filter=(traces["quality_control"]=="y") 15 | 16 | ok_traces=traces[y_filter] 17 | 18 | 19 | 20 | #本來有4級的traces且整個事件只有1個但是壞掉了 需要把其他traces替除 21 | intensity_filter=(ok_traces["intensity"]>=4) 22 | include_intensity_4=ok_traces[intensity_filter]["EQ_ID"].unique().tolist() 23 | ok_traces_filter=(ok_traces["EQ_ID"].isin(include_intensity_4)) 24 | ok_traces=ok_traces[ok_traces_filter] 25 | 26 | # ok_traces.to_csv(f"events_traces_catalog/{start_year}_{end_year}_ok_traces.csv",index=False) 27 | 28 | #plot 歷時剔除後之震度分佈 29 | fig,ax=plt.subplots() 30 | ax.hist(traces["intensity"],bins=16,edgecolor="gray") 31 | ax.hist(ok_traces["intensity"],bins=16,edgecolor="gray") 32 | plt.yscale("log") 33 | 34 | #上述問題在events也替除 35 | ok_event_filter=(events["EQ_ID"].isin(include_intensity_4)) 36 | ok_events=events[ok_event_filter] 37 | # ok_events.to_csv(f"events_traces_catalog/{start_year}_{end_year}_ok_events.csv",index=False) 38 | 39 | #plot 事件剔除後之震度分佈 40 | fig,ax=plt.subplots() 41 | ax.hist(events["magnitude"],bins=28,edgecolor="gray") 42 | ax.hist(ok_events["magnitude"],bins=28,edgecolor="gray") 43 | plt.yscale("log") -------------------------------------------------------------------------------- /data_preprocess/plot_event_trace_distribution.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | 5 | trace = pd.read_csv("./events_traces_catalog/1999_2019_final_traces_Vs30.csv") 6 | catalog = pd.read_csv("./events_traces_catalog/1999_2019_final_catalog.csv") 7 | 8 | fig, ax = plt.subplots(figsize=(7, 7)) 9 | ax.hist( 10 | [trace.query("year>=2009")["pga"],trace.query("year<2009")["pga"]], 11 | bins=25, 12 | edgecolor="black", 13 | stacked=True, 14 | label=["origin","increased"], 15 | ) 16 | ax.legend(loc='best') 17 | ax.set_yscale("log") 18 | label = ["2", "3", "4", "5-", "5+", "6-", "6+", "7"] 19 | pga_threshold = np.log10( 20 | [0.025, 0.080, 0.250, 0.80, 1.4, 2.5, 4.4, 8.0,10]) 21 | ax.vlines(pga_threshold[1:-1], 0, 35000, linestyles="dotted", color="k") 22 | for i in range(len(pga_threshold) - 1): 23 | ax.text((pga_threshold[i] + pga_threshold[i + 1]) / 2, 15000, label[i]) 24 | ax.set_ylabel("number of trace") 25 | ax.set_xlabel("log(PGA (m/s2))") 26 | ax.set_title("TSMIP data PGA distribution") 27 | # fig.savefig("./events_traces_catalog/pga distribution.png",dpi=300) 28 | 29 | fig, ax = plt.subplots(figsize=(7, 7)) 30 | ax.hist( 31 | [catalog.query("year>=2009")["magnitude"],catalog.query("year<2009")["magnitude"]], 32 | bins=25, 33 | edgecolor="black", 34 | stacked=True, 35 | label=["origin","increased"], 36 | ) 37 | ax.legend(loc='best') 38 | ax.set_yscale("log") 39 | ax.set_ylabel("number of event") 40 | ax.set_xlabel("magnitude") 41 | ax.set_title("TSMIP data magnitude distribution") 42 | # fig.savefig("./events_traces_catalog/magnitude distribution.png",dpi=300) -------------------------------------------------------------------------------- /data_preprocess/4_data_selection.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | start_year = 1999 4 | end_year = 2008 5 | intensity_threshold = 4 6 | magnitude_thrshold = 5.5 7 | 8 | 9 | Afile_path = "../data/Afile" 10 | sta_path = "../data/station_information" 11 | traces = pd.read_csv( 12 | f"{Afile_path}/1991-2020 traces (no broken data, double event).csv" 13 | ) 14 | catalog = pd.read_csv(f"{Afile_path}/1991-2020 catalog.csv") 15 | 16 | # traces station location doesn't exist 17 | station_info = pd.read_csv(f"{sta_path}/TSMIPstations_new.csv") 18 | sta_filter = traces["station_name"].isin(station_info["location_code"]) 19 | traces_exist_sta = traces[sta_filter] 20 | 21 | # find Earthquake that at least 1 trace intensity > 4 & magnitude >=3.5 22 | target_traces = traces_exist_sta.query(f"year>={start_year} & year<={end_year}") 23 | EQ_ID = ( 24 | target_traces.query(f"intensity >= {intensity_threshold}")["EQ_ID"] 25 | .unique() 26 | .tolist() 27 | ) 28 | output_catalog = catalog.query(f"EQ_ID in {EQ_ID} & magnitude >= {magnitude_thrshold}") 29 | output_traces = target_traces.copy() 30 | EQ_ID = output_catalog["EQ_ID"].tolist() 31 | output_traces = output_traces.query(f"EQ_ID in {EQ_ID}") 32 | 33 | # check nan 34 | output_traces.isnull().sum(axis=0) 35 | output_catalog.isnull().sum(axis=0) 36 | # plot magnitude hist & check intensity 37 | output_catalog["magnitude"].hist(bins=16) 38 | output_traces["intensity"].hist(bins=20) 39 | output_traces["intensity"].value_counts() 40 | 41 | # output_catalog.to_csv(f"events_traces_catalog/{start_year}_{end_year}_target_catalog.csv", index=False) 42 | # output_traces.to_csv(f"events_traces_catalog/{start_year}_{end_year}_target_traces.csv", index=False) 43 | -------------------------------------------------------------------------------- /data_preprocess/1_afile_to_catalog.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pandas as pd 4 | 5 | from read_tsmip import * 6 | 7 | Afile_path = "../data/Afile" 8 | wavefrom_path = "../data/waveform" 9 | Events = [] 10 | Traces = [] 11 | for year in os.listdir(f"{wavefrom_path}"): 12 | for month in [ 13 | "01", 14 | "02", 15 | "03", 16 | "04", 17 | "05", 18 | "06", 19 | "07", 20 | "08", 21 | "09", 22 | "10", 23 | "11", 24 | "12", 25 | ]: 26 | trace_path = f"{wavefrom_path}/{year}/{month}" 27 | trace_folder = os.listdir(trace_path) 28 | 29 | afile_name = f"{year}{month}A.DAT" 30 | afile_path = f"{Afile_path}/{afile_name}" 31 | events, traces = classify_event_trace(afile_path, afile_name, trace_folder) 32 | Events.extend(events) 33 | Traces.extend(traces) 34 | 35 | # Events 36 | event_dict_inlist = [] 37 | for eq_id, event in enumerate(Events): 38 | header_info = read_header(event, EQ_ID=str(eq_id + 1)) 39 | event_dict_inlist.append(header_info) 40 | 41 | event_df = pd.DataFrame.from_dict(event_dict_inlist) 42 | # event_df.to_csv("{Afile_path}/1991-2020 catalog.csv", index=False) 43 | 44 | # Traces 45 | for i in range(len(Traces)): 46 | if i == 0: 47 | trace_info = read_lines(Traces[i], EQ_ID=str(i + 1)) 48 | else: 49 | tmp_trace_info = read_lines(Traces[i], EQ_ID=str(i + 1)) 50 | trace_info.extend(tmp_trace_info) 51 | 52 | trace_df = pd.DataFrame.from_dict(trace_info) 53 | # check traces belong to TSMIP: 54 | trace_df = trace_df[trace_df["station_name"].str.len() == 6] 55 | 56 | # trace_df.to_csv("{Afile_path}/1991-2020 traces.csv", index=False) 57 | -------------------------------------------------------------------------------- /data_preprocess/10_check_station_overlap.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | start_year=1999 4 | end_year=2008 5 | traces = pd.read_csv( 6 | f"./events_traces_catalog/{start_year}_{end_year}_picked_traces_p_arrival_abstime_labeled.csv" 7 | ) 8 | catalog = pd.read_csv( 9 | f"./events_traces_catalog/{start_year}_{end_year}_ok_events_p_arrival_abstime.csv" 10 | ) 11 | 12 | traces["instrument_priority"] = traces["instrument_code"].map( 13 | {" SMTA": 1, " CVA ": 2, " NANO": 3, " A900": 4, " ETNA": 5, " K2 ": 6, " REFT": 7} 14 | ) 15 | # 抓出eq_id & station_name 相同的 trace 16 | overlap_trace = pd.DataFrame() 17 | for eq_id in catalog["EQ_ID"]: 18 | tmp_traces = traces.query(f"EQ_ID == {eq_id}") 19 | counts = tmp_traces["station_name"].value_counts() 20 | 21 | target_station = counts[counts > 1].index.tolist() 22 | 23 | mask = tmp_traces["station_name"].isin(target_station) 24 | 25 | tmp_overlap_trace = tmp_traces[mask] 26 | 27 | overlap_trace = pd.concat([overlap_trace, tmp_overlap_trace]) 28 | 29 | # 將instrument 編號,設定優先順序 30 | insrument_priority = traces["instrument_code"].value_counts().index.tolist() 31 | 32 | overlap_trace_sorted = overlap_trace.sort_values("instrument_priority") 33 | chosen_trace = overlap_trace_sorted.drop_duplicates( 34 | ["station_name", "EQ_ID"], keep="first" 35 | ) 36 | chosen_trace = chosen_trace.sort_index() 37 | 38 | # 找原始df和有重疊df的差集,最後加回有重疊但最後留下來的trace 39 | differ_set = pd.concat([traces, overlap_trace]).drop_duplicates( 40 | ["station_name", "EQ_ID"], keep=False 41 | ) 42 | final_trace = pd.concat([differ_set, chosen_trace]).sort_index() 43 | 44 | 45 | # final_trace.to_csv( 46 | # f"./events_traces_catalog/{start_year}_{end_year}_picked_traces_p_arrival_abstime_labeled_nostaoverlap.csv", 47 | # index=False, 48 | # ) 49 | 50 | -------------------------------------------------------------------------------- /model_performance_analysis/0403_Hualien_Earthquake/3_waveform_after_preprocess.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import os 3 | import obspy 4 | import numpy as np 5 | import sys 6 | import os 7 | import matplotlib.pyplot as plt 8 | 9 | sys.path.append("../..") 10 | from data_preprocess.read_tsmip import get_peak_value 11 | 12 | data_path = "./0403asc_by_Joey" 13 | 14 | files = os.listdir(f"{data_path}") 15 | asc_files = [file for file in files if file.endswith(".asc")] 16 | output_df = {"station_code": [], "PGA": []} 17 | for i in range(len(asc_files)): 18 | data = pd.read_csv( 19 | f"{data_path}/{asc_files[i]}", sep="\s+", skiprows=[0], header=None 20 | ).to_numpy() 21 | 22 | stream = obspy.core.stream.Stream() 23 | channel = ["HLZ", "HLN", "HLE"] 24 | 25 | for j, chan in enumerate(channel): 26 | trace = obspy.core.trace.Trace(data[:, j + 1]) 27 | trace.stats.sampling_rate = 100 28 | # trace.stats.starttime = obspy.UTCDateTime(asc_files[0][:17]) 29 | stream.append(trace) 30 | stream.filter("lowpass", freq=10) 31 | # plot 32 | # fig,ax=plt.subplots(3,1) 33 | # for k in range(3): 34 | # ax[k].plot(stream[k].data) 35 | # ax[0].set_title(asc_files[i][26:30]) 36 | # ax[2].set_xlabel("time sample (100Hz)") 37 | # ax[1].set_ylabel("amplitude (gal)") 38 | # plt.close() 39 | # fig.savefig(f"{data_path}/image/{asc_files[i][26:30]}.png",dpi=300) 40 | 41 | pga, _ = get_peak_value(stream) 42 | output_df["station_code"].append(asc_files[i][26:30]) 43 | output_df["PGA"].append(pga) 44 | 45 | output_df = pd.DataFrame(output_df) 46 | 47 | station_info = pd.read_csv("../../data/station_information/TSMIPstations_new.csv") 48 | 49 | output_df = pd.merge( 50 | output_df, 51 | station_info[["station_code", "location_code"]], 52 | left_on="station_code", 53 | right_on="station_code", 54 | how="left", 55 | ) 56 | 57 | # output_df.to_csv(f"true_answer.csv", index=False) 58 | -------------------------------------------------------------------------------- /data_preprocess/7_traces_picking.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import os 3 | 4 | from read_tsmip import read_tsmip 5 | from obspy.signal.trigger import ar_pick 6 | 7 | start_year=1999 8 | end_year=2008 9 | waveform_path = "../data/waveform" 10 | traces = pd.read_csv(f"./events_traces_catalog/{start_year}_{end_year}_ok_traces.csv") 11 | 12 | traces["p_pick_sec"] = 0 13 | for i in range(len(traces)): 14 | print(f"{i}/{len(traces)}") 15 | EQ_ID = str(traces["EQ_ID"][i]) 16 | year = str(traces["year"][i]) 17 | month = str(traces["month"][i]) 18 | day = str(traces["day"][i]) 19 | hour = str(traces["hour"][i]) 20 | minute = str(traces["minute"][i]) 21 | second = str(traces["second"][i]) 22 | intensity = str(traces["intensity"][i]) 23 | station_name = traces["station_name"][i] 24 | epdis = str(traces["epdis (km)"][i]) 25 | file_name = traces["file_name"][i].strip() 26 | if len(month) < 2: 27 | month = "0" + month 28 | waveform = read_tsmip(f"{waveform_path}/{year}/{month}/{file_name}.txt") 29 | # picking 30 | p_pick, _ = ar_pick( 31 | waveform[0], 32 | waveform[1], 33 | waveform[2], 34 | samp_rate=waveform[0].stats.sampling_rate, 35 | f1=1, # Frequency of the lower bandpass window 36 | f2=20, # Frequency of the upper bandpass window 37 | lta_p=1, # Length of LTA for the P arrival in seconds 38 | sta_p=0.1, # Length of STA for the P arrival in seconds 39 | lta_s=4.0, # Length of LTA for the S arrival in seconds 40 | sta_s=1.0, # Length of STA for the P arrival in seconds 41 | m_p=2, # Number of AR coefficients for the P arrival 42 | m_s=8, # Number of AR coefficients for the S arrival 43 | l_p=0.1, 44 | l_s=0.2, 45 | s_pick=False, 46 | ) 47 | traces.loc[i, "p_pick_sec"] = p_pick 48 | 49 | # traces.to_csv(f"events_traces_catalog/{start_year}_{end_year}_ok_picked_traces.csv",index=False) 50 | 51 | 52 | -------------------------------------------------------------------------------- /data_preprocess/8_shift_picking_by_velocity_model.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import os 3 | 4 | #before run this script, you need to go to "tracer_demo/": 5 | #use "input_file.py" to create input data 6 | # calculate p wave arrival by velocity model(Huang et al., 2014) 7 | #paper link: https://www.sciencedirect.com/science/article/pii/S0012821X14000995 8 | 9 | start_year=1999 10 | end_year=2008 11 | traces = pd.read_csv(f"events_traces_catalog/{start_year}_{end_year}_ok_picked_traces.csv") 12 | 13 | EQ_ID = os.listdir(f"./tracer_demo/{start_year}_{end_year}_output") 14 | 15 | traces["p_arrival_abs_time"] = pd.to_datetime( 16 | traces[["year", "month", "day", "hour", "minute", "second"]] 17 | ) 18 | 19 | colnames = [ 20 | "evt_lon", 21 | "evt_lat", 22 | "evt_depth", 23 | "sta_lon", 24 | "sta_lat", 25 | "sta_elev", 26 | "p_arrival", 27 | "s_arrival", 28 | ] 29 | for eq in EQ_ID: 30 | event_file_path = f"./tracer_demo/{start_year}_{end_year}_output/{eq}/output.table" 31 | tracer_output = pd.read_csv( 32 | event_file_path, sep=r"\s+", names=colnames, header=None 33 | ) 34 | trace_index = traces[traces["EQ_ID"] == int(eq)].index 35 | p_arrival = pd.to_timedelta(tracer_output["p_arrival"], unit="s") 36 | p_arrival.index = trace_index 37 | traces.loc[trace_index, "p_arrival_abs_time"] = ( 38 | traces.loc[trace_index, "p_arrival_abs_time"] + p_arrival 39 | ) 40 | # traces 和 event 須將 eq_id: 29363 剔除 (velocity model calculate out of range) 41 | final_traces = traces[traces["EQ_ID"] != 29363] 42 | event = pd.read_csv(f"./events_traces_catalog/{start_year}_{end_year}_ok_events.csv") 43 | final_event = event[event["EQ_ID"] != 29363] 44 | # save catalog 45 | # final_traces.to_csv( 46 | # f"./events_traces_catalog/{start_year}_{end_year}_picked_traces_p_arrival_abstime.csv", index=False 47 | # ) 48 | # final_event.to_csv( 49 | # f"./events_traces_catalog/{start_year}_{end_year}_ok_events_p_arrival_abstime.csv", index=False 50 | # ) -------------------------------------------------------------------------------- /data_preprocess/plot_cut_traces.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from tqdm import tqdm 3 | 4 | import obspy 5 | import matplotlib.pyplot as plt 6 | from read_tsmip import cut_traces 7 | 8 | start_year = 1999 9 | end_year = 2019 10 | Afile_path = "data/Afile" 11 | sta_path = "../data/station_information" 12 | waveform_path = "../data/waveform" 13 | catalog = pd.read_csv( 14 | f"./events_traces_catalog/{start_year}_{end_year}_final_catalog.csv" 15 | ) 16 | traces = pd.read_csv( 17 | f"./events_traces_catalog/{start_year}_{end_year}_final_traces_Vs30.csv" 18 | ) 19 | station_info = pd.read_csv(f"{sta_path}/TSMIPstations_new.csv") 20 | traces.loc[traces.index, "p_pick_sec"] = pd.to_timedelta( 21 | traces["p_pick_sec"], unit="sec" 22 | ) 23 | traces.loc[traces.index, "p_arrival_abs_time"] = pd.to_datetime( 24 | traces["p_arrival_abs_time"], format="%Y-%m-%d %H:%M:%S" 25 | ) 26 | 27 | for eq_id in tqdm(catalog["EQ_ID"]): 28 | tmp_traces, traces_info = cut_traces(traces, eq_id, waveform_path, waveform_type="acc") 29 | for i,chan in enumerate(["HLZ","HLN","HLE"]): 30 | stream = obspy.core.stream.Stream() 31 | for j in range(len(traces_info["traces"])): 32 | trace = obspy.core.trace.Trace(data=traces_info["traces"][j][:, i]) 33 | trace.stats.id = eq_id 34 | trace.stats.station = tmp_traces["station_name"][j] 35 | trace.stats.channel = chan 36 | trace.stats.distance = tmp_traces["epdis (km)"][j] * 1000 37 | trace.stats.starttime = traces_info["start_time"][j] 38 | trace.stats.sampling_rate = 200 39 | 40 | stream.append(trace) 41 | fig, ax = plt.subplots() 42 | stream.plot(type="section",fig=fig) 43 | 44 | magnitude = catalog[catalog["EQ_ID"] == eq_id]["magnitude"].values[0] 45 | 46 | ax.set_title( 47 | f"EQ ID:{eq_id}, Magnitude: {magnitude}, start time: {traces_info['start_time'][j]}" 48 | ) 49 | # fig.savefig(f"cut event figure/{eq_id}_{trace.stats.channel}.png",dpi=300) 50 | plt.close() -------------------------------------------------------------------------------- /model_performance_analysis/0403_Hualien_Earthquake/4_plot_intensity_map.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import pandas as pd 3 | 4 | sys.path.append("..") 5 | from analysis import Intensity_Plotter 6 | 7 | mask_sec = 3 8 | event_lon = 121.66 9 | event_lat = 23.77 10 | magnitude = 7.2 11 | answer = pd.read_csv(f"true_answer.csv") 12 | 13 | # merge 3 5 7 10 sec to find maximum predicted pga 14 | prediction_3 = pd.read_csv(f"no_include_broken_data_prediction/3_sec_prediction.csv") 15 | prediction_5 = pd.read_csv(f"no_include_broken_data_prediction/5_sec_prediction.csv") 16 | prediction_7 = pd.read_csv(f"no_include_broken_data_prediction/7_sec_prediction.csv") 17 | prediction_10 = pd.read_csv(f"no_include_broken_data_prediction/10_sec_prediction.csv") 18 | 19 | max_prediction = pd.concat( 20 | [ 21 | prediction_3, 22 | prediction_5["predict"], 23 | prediction_7["predict"], 24 | prediction_10["predict"], 25 | ], 26 | axis=1, 27 | ) 28 | 29 | max_prediction.columns = [ 30 | "3_predict", 31 | "station_name", 32 | "latitude", 33 | "longitude", 34 | "elevation", 35 | "5_predict", 36 | "7_predict", 37 | "10_predict", 38 | ] 39 | max_prediction["max_predict"] = max_prediction.apply( 40 | lambda row: max( 41 | row["3_predict"], row["5_predict"], row["7_predict"], row["10_predict"] 42 | ), 43 | axis=1, 44 | ) 45 | 46 | max_prediction = pd.merge( 47 | answer, max_prediction, how="left", left_on="location_code", right_on="station_name" 48 | ) 49 | max_prediction.dropna(inplace=True) 50 | 51 | eventmeta = pd.DataFrame( 52 | {"longitude": [event_lon], "latitude": [event_lat], "magnitude": [magnitude]} 53 | ) 54 | 55 | Intensity_Plotter.plot_intensity_map( 56 | trace_info=max_prediction, 57 | eventmeta=eventmeta, 58 | label_type="pga", 59 | true_label=max_prediction["PGA"], 60 | pred_label=max_prediction[f"{mask_sec}_predict"], 61 | sec=mask_sec, 62 | min_epdis=10.87177078, # 0.1087度轉成km 63 | EQ_ID=None, 64 | grid_method="linear", 65 | pad=100, 66 | title=f"{mask_sec} sec intensity Map", 67 | ) 68 | # fig.savefig(f"true_intensity_map_without_broken_data/{mask_sec}_sec.png",dpi=300) 69 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | /__pycache__ 3 | .vscode 4 | 5 | #waveform and catalog 6 | data/* 7 | !data/*.py 8 | /paper image 9 | *.zip 10 | 11 | #mlflow 12 | /mlruns 13 | 14 | #training 15 | model/* 16 | !model/*.py 17 | /predict 18 | 19 | /multi_input_predict_pga_and_pgv/__pycache__ 20 | /multi_input_predict_pga_and_pgv/mlruns 21 | /multi_input_predict_pga_and_pgv/model 22 | 23 | 24 | #data preprocess 25 | data_preprocess/* 26 | !data_preprocess/images 27 | !data_preprocess/*.py 28 | !data_preprocess/README.md 29 | !data_preprocess/tracer_demo 30 | data_preprocess/tracer_demo/1999_2008_output 31 | data_preprocess/tracer_demo/2009_2019_output 32 | data_preprocess/tracer_demo/2023_output 33 | data_preprocess/tracer_demo/model_image 34 | data_preprocess/tracer_demo/create_input_file/* 35 | !data_preprocess/tracer_demo/create_input_file/*.py 36 | !data_preprocess/0918_M6.8_1319_1330/ 37 | data_preprocess/0918_M6.8_1319_1330/cut trace 38 | data_preprocess/0918_M6.8_1319_1330/*.csv 39 | 40 | #feature map correlation 41 | feature_map_correlation/__pycache__ 42 | 43 | model_performance_analysis/__pycache__ 44 | 45 | #0403 Hualien_earthquake 46 | /model_performance_analysis/0403_Hualien_Earthquake/0403waveform_image 47 | /model_performance_analysis/0403_Hualien_Earthquake/113019_TSMIP_SAC 48 | /model_performance_analysis/0403_Hualien_Earthquake/model_input 49 | /model_performance_analysis/0403_Hualien_Earthquake/true_intensity_map_with_broken_data 50 | /model_performance_analysis/0403_Hualien_Earthquake/true_intensity_map_without_broken_data 51 | /model_performance_analysis/0403_Hualien_Earthquake/include_broken_data_prediction 52 | /model_performance_analysis/0403_Hualien_Earthquake/no_include_broken_data_prediction 53 | /model_performance_analysis/0403_Hualien_Earthquake/underestimation_problem 54 | /model_performance_analysis/0403_Hualien_Earthquake/model_input_waveform_image 55 | /model_performance_analysis/0403_Hualien_Earthquake/0403asc_by_Joey 56 | /model_performance_analysis/0403_Hualien_Earthquake/triggered_station 57 | /model_performance_analysis/0403_Hualien_Earthquake/*.zip 58 | /model_performance_analysis/0403_Hualien_Earthquake/*.csv 59 | /model_performance_analysis/0403_Hualien_Earthquake/*.7z 60 | 61 | #CWA eew 62 | /CWA_EEW_report -------------------------------------------------------------------------------- /data_preprocess/tracer_demo/create_input_file/input_file.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import os 3 | 4 | start_year=1999 5 | end_year=2008 6 | event = pd.read_csv(f"../../events_traces_catalog/{start_year}_{end_year}_ok_events.csv") 7 | sta_path = "D:/TEAM_TSMIP/data/station_information" 8 | station_info = pd.read_csv(f"{sta_path}/TSMIPstations_new.csv") 9 | traces = pd.read_csv(f"../../events_traces_catalog/{start_year}_{end_year}_ok_picked_traces.csv") 10 | 11 | event["longitude"] = event["lon"] + (event["lon_minute"] / 60) 12 | event["latitude"] = event["lat"] + (event["lat_minute"] / 60) 13 | 14 | for eq_id in event["EQ_ID"]: 15 | # 16 | if eq_id==29363: #event location out of velocity model range 17 | continue 18 | tmp_event = event[event["EQ_ID"] == eq_id].reset_index() 19 | tmp_trace = traces[traces["EQ_ID"] == eq_id] 20 | tmp_trace_sta_lon_lat = pd.merge( 21 | tmp_trace, 22 | station_info[["location_code", "latitude", "longitude", "elevation (m)"]], 23 | left_on="station_name", 24 | right_on="location_code", 25 | how="left", 26 | ) 27 | # 28 | drop_station_name = "KNM003" #station_location velocity model range 29 | tmp_trace_sta_lon_lat = tmp_trace_sta_lon_lat[(tmp_trace_sta_lon_lat["station_name"] != drop_station_name)].reset_index() 30 | tmp_trace_sta_lon_lat.rename(columns={'elevation (m)': 'elevation_m'}, inplace=True) 31 | folder_path = f"{start_year}_{end_year}_input/{eq_id}/" 32 | 33 | if not os.path.exists(folder_path): 34 | os.mkdir(folder_path) 35 | 36 | event_file_path = folder_path + "event_input.evt" 37 | with open(event_file_path, "w") as file: 38 | file.write( 39 | f"{round(tmp_event.longitude[0],3)} {round(tmp_event.latitude[0],3)} {tmp_event.depth[0]}\n" 40 | ) 41 | 42 | station_file_path = folder_path + "station_input.sta" 43 | with open(station_file_path, "w") as file: 44 | for i in range(len(tmp_trace_sta_lon_lat)): 45 | file.write( 46 | f"{round(tmp_trace_sta_lon_lat.longitude[i],3)} {round(tmp_trace_sta_lon_lat.latitude[i],4)} {round(tmp_trace_sta_lon_lat.elevation_m[i],3)} {tmp_trace_sta_lon_lat.station_name[i]}\n" 47 | ) 48 | 49 | -------------------------------------------------------------------------------- /data_preprocess/9_label.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | 4 | from read_tsmip import read_tsmip, get_peak_value, get_integrated_stream 5 | 6 | # read traces catalog 7 | start_year=1999 8 | end_year=2008 9 | waveform_path = "../data/waveform" 10 | traces = pd.read_csv( 11 | f"./events_traces_catalog/{start_year}_{end_year}_picked_traces_p_arrival_abstime.csv" 12 | ) 13 | 14 | sampling_rate = 200 15 | for i in range(len(traces)): 16 | print(f"{i}/{len(traces)}") 17 | EQ_ID = str(traces["EQ_ID"][i]) 18 | year = str(traces["year"][i]) 19 | month = str(traces["month"][i]) 20 | day = str(traces["day"][i]) 21 | hour = str(traces["hour"][i]) 22 | minute = str(traces["minute"][i]) 23 | second = str(traces["second"][i]) 24 | intensity = str(traces["intensity"][i]) 25 | station_name = traces["station_name"][i] 26 | epdis = str(traces["epdis (km)"][i]) 27 | file_name = traces["file_name"][i].strip() 28 | if len(month) < 2: 29 | month = "0" + month 30 | # read waveform 31 | waveform = read_tsmip(f"{waveform_path}/{year}/{month}/{file_name}.txt") 32 | # resample to 200Hz 33 | if waveform[0].stats.sampling_rate != sampling_rate: 34 | waveform.resample(sampling_rate, window="hann") 35 | 36 | # detrend 37 | waveform.detrend(type="demean") 38 | # lowpass filter 39 | waveform.filter("lowpass", freq=10) # filter 40 | # get pga 41 | pick_point = int(np.round(traces["p_pick_sec"][i] * sampling_rate, 0)) 42 | pga, pga_time = get_peak_value(waveform, pick_point=pick_point) 43 | # waveform taper 44 | waveform.taper(max_percentage=0.05, type="cosine") 45 | # integrate 46 | vel_waveform = get_integrated_stream(waveform) 47 | # bandpass filter 48 | vel_waveform.filter("bandpass", freqmin=0.075, freqmax=10) 49 | # get pgv 50 | pgv, pgv_time = get_peak_value(vel_waveform, pick_point=pick_point) 51 | # input to df 52 | traces.loc[i, "pga"] = pga 53 | traces.loc[i, "pga_time"] = pga_time 54 | traces.loc[i, "pgv"] = pgv 55 | traces.loc[i, "pgv_time"] = pgv_time 56 | 57 | # traces.to_csv( 58 | # f"./events_traces_catalog/{start_year}_{end_year}_picked_traces_p_arrival_abstime_labeled.csv", 59 | # index=False, 60 | # ) 61 | -------------------------------------------------------------------------------- /data_preprocess/tracer_demo/chichi.sta: -------------------------------------------------------------------------------- 1 | 120.805 23.5103 0 ALS 2 | 121.365 23.0992 0 CHK 3 | 120.412 23.7192 0 CHY002 4 | 120.172 23.6013 0 CHY004 5 | 120.552 23.5815 0 CHY006 6 | 120.269 23.4853 0 CHY008 7 | 120.544 23.4653 0 CHY010 8 | 120.152 23.3328 0 CHY012 9 | 120.583 23.2963 0 CHY014 10 | 120.405 23.355 0 CHY015 11 | 120.153 23.2212 0 CHY016 12 | 120.268 23.2147 0 CHY017 13 | 120.478 23.1795 0 CHY019 14 | 120.462 23.0457 0 CHY022 15 | 120.28 22.9655 0 CHY023 16 | 120.606 23.757 0 CHY024 17 | 120.514 23.7795 0 CHY025 18 | 120.411 23.7987 0 CHY026 19 | 120.247 23.752 0 CHY027 20 | 120.605 23.632 0 CHY028 21 | 120.528 23.6135 0 CHY029 22 | 120.294 23.5799 0 CHY032 23 | 120.215 23.5407 0 CHY033 24 | 120.544 23.5212 0 CHY034 25 | 120.584 23.52 0 CHY035 26 | 120.479 23.6073 0 CHY036 27 | 120.344 23.5207 0 CHY039 28 | 120.596 23.4388 0 CHY041 29 | 120.583 23.3583 0 CHY042 30 | 120.163 23.3832 0 CHY044 31 | 120.463 23.4765 0 CHY046 32 | 120.447 23.4938 0 CHY047 33 | 120.408 23.2803 0 CHY050 34 | 120.501 23.2878 0 CHY052 35 | 120.31 23.3077 0 CHY054 36 | 120.271 23.2698 0 CHY055 37 | 120.41 23.1495 0 CHY057 38 | 120.319 23.1725 0 CHY058 39 | 120.103 23.184 0 CHY059 40 | 120.239 23.1243 0 CHY060 41 | 120.511 23.0768 0 CHY061 42 | 120.45 23.1213 0 CHY062 43 | 120.34 23.027 0 CHY063 44 | 120.345 22.906 0 CHY065 45 | 120.208 22.9205 0 CHY066 46 | 120.184 22.999 0 CHY067 47 | 120.182 22.9737 0 CHY069 48 | 120.229 22.9651 0 CHY070 49 | 120.164 23.0648 0 CHY071 50 | 120.805 23.5103 0 CHY074 51 | 119.555 23.5672 0 CHY075 52 | 120.222 23.638 0 CHY076 53 | 120.228 23.0402 0 CHY078 54 | 120.528 23.1848 0 CHY079 55 | 120.678 23.5972 0 CHY080 56 | 120.496 23.2703 0 CHY081 57 | 120.298 23.7237 0 CHY082 58 | 120.593 23.351 0 CHY086 59 | 120.519 23.3845 0 CHY087 60 | 120.429 23.3462 0 CHY088 61 | 120.216 23.2673 0 CHY090 62 | 120.478 23.7913 0 CHY092 63 | 120.147 23.6538 0 CHY093 64 | 120.321 23.7935 0 CHY094 65 | 120.233 22.983 0 CHY096 66 | 120.28 23.1373 0 CHY099 67 | 120.342 23.2272 0 CHY100 68 | 120.562 23.6862 0 CHY101 69 | 120.614 23.2455 0 CHY102 70 | 120.465 23.6695 0 CHY104 71 | 120.29 23.2988 0 CHY107 72 | 120.53 23.2517 0 CHY109 73 | 120.53 23.2517 0 CHY110 74 | 120.227 23.7912 0 CHY111 75 | 120.183 23.7035 0 CHY112 76 | 120.119 23.0372 0 CHY114 77 | 120.097 23.1543 0 CHY115 78 | 120.108 23.0775 0 CHY116 79 | 120.424 23.4977 0 CHY 80 | -------------------------------------------------------------------------------- /model_performance_analysis/plot_CWA_TTSAM_intensity_comparision.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from analysis import Intensity_Plotter 3 | 4 | eqid = 24784 5 | data_folder = "../data/station_information" 6 | multi_station = pd.read_csv(f"{data_folder}/multi-station.txt", sep=" ") 7 | station_dataset = pd.read_csv(f"{data_folder}/TSMIPstations_new.csv") 8 | cwa_event = pd.read_csv(f"{data_folder}/cwa_test_eew_events.csv") 9 | cwa_traces = pd.read_csv(f"{data_folder}/cwa_test_eew_traces.csv") 10 | process_time = int(cwa_event.query(f"eqid=={eqid}")["eew_time"].values[0]) 11 | event_lat = cwa_event.query(f"eqid=={eqid}")["catalog_lat"].values[0] 12 | event_lon = cwa_event.query(f"eqid=={eqid}")["catalog_lon"].values[0] 13 | mag = cwa_event.query(f"eqid=={eqid}")["catalog_mag"].values[0] 14 | merge_data = pd.merge( 15 | multi_station, 16 | station_dataset[["station_code", "location_code"]], 17 | left_on="TSMIP", 18 | right_on="station_code", 19 | how="left", 20 | ) 21 | 22 | 23 | cwa_merge_data = pd.merge( 24 | cwa_traces[ 25 | ["eqid", "predict_pga", "station_code", "sta_lat_pre", "sta_lon_pre", "PGA"] 26 | ], 27 | merge_data[["CWASN", "location_code"]], 28 | left_on="station_code", 29 | right_on="CWASN", 30 | how="inner", 31 | ) 32 | 33 | tt_sam = pd.read_csv( 34 | "../predict/station_blind_Vs30_bias2closed_station_2016/7 sec model11 with all info.csv" 35 | ) 36 | 37 | ttsam_merge_data = pd.merge( 38 | tt_sam[["EQ_ID", "predict", "answer", "latitude", "longitude", "station_name"]], 39 | merge_data[["CWASN", "location_code"]], 40 | left_on="station_name", 41 | right_on="location_code", 42 | how="inner", 43 | ) 44 | ttsam_merge_data["sta_lon_pre"] = ttsam_merge_data["longitude"] 45 | ttsam_merge_data["sta_lat_pre"] = ttsam_merge_data["latitude"] 46 | ttsam_merge_data["predict_pga"] = (10 ** ttsam_merge_data["predict"]) * 100 47 | ttsam_merge_data["observed_pga"] = (10 ** ttsam_merge_data["answer"]) * 100 48 | ttsam_merge_data["eqid"] = ttsam_merge_data["EQ_ID"] 49 | # ============================== 50 | 51 | # change "ttsam_merge_data" or "cwa_merge_data" to plot each system intensity map 52 | event = ttsam_merge_data.query(f"eqid=={eqid}") 53 | 54 | fig, ax = Intensity_Plotter.plot_intensity_scatter_map( 55 | event, 56 | event_lon, 57 | event_lat, 58 | mag, 59 | pga_column="observed_pga", 60 | title="Observed intensity", 61 | ) 62 | # fig.savefig(f"../CWA_EEW_report/eqid_{eqid}_intensity.png", dpi=300) -------------------------------------------------------------------------------- /data_preprocess/0918_M6.8_1319_1330/1_merge_sta_info.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | station_info = pd.read_csv("../../data/station_information/TSMIPstations_new.csv") 4 | traces_info_with_vs30 = pd.read_csv( 5 | "../events_traces_catalog/1999_2019_final_traces_Vs30.csv" 6 | ) 7 | 8 | pick_result = pd.read_csv("../../data/0918_M6.8_1319_1330/result.csv", header=None) 9 | pick_result.drop(0, axis=1, inplace=True) 10 | pick_result.columns = ["file_name", "pick_result"] 11 | 12 | ok_traces = pick_result.query("pick_result=='y'") 13 | 14 | ok_traces["station_code"] = ok_traces["file_name"].str[3:7] 15 | 16 | ok_traces = pd.merge( 17 | ok_traces, 18 | station_info[["station_code", "location_code"]], 19 | on="station_code", 20 | how="left", 21 | ) 22 | 23 | 24 | for i in ok_traces.index: 25 | if pd.isna(ok_traces["location_code"][i]): 26 | if ok_traces["station_code"][i][0] == "A": 27 | ok_traces["location_code"][i] = "TAP" + ok_traces["station_code"][i][1:] 28 | if ok_traces["station_code"][i][0] == "B": 29 | ok_traces["location_code"][i] = "TCU" + ok_traces["station_code"][i][1:] 30 | if ok_traces["station_code"][i][0] == "C": 31 | ok_traces["location_code"][i] = "CHY" + ok_traces["station_code"][i][1:] 32 | if ok_traces["station_code"][i][0] == "D": 33 | ok_traces["location_code"][i] = "KAU" + ok_traces["station_code"][i][1:] 34 | if ok_traces["station_code"][i][0] == "E": 35 | ok_traces["location_code"][i] = "ILA" + ok_traces["station_code"][i][1:] 36 | if ok_traces["station_code"][i][0] == "F": 37 | ok_traces["location_code"][i] = "HWA" + ok_traces["station_code"][i][1:] 38 | if ok_traces["station_code"][i][0] == "G": 39 | ok_traces["location_code"][i] = "TTN" + ok_traces["station_code"][i][1:] 40 | 41 | ok_traces = pd.merge( 42 | ok_traces, 43 | station_info[["location_code", "latitude", "longitude", "elevation (m)"]], 44 | on="location_code", 45 | how="left", 46 | ) 47 | 48 | ok_traces = pd.merge( 49 | ok_traces, 50 | traces_info_with_vs30[["station_name", "Vs30"]].drop_duplicates( 51 | subset="station_name" 52 | ), 53 | left_on="location_code", 54 | right_on="station_name", 55 | how="left", 56 | ) 57 | 58 | ok_traces.dropna(inplace=True) 59 | ok_traces.drop(["station_code","location_code","pick_result"],axis=1,inplace=True) 60 | 61 | ok_traces.to_csv("../0918_M6.8_1319_1330/traces_catalog.csv",index=None) 62 | -------------------------------------------------------------------------------- /data_preprocess/plot_picking_waveform.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from read_tsmip import read_tsmip 3 | import matplotlib.pyplot as plt 4 | from obspy.signal.trigger import ar_pick 5 | 6 | ok_waveform_file = "1999_2019_final_traces_Vs30.csv" 7 | year = 1999 8 | traces = pd.read_csv(f"events_traces_catalog/{ok_waveform_file}") 9 | 10 | waveform_path = "../data/waveform" 11 | for i in traces.index: 12 | print(f"{i}/{len(traces)}") 13 | EQ_ID = str(traces["EQ_ID"][i]) 14 | year = str(traces["year"][i]) 15 | month = str(traces["month"][i]) 16 | day = str(traces["day"][i]) 17 | hour = str(traces["hour"][i]) 18 | minute = str(traces["minute"][i]) 19 | second = str(traces["second"][i]) 20 | intensity = str(traces["intensity"][i]) 21 | station_name = traces["station_name"][i] 22 | epdis = str(traces["epdis (km)"][i]) 23 | file_name = traces["file_name"][i].strip() 24 | if len(month) < 2: 25 | month = "0" + month 26 | waveform = read_tsmip(f"{waveform_path}/{year}/{month}/{file_name}.txt") 27 | # p_pick=traces["p_pick_sec"][i]*waveform[0].stats.sampling_rate 28 | p_pick, _ = ar_pick( 29 | waveform[0], 30 | waveform[1], 31 | waveform[2], 32 | samp_rate=waveform[0].stats.sampling_rate, 33 | f1=1, # Frequency of the lower bandpass window 34 | f2=20, # Frequency of the upper bandpass window 35 | lta_p=1, # Length of LTA for the P arrival in seconds 36 | sta_p=0.1, # Length of STA for the P arrival in seconds 37 | lta_s=4.0, # Length of LTA for the S arrival in seconds 38 | sta_s=1.0, # Length of STA for the P arrival in seconds 39 | m_p=2, # Number of AR coefficients for the P arrival 40 | m_s=8, # Number of AR coefficients for the S arrival 41 | l_p=0.1, 42 | l_s=0.2, 43 | s_pick=False, 44 | ) 45 | p_pick = p_pick * waveform[0].stats.sampling_rate 46 | fig, ax = plt.subplots(3, 1) 47 | for j in range(len(ax)): 48 | ax[j].plot( 49 | waveform[j].data[int(p_pick - 5 * 200) : int(p_pick + 30 * 200)], "k" 50 | ) 51 | ax[j].axvline(x=5 * 200, color="r", linestyle="-") 52 | ax[0].set_xticks([]) 53 | ax[1].set_xticks([]) 54 | ax[1].set_ylabel(f"Amplitude (gal)") 55 | ax[2].set_xlabel(f"Time Sample (200Hz)") 56 | ax[0].set_title(f"EQ_ID:{EQ_ID},year:{year},month:{month},file_name:{file_name}") 57 | # fig.savefig(f"pick_result/EQ_ID_{EQ_ID}_{year}_{month}_{file_name}.png",dpi=300) 58 | plt.close() 59 | -------------------------------------------------------------------------------- /model_performance_analysis/0403_Hualien_Earthquake/2_TTSAM_0403.py: -------------------------------------------------------------------------------- 1 | import json 2 | import pandas as pd 3 | import torch 4 | import sys 5 | 6 | sys.path.append("../..") 7 | from model.CNN_Transformer_Mixtureoutput_TEAM import ( 8 | CNN, 9 | MDN, 10 | MLP, 11 | PositionEmbedding_Vs30, 12 | TransformerEncoder, 13 | full_model, 14 | ) 15 | 16 | mask_after_sec = 3 17 | num = 11 18 | device = torch.device("cuda") 19 | path = f"../../model/model{num}.pt" 20 | emb_dim = 150 21 | mlp_dims = (150, 100, 50, 30, 10) 22 | CNN_model = CNN(mlp_input=5665).cuda() 23 | pos_emb_model = PositionEmbedding_Vs30(emb_dim=emb_dim).cuda() 24 | transformer_model = TransformerEncoder() 25 | mlp_model = MLP(input_shape=(emb_dim,), dims=mlp_dims).cuda() 26 | mdn_model = MDN(input_shape=(mlp_dims[-1],)).cuda() 27 | full_Model = full_model( 28 | CNN_model, 29 | pos_emb_model, 30 | transformer_model, 31 | mlp_model, 32 | mdn_model, 33 | pga_targets=25, 34 | data_length=3000, 35 | ).to(device) 36 | full_Model.load_state_dict(torch.load(path)) 37 | 38 | Lat = [] 39 | Lon = [] 40 | Elev = [] 41 | Mixture_mu = [] 42 | station_name = [] 43 | for i in range(1, 15): 44 | print(i) 45 | with open( 46 | f"model_input/{mask_after_sec}_sec_without_broken_data/{i}.json", "r" 47 | ) as json_file: 48 | data = json.load(json_file) 49 | 50 | waveform = torch.tensor(data["waveform"]).to(torch.double).unsqueeze(0) 51 | 52 | input_station = torch.tensor(data["sta"]).to(torch.double).unsqueeze(0) 53 | 54 | target_station = torch.tensor(data["target"]).to(torch.double).unsqueeze(0) 55 | true_target_num = torch.sum(torch.all(target_station != 0, dim=-1)).item() 56 | sample = {"waveform": waveform, "sta": input_station, "target": target_station} 57 | 58 | lat = sample["target"][:, :, 0].flatten().tolist() 59 | lon = sample["target"][:, :, 1].flatten().tolist() 60 | elev = sample["target"][:, :, 2].flatten().tolist() 61 | Lat.extend(lat) 62 | Lon.extend(lon) 63 | Elev.extend(elev) 64 | weight, sigma, mu = full_Model(sample) 65 | Mixture_mu.append( 66 | torch.sum(weight * mu, dim=2).cpu().detach().numpy().flatten().tolist() 67 | ) 68 | station_name += data["station_name"] 69 | Mixture_mu = [item for sublist in Mixture_mu for item in sublist] 70 | output = { 71 | "predict": Mixture_mu, 72 | "station_name": station_name, 73 | "latitude": Lat, 74 | "longitude": Lon, 75 | "elevation": Elev, 76 | } 77 | 78 | output_df = pd.DataFrame(output) 79 | 80 | # output_df.to_csv( 81 | # f"no_include_broken_data_prediction/{mask_after_sec}_sec_prediction.csv", index=False 82 | # ) 83 | -------------------------------------------------------------------------------- /data_preprocess/2_catalog_records_cleaning.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import pandas as pd 3 | 4 | Afile_path = "../data/Afile" 5 | ##############clean broken data and small data############## 6 | 7 | traces_catalog = pd.read_csv(f"{Afile_path}/1991-2020 traces.csv") 8 | acc_filter = ( 9 | (traces_catalog["pga_z"] == 0) 10 | | (traces_catalog["pga_ns"] == 0) 11 | | (traces_catalog["pga_ew"] == 0) 12 | | (traces_catalog["pga_z"] < 2.5) 13 | | (traces_catalog["pga_ns"] < 2.5) 14 | | (traces_catalog["pga_ew"] < 2.5) 15 | | (traces_catalog["pga_z"] > 1300) 16 | | (traces_catalog["pga_ns"] > 1300) 17 | | (traces_catalog["pga_ew"] > 1300) 18 | ) 19 | broken_traces = traces_catalog[acc_filter] 20 | # broken_traces.to_csv(f"{Afile_path}/1991-2020 broken traces.csv", index=False) 21 | 22 | traces_catalog.drop(traces_catalog[acc_filter].index, inplace=True) 23 | # traces_catalog.to_csv(f"{Afile_path}/1991-2020 traces no broken data.csv", index=False) 24 | 25 | ##############find double event traces############## 26 | catalog = pd.read_csv(f"{Afile_path}/1991-2020 catalog.csv") 27 | traces_ljoin_catalog = pd.merge( 28 | catalog[["EQ_ID", "year", "month", "day", "hour", "minute", "second"]], 29 | traces_catalog, 30 | on="EQ_ID", 31 | ) 32 | 33 | double_traces_catalog = pd.DataFrame() 34 | for year in range(1991, 2021): 35 | for month in range(1, 13): 36 | time_filter = (traces_ljoin_catalog["year"] == year) & ( 37 | traces_ljoin_catalog["month"] == month 38 | ) 39 | tmp_catalog = traces_ljoin_catalog[time_filter] 40 | file_name_num = tmp_catalog["file_name"].value_counts() 41 | double_event = file_name_num[file_name_num > 1] 42 | same_filename_filter = tmp_catalog["file_name"].isin(double_event.index) 43 | double_traces = tmp_catalog[same_filename_filter] 44 | double_traces_catalog = pd.concat([double_traces_catalog, double_traces]) 45 | # double_traces_catalog.to_csv(f"{Afile_path}/1991-2020 double traces.csv", index=False) 46 | 47 | # clean trace double event 48 | traces_catalog = pd.read_csv(f"{Afile_path}/1991-2020 traces no broken data.csv") 49 | catalog = pd.read_csv(f"{Afile_path}/1991-2020 catalog.csv") 50 | traces_catalog_merge = pd.merge( 51 | catalog[["EQ_ID", "year", "month", "day", "hour", "minute", "second"]], 52 | traces_catalog, 53 | on="EQ_ID", 54 | ) 55 | 56 | double_event = pd.read_csv(f"{Afile_path}/1991-2020 double traces.csv") 57 | 58 | final_traces_catalog = pd.concat( 59 | [traces_catalog_merge, double_event, double_event] 60 | ).drop_duplicates(keep=False) 61 | # final_traces_catalog.to_csv( 62 | # f"{Afile_path}/1991-2020 traces (no broken data, double event).csv", index=False 63 | # ) 64 | -------------------------------------------------------------------------------- /model_performance_analysis/plot_residual.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import os 4 | from analysis import Residual_Plotter 5 | 6 | mask_after_sec = 7 7 | test_year = 2016 8 | path = f"../predict/station_blind_Vs30_bias2closed_station_{test_year}" 9 | output_path = f"{path}/{mask_after_sec} sec residual plots" 10 | prediction_with_info = pd.read_csv( 11 | f"{path}/{mask_after_sec} sec model11 with all info.csv" 12 | ) 13 | 14 | miss_alarm = (prediction_with_info["predict"] < np.log10(0.25)) & ( 15 | prediction_with_info["answer"] >= np.log10(0.25) 16 | ) 17 | false_alarm = (prediction_with_info["predict"] >= np.log10(0.25)) & ( 18 | prediction_with_info["answer"] < np.log10(0.25) 19 | ) 20 | wrong_predict = prediction_with_info[miss_alarm | false_alarm] 21 | 22 | for column in prediction_with_info.columns: 23 | fig, ax = Residual_Plotter.residual_with_attribute( 24 | prediction_with_info=prediction_with_info, 25 | column=column, 26 | single_case_check=24784.0, 27 | wrong_predict=wrong_predict, 28 | test_year=test_year, 29 | ) 30 | if not os.path.isdir(output_path): 31 | os.mkdir(output_path) 32 | # fig.savefig( 33 | # f"{output_path}/{column}.png", 34 | # dpi=300, 35 | # ) 36 | 37 | # plot event residual on map 38 | fig, ax = Residual_Plotter.single_event_residual_map( 39 | prediction_with_info=prediction_with_info, 40 | eq_id=24784.0, 41 | title=f"{mask_after_sec} sec 2016 Meinong earthquake residual in prediction", 42 | ) 43 | # fig.savefig( 44 | # f"{output_path}/{mask_after_sec} sec 2016 Meinong earthquake residual map.png", 45 | # dpi=300, 46 | # ) 47 | 48 | # plot all prediction residual on map 49 | prediction_with_info["predict_residual"] = ( 50 | prediction_with_info["predict"] - prediction_with_info["answer"] 51 | ) 52 | grouby_sta = prediction_with_info.groupby("station_name").agg( 53 | {"longitude": "first", "latitude": "first", "predict_residual": ["mean", "std"]} 54 | ) 55 | # 當station樣本只有1時,std 會有NaN之情形發生,需剃除 56 | grouby_sta = grouby_sta[~grouby_sta["predict_residual", f"std"].isna()] 57 | # grouby_sta.to_csv(f"{mask_after_sec}_sec_station_correction.csv") 58 | 59 | max_abs_difference = abs(grouby_sta["predict_residual", "mean"]).max() 60 | negative_max_difference = -max_abs_difference 61 | 62 | fig, ax = Residual_Plotter.events_station_map( 63 | grouby_sta=grouby_sta, 64 | column="mean", 65 | cmap="seismic", 66 | title=f"{mask_after_sec} sec residual mean in 2016 prediction", 67 | ) 68 | fig, ax = Residual_Plotter.events_station_map( 69 | grouby_sta=grouby_sta, 70 | column="std", 71 | cmap="Reds", 72 | title=f"{mask_after_sec} sec residual std in 2016 prediction", 73 | ) 74 | # fig.savefig( 75 | # f"{output_path}/{mask_after_sec} sec residual {column} map.png", 76 | # dpi=300, 77 | # ) 78 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Taiwan Transformer Shaking Alert Model (TT-SAM) 2 | [![License](https://img.shields.io/badge/License-GPLv3-orange)](https://www.gnu.org/licenses/gpl-3.0.html) 3 | 4 | This study has referenced the Transformer Earthquake Alerting Model (TEAM), a deep learning earthquake early warning (EEW) framework. We optimized the model using seismic data from Taiwan to develop the Taiwan Transformer Shaking Alert Model (TT-SAM), and it could rapidly calculate the seismic intensity to provide longer warning time. 5 | 6 | 7 | ## Data Preprocess 8 | 9 | ![image](data_preprocess/images/workflow.png) 10 | 11 | ## Model architecture 12 | ![image](images/TEAM-Taiwan_model_architecture.png) 13 | 14 | ## Model Performance 15 | 16 | We use 2016 seismic data to evaluate model performance. 17 | 18 | Seismic intensity threshold is from Central Weather Administration. 19 | 20 | Background color represents model predicted intensity. 21 | 22 | ### 2016 Meinong Earthquake 23 | 24 | ![image](images/Meinong_event.gif) 25 | 26 | ### 2016 Taitung Offshore Earthquake 27 | ![image](images/Taitung_offshore_event.gif) 28 | 29 | ## References 30 | Münchmeyer, J., Bindi, D., Leser, U., & Tilmann, F. (2021). The transformer earthquake 31 | alerting model: A new versatile approach to earthquake early warning. Geophysical Journal 32 | International, 225(1), 646-656. 33 | (https://academic.oup.com/gji/article/225/1/646/6047414) 34 | 35 | Liu, Kun-Sung, Tzay-Chyn Shin, and Yi-Ben Tsai. (1999). A free-field strong motion 36 | network in Taiwan: TSMIP. Terrestrial, Atmospheric and Oceanic Sciences, 10(2), 377-396. 37 | (http://tao.cgu.org.tw/index.php/articles/archive/geophysics/item/308) 38 | 39 | Akazawa, T. (2004, August). A technique for automatic detection of onset time of P-and Sphases 40 | in strong motion records. In Proc. of the 13th world conf. on earthquake engineering 41 | (Vol. 786, p. 786). Vancouver, Canada. 42 | (https://www.iitk.ac.in/nicee/wcee/article/13_786.pdf) 43 | 44 | Kuo, C. H., Wen, K. L., Hsieh, H. H., Lin, C. M., Chang, T. M., & Kuo, K. W. (2012). Site 45 | classification and Vs30 estimation of free-field TSMIP stations using the logging data of 46 | EGDT. Engineering Geology, 129, 68-75. 47 | (https://www.sciencedirect.com/science/article/pii/S0013795212000397) 48 | 49 | Lee, C. T., & Tsai, B. R. (2008). Mapping Vs30 in Taiwan. TAO: Terrestrial, Atmospheric 50 | and Oceanic Sciences, 19(6), 6. 51 | (https://www.researchgate.net/profile/Chyi-Tyi-Lee-2/publication/250211755_Mapping_Vs30_in_Taiwan/links/557fa82608aeb61eae262086/Mapping-Vs30-in-Taiwan.pdf) 52 | 53 | Huang, H. H., Wu, Y. M., Song, X., Chang, C. H., Lee, S. J., Chang, T. M., & Hsieh, H. H. 54 | (2014). Joint Vp and Vs tomography of Taiwan: Implications for subduction-collision 55 | orogeny. Earth and Planetary Science Letters, 392, 177-191. 56 | (https://www.sciencedirect.com/science/article/pii/S0012821X14000995) 57 | 58 | -------------------------------------------------------------------------------- /data_preprocess/plot_data_distribution.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import seaborn as sns 3 | import pandas as pd 4 | import numpy as np 5 | 6 | Afile_path = "./events_traces_catalog" 7 | 8 | def plot_event_distribution(catalog, output_path=None): 9 | fig, ax = plt.subplots(figsize=(7, 7)) 10 | sns.histplot(catalog, x="magnitude", hue="from", alpha=1, ax=ax) 11 | ax.set_title("Events Catalog", fontsize=20) 12 | ax.set_yscale("log") 13 | ax.set_xlabel("Magnitude", fontsize=13) 14 | ax.set_ylabel("Number of events", fontsize=13) 15 | if output_path: 16 | fig.savefig(f"{output_path}/event_distribution.png", dpi=300) 17 | return fig, ax 18 | 19 | def plot_trace_distribution(trace, output_path=None): 20 | label = ["2", "3", "4", "5-", "5+", "6-", "6+", "7"] 21 | pga_threshold = np.log10([0.025, 0.080, 0.250, 0.80, 1.4, 2.5, 4.4, 8.0, 10]) 22 | fig, ax = plt.subplots(figsize=(7, 7)) 23 | sns.histplot(trace, x="pga", hue="from", alpha=1, ax=ax, bins=32) 24 | for i in range(len(pga_threshold) - 1): 25 | ax.text((pga_threshold[i] + pga_threshold[i + 1]) / 2, 10000, label[i]) 26 | ax.vlines(pga_threshold[1:-1], 0, 40000, linestyles="dotted", color="k") 27 | ax.set_title("Traces catalog", fontsize=20) 28 | ax.set_yscale("log") 29 | ax.set_xlabel("PGA log(m/s^2)", fontsize=13) 30 | ax.set_ylabel("number of traces", fontsize=13) 31 | if output_path: 32 | fig.savefig(f"{output_path}/traces_distribution.png", dpi=300) 33 | return fig, ax 34 | 35 | before_catalog = pd.read_csv(f"{Afile_path}/2009_2019_ok_events_p_arrival_abstime.csv") 36 | after_catalog = pd.read_csv(f"{Afile_path}/1999_2019_final_catalog.csv") 37 | 38 | before_catalog["from"] = "2009~2019 M>=3.5" 39 | after_catalog["from"] = "1999~2008 M>=5.5" 40 | 41 | catalog = pd.concat([before_catalog, after_catalog]) 42 | catalog.reset_index(inplace=True, drop=True) 43 | 44 | fig, ax = plot_event_distribution(catalog, output_path=None) 45 | 46 | ###### trace 47 | 48 | before_trace = pd.read_csv( 49 | f"{Afile_path}/2009_2019_picked_traces_p_arrival_abstime_labeled_nostaoverlap.csv" 50 | ) 51 | after_trace = pd.read_csv(f"{Afile_path}/1999_2019_final_traces.csv") 52 | 53 | before_trace["from"] = "2009~2019 M>=3.5" 54 | after_trace["from"] = "1999~2008 M>=5.5" 55 | 56 | trace = pd.concat([before_trace, after_trace]) 57 | trace.reset_index(inplace=True, drop=True) 58 | label = ["2", "3", "4", "5-", "5+", "6-", "6+", "7"] 59 | pga_threshold = np.log10([0.025, 0.080, 0.250, 0.80, 1.4, 2.5, 4.4, 8.0, 10]) 60 | fig, ax = plot_trace_distribution(trace, output_path=None) 61 | 62 | print("high_intensity_rate") 63 | print( 64 | "2009~2019:", 65 | len(before_trace.query(f"pga >={pga_threshold[2]}")) / len(before_trace), 66 | ) 67 | print( 68 | "1999~2019:", len(after_trace.query(f"pga >={pga_threshold[2]}")) / len(after_trace) 69 | ) 70 | -------------------------------------------------------------------------------- /model_performance_analysis/0403_Hualien_Earthquake/5_plot_confusion_matrix.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import pandas as pd 3 | import numpy as np 4 | from sklearn.metrics import confusion_matrix 5 | 6 | 7 | sys.path.append("..") 8 | from analysis import Precision_Recall_Factory 9 | 10 | mask_sec = 3 11 | event_lon = 121.66 12 | event_lat = 23.77 13 | magnitude = 7.2 14 | answer = pd.read_csv(f"true_answer.csv") 15 | 16 | # merge 3 5 7 10 sec to find maximum predicted pga 17 | prediction_3 = pd.read_csv(f"no_include_broken_data_prediction/3_sec_prediction.csv") 18 | prediction_5 = pd.read_csv(f"no_include_broken_data_prediction/5_sec_prediction.csv") 19 | prediction_7 = pd.read_csv(f"no_include_broken_data_prediction/7_sec_prediction.csv") 20 | prediction_10 = pd.read_csv(f"no_include_broken_data_prediction/10_sec_prediction.csv") 21 | 22 | max_prediction = pd.concat( 23 | [ 24 | prediction_3, 25 | prediction_5["predict"], 26 | prediction_7["predict"], 27 | prediction_10["predict"], 28 | ], 29 | axis=1, 30 | ) 31 | 32 | max_prediction.columns = [ 33 | "3_predict", 34 | "station_name", 35 | "latitude", 36 | "longitude", 37 | "elevation", 38 | "5_predict", 39 | "7_predict", 40 | "10_predict", 41 | ] 42 | max_prediction["max_predict"] = max_prediction.apply( 43 | lambda row: max( 44 | row["3_predict"], row["5_predict"], row["7_predict"], row["10_predict"] 45 | ), 46 | axis=1, 47 | ) 48 | 49 | max_prediction = pd.merge( 50 | answer, max_prediction, how="left", left_on="location_code", right_on="station_name" 51 | ) 52 | max_prediction.dropna(inplace=True) 53 | 54 | ################# 55 | label_threshold = np.log10(np.array([0.25])) 56 | predict_label = np.array(max_prediction[f"max_predict"]) 57 | real_label = np.array(max_prediction["PGA"]) 58 | predict_logic = np.where(predict_label > label_threshold, 1, 0) 59 | real_logic = np.where(real_label > label_threshold, 1, 0) 60 | matrix = confusion_matrix(real_logic, predict_logic, labels=[1, 0]) 61 | accuracy = np.sum(np.diag(matrix)) / np.sum(matrix) # (TP+TN)/all 62 | precision = matrix[0][0] / np.sum(matrix, axis=0)[0] # TP/(TP+FP) 63 | recall = matrix[0][0] / np.sum(matrix, axis=1)[0] 64 | 65 | intensity = ["0", "1", "2", "3", "4", "5-", "5+", "6-", "6+", "7"] 66 | max_prediction["predicted_intensity"] = max_prediction["max_predict"].apply( 67 | Precision_Recall_Factory.pga_to_intensity 68 | ) 69 | max_prediction["answer_intensity"] = max_prediction["PGA"].apply(Precision_Recall_Factory.pga_to_intensity) 70 | 71 | intensity_confusion_matrix = confusion_matrix( 72 | max_prediction["answer_intensity"], 73 | max_prediction["predicted_intensity"], 74 | labels=intensity, 75 | ) 76 | 77 | fig, ax = Precision_Recall_Factory.plot_intensity_confusion_matrix( 78 | intensity_confusion_matrix 79 | ) 80 | # fig.savefig("confusion_matrix.png", dpi=300) 81 | -------------------------------------------------------------------------------- /data/data_visualize.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from visualize import Plot_Train_Test_Data, Increase_High_Data_Test 4 | 5 | 6 | data_path = "../data_preprocess/events_traces_catalog" 7 | origin_catalog = pd.read_csv(f"{data_path}/1999_2019_final_catalog.csv") 8 | traces_catalog = pd.read_csv(f"{data_path}/1999_2019_final_traces_Vs30.csv") 9 | test_year = 2016 10 | train_catalog = origin_catalog.query(f"year!={test_year}") 11 | test_catalog = origin_catalog.query(f"year=={test_year}") 12 | # events histogram 13 | fig, ax = Plot_Train_Test_Data.event_histogram( 14 | train_catalog, test_catalog, key="magnitude", xlabel="magnitude" 15 | ) 16 | # fig.savefig(f"paper image/event depth distribution.png",dpi=300) 17 | # fig.savefig(f"paper image/event depth distribution.pdf",dpi=300) 18 | 19 | # event distribution in map 20 | fig, ax = Plot_Train_Test_Data.event_map(train_catalog, test_catalog) 21 | # fig.savefig(f"paper image/event distribution map.png",dpi=300) 22 | # fig.savefig(f"paper image/event distribution map.pdf",dpi=300) 23 | 24 | # traces pga histogram 25 | fig, ax = Plot_Train_Test_Data.pga_histogram(traces_catalog, test_year=test_year) 26 | # fig.savefig(f"paper image/trace pga distribution.png",dpi=300) 27 | # fig.savefig(f"paper image/trace pga distribution.pdf",dpi=300) 28 | 29 | 30 | # test oversampling method 31 | data_path = "./TSMIP_1999_2019_Vs30.hdf5" 32 | origin_PGA = Increase_High_Data_Test.load_dataset_into_list( 33 | data_path, oversample_rate=1, bias_to_close_station=False 34 | ) 35 | oversampled_PGA = Increase_High_Data_Test.load_dataset_into_list( 36 | data_path, oversample_rate=1.5, bias_to_close_station=False 37 | ) 38 | 39 | bias_closed_sta_PGA = Increase_High_Data_Test.load_dataset_into_list( 40 | data_path, oversample_rate=1.5, bias_to_close_station=True 41 | ) 42 | 43 | origin_PGA_array = np.array(origin_PGA) 44 | origin_high_intensity_rate = np.sum(origin_PGA_array > np.log10(0.250)) / len( 45 | origin_PGA_array 46 | ) 47 | print(f"origin rate:{origin_high_intensity_rate}") 48 | 49 | oversampled_PGA_array = np.array(oversampled_PGA) 50 | oversampled_high_intensity_rate = np.sum(oversampled_PGA_array > np.log10(0.250)) / len( 51 | oversampled_PGA_array 52 | ) 53 | print(f"oversampled rate:{oversampled_high_intensity_rate}") 54 | 55 | bias_closed_sta_PGA_array = np.array(bias_closed_sta_PGA) 56 | bias_closed_sta_high_intensity_rate = np.sum( 57 | bias_closed_sta_PGA_array > np.log10(0.250) 58 | ) / len(bias_closed_sta_PGA_array) 59 | print(f"bias_closed_sta rate:{bias_closed_sta_high_intensity_rate}") 60 | 61 | fig, ax = Increase_High_Data_Test.plot_pga_histogram( 62 | bias_closed_sta_PGA, 63 | oversampled_PGA, 64 | origin_PGA, 65 | origin_high_intensity_rate, 66 | oversampled_high_intensity_rate, 67 | bias_closed_sta_high_intensity_rate, 68 | ) 69 | # fig.savefig("PGA distribution.png", dpi=300,bbox_inches='tight') 70 | -------------------------------------------------------------------------------- /data_preprocess/3_station_location_dataset.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | from read_tsmip import * 5 | 6 | sta_path = "../data/station_information" 7 | station_info = pd.read_csv(f"{sta_path}/TSMIPstations.csv") 8 | station_code = station_info["station"].str.extract(r"(.*?)[(]") 9 | location_code = station_info["station"].str.extract(r"[(](.*?)[)]") 10 | station_info.insert(1, "station_code", station_code.values) 11 | station_info.insert(2, "location_code", location_code.values) 12 | station_info.drop(["station"], axis=1, inplace=True) 13 | 14 | # merge data from JC 15 | for sta in ["CHY", "HWA", "ILA", "KAU", "TAP", "TCU", "TTN"]: 16 | tmp_info = pd.read_csv(f"{sta_path}/{sta}.csv", encoding="unicode_escape") 17 | tmp_info.columns = [ 18 | "location_code", 19 | "station_location", 20 | "county", 21 | "district", 22 | "net", 23 | "longitude", 24 | "latitude", 25 | "elevation", 26 | "stamp code", 27 | "address", 28 | ] 29 | sta_filter = tmp_info["location_code"].isin(station_info["location_code"]) 30 | add_df = tmp_info[~sta_filter][ 31 | ["location_code", "latitude", "longitude", "elevation"] 32 | ] 33 | add_df.rename(columns={"elevation": "elevation (m)"}, inplace=True) 34 | add_df.insert(0, "network", "TSMIP") 35 | add_df.insert(1, "station_code", np.nan) 36 | station_info = pd.concat([station_info, add_df]) 37 | 38 | # merge data fron MH 39 | # data1 40 | station_code1 = pd.read_csv(f"{sta_path}/station_code.csv") 41 | station_code2 = pd.read_csv(f"{sta_path}/tsmip_factor.csv") 42 | merged_station_code = pd.merge( 43 | station_code1, station_code2, left_on="Station_Code", right_on="station_code" 44 | ) 45 | sta_filter = merged_station_code["TSMIP_code"].isin(station_info["location_code"]) 46 | add_df = merged_station_code[~sta_filter][ 47 | ["TSMIP_code", "Ins_longitude", "Ins_latitude", "Ins_elevation", "TSMIP_short_code"] 48 | ] 49 | add_df.columns = [ 50 | "location_code", 51 | "longitude", 52 | "latitude", 53 | "elevation (m)", 54 | "station_code", 55 | ] 56 | 57 | save_index = [] 58 | for sta_code in add_df["location_code"].unique(): 59 | save_index.append( 60 | add_df[add_df["location_code"] == sta_code]["location_code"].index[-1] 61 | ) 62 | uniqued_add_df = add_df.loc[save_index] 63 | uniqued_add_df.insert(0, "network", "TSMIP") 64 | station_info = pd.concat([station_info, uniqued_add_df]) 65 | 66 | # data2 67 | CWBstation = pd.read_csv(f"{sta_path}/CWBstation.log", sep="\s+", header=None) 68 | CWBstation.columns = [ 69 | "location_code", 70 | "longitude", 71 | "latitude", 72 | "elevation (m)", 73 | "starttime", 74 | "endtime", 75 | ] 76 | sta_filter = CWBstation["location_code"].isin(station_info["location_code"]) 77 | add_df = CWBstation[~sta_filter][ 78 | ["location_code", "longitude", "latitude", "elevation (m)"] 79 | ] 80 | add_df.insert(0, "network", "TSMIP") 81 | add_df.insert(1, "station_code", np.nan) 82 | station_info = pd.concat([station_info, add_df]) 83 | station_info.sort_values(by=["location_code"], inplace=True) 84 | # station_info.to_csv(f"{sta_path}/TSMIPstations_new.csv", index=False) 85 | -------------------------------------------------------------------------------- /data_preprocess/tracer_demo/read_velocity_model.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | from cartopy.mpl import ticker 4 | import cartopy.crs as ccrs 5 | 6 | with open("vel3d.mod","r") as file: 7 | lines = file.readlines() 8 | 9 | lon_coor=[] 10 | lon_coor.append([float(x) for x in lines[1].split()]) 11 | lon_coor=np.array(lon_coor[0]) 12 | 13 | lat_coor=[] 14 | lat_coor.append([float(x) for x in lines[2].split()]) 15 | lat_coor=np.array(lat_coor[0]) 16 | 17 | dep_coor=[] 18 | dep_coor.append([float(x) for x in lines[3].split()]) 19 | dep_coor=np.array(dep_coor[0]) 20 | 21 | array=[] 22 | data=lines[4:] 23 | for i,line in enumerate(data): 24 | # 使用指定分隔符(例如,空格)拆分每一行 25 | elements = line.split() 26 | 27 | # 获取第5列以后的数据 28 | 29 | # 添加到结果列表 30 | array.append([float(x) for x in elements]) 31 | 32 | array=np.array(array) 33 | reshape_array=array.reshape(2,27,61,76)#(model,depth,lat,lon) 34 | 35 | 36 | #plot velocity model 37 | X, Y = np.meshgrid(lon_coor, lat_coor) 38 | for model_index,model_name in enumerate(["Vp model","Vs model"]): 39 | vmax=reshape_array[model_index,:,:].max() 40 | vmin=reshape_array[model_index,:,:].min() 41 | for dep_index in range(0,len(dep_coor)): 42 | fig,ax=plt.subplots(subplot_kw={'projection': ccrs.PlateCarree()}) 43 | ax.coastlines() 44 | cp = ax.contourf(X, Y, reshape_array[model_index,dep_index,:,:],transform=ccrs.PlateCarree()) 45 | xticks = ticker.LongitudeLocator(nbins=125-119)._raw_ticks(119, 125) 46 | yticks = ticker.LatitudeLocator(nbins=26-20)._raw_ticks(20, 26) 47 | 48 | ax.set_xticks(xticks, crs=ccrs.PlateCarree()) 49 | ax.set_yticks(yticks, crs=ccrs.PlateCarree()) 50 | cbar = fig.colorbar(cp) 51 | cbar.set_label(f'{model_name[:2]} (km/s)') 52 | ax.set_xlabel("longitude") 53 | ax.set_ylabel("latitude") 54 | ax.set_title(f"Depth: {dep_coor[dep_index]}km") 55 | fig.savefig(f"model_image/{model_name}_depth_{int(dep_coor[dep_index]*1000)}m.png",dpi=300) 56 | 57 | #origin 76 61 27 58 | #after 89 66 27 59 | start=125.08 60 | end=126.5 61 | increased_lon=np.round(np.arange(start, end + 0.08, 0.08),2) 62 | increased_lon_to_str = " ".join("{:.2f}".format(num) for num in increased_lon) 63 | 64 | start=26.18 65 | end=26.8 66 | increased_lat=np.round(np.arange(start, end + 0.08, 0.08),2) 67 | increased_lat_to_str = " ".join("{:.2f}".format(num) for num in increased_lat) 68 | 69 | 70 | with open("vel3d_new.mod","w") as file: 71 | points_info=lines[0] 72 | updated_points_info=points_info.replace("76",f"{76+len(increased_lon)}").replace("61",f"{61+len(increased_lat)}") 73 | file.write(updated_points_info) 74 | updated_lon=lines[1].rstrip("\n")+" "+increased_lon_to_str+"\n" 75 | file.write(updated_lon) 76 | updated_lat=lines[2].rstrip("\n")+" "+increased_lat_to_str+"\n" 77 | file.write(updated_lat) 78 | file.write(lines[3]) 79 | new_lines=[] 80 | for i in range(4,len(lines)): 81 | 82 | formatted_number= "{:.3f}".format(array[i-4][-1]) 83 | result_string=(" "+str(formatted_number))*len(increased_lon) 84 | new_line=lines[i].rstrip("\n")+result_string+"\n" 85 | 86 | if (i-4)%61==60: 87 | for j in range(len(increased_lat)): 88 | file.write(new_line) 89 | file.write(new_line) 90 | 91 | #記得手動再增加一個深度 92 | 93 | -------------------------------------------------------------------------------- /model_performance_analysis/plot_event_input_waveforms.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | 3 | plt.subplots() 4 | import numpy as np 5 | import pandas as pd 6 | from torch.utils.data import DataLoader 7 | from tqdm import tqdm 8 | import sys 9 | sys.path.append("..") 10 | from data.multiple_sta_dataset import multiple_station_dataset 11 | from analysis import Triggered_Map 12 | 13 | 14 | mask_after_sec = 10 15 | label = "pga" 16 | eq_id = 25900 17 | data = multiple_station_dataset( 18 | "../data/TSMIP_1999_2019_Vs30.hdf5", 19 | mode="test", 20 | mask_waveform_sec=mask_after_sec, 21 | test_year=2016, 22 | label_key=label, 23 | input_type="acc", 24 | data_length_sec=15, 25 | ) 26 | record_prediction = pd.read_csv( 27 | f"../predict/station_blind_noVs30_bias2closed_station_2016/{mask_after_sec} sec ensemble 510 with all info.csv" 28 | ) 29 | record_prediction = record_prediction[record_prediction["EQ_ID"] == eq_id].reset_index( 30 | drop=True 31 | ) 32 | # ========================= 33 | loader = DataLoader(dataset=data, batch_size=1) 34 | 35 | Mixture_mu = [] 36 | Label = [] 37 | P_picks = [] 38 | EQ_ID = [] 39 | Label_time = [] 40 | Sta_name = [] 41 | Lat = [] 42 | Lon = [] 43 | Elev = [] 44 | 45 | for j, sample in tqdm(enumerate(loader)): 46 | picks = sample["p_picks"].flatten().numpy().tolist() 47 | label_time = sample[f"{label}_time"].flatten().numpy().tolist() 48 | lat = sample["target"][:, :, 0].flatten().tolist() 49 | lon = sample["target"][:, :, 1].flatten().tolist() 50 | elev = sample["target"][:, :, 2].flatten().tolist() 51 | P_picks.extend(picks) 52 | P_picks.extend([np.nan] * (25 - len(picks))) 53 | Label_time.extend(label_time) 54 | Label_time.extend([np.nan] * (25 - len(label_time))) 55 | Lat.extend(lat) 56 | Lon.extend(lon) 57 | Elev.extend(elev) 58 | 59 | eq_id_list = sample["EQ_ID"][:, :, 0].flatten().numpy().tolist() 60 | EQ_ID.extend(eq_id_list) 61 | EQ_ID.extend([np.nan] * (25 - len(eq_id_list))) 62 | if eq_id_list[0] == eq_id: 63 | waveform = sample["waveform"].numpy().reshape(25, 3000, 3) 64 | waveforms_fig,waveforms_ax=Triggered_Map.plot_model_waveforms_input(waveform,picks,record_prediction,mask_after_sec) 65 | 66 | waveform_num = len( 67 | np.where(np.array(picks) <= picks[0] + (mask_after_sec * 200))[0] 68 | ) 69 | for i in range(waveform_num): 70 | station_name = record_prediction["station_name"][i] 71 | answer = np.round(100 * (10 ** record_prediction["answer"][i]), 2) 72 | waveform_fig, waveform_ax = plt.subplots(3, 1, figsize=(7, 7)) 73 | for j in range(3): 74 | waveform_ax[j].plot(waveform[i, :, j]) 75 | waveform_ax[j].axvline(x=picks[i], c="r") 76 | waveform_ax[0].set_title(f"acc waveform, PGA: {answer} gal", size=20) 77 | # waveform_fig.savefig(f"./predict/acc predict pga 1999_2019/model 2 meinong intensity map/index{i}_{station_name}_acc_input.png") 78 | waveforms_ax[0].set_title( 79 | f"EQID:{eq_id} {mask_after_sec} sec acc records, Z component", size=20 80 | ) 81 | waveforms_ax[-1].set_xlabel( 82 | "Time sample (200Hz)",size=15 83 | ) 84 | # waveforms_fig.savefig(f"paper image/eqid{eq_id}_{mask_after_sec}_sec_Z_acc.png",bbox_inches='tight',dpi=300) 85 | break 86 | -------------------------------------------------------------------------------- /data_preprocess/0918_M6.8_1319_1330/3_label.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import sys 4 | import os 5 | import obspy 6 | import re 7 | from obspy.geodetics import gps2dist_azimuth 8 | 9 | sys.path.append("../..") 10 | from read_tsmip import read_tsmip, get_peak_value, get_integrated_stream 11 | 12 | # read traces catalog 13 | waveform_path = "../../data/0918_M6.8_1319_1330/ascii" 14 | traces = pd.read_csv("traces_catalog.csv") 15 | # delete broken waveform 16 | traces = traces.query("quality_control=='y'").reset_index(drop=True) 17 | 18 | sampling_rate = 200 19 | for i in range(len(traces)): 20 | print(f"{i}/{len(traces)}") 21 | file_name = traces["file_name"][i].strip() 22 | # read waveform 23 | data = pd.read_csv( 24 | f"{waveform_path}/{file_name}.asc", sep="\s+", skiprows=1, header=None 25 | ).to_numpy() 26 | 27 | with open(f"{waveform_path}/{file_name}.asc", "r") as f: 28 | picks = f.readlines()[0] 29 | picks = re.findall(r"\d+\.\d+", picks) 30 | picks = [np.round(float(number), 2) for number in picks] 31 | 32 | waveform = obspy.core.stream.Stream() 33 | channel = ["HLZ", "HLN", "HLE"] 34 | for j, chan in enumerate(channel): 35 | start = np.where(data == picks[2])[0][0] 36 | end = np.where(data == picks[3])[0][0] 37 | trace = obspy.core.trace.Trace(data[start:end, j + 1]) 38 | 39 | trace.stats.network = "TW" 40 | # trace.stats.station = header[0][14:20] 41 | trace.stats.channel = chan 42 | 43 | trace.stats.sampling_rate = int(1 / abs(data[0, 0] - data[1, 0])) 44 | 45 | waveform.append(trace) 46 | # resample to 200Hz 47 | if waveform[0].stats.sampling_rate != sampling_rate: 48 | waveform.resample(sampling_rate, window="hann") 49 | 50 | # detrend 51 | waveform.detrend(type="demean") 52 | # lowpass filter 53 | waveform.filter("lowpass", freq=10) # filter 54 | # get pga 55 | pick_point = int(np.round(traces["p_pick_sec"][i] * sampling_rate, 0)) 56 | pga, pga_time = get_peak_value(waveform, pick_point=pick_point) 57 | # waveform taper 58 | waveform.taper(max_percentage=0.05, type="cosine") 59 | # integrate 60 | vel_waveform = get_integrated_stream(waveform) 61 | # bandpass filter 62 | vel_waveform.filter("bandpass", freqmin=0.075, freqmax=10) 63 | # get pgv 64 | pgv, pgv_time = get_peak_value(vel_waveform, pick_point=pick_point) 65 | # input to df 66 | traces.loc[i, "pga"] = pga 67 | traces.loc[i, "pga_time"] = pga_time 68 | traces.loc[i, "pgv"] = pgv 69 | traces.loc[i, "pgv_time"] = pgv_time 70 | 71 | 72 | #calculate epicentral distance 73 | 74 | catalog = pd.read_csv("event_catalog.csv") 75 | traces["epdis (km)"]=0 76 | 77 | eq_latitude = catalog["lat"][0] + catalog["lat_minute"][0] / 60 78 | eq_longitude = catalog["lon"][0] + catalog["lon_minute"][0] / 60 79 | eq_depth = catalog["depth"][0] 80 | for i in range(len(traces)): 81 | station_latitude = traces["latitude"][i] 82 | station_longitude = traces["longitude"][i] 83 | station_elevation = traces["elevation (m)"][i] / 1000 84 | epi_dis, azimuth, _ = gps2dist_azimuth( 85 | eq_latitude, eq_longitude, station_latitude, station_longitude 86 | ) 87 | epi_dis=(epi_dis**2 + (eq_depth - station_elevation)**2)**0.5 88 | traces.loc[i,"epdis (km)"]=epi_dis/1000 89 | 90 | traces.to_csv(f"traces_catalog.csv", index=False) -------------------------------------------------------------------------------- /data_preprocess/tracer_demo/README: -------------------------------------------------------------------------------- 1 | --------------------------------------------------------------------------- 2 | This is a script to calculate the P- and S-wave travel time between any two 3 | points in a 3D velocity model. 4 | 5 | References please cite the papers below: 6 | 1) For ray-tracing method: 7 | Koketsu and Sekine (1998), Pseudo-bending method for three-dimensional 8 | seismic ray tracing in a spherical earth with discontinuities, Geophys. 9 | J. Int., 132, 339-346. 10 | Huang et al. (2013), First Local Seismic Tomography for Red River Shear 11 | Zone, northern Vietnam: Stepwise inversion employing crustal P and Pn 12 | waves, Tectonophysics, 584, 230-239. 13 | 14 | 2) For velocity model of Taiwan region: 15 | Huang et al. (2014), Joint Vp and Vs tomography of Taiwan: Implications 16 | for subduction-collision orogeny, Earth. Planet. Sci. Lett., 392, 17 | 177-191. 18 | 19 | --------------------------------------------------------------------------- 20 | Last update: JUL 7, 2014 by Hsin-Hua Huang 21 | 22 | SUBDIRECTORIES: 23 | 1) src - source codes 24 | 2) mod - place to put collected velocity models 25 | 26 | 27 | 28 | RUNNING THE SCRIPT, SIMPLY TYPE './tracer'! 29 | To do it correctly, you need a velocity model named 'vel3d.mod' under main 30 | directory for script to read. Detailed format please refer to the content 31 | of the files. 32 | 33 | 34 | 35 | RAY PATH OUTPUT: 36 | If select 1 for ray path output, two files named P_path.txt and S_path.txt 37 | will be generated. Each ray paths in the files are separated by "X". 38 | 39 | 40 | 41 | INPUT DATA FORMAT: 42 | You will see the instruction after typing './tracer': 43 | 1) Two-point mode - All you need is simply to input coordinates of any 44 | two points. Then it will give you P- and S-wave 45 | calculated travel times on screen. This mode provide 46 | a option to output the traced ray paths for P- and 47 | S-wave in the file "P_path.txt" and "S_path.txt", 48 | repectively. 49 | 50 | 2) Two-file mode - In this mode you need to input two file names for 51 | sources (usually earthquakes) and receivers (usually 52 | stations), respectively. Both of them require the 53 | source and receiver coordinates in three columns 54 | (lon, lat, dep), and can be plural. See chichi.evt 55 | and chichi.sta for examples. An easier way is to 56 | modify the input file and type './tracer < input' 57 | to run. 58 | 59 | OUTPUT FORMAT: 60 | In two-file mode, you can further choose the format 61 | of output file. 62 | 1) ascii 63 | It will output an ascii file 'tt.table', containning 64 | eight-column. The column 1,2,3 and column 4,5,6 are 65 | the source and receiver coordinates, and the last two 66 | columns are the calculated P- and S-wave travel times 67 | , respecitvely. 68 | 69 | 2) binary 70 | It will output a binary file 'tt.bin', which follows 71 | the ascii format to store. 72 | 73 | -------------------------------------------------------------------------------- /model_performance_analysis/0403_Hualien_Earthquake/residual_map.py: -------------------------------------------------------------------------------- 1 | import cartopy.crs as ccrs 2 | import matplotlib.pyplot as plt 3 | import pandas as pd 4 | import math 5 | import sys 6 | from obspy.taup.taup_geo import calc_dist 7 | from obspy.geodetics import degrees2kilometers 8 | 9 | sys.path.append("..") 10 | from analysis import Consider_Angle 11 | 12 | 13 | def calculate_angle(x1, y1, x2, y2): 14 | # 計算兩點之間的斜率 15 | delta_x = x2 - x1 16 | delta_y = y2 - y1 17 | slope = delta_y / delta_x 18 | 19 | # 使用反正切函數計算角度(以弧度為單位) 20 | angle_radians = math.atan(slope) 21 | 22 | # 將弧度轉換為角度 23 | angle_degrees = math.degrees(angle_radians) 24 | 25 | # 將角度調整為0到360度的範圍 26 | if delta_x < 0: 27 | angle_degrees += 180 28 | elif delta_x >= 0 and delta_y < 0: 29 | angle_degrees += 360 30 | 31 | return angle_degrees % 360 32 | 33 | 34 | answer = pd.read_csv(f"true_answer.csv") 35 | prediction_3 = pd.read_csv(f"no_include_broken_data_prediction/3_sec_prediction.csv") 36 | prediction_5 = pd.read_csv(f"no_include_broken_data_prediction/5_sec_prediction.csv") 37 | prediction_7 = pd.read_csv(f"no_include_broken_data_prediction/7_sec_prediction.csv") 38 | prediction_10 = pd.read_csv(f"no_include_broken_data_prediction/10_sec_prediction.csv") 39 | 40 | max_prediction = pd.concat( 41 | [ 42 | prediction_3, 43 | prediction_5["predict"], 44 | prediction_7["predict"], 45 | prediction_10["predict"], 46 | ], 47 | axis=1, 48 | ) 49 | 50 | max_prediction.columns = [ 51 | "3_predict", 52 | "station_name", 53 | "latitude", 54 | "longitude", 55 | "elevation", 56 | "5_predict", 57 | "7_predict", 58 | "10_predict", 59 | ] 60 | max_prediction["max_predict"] = max_prediction.apply( 61 | lambda row: max( 62 | row["3_predict"], row["5_predict"], row["7_predict"], row["10_predict"] 63 | ), 64 | axis=1, 65 | ) 66 | 67 | max_prediction = pd.merge( 68 | answer, max_prediction, how="left", left_on="location_code", right_on="station_name" 69 | ) 70 | max_prediction.dropna(inplace=True) 71 | 72 | init_latitude = max_prediction.query("location_code=='HWA074'")["latitude"].values[0] 73 | init_longitude = max_prediction.query("location_code=='HWA074'")["longitude"].values[0] 74 | event_lat = 23.77 75 | event_lon = 121.67 76 | 77 | max_prediction = max_prediction.reset_index(drop=True) 78 | flattening_of_planet = 1 / 298.257223563 79 | 80 | for i in range(len(max_prediction)): 81 | lat = max_prediction["latitude"][i] 82 | lon = max_prediction["longitude"][i] 83 | angle = calculate_angle(init_longitude, init_latitude, lon, lat) 84 | epi_dist = degrees2kilometers( 85 | calc_dist( 86 | event_lat, 87 | event_lon, 88 | lat, 89 | lon, 90 | radius_of_planet_in_km=6371.0, 91 | flattening_of_planet=flattening_of_planet, 92 | ) 93 | ) 94 | max_prediction.loc[i, "angle"] = angle 95 | max_prediction.loc[i, "dist"] = epi_dist 96 | 97 | fig, ax = Consider_Angle.plot_pga_attenuation(prediction=max_prediction) 98 | # fig.savefig("PGA_attenuation.png",dpi=300) 99 | 100 | fig, ax = Consider_Angle.angle_map( 101 | stations=max_prediction, 102 | init_sta_lat=init_latitude, 103 | init_sta_lon=init_longitude, 104 | event_lat=23.77, 105 | event_lon=121.66, 106 | ) 107 | # fig.savefig("Angle_map.png",dpi=300) -------------------------------------------------------------------------------- /data_preprocess/analyze_resample.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import pandas as pd 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | from scipy.interpolate import interpolate,PchipInterpolator 6 | from read_tsmip import read_tsmip, get_peak_value 7 | 8 | """ 9 | In our dataset, we have different sampling rate waveforms, most of the data is 200Hz. 10 | In this script, we checked the residual of PGA after resampling all of waveforms to 200Hz. 11 | """ 12 | 13 | target_sampling_rate = 200 14 | waveform_path = "../data/waveform" 15 | output_path = "./traces_sampling_rate" 16 | traces = pd.read_csv(f"events_traces_catalog/1999_2019_final_traces_Vs30.csv") 17 | traces["p_arrival_abs_time"] = pd.to_datetime( 18 | traces["p_arrival_abs_time"], format="%Y-%m-%d %H:%M:%S" 19 | ) 20 | 21 | dict = {"station_name":[],"sta_latitude":[],"sta_longitude":[],"sampling_rate": [], "origin_PGA": [], "resampled_PGA": []} 22 | for i in range(len(traces)): 23 | print(f"{i}/{len(traces)}") 24 | year = str(traces["year"][i]) 25 | month = str(traces["month"][i]) 26 | if len(month) < 2: 27 | month = "0" + month 28 | filename = traces["file_name"][i].strip() 29 | waveform = read_tsmip(f"{waveform_path}/{year}/{month}/{filename}.txt") 30 | sampling_rate = waveform[0].stats.sampling_rate 31 | if sampling_rate != target_sampling_rate: 32 | dict["station_name"].append(traces["station_name"][i]) 33 | dict["sta_latitude"].append(traces["latitude"][i]) 34 | dict["sta_longitude"].append(traces["longitude"][i]) 35 | dict["sampling_rate"].append(sampling_rate) 36 | 37 | 38 | pick_point = int(np.round(traces["p_pick_sec"][i] * sampling_rate, 0)) 39 | waveform.detrend(type="demean") 40 | waveform.filter("lowpass", freq=10) 41 | origin_pga = 10 ** get_peak_value(waveform, pick_point=pick_point)[0] * 100 42 | dict["origin_PGA"].append(origin_pga) 43 | 44 | 45 | for channel in range(len(waveform)): 46 | print(max(waveform[channel].data)) 47 | duration=len(waveform[channel].data)/sampling_rate 48 | origin_x=np.linspace(0,duration,int(len(waveform[channel].data))) 49 | resample_x=np.linspace(0,duration,int(target_sampling_rate*duration)) 50 | interpolater= PchipInterpolator(origin_x, waveform[channel].data) 51 | resample_waveform = interpolater(resample_x) 52 | 53 | # fig,ax=plt.subplots(2,1) 54 | # ax[0].plot(origin_x, waveform[channel].data) 55 | # ax[0].axvline(traces["p_pick_sec"][i],c="r") 56 | # ax[1].plot(resample_x, resample_waveform) 57 | # ax[1].axvline(traces["p_pick_sec"][i],c="r") 58 | waveform[channel].data=resample_waveform 59 | waveform[channel].stats.sampling_rate=target_sampling_rate 60 | print(max(waveform[channel].data)) 61 | 62 | pick_point = int(np.round(traces["p_pick_sec"][i] * target_sampling_rate, 0)) 63 | resample_pga = 10 ** get_peak_value(waveform, pick_point=pick_point)[0] * 100 64 | dict["resampled_PGA"].append(resample_pga) 65 | break 66 | 67 | 68 | output = pd.DataFrame(dict) 69 | output["residual"] = output["origin_PGA"] - output["resampled_PGA"] 70 | # output.to_csv(f"{output_path}/statistic_sampling_rate_new.csv", index=False) 71 | 72 | 73 | 74 | fig, ax = plt.subplots() 75 | ax.hist(output["residual"], bins=20, edgecolor="gray") 76 | ax.set_yscale("log") 77 | ax.set_xlabel("Residual (pga-resampled pga, unit: gal)") 78 | ax.set_ylabel("Number of traces") 79 | # fig.savefig(f"{output_path}/pga residual after resampling.png",dpi=300) 80 | -------------------------------------------------------------------------------- /model_performance_analysis/warning_time_maximize.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from analysis import Rolling_Warning, Warning_Time_Plotter 4 | 5 | eq_id = 24784.0 6 | label_type = "pga" 7 | if label_type == "pga": 8 | label_threshold = np.log10(0.25) 9 | intensity = "IV" 10 | if label_type == "pgv": 11 | label_threshold = np.log10(0.15) 12 | intensity = "V" 13 | 14 | path = "../predict/station_blind_Vs30_bias2closed_station_2016" 15 | output_path = f"{path}/mag bigger 5.5 predict" 16 | 17 | prediction3_with_info = pd.read_csv(f"{path}/3 sec model11 with all info.csv") 18 | prediction5_with_info = pd.read_csv(f"{path}/5 sec model11 with all info.csv") 19 | prediction7_with_info = pd.read_csv(f"{path}/7 sec model11 with all info.csv") 20 | prediction10_with_info = pd.read_csv(f"{path}/10 sec model11 with all info.csv") 21 | 22 | rw_instance = Rolling_Warning(label_type="pga") 23 | warning_df_with_station_info = ( 24 | rw_instance.calculate_warning_time_at_different_issue_timing( 25 | prediction_in_different_timing=[ 26 | prediction3_with_info, 27 | prediction5_with_info, 28 | prediction7_with_info, 29 | prediction10_with_info, 30 | ], 31 | time_list=[3, 5, 7, 10], 32 | event_filter="magnitude>=5", 33 | ) 34 | ) 35 | 36 | fig, ax = rw_instance.plot_maximum_warning_time( 37 | warning_df_with_station_info=warning_df_with_station_info, 38 | time_list=["3 second", "5 second", "7 second", "10 second"], 39 | ) 40 | # fig.savefig(f"{path}/update warning_epi_vs_lead_time_mag_bigger_than_5.png",dpi=300) 41 | 42 | event_info = warning_df_with_station_info[ 43 | warning_df_with_station_info["EQ_ID"] == eq_id 44 | ] 45 | fig,ax=rw_instance.plot_event_warning_time_with_distance_range( 46 | event_info=event_info, distance_range=[20, 60], event_loc=[120.543833333333, 22.922] 47 | ) 48 | 49 | maximum_warning_time = warning_df_with_station_info["max_warning_time"] 50 | maximum_warning_time = maximum_warning_time[maximum_warning_time > 0] 51 | describe = maximum_warning_time.describe() 52 | count = int(describe["count"]) 53 | mean = np.round(describe["mean"], 2) 54 | std = np.round(describe["std"], 2) 55 | median = np.round(describe["50%"], 2) 56 | max = np.round(describe["max"], 2) 57 | statistical_dict = rw_instance.calculate_statistical_value(warning_df_with_station_info) 58 | 59 | fig, ax = rw_instance.plot_maximum_warning_time_histogram( 60 | warning_df_with_station_info, 61 | statistical_dict, 62 | title="Warning time in 2016 events magnitude >=5", 63 | ) 64 | # fig.savefig(f"{output_path}/maximum warning time, magnitude bigger than 5.png",dpi=300) 65 | 66 | single_event_statistical_dict = rw_instance.calculate_statistical_value( 67 | warning_df_with_station_info, filter=f"EQ_ID=={eq_id}" 68 | ) 69 | 70 | fig, ax = rw_instance.plot_maximum_warning_time_histogram( 71 | warning_df_with_station_info, 72 | single_event_statistical_dict, 73 | filter=f"EQ_ID=={eq_id}", 74 | title=f"EQ ID: {eq_id}, Maximum warning time", 75 | ) 76 | 77 | # fig.savefig(f"{output_path}/EQ ID{eq_id}, maximum warning time.png", dpi=300) 78 | for sec, events_prediction in zip( 79 | [3, 5, 7, 10], 80 | [ 81 | prediction3_with_info, 82 | prediction5_with_info, 83 | prediction7_with_info, 84 | prediction10_with_info, 85 | ], 86 | ): 87 | single_event_prediction = events_prediction.query(f"EQ_ID=={eq_id}") 88 | fig, ax = Warning_Time_Plotter.p_wave_pga_travel_time( 89 | event_prediction=single_event_prediction, 90 | title=f"EQ ID: {eq_id} {sec} sec prediction with p-wave and pga travel time", 91 | ) 92 | -------------------------------------------------------------------------------- /feature_map_correlation/plot_single_event_feature_map.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | 3 | plt.subplots() 4 | import numpy as np 5 | import pandas as pd 6 | import torch 7 | from torch.utils.data import DataLoader 8 | from tqdm import tqdm 9 | import os 10 | import sys 11 | 12 | sys.path.append("..") 13 | from model.CNN_Transformer_Mixtureoutput_TEAM import CNN_feature_map 14 | from data.multiple_sta_dataset import multiple_station_dataset 15 | 16 | mask_after_sec = 5 17 | sample_rate = 200 18 | eq_id = 24784 19 | label = "pga" 20 | data = multiple_station_dataset( 21 | "D:/TEAM_TSMIP/data/TSMIP_1999_2019_Vs30.hdf5", 22 | mode="test", 23 | mask_waveform_sec=mask_after_sec, 24 | test_year=2016, 25 | label_key=label, 26 | mag_threshold=0, 27 | input_type="acc", 28 | data_length_sec=15, 29 | ) 30 | # need station name 31 | data_path = "../predict/station_blind_Vs30_bias2closed_station_2016" 32 | predict = pd.read_csv(f"{data_path}/{mask_after_sec} sec model11 with all info.csv") 33 | single_event_prediction = predict.query(f"EQ_ID=={eq_id}") 34 | # ===========prepare model============== 35 | device = torch.device("cuda") 36 | num = 11 37 | model_path = f"../model/model{num}.pt" 38 | CNN_model = CNN_feature_map(mlp_input=5665).cuda() 39 | 40 | # ===========load CNN parameter============== 41 | full_model_parameter = torch.load(model_path) 42 | CNN_parameter = {} 43 | for name, param in full_model_parameter.items(): 44 | if ( 45 | "model_CNN" in name 46 | ): # model_CNN.conv2d1.0.weight : conv2d1.0.weight didn't match 47 | name = name.replace("model_CNN.", "") 48 | CNN_parameter[name] = param 49 | CNN_model.load_state_dict(CNN_parameter) 50 | 51 | # find specific eq_id 52 | loader = DataLoader(dataset=data, batch_size=1) 53 | for j, sample in tqdm(enumerate(loader)): 54 | if sample["EQ_ID"][:, :, 0].flatten().numpy().tolist()[0] == eq_id: 55 | break 56 | 57 | # waveform average 58 | waveform = sample["waveform"].numpy().reshape(25, 3000, 3) 59 | average_waveform = np.mean(waveform, axis=2) 60 | # station_name_list 61 | not_padding_station_number = (sample["sta"].reshape(25, 4) != 0).all(dim=1).sum().item() 62 | input_station_list = single_event_prediction["station_name"][ 63 | :not_padding_station_number 64 | ].tolist() 65 | if len(input_station_list) < 25: 66 | input_station_list += [np.nan] * (25 - len(input_station_list)) 67 | # input trace trigger time 68 | p_picks = sample["p_picks"].flatten().tolist() 69 | 70 | cnn_input = torch.DoubleTensor(sample["waveform"].reshape(-1, 3000, 3)).float().cuda() 71 | cnn_output, layer_output = CNN_model(cnn_input) 72 | 73 | # plot convolution layer feature map (each layer) 74 | for layer_num, tensor in enumerate(layer_output): # convolution layer number 75 | output_path = f"../predict/station_blind_Vs30_bias2closed_station_2016/{mask_after_sec} sec cnn feature map//layer {layer_num+1}" 76 | if not os.path.isdir(output_path): 77 | os.mkdir(output_path) 78 | print("layer_number", layer_num) 79 | numeric_array = np.array(tensor.detach().cpu(), dtype=np.float32) 80 | feature_map = np.mean(numeric_array, axis=1) 81 | if len(feature_map.shape) == 3: 82 | feature_map = np.mean(feature_map, axis=2) 83 | print(feature_map.shape) 84 | fig, ax = plt.subplots(figsize=(10, 10)) 85 | image = ax.imshow(feature_map, cmap="Reds", aspect="auto") 86 | ax.set_yticks(np.arange(0 - 0.5, feature_map.shape[0] + 0.5, 1), minor=True) 87 | ax.grid(axis="y", linestyle="--", c="red", which="minor") 88 | colorbar = plt.colorbar(image, ax=ax) 89 | ax.set_title(f"Conv layer {layer_num+1}") 90 | # fig.savefig( 91 | # f"{output_path}/Conv layer {layer_num+1}, average feature map.png", dpi=300 92 | # ) 93 | -------------------------------------------------------------------------------- /model_performance_analysis/plot_intensity_map_and_warning_time.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import os 4 | from analysis import Intensity_Plotter,Warning_Time_Plotter 5 | 6 | 7 | mask_after_sec = 10 8 | label_type = "pga" 9 | if label_type == "pga": 10 | label_threshold = np.log10(0.25) 11 | intensity = "IV" 12 | if label_type == "pgv": 13 | label_threshold = np.log10(0.15) 14 | intensity = "V" 15 | 16 | path = "../predict/station_blind_Vs30_bias2closed_station_2016" 17 | output_path = f"{path}/mag bigger 5.5 predict" 18 | if not os.path.isdir(output_path): 19 | os.mkdir(output_path) 20 | Afile_path = "../data_preprocess/events_traces_catalog" 21 | 22 | catalog = pd.read_csv(f"{Afile_path}/1999_2019_final_catalog.csv") 23 | traces_info = pd.read_csv( 24 | f"{Afile_path}/2009_2019_picked_traces_p_arrival_abstime_labeled_nostaoverlap.csv" 25 | ) 26 | prediction_with_info = pd.read_csv( 27 | f"{path}/{mask_after_sec} sec model11 with all info.csv" 28 | ) 29 | 30 | # for EQ_ID in catalog.query("year==2016 & magnitude>=5.5")["EQ_ID"]: 31 | for EQ_ID in [24784, 25900]: 32 | event = catalog[catalog["EQ_ID"] == EQ_ID] 33 | event = event.assign( 34 | latitude=event["lat"] + event["lat_minute"] / 60, 35 | longitude=event["lon"] + event["lon_minute"] / 60, 36 | ) 37 | event_prediction = prediction_with_info.query(f"EQ_ID=={EQ_ID}") 38 | 39 | fig, ax = Intensity_Plotter.plot_intensity_map( 40 | trace_info=event_prediction, 41 | eventmeta=event, 42 | label_type=label_type, 43 | true_label=event_prediction["answer"], 44 | pred_label=event_prediction["predict"], 45 | sec=mask_after_sec, 46 | EQ_ID=EQ_ID, 47 | grid_method="linear", 48 | pad=100, 49 | title=f"{mask_after_sec} sec intensity Map", 50 | ) 51 | # fig.savefig( 52 | # f"../paper image/{EQ_ID}_{mask_after_sec}sec PGA intensity Map.png", dpi=600, bbox_inches="tight" 53 | # ) 54 | fig, ax = Intensity_Plotter.plot_true_predicted( 55 | y_true=event_prediction["answer"], 56 | y_pred=event_prediction["predict"], 57 | quantile=False, 58 | agg="point", 59 | point_size=70, 60 | target=label_type, 61 | title=f"EQID: {EQ_ID}, mag: {event['magnitude'].values[0]}, {mask_after_sec} sec true and predict", 62 | ) 63 | # fig.savefig( 64 | # f"{output_path}/{EQ_ID}_mag_{event['magnitude'].values[0]}_{mask_after_sec}sec true predict plot.png", dpi=450, bbox_inches="tight" 65 | # ) 66 | try: 67 | fig, ax = Warning_Time_Plotter.warning_map( 68 | trace_info=event_prediction, 69 | eventmeta=event, 70 | label_type=label_type, 71 | intensity=intensity, 72 | EQ_ID=EQ_ID, 73 | sec=mask_after_sec, 74 | label_threshold=label_threshold, 75 | ) 76 | 77 | # fig.savefig(f"../paper image/{EQ_ID}_mag_{event['magnitude'].values[0]}_{mask_after_sec} sec warning map.png", 78 | # dpi=600) 79 | fig, ax = Warning_Time_Plotter.correct_warning_with_epidist( 80 | event_prediction=event_prediction, 81 | label_threshold=label_threshold, 82 | label_type=label_type, 83 | mask_after_sec=mask_after_sec, 84 | ) 85 | # fig.savefig(f"{output_path}/{EQ_ID}_mag_{event['magnitude'].values[0]}_{mask_after_sec} sec epidist vs time.png", 86 | # dpi=300) 87 | fig, ax = Warning_Time_Plotter.warning_time_hist( 88 | event_prediction, 89 | catalog, 90 | EQ_ID=EQ_ID, 91 | mask_after_sec=mask_after_sec, 92 | warning_mag_threshold=4, 93 | label_threshold=label_threshold, 94 | label_type=label_type, 95 | bins=14, 96 | ) 97 | # fig.savefig( 98 | # f"{output_path}/{EQ_ID}_mag_{event['magnitude'].values[0]}_{mask_after_sec} sec warning stations hist.png", 99 | # dpi=300, 100 | # bbox_inches="tight", 101 | # ) 102 | except Exception as e: 103 | print(EQ_ID) 104 | continue 105 | -------------------------------------------------------------------------------- /data_preprocess/plot_double_event.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import pandas as pd 3 | from obspy.signal.trigger import ar_pick 4 | 5 | from read_tsmip import read_tsmip, trace_pick_plot 6 | 7 | 8 | Afile_path = "../data/Afile" 9 | double_event = pd.read_csv(f"{Afile_path}/1991-2020 double traces.csv") 10 | double_event=double_event.query("year==2018") 11 | counts_file_times=double_event[["file_name","year","month"]].value_counts() 12 | error_file={"year":[],"month":[],"file":[],"eq_num":[],"reason":[]} 13 | for (file_name,year,month),eq_num in zip(counts_file_times.index,counts_file_times): 14 | if len(str(month))<2: 15 | month="0"+str(month) 16 | path=f"../data/waveform/{year}/{month}" 17 | file_name=file_name.strip() 18 | try: 19 | trace=read_tsmip(f"{path}/{file_name}.txt") 20 | print("read_file ok") 21 | 22 | except Exception as reason: 23 | print(file_name,f"year:{year},month:{month}, {reason}") 24 | error_file["year"].append(year) 25 | error_file["month"].append(month) 26 | error_file["file"].append(file_name) 27 | error_file["reason"].append(reason) 28 | error_file["eq_num"].append(eq_num) 29 | continue 30 | sampling_rate=trace[0].stats.sampling_rate 31 | try: 32 | p_pick,s_pick=ar_pick(trace[0],trace[1],trace[2], 33 | samp_rate=sampling_rate, 34 | f1=1, #Frequency of the lower bandpass window 35 | f2=20, #Frequency of the upper bandpass window 36 | lta_p=1, #Length of LTA for the P arrival in seconds 37 | sta_p=0.1, #Length of STA for the P arrival in seconds 38 | lta_s=4.0, #Length of LTA for the S arrival in seconds 39 | sta_s=1.0, #Length of STA for the P arrival in seconds 40 | m_p=2, #Number of AR coefficients for the P arrival 41 | m_s=8, #Number of AR coefficients for the S arrival 42 | l_p=0.1, 43 | l_s=0.2, 44 | s_pick=True) 45 | except Exception as reason: 46 | print(file_name,f"year:{year},month:{month}, {reason}") 47 | error_file["year"].append(year) 48 | error_file["month"].append(month) 49 | error_file["file"].append(file_name) 50 | error_file["reason"].append(reason) 51 | error_file["eq_num"].append(eq_num) 52 | continue 53 | fig,ax=plt.subplots(3,1) 54 | ax[0].set_title(f"station: {trace[0].stats.station}, start time: {trace[0].stats.starttime}") 55 | ax[1].set_title(f"number of events: {eq_num}") 56 | for component in range(len(trace)): 57 | ax[component].plot(trace[component],"k") 58 | ymin,ymax=ax[component].get_ylim() 59 | ax[component].vlines(p_pick*sampling_rate,ymin,ymax,"r",label="P pick") 60 | ax[component].vlines(s_pick*sampling_rate,ymin,ymax,"g",label="S pick") 61 | ax[0].set_xticks([]) 62 | ax[1].set_xticks([]) 63 | ax[1].set_ylabel(f"Amplitude (gal)") 64 | ax[2].set_xlabel(f"Time Sample (200Hz)") 65 | ax[0].legend() 66 | fig.tight_layout() 67 | output_path="../data/double event picking" 68 | fig.savefig(f"{output_path}/{file_name}.png",dpi=300) 69 | plt.close() 70 | 71 | error_file_df=pd.DataFrame(error_file) 72 | # error_file_df.to_csv(f"{Afile_path}/double event error.csv",index=False) 73 | 74 | #pick again error file 75 | error_file_df=pd.read_csv(f"{Afile_path}/double event error_new.csv") 76 | cant_picking_filter=((error_file_df["year"]!=2020) & (error_file_df["month"]!="07") & (error_file_df["month"]!="08") & (error_file_df["month"]!="09")) 77 | cant_picking_file=error_file_df[cant_picking_filter].reset_index(drop=True) 78 | 79 | for i in range(len(cant_picking_file)): 80 | year=cant_picking_file["year"][i] 81 | month=cant_picking_file["month"][i] 82 | if len(str(month))<2: 83 | month="0"+str(month) 84 | file_name=cant_picking_file["file"][i] 85 | eq_num=cant_picking_file["eq_num"][i] 86 | 87 | path=f"data/waveform/{year}/{month}" 88 | 89 | trace=read_tsmip(f"{path}/{file_name}.txt") 90 | trace_pick_plot(trace,file_name,eq_num=eq_num,output_path="../data/waveform/double event picking") -------------------------------------------------------------------------------- /data_preprocess/Vs30_preprocess.py: -------------------------------------------------------------------------------- 1 | import pygmt 2 | import numpy as np 3 | from numpy import sin, cos, tan, radians 4 | import math 5 | from scipy.spatial import distance 6 | 7 | def grd_to_xyz(input_grd, output_xyz): 8 | with pygmt.clib.Session() as session: 9 | # 使用pygmt.grd2xyz進行轉換 10 | session.call_module("grd2xyz", f"{input_grd} > {output_xyz}") 11 | 12 | def twd67_to_97(x, y): 13 | """_summary_ 14 | 15 | 16 | Parameters 17 | ---------- 18 | x : float 19 | x in TWD67 system 20 | y : float 21 | x in TWD67 system 22 | 23 | Returns 24 | ------- 25 | x and y in TWD97 system 26 | """ 27 | A = 0.00001549 28 | B = 0.000006521 29 | 30 | x_97 = x + 807.8 + A * x + B * y 31 | y_97 = y - 248.6 + A * y + B * x 32 | return x_97, y_97 33 | 34 | 35 | def twd97_to_lonlat(x=174458.0, y=2525824.0): 36 | """ 37 | Parameters 38 | ---------- 39 | x : float 40 | TWD97 coord system. The default is 174458.0. 41 | y : float 42 | TWD97 coord system. The default is 2525824.0. 43 | Returns 44 | ------- 45 | list 46 | [longitude, latitude] 47 | """ 48 | 49 | a = 6378137 50 | b = 6356752.314245 51 | long_0 = 121 * math.pi / 180.0 52 | k0 = 0.9999 53 | dx = 250000 54 | dy = 0 55 | 56 | e = math.pow((1 - math.pow(b, 2) / math.pow(a, 2)), 0.5) 57 | 58 | x -= dx 59 | y -= dy 60 | 61 | M = y / k0 62 | 63 | mu = M / ( 64 | a 65 | * (1 - math.pow(e, 2) / 4 - 3 * math.pow(e, 4) / 64 - 5 * math.pow(e, 6) / 256) 66 | ) 67 | e1 = (1.0 - pow((1 - pow(e, 2)), 0.5)) / ( 68 | 1.0 + math.pow((1.0 - math.pow(e, 2)), 0.5) 69 | ) 70 | 71 | j1 = 3 * e1 / 2 - 27 * math.pow(e1, 3) / 32 72 | j2 = 21 * math.pow(e1, 2) / 16 - 55 * math.pow(e1, 4) / 32 73 | j3 = 151 * math.pow(e1, 3) / 96 74 | j4 = 1097 * math.pow(e1, 4) / 512 75 | 76 | fp = ( 77 | mu 78 | + j1 * math.sin(2 * mu) 79 | + j2 * math.sin(4 * mu) 80 | + j3 * math.sin(6 * mu) 81 | + j4 * math.sin(8 * mu) 82 | ) 83 | 84 | e2 = math.pow((e * a / b), 2) 85 | c1 = math.pow(e2 * math.cos(fp), 2) 86 | t1 = math.pow(math.tan(fp), 2) 87 | r1 = ( 88 | a 89 | * (1 - math.pow(e, 2)) 90 | / math.pow((1 - math.pow(e, 2) * math.pow(math.sin(fp), 2)), (3 / 2)) 91 | ) 92 | n1 = a / math.pow((1 - math.pow(e, 2) * math.pow(math.sin(fp), 2)), 0.5) 93 | d = x / (n1 * k0) 94 | 95 | q1 = n1 * math.tan(fp) / r1 96 | q2 = math.pow(d, 2) / 2 97 | q3 = (5 + 3 * t1 + 10 * c1 - 4 * math.pow(c1, 2) - 9 * e2) * math.pow(d, 4) / 24 98 | q4 = ( 99 | ( 100 | 61 101 | + 90 * t1 102 | + 298 * c1 103 | + 45 * math.pow(t1, 2) 104 | - 3 * math.pow(c1, 2) 105 | - 252 * e2 106 | ) 107 | * math.pow(d, 6) 108 | / 720 109 | ) 110 | lat = fp - q1 * (q2 - q3 + q4) 111 | 112 | q5 = d 113 | q6 = (1 + 2 * t1 + c1) * math.pow(d, 3) / 6 114 | q7 = ( 115 | (5 - 2 * c1 + 28 * t1 - 3 * math.pow(c1, 2) + 8 * e2 + 24 * math.pow(t1, 2)) 116 | * math.pow(d, 5) 117 | / 120 118 | ) 119 | lon = long_0 + (q5 - q6 + q7) / math.cos(fp) 120 | 121 | lat = (lat * 180) / math.pi 122 | lon = (lon * 180) / math.pi 123 | return [lon, lat] 124 | 125 | 126 | def find_nearest_point(target_point, points): 127 | """ 128 | 找到點集points中距離目標點target_point最近的點。 129 | 130 | 參數: 131 | target_point: 目標點的坐標,一個包含兩個元素的列表或元組,例如 [x, y]。 132 | points: 點集,一個包含多個點坐標的二維數組,每個點為一個包含兩個元素的列表或元組,例如 [[x1, y1], [x2, y2], ...]。 133 | 134 | 返回值: 135 | nearest_point: 距離目標點最近的點的坐標,一個包含兩個元素的列表,例如 [x_nearest, y_nearest]。 136 | """ 137 | target_point = np.array(target_point) 138 | points = np.array(points) 139 | 140 | # 使用cdist計算距離矩陣 141 | distances = distance.cdist([target_point], points) 142 | 143 | # 找到距離最小的點的索引 144 | nearest_index = np.argmin(distances) 145 | 146 | # 返回距離最小的點的坐標 147 | nearest_point = points[nearest_index] 148 | 149 | return nearest_index, nearest_point 150 | 151 | 152 | def get_unique_with_other_columns(group): 153 | # 獲取唯一值 154 | unique_value = group["station_name"].unique()[0] 155 | # 獲取其他columns的值 156 | other_columns_values = group.drop_duplicates(subset=["station_name"]) 157 | return other_columns_values 158 | -------------------------------------------------------------------------------- /data_preprocess/13_cut_waveform_to_hdf5.py: -------------------------------------------------------------------------------- 1 | import h5py 2 | import numpy as np 3 | import pandas as pd 4 | from tqdm import tqdm 5 | 6 | from read_tsmip import cut_traces 7 | start_year = 1999 8 | end_year = 2019 9 | sta_path = "../data/station_information" 10 | waveform_path = "../data/waveform" 11 | catalog = pd.read_csv( 12 | f"./events_traces_catalog/{start_year}_{end_year}_final_catalog.csv" 13 | ) 14 | traces = pd.read_csv( 15 | f"./events_traces_catalog/{start_year}_{end_year}_final_traces_Vs30.csv" 16 | ) 17 | station_info = pd.read_csv(f"{sta_path}/TSMIPstations_new.csv") 18 | traces.loc[traces.index, "p_pick_sec"] = pd.to_timedelta( 19 | traces["p_pick_sec"], unit="sec" 20 | ) 21 | traces.loc[traces.index, "p_arrival_abs_time"] = pd.to_datetime( 22 | traces["p_arrival_abs_time"], format="%Y-%m-%d %H:%M:%S" 23 | ) 24 | 25 | # into hdf5 26 | output = f"../data/TSMIP_{start_year}_{end_year}_Vs30.hdf5" 27 | error_event = {"EQ_ID": [], "reason": []} 28 | with h5py.File(output, "w") as file: 29 | data = file.create_group("data") 30 | meta = file.create_group("metadata") 31 | for eq_id in tqdm(catalog["EQ_ID"]): 32 | # for eq_id in [247]: 33 | try: 34 | tmp_traces, traces_info = cut_traces( 35 | traces, eq_id, waveform_path, waveform_type="acc" 36 | ) 37 | _, vel_info = cut_traces(traces, eq_id, waveform_path, waveform_type="vel") 38 | _, dis_info = cut_traces(traces, eq_id, waveform_path, waveform_type="dis") 39 | traces_info["vel"] = vel_info["traces"] 40 | traces_info["dis"] = dis_info["traces"] 41 | # fig=plot_cutting_event(tmp_traces,traces_info) 42 | start_time_str_arr = np.array(traces_info["start_time"], dtype="S30") 43 | station_name_str_arr = np.array(tmp_traces["station_name"], dtype="S30") 44 | tmp_station_info = pd.merge( 45 | tmp_traces[["station_name","Vs30"]], 46 | station_info[ 47 | ["location_code", "latitude", "longitude", "elevation (m)"] 48 | ], 49 | how="left", 50 | left_on="station_name", 51 | right_on="location_code", 52 | ) 53 | location_array = np.array( 54 | tmp_station_info[["latitude", "longitude", "elevation (m)"]] 55 | ) 56 | Vs30_array=np.array(tmp_traces["Vs30"]) 57 | if np.isnan(location_array).any(): 58 | print("The location array contain NaN values") 59 | continue 60 | event = data.create_group(f"{eq_id}") 61 | event.create_dataset( 62 | "acc_traces", data=traces_info["traces"], dtype=np.float64 63 | ) 64 | event.create_dataset( 65 | "vel_traces", data=traces_info["vel"], dtype=np.float64 66 | ) 67 | event.create_dataset( 68 | "dis_traces", data=traces_info["dis"], dtype=np.float64 69 | ) 70 | event.create_dataset("p_picks", data=traces_info["p_picks"], dtype=np.int64) 71 | event.create_dataset("pga", data=traces_info["pga"], dtype=np.float64) 72 | event.create_dataset("pgv", data=traces_info["pgv"], dtype=np.float64) 73 | event.create_dataset( 74 | "start_time", data=start_time_str_arr, maxshape=(None), chunks=True 75 | ) 76 | event.create_dataset( 77 | "pga_time", data=traces_info["pga_time"], dtype=np.int64 78 | ) 79 | event.create_dataset( 80 | "pgv_time", data=traces_info["pgv_time"], dtype=np.int64 81 | ) 82 | event.create_dataset( 83 | "station_name", data=station_name_str_arr, maxshape=(None), chunks=True 84 | ) 85 | event.create_dataset( 86 | "station_location", data=location_array, dtype=np.float64 87 | ) 88 | event.create_dataset( 89 | "Vs30", data=Vs30_array, dtype=np.float64 90 | ) 91 | except Exception as reason: 92 | print(f"EQ_ID:{eq_id}, {reason}") 93 | error_event["EQ_ID"].append(eq_id) 94 | error_event["reason"].append(reason) 95 | continue 96 | # fig.savefig(f"data/cutting waveform image/{eq_id}.png") 97 | error_event_df = pd.DataFrame(error_event) 98 | error_event_df.to_csv( 99 | "./events_traces_catalog/load into hdf5 error event.csv", index=False 100 | ) 101 | 102 | catalog.to_hdf(output, key="metadata/event_metadata", mode="a", format="table") 103 | traces.to_hdf(output, key="metadata/traces_metadata", mode="a", format="table") 104 | -------------------------------------------------------------------------------- /data_preprocess/tracer_demo/src/tracer.f90: -------------------------------------------------------------------------------- 1 | ! This is a script to calculate the travel time between two points in 3D velocity model 2 | program tracer 3 | implicit real*8 (a-h,o-z) 4 | include 'setup.inc' 5 | integer msg,imode,ibin,ipath 6 | parameter (msg=16384) 7 | real*8 w(3,msg+1),tp,ts 8 | real*8 evlo,evla,evla2,evdp 9 | real*8 stlo,stla,stla2,stel 10 | character*32 evt_fn,sta_fn,mod_fn 11 | 12 | ! default file names of 3D velocity model and output travel time table 13 | mod_fn='vel3d.mod' 14 | 15 | ! read 3D velocity model 16 | print*,'read default velocity model [vel3d.mod]' 17 | call input_vel(mod_fn) 18 | 19 | 666 print*,'Mode option: (1) input two points' 20 | print*,' (2) input two files' 21 | read(*,*)imode 22 | 23 | ! two-point mode 24 | if (imode.eq.1) then 25 | print*,'Input point 1 (lon, lat, dep):' 26 | read(*,*)evlo,evla,evdp 27 | evla2=geog_to_geoc(evla) 28 | print*,'Input point 2 (lon, lat, dep):' 29 | read(*,*)stlo,stla,stel 30 | stla2=geog_to_geoc(stla) 31 | 32 | print*,'Set 1 to output ray path, otherwise 0:' 33 | read(*,*)ipath 34 | 35 | !-P-wave 36 | ips=1 37 | call pbr(evla2,evlo,evdp,stla2,stlo,stel,w,np,tp) 38 | if (ipath.eq.1) then 39 | open(11,file='P_path.txt',status='unknown') 40 | do i=1,np 41 | write(11,'(3f10.3)')w(3,i),w(2,i),w(1,i) 42 | enddo 43 | close(11) 44 | endif 45 | !-S-wave 46 | ips=2 47 | call pbr(evla2,evlo,evdp,stla2,stlo,stel,w,np,ts) 48 | if (ipath.eq.1) then 49 | open(11,file='S_path.txt',status='unknown') 50 | do i=1,np 51 | write(11,'(3f10.3)')w(3,i),w(2,i),w(1,i) 52 | enddo 53 | close(11) 54 | endif 55 | print*,'P- and S-wave ray path were outputed!' 56 | print*,'' 57 | 58 | print*,'--------------------------------------------' 59 | print*,'P-wave travel time (sec):',tp 60 | print*,'S-wave travel time (sec):',ts 61 | 62 | 63 | ! two-file mode 64 | elseif (imode.eq.2) then 65 | print*,'Input source file:' 66 | read(*,*)evt_fn 67 | print*,'Input receiver file:' 68 | read(*,*)sta_fn 69 | 70 | print*,'Set 1 to output ray path, otherwise 0:' 71 | read(*,*)ipath 72 | 73 | 777 print*,'Format of output table: 1) ascii, 2) binary' 74 | read(*,*)ibin 75 | 76 | open(1,file=evt_fn,status='old') 77 | open(2,file=sta_fn,status='old') 78 | if (ibin.eq.1) then 79 | open(3,file='tt.table',status='unknown') 80 | elseif (ibin.eq.2) then 81 | open(3,file='tt.bin',status='unknown',form='unformatted') 82 | else 83 | print*,'Option can not be recognized! Please re-try!' 84 | goto 777 85 | endif 86 | ibyte=0 87 | if (ipath.eq.1) then 88 | open(10,file='P_path.txt',status='unknown') 89 | open(11,file='S_path.txt',status='unknown') 90 | endif 91 | do 92 | read(1,*,iostat=ierr1)evlo,evla,evdp 93 | if (ierr1.lt.0) exit 94 | evla2=geog_to_geoc(evla) 95 | do 96 | read(2,*,iostat=ierr2)stlo,stla,stel 97 | if (ierr2.lt.0) exit 98 | stla2=geog_to_geoc(stla) 99 | stel=-stel/1000. 100 | 101 | !-P-wave 102 | ips=1 103 | call pbr(evla2,evlo,evdp,stla2,stlo,stel,w,np,tp) 104 | !-output P-wave path 105 | if (ipath.eq.1) then 106 | do i=1,np 107 | write(10,'(3f10.3)')w(3,i),w(2,i),w(1,i) 108 | enddo 109 | write(10,'(a1)')"X" 110 | endif 111 | !-S-wave 112 | ips=2 113 | call pbr(evla2,evlo,evdp,stla2,stlo,stel,w,np,ts) 114 | !-output S-wave path 115 | if (ipath.eq.1) then 116 | do i=1,np 117 | write(11,'(3f10.3)')w(3,i),w(2,i),w(1,i) 118 | enddo 119 | write(11,'(a1)')"X" 120 | endif 121 | 122 | !-output calculated travel time table 123 | if (ibin.eq.1) write(3,'(2(2f8.3,f7.3,1x),2f10.3)')evlo,evla,evdp,stlo,stla,stel,tp,ts 124 | if (ibin.eq.2) write(3)evlo,evla,evdp,stlo,stla,stel,tp,ts 125 | enddo 126 | rewind(2) 127 | enddo 128 | close(10) 129 | close(11) 130 | print*,'--------------------------------------------' 131 | if (ibin.eq.1) print*,'Ascii table [tt.table] completed!' 132 | if (ibin.eq.2) print*,'Binary table [tt.bin] completed!' 133 | 134 | else 135 | print*,'Option can not be recognized! Please re-try!' 136 | goto 666 137 | endif 138 | close(1) 139 | close(2) 140 | close(3) 141 | end 142 | -------------------------------------------------------------------------------- /model_performance_analysis/confusion_matrix_multi_station.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | import pandas as pd 4 | from sklearn.metrics import confusion_matrix 5 | import os 6 | from analysis import Precision_Recall_Factory 7 | 8 | path = "../predict/station_blind_Vs30_bias2closed_station_2016" 9 | output_path = f"{path}/model11 confusion matrix" 10 | if not os.path.isdir(output_path): 11 | os.mkdir(output_path) 12 | 13 | label = "pga" 14 | unit = "m/s^2" 15 | 16 | #形成 warning threshold array 其中包含對應的4~5級標準 17 | target_value = np.log10(0.8) 18 | 19 | # 生成一個包含目標值的數組 20 | score_curve_threshold = np.linspace(np.log10(0.025), np.log10(1.4), 100) 21 | 22 | # 檢查最接近的值 23 | closest_value = min(score_curve_threshold, key=lambda x: abs(x - target_value)) 24 | 25 | # 調整num參數以確保包含目標值 26 | if closest_value != target_value: 27 | num_adjusted = 100 + int(np.ceil(abs(target_value - closest_value) / np.diff(score_curve_threshold[:2]))) 28 | score_curve_threshold = np.linspace(np.log10(0.025), np.log10(1.4), num_adjusted) 29 | 30 | 31 | intensity_score_dict = {"second": [], "intensity_score": []} 32 | f1_curve_fig, f1_curve_ax = plt.subplots() 33 | precision_curve_fig, precision_curve_ax = plt.subplots() 34 | recall_curve_fig, recall_curve_ax = plt.subplots() 35 | for mask_after_sec in [3, 5, 7, 10]: 36 | data = pd.read_csv(f"{path}/{mask_after_sec} sec model11 with all info.csv") 37 | 38 | predict_label = data["predict"] 39 | real_label = data["answer"] 40 | # calculate intensity score 41 | intensity = ["0", "1", "2", "3", "4", "5-", "5+", "6-", "6+", "7"] 42 | data["predicted_intensity"] = predict_label.apply(Precision_Recall_Factory.pga_to_intensity) 43 | data["answer_intensity"] = real_label.apply(Precision_Recall_Factory.pga_to_intensity) 44 | intensity_score = ( 45 | (data["predicted_intensity"] == data["answer_intensity"]).sum() 46 | ) / len(data) 47 | intensity_score_dict["second"].append(mask_after_sec) 48 | intensity_score_dict["intensity_score"].append(intensity_score) 49 | intensity_table = pd.DataFrame(intensity_score_dict) 50 | 51 | # intensity_table.to_csv( 52 | # f"{output_path}/intensity table.csv", 53 | # index=False, 54 | # ) 55 | # plot intensity score confusion matrix 56 | intensity_confusion_matrix = confusion_matrix( 57 | data["answer_intensity"], data["predicted_intensity"], labels=intensity 58 | ) 59 | fig,ax=Precision_Recall_Factory.plot_intensity_confusion_matrix(intensity_confusion_matrix,intensity_score,mask_after_sec,output_path=None) 60 | 61 | performance_score = { 62 | f"{label}_threshold ({unit})": [], 63 | "confusion matrix": [], 64 | "accuracy": [], 65 | "precision": [], 66 | "recall": [], 67 | "F1": [], 68 | } 69 | for label_threshold in score_curve_threshold: 70 | predict_logic = np.where(predict_label > label_threshold, 1, 0) 71 | real_logic = np.where(real_label > label_threshold, 1, 0) 72 | matrix = confusion_matrix(real_logic, predict_logic, labels=[1, 0]) 73 | accuracy = np.sum(np.diag(matrix)) / np.sum(matrix) # (TP+TN)/all 74 | precision = matrix[0][0] / np.sum(matrix, axis=0)[0] # TP/(TP+FP) 75 | recall = matrix[0][0] / np.sum(matrix, axis=1)[0] # TP/(TP+FP) 76 | F1_score = 2 / ((1 / precision) + (1 / recall)) 77 | performance_score[f"{label}_threshold ({unit})"].append( 78 | np.round((10**label_threshold), 3) 79 | ) # m/s^2 / 9.8 = g 80 | performance_score["confusion matrix"].append(matrix) 81 | performance_score["accuracy"].append(accuracy) 82 | performance_score["precision"].append(precision) 83 | performance_score["recall"].append(recall) 84 | performance_score["F1"].append(F1_score) 85 | 86 | f1_curve_fig, f1_curve_ax = Precision_Recall_Factory.plot_score_curve( 87 | performance_score, 88 | f1_curve_fig, 89 | f1_curve_ax, 90 | "F1", 91 | score_curve_threshold, 92 | mask_after_sec, 93 | output_path=None, 94 | ) 95 | # precision_curve_ax.set_xlim(0,90) #figure in thesis 96 | precision_curve_fig, precision_curve_ax = Precision_Recall_Factory.plot_score_curve( 97 | performance_score, 98 | precision_curve_fig, 99 | precision_curve_ax, 100 | "precision", 101 | score_curve_threshold, 102 | mask_after_sec, 103 | output_path=None, 104 | ) 105 | # precision_curve_ax.set_xlim(0,90) #figure in thesis 106 | # precision_curve_fig.savefig(f"../paper image/precision_curve.png", dpi=300) 107 | recall_curve_fig, recall_curve_ax = Precision_Recall_Factory.plot_score_curve( 108 | performance_score, 109 | recall_curve_fig, 110 | recall_curve_ax, 111 | "recall", 112 | score_curve_threshold, 113 | mask_after_sec, 114 | output_path=None, 115 | ) 116 | # recall_curve_ax.set_xlim(0,90) #figure in thesis 117 | # recall_curve_fig.savefig(f"../paper image/recall_curve.png", dpi=300) 118 | 119 | predict_table = pd.DataFrame(performance_score) 120 | # predict_table.to_csv( 121 | # f"{output_path}/{mask_after_sec} sec confusion matrix table.csv", 122 | # index=False, 123 | # ) 124 | -------------------------------------------------------------------------------- /data_preprocess/tracer_demo/tt.table: -------------------------------------------------------------------------------- 1 | 120.816 23.853 7.500 120.805 23.510 -0.000 7.775 13.288 2 | 120.816 23.853 7.500 121.365 23.099 -0.000 18.814 33.033 3 | 120.816 23.853 7.500 120.412 23.719 -0.000 9.346 16.527 4 | 120.816 23.853 7.500 120.172 23.601 -0.000 13.254 23.098 5 | 120.816 23.853 7.500 120.552 23.581 -0.000 8.739 15.026 6 | 120.816 23.853 7.500 120.269 23.485 -0.000 13.276 23.241 7 | 120.816 23.853 7.500 120.544 23.465 -0.000 10.449 17.891 8 | 120.816 23.853 7.500 120.152 23.333 -0.000 16.196 28.654 9 | 120.816 23.853 7.500 120.583 23.296 -0.000 12.711 22.111 10 | 120.816 23.853 7.500 120.405 23.355 -0.000 13.499 23.428 11 | 120.816 23.853 7.500 120.153 23.221 -0.000 17.698 31.583 12 | 120.816 23.853 7.500 120.268 23.215 -0.000 16.828 29.655 13 | 120.816 23.853 7.500 120.478 23.180 -0.000 15.269 26.681 14 | 120.816 23.853 7.500 120.462 23.046 -0.000 17.891 31.529 15 | 120.816 23.853 7.500 120.280 22.965 -0.000 20.569 36.566 16 | 120.816 23.853 7.500 120.606 23.757 -0.000 5.982 10.513 17 | 120.816 23.853 7.500 120.514 23.779 -0.000 7.312 12.998 18 | 120.816 23.853 7.500 120.411 23.799 -0.000 8.928 15.790 19 | 120.816 23.853 7.500 120.247 23.752 -0.000 11.424 20.035 20 | 120.816 23.853 7.500 120.605 23.632 -0.000 7.320 12.607 21 | 120.816 23.853 7.500 120.528 23.613 -0.000 8.671 15.115 22 | 120.816 23.853 7.500 120.294 23.580 -0.000 11.958 20.900 23 | 120.816 23.853 7.500 120.215 23.541 -0.000 13.308 23.309 24 | 120.816 23.853 7.500 120.544 23.521 -0.000 9.665 16.530 25 | 120.816 23.853 7.500 120.584 23.520 -0.000 9.173 15.720 26 | 120.816 23.853 7.500 120.479 23.607 -0.000 9.393 16.690 27 | 120.816 23.853 7.500 120.344 23.521 -0.000 11.961 20.928 28 | 120.816 23.853 7.500 120.596 23.439 -0.000 10.387 17.869 29 | 120.816 23.853 7.500 120.583 23.358 -0.000 11.839 20.570 30 | 120.816 23.853 7.500 120.163 23.383 -0.000 15.530 27.379 31 | 120.816 23.853 7.500 120.463 23.477 -0.000 11.378 19.692 32 | 120.816 23.853 7.500 120.447 23.494 -0.000 11.369 19.981 33 | 120.816 23.853 7.500 120.408 23.280 -0.000 14.536 25.278 34 | 120.816 23.853 7.500 120.501 23.288 -0.000 13.458 23.378 35 | 120.816 23.853 7.500 120.310 23.308 -0.000 15.091 26.621 36 | 120.816 23.853 7.500 120.271 23.270 -0.000 16.031 28.307 37 | 120.816 23.853 7.500 120.410 23.149 -0.000 16.447 28.993 38 | 120.816 23.853 7.500 120.319 23.172 -0.000 16.931 29.752 39 | 120.816 23.853 7.500 120.103 23.184 -0.000 18.527 32.838 40 | 120.816 23.853 7.500 120.239 23.124 -0.000 18.363 32.707 41 | 120.816 23.853 7.500 120.511 23.077 -0.000 16.838 29.451 42 | 120.816 23.853 7.500 120.450 23.121 -0.000 16.569 29.137 43 | 120.816 23.853 7.500 120.340 23.027 -0.000 19.316 34.432 44 | 120.816 23.853 7.500 120.345 22.906 -0.000 21.063 37.970 45 | 120.816 23.853 7.500 120.208 22.921 -0.000 21.445 37.939 46 | 120.816 23.853 7.500 120.184 22.999 -0.000 20.714 37.091 47 | 120.816 23.853 7.500 120.182 22.974 -0.000 20.914 37.582 48 | 120.816 23.853 7.500 120.229 22.965 -0.000 20.913 37.361 49 | 120.816 23.853 7.500 120.164 23.065 -0.000 19.890 35.615 50 | 120.816 23.853 7.500 120.805 23.510 -0.000 7.775 13.288 51 | 120.816 23.853 7.500 119.555 23.567 -0.000 22.200 38.366 52 | 120.816 23.853 7.500 120.222 23.638 -0.000 12.358 21.596 53 | 120.816 23.853 7.500 120.228 23.040 -0.000 19.869 35.383 54 | 120.816 23.853 7.500 120.528 23.185 -0.000 14.736 25.631 55 | 120.816 23.853 7.500 120.678 23.597 -0.000 6.833 11.810 56 | 120.816 23.853 7.500 120.496 23.270 -0.000 13.734 23.881 57 | 120.816 23.853 7.500 120.298 23.724 -0.000 10.964 19.389 58 | 120.816 23.853 7.500 120.593 23.351 -0.000 11.879 20.659 59 | 120.816 23.853 7.500 120.519 23.384 -0.000 11.966 20.596 60 | 120.816 23.853 7.500 120.429 23.346 -0.000 13.418 23.195 61 | 120.816 23.853 7.500 120.216 23.267 -0.000 16.496 29.114 62 | 120.816 23.853 7.500 120.478 23.791 -0.000 7.844 13.928 63 | 120.816 23.853 7.500 120.147 23.654 -0.000 13.239 22.939 64 | 120.816 23.853 7.500 120.321 23.794 -0.000 10.330 18.324 65 | 120.816 23.853 7.500 120.233 22.983 -0.000 20.662 36.910 66 | 120.816 23.853 7.500 120.280 23.137 -0.000 17.877 31.911 67 | 120.816 23.853 7.500 120.342 23.227 -0.000 15.923 27.745 68 | 120.816 23.853 7.500 120.562 23.686 -0.000 7.360 12.923 69 | 120.816 23.853 7.500 120.614 23.245 -0.000 13.273 22.993 70 | 120.816 23.853 7.500 120.465 23.669 -0.000 8.919 15.996 71 | 120.816 23.853 7.500 120.290 23.299 -0.000 15.436 27.358 72 | 120.816 23.853 7.500 120.530 23.252 -0.000 13.672 23.754 73 | 120.816 23.853 7.500 120.530 23.252 -0.000 13.672 23.754 74 | 120.816 23.853 7.500 120.227 23.791 -0.000 11.575 20.291 75 | 120.816 23.853 7.500 120.183 23.703 -0.000 12.497 21.708 76 | 120.816 23.853 7.500 120.119 23.037 -0.000 20.554 36.901 77 | 120.816 23.853 7.500 120.097 23.154 -0.000 18.946 33.610 78 | 120.816 23.853 7.500 120.108 23.078 -0.000 20.030 35.820 79 | 120.816 23.853 7.500 120.424 23.498 -0.000 11.548 20.462 80 | -------------------------------------------------------------------------------- /data_preprocess/5_check_waveform.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import sys 3 | import os 4 | 5 | sys.path.append("..") 6 | from read_tsmip import read_tsmip 7 | from obspy.signal.trigger import ar_pick 8 | import matplotlib.pyplot as plt 9 | import tkinter as tk 10 | from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg 11 | 12 | start_year=1999 13 | end_year=2008 14 | start_index = 14031 15 | Afile_path = "../data/Afile" 16 | sta_path = "../data/station information" 17 | waveform_path = "../data/waveform" 18 | output_path="events_traces_catalog" 19 | traces_file_name=f"{start_year}_{end_year}_target_traces.csv" 20 | error_file_name=f"{start_year}_{end_year}_error_traces_file.csv" 21 | traces = pd.read_csv(f"{output_path}/{traces_file_name}") 22 | catalog = pd.read_csv(f"{output_path}/{start_year}_{end_year}_target_catalog.csv") 23 | 24 | 25 | def ok_traces(traces=None, index=None): 26 | traces.loc[index, "quality_control"] = "y" 27 | win.destroy() 28 | 29 | 30 | def broken_traces(traces=None, index=None): 31 | traces.loc[index, "quality_control"] = "n" 32 | win.destroy() 33 | def quit(running): 34 | running.set(False) 35 | win.destroy() 36 | 37 | 38 | if "quality_control" not in traces.columns: 39 | traces["quality_control"] = "TBD" 40 | if os.path.isfile(f"{error_file_name}"): 41 | error_file=pd.read_csv(f"{error_file_name}") 42 | else: 43 | error_file = pd.DataFrame({'index':[]}) 44 | error_file.to_csv(f"{error_file_name}", index=False) 45 | for i in range(start_index,len(traces)): 46 | print(f"{i}/{len(traces)}") 47 | try: 48 | EQ_ID = str(traces["EQ_ID"][i]) 49 | year = str(traces["year"][i]) 50 | month = str(traces["month"][i]) 51 | day = str(traces["day"][i]) 52 | hour = str(traces["hour"][i]) 53 | minute = str(traces["minute"][i]) 54 | second = str(traces["second"][i]) 55 | intensity = str(traces["intensity"][i]) 56 | station_name= traces["station_name"][i] 57 | epdis=str(traces["epdis (km)"][i]) 58 | file_name = traces["file_name"][i].strip() 59 | magnitude = catalog.query(f"EQ_ID=={EQ_ID}")["magnitude"].tolist()[0] 60 | if len(month) < 2: 61 | month = "0" + month 62 | waveform = read_tsmip(f"{waveform_path}/{year}/{month}/{file_name}.txt") 63 | # picking 64 | if i==8319: #1999~2008 index 8319 can't pick, kernel crushed 65 | continue 66 | p_pick,_ = ar_pick( 67 | waveform[0], 68 | waveform[1], 69 | waveform[2], 70 | samp_rate=waveform[0].stats.sampling_rate, 71 | f1=1, # Frequency of the lower bandpass window 72 | f2=20, # Frequency of the upper bandpass window 73 | lta_p=1, # Length of LTA for the P arrival in seconds 74 | sta_p=0.1, # Length of STA for the P arrival in seconds 75 | lta_s=4.0, # Length of LTA for the S arrival in seconds 76 | sta_s=1.0, # Length of STA for the P arrival in seconds 77 | m_p=2, # Number of AR coefficients for the P arrival 78 | m_s=8, # Number of AR coefficients for the S arrival 79 | l_p=0.1, 80 | l_s=0.2, 81 | s_pick=False, 82 | ) 83 | if (p_pick-3)>0: 84 | start_time=int((p_pick-3)*waveform[0].stats.sampling_rate) 85 | else: 86 | start_time=0 87 | # plot 88 | fig, ax = plt.subplots(3, 1) 89 | fig.subplots_adjust(hspace=0.4) 90 | for j in range(len(ax)): 91 | # start_time=4000 92 | if (p_pick+30)*waveform[0].stats.sampling_rate", lambda event: ok_traces(traces=traces, index=i)) 121 | win.bind("", lambda event: broken_traces(traces=traces, index=i)) 122 | running = tk.BooleanVar(value=True) 123 | win.bind("", lambda event: quit(running) if event.keysym == "Escape" else None) 124 | win.mainloop() 125 | if running.get(): 126 | pass 127 | else: 128 | print(f"stop at index:{i}") 129 | break 130 | except Exception as reason: 131 | print(file_name, f"year:{year},month:{month}, {reason}") 132 | row={"index":i,"year":int(year), "month":month, "file":file_name,"reason":reason} 133 | if i not in error_file["index"].values: 134 | error_file= pd.concat([error_file,pd.DataFrame(row, index=[0])],ignore_index=True) 135 | traces.loc[i, "quality_control"] = "n" 136 | continue 137 | 138 | # traces.to_csv(f"{output_path}/{traces_file_name}", index=False) 139 | # error_file.to_csv(f"{output_path}/{error_file_name}", index=False) 140 | print("data saved") 141 | -------------------------------------------------------------------------------- /data_preprocess/0918_M6.8_1319_1330/4_cut_waveform.py: -------------------------------------------------------------------------------- 1 | import os 2 | import h5py 3 | import numpy as np 4 | import pandas as pd 5 | from tqdm import tqdm 6 | import sys 7 | import obspy 8 | import matplotlib.pyplot as plt 9 | 10 | sys.path.append("../..") 11 | from read_tsmip import cut_traces 12 | 13 | sta_path = "../../data/station_information" 14 | waveform_path = "../../data/0918_M6.8_1319_1330/ascii" 15 | traces = pd.read_csv("traces_catalog.csv") 16 | catalog = pd.read_csv("event_catalog.csv") 17 | station_info = pd.read_csv(f"{sta_path}/TSMIPstations_new.csv") 18 | traces.loc[traces.index, "p_pick_sec"] = pd.to_timedelta( 19 | traces["p_pick_sec"], unit="sec" 20 | ) 21 | traces.loc[traces.index, "p_arrival_abs_time"] = pd.to_datetime( 22 | traces["p_arrival_abs_time"], format="%Y-%m-%d %H:%M:%S" 23 | ) 24 | 25 | output = f"../../data/TSMIP_0918_M6.8_1319_1330.hdf5" 26 | error_event = {"EQ_ID": [], "reason": []} 27 | with h5py.File(output, "w") as file: 28 | data = file.create_group("data") 29 | meta = file.create_group("metadata") 30 | for eq_id in tqdm(catalog["EQ_ID"]): 31 | # for eq_id in [247]: 32 | # try: 33 | tmp_traces, traces_info = cut_traces( 34 | traces, eq_id, waveform_path, waveform_type="acc" 35 | ) 36 | _, vel_info = cut_traces(traces, eq_id, waveform_path, waveform_type="vel") 37 | _, dis_info = cut_traces(traces, eq_id, waveform_path, waveform_type="dis") 38 | traces_info["vel"] = vel_info["traces"] 39 | traces_info["dis"] = dis_info["traces"] 40 | # fig=plot_cutting_event(tmp_traces,traces_info) 41 | start_time_str_arr = np.array(traces_info["start_time"], dtype="S30") 42 | station_name_str_arr = np.array(tmp_traces["station_name"], dtype="S30") 43 | tmp_station_info = pd.merge( 44 | tmp_traces[["station_name","Vs30"]], 45 | station_info[ 46 | ["location_code", "latitude", "longitude", "elevation (m)"] 47 | ], 48 | how="left", 49 | left_on="station_name", 50 | right_on="location_code", 51 | ) 52 | location_array = np.array( 53 | tmp_station_info[["latitude", "longitude", "elevation (m)"]] 54 | ) 55 | Vs30_array=np.array(tmp_traces["Vs30"]) 56 | if np.isnan(location_array).any(): 57 | print("The location array contain NaN values") 58 | continue 59 | event = data.create_group(f"{eq_id}") 60 | event.create_dataset( 61 | "acc_traces", data=traces_info["traces"], dtype=np.float64 62 | ) 63 | event.create_dataset( 64 | "vel_traces", data=traces_info["vel"], dtype=np.float64 65 | ) 66 | event.create_dataset( 67 | "dis_traces", data=traces_info["dis"], dtype=np.float64 68 | ) 69 | event.create_dataset("p_picks", data=traces_info["p_picks"], dtype=np.int64) 70 | event.create_dataset("pga", data=traces_info["pga"], dtype=np.float64) 71 | event.create_dataset("pgv", data=traces_info["pgv"], dtype=np.float64) 72 | event.create_dataset( 73 | "start_time", data=start_time_str_arr, maxshape=(None), chunks=True 74 | ) 75 | event.create_dataset( 76 | "pga_time", data=traces_info["pga_time"], dtype=np.int64 77 | ) 78 | event.create_dataset( 79 | "pgv_time", data=traces_info["pgv_time"], dtype=np.int64 80 | ) 81 | event.create_dataset( 82 | "station_name", data=station_name_str_arr, maxshape=(None), chunks=True 83 | ) 84 | event.create_dataset( 85 | "station_location", data=location_array, dtype=np.float64 86 | ) 87 | event.create_dataset( 88 | "Vs30", data=Vs30_array, dtype=np.float64 89 | ) 90 | # except Exception as reason: 91 | # print(f"EQ_ID:{eq_id}, {reason}") 92 | # error_event["EQ_ID"].append(eq_id) 93 | # error_event["reason"].append(reason) 94 | # continue 95 | # fig.savefig(f"data/cutting waveform image/{eq_id}.png") 96 | error_event_df = pd.DataFrame(error_event) 97 | error_event_df.to_csv( 98 | "./load into hdf5 error event.csv", index=False 99 | ) 100 | 101 | catalog.to_hdf(output, key="metadata/event_metadata", mode="a", format="table") 102 | traces.to_hdf(output, key="metadata/traces_metadata", mode="a", format="table") 103 | 104 | # plot records section 105 | for eq_id in tqdm(catalog["EQ_ID"]): 106 | tmp_traces, traces_info = cut_traces(traces, eq_id, waveform_path, waveform_type="acc") 107 | for i,chan in enumerate(["HLZ","HLN","HLE"]): 108 | stream = obspy.core.stream.Stream() 109 | for j in range(len(traces_info["traces"])): 110 | trace = obspy.core.trace.Trace(data=traces_info["traces"][j][:, i]) 111 | trace.stats.id = eq_id 112 | trace.stats.station = tmp_traces["station_name"][j] 113 | trace.stats.channel = chan 114 | trace.stats.distance = tmp_traces["epdis (km)"][j] * 1000 115 | trace.stats.starttime = traces_info["start_time"][j] 116 | trace.stats.sampling_rate = 200 117 | 118 | stream.append(trace) 119 | fig, ax = plt.subplots() 120 | stream.plot(type="section",fig=fig) 121 | 122 | magnitude = catalog[catalog["EQ_ID"] == eq_id]["magnitude"].values[0] 123 | 124 | ax.set_title( 125 | f"EQ ID:{eq_id}, Magnitude: {magnitude}, start time: {traces_info['start_time'][j]}" 126 | ) 127 | fig.savefig(f"cut trace/{eq_id}_{trace.stats.channel}.png") 128 | plt.close() 129 | 130 | for i in range(len(traces_info["traces"])): 131 | file_name=tmp_traces["file_name"][i] 132 | station_name=tmp_traces["station_name"][i] 133 | p_pick=traces_info["p_picks"][i] 134 | fig,ax=plt.subplots(3,1,figsize=(14,7)) 135 | for j in range(len(ax)): 136 | ax[j].plot(traces_info["traces"][i][:,j]) 137 | ax[j].axvline(x=p_pick, color="r", linestyle="-") 138 | ax[0].set_title(f"EQ_ID:{eq_id},station_name: {station_name},cut from file_name:{file_name}") 139 | fig.savefig(f"cut trace/EQ_ID_{eq_id}_{station_name}.png",dpi=300) 140 | plt.close() -------------------------------------------------------------------------------- /data_preprocess/0918_M6.8_1319_1330/2_picking.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import sys 3 | import os 4 | import numpy as np 5 | 6 | sys.path.append("../..") 7 | from obspy.signal.trigger import ar_pick 8 | import matplotlib.pyplot as plt 9 | import obspy 10 | import tkinter as tk 11 | from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg 12 | import re 13 | 14 | 15 | start_index = 0 16 | 17 | 18 | # =================== 19 | def ok_traces(traces=None, index=None): 20 | traces.loc[index, "quality_control"] = "y" 21 | win.destroy() 22 | 23 | 24 | def broken_traces(traces=None, index=None): 25 | traces.loc[index, "quality_control"] = "n" 26 | win.destroy() 27 | 28 | 29 | def quit(running): 30 | running.set(False) 31 | win.destroy() 32 | 33 | 34 | trace_catalog = pd.read_csv("traces_catalog.csv") 35 | 36 | for k in range(start_index, len(trace_catalog["file_name"])): 37 | file_name = trace_catalog["file_name"][k] 38 | print(f"{k}/{len(trace_catalog)}") 39 | try: 40 | txt = f"../../data/0918_M6.8_1319_1330/ascii/{file_name}.asc" 41 | data = pd.read_csv(txt, sep="\s+", skiprows=1, header=None).to_numpy() 42 | 43 | with open(txt, "r") as f: 44 | picks = f.readlines()[0] 45 | picks = re.findall(r"\d+\.\d+", picks) 46 | picks = [np.round(float(number), 2) for number in picks] 47 | 48 | waveform = obspy.core.stream.Stream() 49 | channel = ["HLZ", "HLN", "HLE"] 50 | for i, chan in enumerate(channel): 51 | start = np.where(data == picks[2])[0][0] 52 | end = np.where(data == picks[3])[0][0] 53 | trace = obspy.core.trace.Trace(data[start:end, i + 1]) 54 | 55 | trace.stats.network = "TW" 56 | # trace.stats.station = header[0][14:20] 57 | trace.stats.channel = chan 58 | 59 | trace.stats.sampling_rate = int(1 / abs(data[0, 0] - data[1, 0])) 60 | 61 | waveform.append(trace) 62 | 63 | p_pick, _ = ar_pick( 64 | waveform[0], 65 | waveform[1], 66 | waveform[2], 67 | samp_rate=waveform[0].stats.sampling_rate, 68 | f1=1, # Frequency of the lower bandpass window 69 | f2=20, # Frequency of the upper bandpass window 70 | lta_p=1, # Length of LTA for the P arrival in seconds 71 | sta_p=0.1, # Length of STA for the P arrival in seconds 72 | lta_s=4.0, # Length of LTA for the S arrival in seconds 73 | sta_s=1.0, # Length of STA for the P arrival in seconds 74 | m_p=2, # Number of AR coefficients for the P arrival 75 | m_s=8, # Number of AR coefficients for the S arrival 76 | l_p=0.1, 77 | l_s=0.2, 78 | s_pick=False, 79 | ) 80 | trace_catalog.loc[k, "p_pick_sec"] = p_pick 81 | if (p_pick - 3) > 0: 82 | start_time = int((p_pick - 3) * waveform[0].stats.sampling_rate) 83 | else: 84 | start_time = 0 85 | # plot 86 | fig, ax = plt.subplots(3, 1) 87 | fig.subplots_adjust(hspace=0.4) 88 | for j in range(len(ax)): 89 | # start_time=4000 90 | if (p_pick + 30) * waveform[0].stats.sampling_rate < len(waveform[0].data): 91 | endtime = int((p_pick + 30) * waveform[0].stats.sampling_rate) 92 | # endtime=4600 93 | ax[j].plot( 94 | waveform[j].times()[start_time:], waveform[j].data[start_time:], "k" 95 | ) 96 | ax[j].axvline(x=p_pick, color="r", linestyle="-") 97 | else: 98 | ax[j].plot( 99 | waveform[j].times()[start_time:], waveform[j].data[start_time:], "k" 100 | ) 101 | ax[j].axvline(x=p_pick, color="r", linestyle="-") 102 | ax[0].set_title(f"{file_name}") 103 | ax[1].set_ylabel("gal") 104 | ax[-1].set_xlabel("time (sec)") 105 | plt.close() 106 | 107 | win = tk.Tk() 108 | win.attributes("-topmost", True) 109 | win.after(1, lambda: win.focus_force()) 110 | win.title("check waveform") 111 | win.geometry("700x650+10+10") 112 | win.maxsize(1000, 700) 113 | canvas = FigureCanvasTkAgg(fig, win) 114 | canvas.get_tk_widget().pack(side=tk.TOP, fill=tk.BOTH, expand=1) 115 | 116 | label = tk.Label(win, text="Press ESC to quit") 117 | label.pack() 118 | win.bind("", lambda event: ok_traces(traces=trace_catalog, index=k)) 119 | win.bind("", lambda event: broken_traces(traces=trace_catalog, index=k)) 120 | running = tk.BooleanVar(value=True) 121 | win.bind( 122 | "", lambda event: quit(running) if event.keysym == "Escape" else None 123 | ) 124 | win.mainloop() 125 | if running.get(): 126 | pass 127 | else: 128 | print(f"stop at index:{k}") 129 | break 130 | except Exception as reason: 131 | print(file_name, f"{reason}") 132 | row = {"index": i, "file": file_name, "reason": reason} 133 | if i not in error_file["index"].values: 134 | error_file = pd.concat( 135 | [error_file, pd.DataFrame(row, index=[0])], ignore_index=True 136 | ) 137 | trace_catalog.loc[i, "quality_control"] = "n" 138 | continue 139 | trace_catalog.to_csv(f"traces_catalog.csv", index=False) 140 | 141 | # ========shift p_picking by velocity model to correct absolute time====== 142 | traces = pd.read_csv("traces_catalog.csv") 143 | catalog = pd.read_csv("event_catalog.csv") 144 | 145 | EQ_ID = os.listdir(f"../tracer_demo/2023_output") 146 | 147 | traces.insert(0, "EQ_ID", 30792) 148 | 149 | traces=pd.merge( 150 | catalog[["EQ_ID", "year", "month", "day", "hour", "minute", "second"]], 151 | traces, 152 | how="right", 153 | on="EQ_ID", 154 | ) 155 | traces["p_arrival_abs_time"] = pd.to_datetime( 156 | traces[["year", "month", "day", "hour", "minute", "second"]] 157 | ) 158 | 159 | colnames = [ 160 | "evt_lon", 161 | "evt_lat", 162 | "evt_depth", 163 | "sta_lon", 164 | "sta_lat", 165 | "sta_elev", 166 | "p_arrival", 167 | "s_arrival", 168 | ] 169 | for eq in EQ_ID: 170 | event_file_path = f"../tracer_demo/2023_output/{eq}/output.table" 171 | tracer_output = pd.read_csv( 172 | event_file_path, sep=r"\s+", names=colnames, header=None 173 | ) 174 | trace_index = traces[traces["EQ_ID"] == int(eq)].index 175 | p_arrival = pd.to_timedelta(tracer_output["p_arrival"], unit="s") 176 | p_arrival.index = trace_index 177 | traces.loc[trace_index, "p_arrival_abs_time"] = ( 178 | traces.loc[trace_index, "p_arrival_abs_time"] + p_arrival 179 | ) 180 | traces.to_csv(f"traces_catalog.csv", index=False) -------------------------------------------------------------------------------- /model_train_predict/predict_ensemble_merge_info.py: -------------------------------------------------------------------------------- 1 | import h5py 2 | import matplotlib.pyplot as plt 3 | 4 | plt.subplots() 5 | import numpy as np 6 | import pandas as pd 7 | import torch 8 | from torch.utils.data import DataLoader 9 | from tqdm import tqdm 10 | import sys 11 | sys.path.append("..") 12 | from model.CNN_Transformer_Mixtureoutput_TEAM import ( 13 | CNN, 14 | MDN, 15 | MLP, 16 | PositionEmbedding_Vs30, 17 | TransformerEncoder, 18 | full_model, 19 | ) 20 | from data.multiple_sta_dataset import multiple_station_dataset 21 | from model_performance_analysis.analysis import Intensity_Plotter 22 | 23 | mask_after_sec = 7 24 | label = "pga" 25 | data = multiple_station_dataset( 26 | "../data/TSMIP_1999_2019_Vs30.hdf5", 27 | mode="test", 28 | mask_waveform_sec=mask_after_sec, 29 | test_year=2016, 30 | label_key=label, 31 | mag_threshold=0, 32 | input_type="acc", 33 | data_length_sec=15, 34 | ) 35 | # ===========predict============== 36 | device = torch.device("cuda") 37 | for num in [11]: 38 | path = f"../model/model{num}.pt" 39 | emb_dim = 150 40 | mlp_dims = (150, 100, 50, 30, 10) 41 | CNN_model = CNN(mlp_input=5665).cuda() 42 | pos_emb_model = PositionEmbedding_Vs30(emb_dim=emb_dim).cuda() 43 | transformer_model = TransformerEncoder() 44 | mlp_model = MLP(input_shape=(emb_dim,), dims=mlp_dims).cuda() 45 | mdn_model = MDN(input_shape=(mlp_dims[-1],)).cuda() 46 | full_Model = full_model( 47 | CNN_model, 48 | pos_emb_model, 49 | transformer_model, 50 | mlp_model, 51 | mdn_model, 52 | pga_targets=25, 53 | data_length=3000, 54 | ).to(device) 55 | full_Model.load_state_dict(torch.load(path)) 56 | loader = DataLoader(dataset=data, batch_size=1) 57 | 58 | Mixture_mu = [] 59 | Label = [] 60 | P_picks = [] 61 | EQ_ID = [] 62 | Label_time = [] 63 | Sta_name = [] 64 | Lat = [] 65 | Lon = [] 66 | Elev = [] 67 | for j, sample in tqdm(enumerate(loader)): 68 | picks = sample["p_picks"].flatten().numpy().tolist() 69 | label_time = sample[f"{label}_time"].flatten().numpy().tolist() 70 | lat = sample["target"][:, :, 0].flatten().tolist() 71 | lon = sample["target"][:, :, 1].flatten().tolist() 72 | elev = sample["target"][:, :, 2].flatten().tolist() 73 | P_picks.extend(picks) 74 | P_picks.extend([np.nan] * (25 - len(picks))) 75 | Label_time.extend(label_time) 76 | Label_time.extend([np.nan] * (25 - len(label_time))) 77 | Lat.extend(lat) 78 | Lon.extend(lon) 79 | Elev.extend(elev) 80 | 81 | eq_id = sample["EQ_ID"][:, :, 0].flatten().numpy().tolist() 82 | EQ_ID.extend(eq_id) 83 | EQ_ID.extend([np.nan] * (25 - len(eq_id))) 84 | weight, sigma, mu = full_Model(sample) 85 | 86 | weight = weight.cpu() 87 | sigma = sigma.cpu() 88 | mu = mu.cpu() 89 | if j == 0: 90 | Mixture_mu = torch.sum(weight * mu, dim=2).cpu().detach().numpy() 91 | Label = sample["label"].cpu().detach().numpy() 92 | else: 93 | Mixture_mu = np.concatenate( 94 | [Mixture_mu, torch.sum(weight * mu, dim=2).cpu().detach().numpy()], 95 | axis=1, 96 | ) 97 | Label = np.concatenate( 98 | [Label, sample["label"].cpu().detach().numpy()], axis=1 99 | ) 100 | Label = Label.flatten() 101 | Mixture_mu = Mixture_mu.flatten() 102 | 103 | output = { 104 | "EQ_ID": EQ_ID, 105 | "p_picks": P_picks, 106 | f"{label}_time": Label_time, 107 | "predict": Mixture_mu, 108 | "answer": Label, 109 | "latitude": Lat, 110 | "longitude": Lon, 111 | "elevation": Elev, 112 | } 113 | output_df = pd.DataFrame(output) 114 | output_df = output_df[output_df["answer"] != 0] 115 | # output_df.to_csv( 116 | # f"./predict/model {num} {mask_after_sec} sec prediction.csv", index=False 117 | # ) 118 | fig, ax = Intensity_Plotter.true_predicted( 119 | y_true=output_df["answer"], 120 | y_pred=output_df["predict"], 121 | quantile=False, 122 | agg="point", 123 | point_size=12, 124 | target=label, 125 | ) 126 | eq_id = 24784 127 | ax.scatter( 128 | output_df["answer"][output_df["EQ_ID"] == eq_id], 129 | output_df["predict"][output_df["EQ_ID"] == eq_id], 130 | c="r", 131 | ) 132 | magnitude = data.event_metadata[data.event_metadata["EQ_ID"] == eq_id][ 133 | "magnitude" 134 | ].values[0] 135 | ax.set_title( 136 | f"{mask_after_sec}s True Predict Plot, 2016 data", 137 | fontsize=20, 138 | ) 139 | 140 | # fig.savefig(f"../predict/model {num} {mask_after_sec} sec.png") 141 | 142 | # ===========merge info============== 143 | Afile_path = "../data_preprocess/events_traces_catalog" 144 | output_path = "../predict/station_blind_Vs30_bias2closed_station_2016" 145 | catalog = pd.read_csv(f"{Afile_path}/1999_2019_final_catalog.csv") 146 | traces_info = pd.read_csv(f"{Afile_path}/1999_2019_final_traces_Vs30.csv") 147 | ensemble_predict = pd.read_csv( 148 | f"{output_path}/model 11 {mask_after_sec} sec prediction.csv" 149 | ) 150 | trace_merge_catalog = pd.merge( 151 | traces_info, 152 | catalog[ 153 | [ 154 | "EQ_ID", 155 | "lat", 156 | "lat_minute", 157 | "lon", 158 | "lon_minute", 159 | "depth", 160 | "magnitude", 161 | "nsta", 162 | "nearest_sta_dist (km)", 163 | ] 164 | ], 165 | on="EQ_ID", 166 | how="left", 167 | ) 168 | trace_merge_catalog["event_lat"] = ( 169 | trace_merge_catalog["lat"] + trace_merge_catalog["lat_minute"] / 60 170 | ) 171 | 172 | trace_merge_catalog["event_lon"] = ( 173 | trace_merge_catalog["lon"] + trace_merge_catalog["lon_minute"] / 60 174 | ) 175 | trace_merge_catalog.drop( 176 | ["lat", "lat_minute", "lon", "lon_minute"], axis=1, inplace=True 177 | ) 178 | trace_merge_catalog.rename(columns={"elevation (m)": "elevation"}, inplace=True) 179 | 180 | 181 | data_path = "D:/TEAM_TSMIP/data/TSMIP_1999_2019.hdf5" 182 | dataset = h5py.File(data_path, "r") 183 | for eq_id in ensemble_predict["EQ_ID"].unique(): 184 | eq_id = int(eq_id) 185 | station_name = dataset["data"][str(eq_id)]["station_name"][:].tolist() 186 | 187 | ensemble_predict.loc[ 188 | ensemble_predict.query(f"EQ_ID=={eq_id}").index, "station_name" 189 | ] = station_name 190 | 191 | ensemble_predict["station_name"] = ensemble_predict["station_name"].str.decode("utf-8") 192 | 193 | 194 | prediction_with_info = pd.merge( 195 | ensemble_predict, 196 | trace_merge_catalog.drop( 197 | [ 198 | "latitude", 199 | "longitude", 200 | "elevation", 201 | ], 202 | axis=1, 203 | ), 204 | on=["EQ_ID", "station_name"], 205 | how="left", 206 | suffixes=["_window", "_file"], 207 | ) 208 | # prediction_with_info.to_csv( 209 | # f"{output_path}/{mask_after_sec} sec model11 with all info.csv", index=False 210 | # ) 211 | -------------------------------------------------------------------------------- /model_performance_analysis/plot_CWA_EEW_intensity.py: -------------------------------------------------------------------------------- 1 | import math 2 | import pandas as pd 3 | import numpy as np 4 | import re 5 | import os 6 | from sklearn.metrics import confusion_matrix 7 | from analysis import Intensity_Plotter 8 | 9 | 10 | def haversine(lat1, lon1, lat2, lon2): 11 | # 將經緯度轉換為弧度 12 | lat1 = math.radians(lat1) 13 | lon1 = math.radians(lon1) 14 | lat2 = math.radians(lat2) 15 | lon2 = math.radians(lon2) 16 | 17 | # Haversine公式 18 | dlon = lon2 - lon1 19 | dlat = lat2 - lat1 20 | a = ( 21 | math.sin(dlat / 2) ** 2 22 | + math.cos(lat1) * math.cos(lat2) * math.sin(dlon / 2) ** 2 23 | ) 24 | c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a)) 25 | 26 | # 地球半徑(千米) 27 | radius = 6371 28 | 29 | # 計算距離 30 | distance = radius * c 31 | 32 | return distance 33 | 34 | 35 | # EEW calculate intensity 36 | path = "../CWA_EEW_report" 37 | site_info = pd.read_excel(f"{path}/site.xlsx") 38 | catalog = pd.read_excel(f"{path}/EEW2016.xlsx") 39 | catalog.columns = [ 40 | "event_time", 41 | "catalog_lon", 42 | "catalog_lat", 43 | "catalog_mag", 44 | "catalog_dep", 45 | "eew_lon", 46 | "eew_lat", 47 | "eew_mag", 48 | "eew_dep", 49 | "eew_time", 50 | ] 51 | catalog = catalog.query("catalog_mag>=5.5") 52 | catalog["event_time"] = catalog["event_time"].astype(str) 53 | catalog.dropna(inplace=True) 54 | catalog.reset_index(drop=True, inplace=True) 55 | predict_dict = { 56 | "event_time": [], 57 | "sta_lat": [], 58 | "sta_lon": [], 59 | "predict_pga": [], 60 | "station_code": [], 61 | "process_time": [], 62 | } 63 | for i in range(len(catalog)): 64 | print(catalog["event_time"][i]) 65 | lat = catalog["eew_lat"][i] 66 | lon = catalog["eew_lon"][i] 67 | dep = catalog["eew_dep"][i] 68 | mag = catalog["eew_mag"][i] 69 | for j in range(len(site_info)): 70 | if dep < 40: 71 | Si = site_info["site_s"][j] 72 | hypo_dist = math.sqrt( 73 | math.pow( 74 | haversine(lat, lon, site_info["lat"][j], site_info["lon"][j]), 2 75 | ) 76 | + math.pow(dep, 2) 77 | ) 78 | pga = ( 79 | 12.44 * math.exp(1.31 * mag) * math.pow(hypo_dist, -1.837) * Si 80 | ) # 2021_0303 from Hsiao 81 | else: 82 | Si = site_info["site_d"][j] 83 | hypo_dist = math.sqrt( 84 | math.pow( 85 | haversine(lat, lon, site_info["lat"][j], site_info["lon"][j]), 2 86 | ) 87 | + math.pow(dep, 2) 88 | ) 89 | pga = ( 90 | 12.44 * math.exp(1.31 * mag) * math.pow(hypo_dist, -1.837) * Si 91 | ) # 2021_0303 from Hsiao 92 | 93 | predict_dict["event_time"].append(catalog["event_time"][i]) 94 | predict_dict["sta_lat"].append(site_info["lat"][j]) 95 | predict_dict["sta_lon"].append(site_info["lon"][j]) 96 | predict_dict["predict_pga"].append(pga) 97 | predict_dict["station_code"].append(site_info["code"][j]) 98 | predict_dict["process_time"].append(catalog["eew_time"][i]) 99 | 100 | predict_df = pd.DataFrame(predict_dict) 101 | predict_df["event_time"] = predict_df["event_time"].astype(str) 102 | # merge ground true pga 103 | 104 | 105 | pattern = r"[=,]" 106 | true_pga_dict = { 107 | "event_time": [], 108 | "station_code": [], 109 | "sta_lon": [], 110 | "sta_lat": [], 111 | "dist": [], 112 | "PGA(V)": [], 113 | "PGA(NS)": [], 114 | "PGA(EW)": [], 115 | } 116 | files = os.listdir(f"{path}/event_true_pga") 117 | for file in files: 118 | with open(f"{path}/event_true_pga/{file}", "r", encoding="iso-8859-1") as event: 119 | start_line = 5 120 | lines = event.readlines() 121 | for i in range(start_line, len(lines)): 122 | line = lines[i] 123 | result = re.split(pattern, line.strip()) 124 | true_pga_dict["event_time"].append(file.replace(".txt", "")) 125 | true_pga_dict["station_code"].append(result[1].replace(" ", "")) 126 | true_pga_dict["sta_lon"].append(float(result[5].replace(" ", ""))) 127 | true_pga_dict["sta_lat"].append(float(result[7].replace(" ", ""))) 128 | true_pga_dict["dist"].append(float(result[9].replace(" ", ""))) 129 | true_pga_dict["PGA(V)"].append(float(result[13].replace(" ", ""))) 130 | true_pga_dict["PGA(NS)"].append(float(result[15].replace(" ", ""))) 131 | true_pga_dict["PGA(EW)"].append(float(result[17].replace(" ", ""))) 132 | 133 | true_pga_df = pd.DataFrame(true_pga_dict) 134 | 135 | final_table = pd.merge( 136 | predict_df, 137 | true_pga_df, 138 | on=["event_time", "station_code"], 139 | how="left", 140 | suffixes=["_pre", "_true"], 141 | ) 142 | final_table.dropna(inplace=True) 143 | time_eqid_dict = { 144 | "eqid": [24757, 24784, 25112, 25193, 25225, 25396, 25401, 25561, 25900], 145 | "event_time": [ 146 | "201601190213026", 147 | "201602051957026", 148 | "201604110545009", 149 | "201604271517014", 150 | "201604271819006", 151 | "201605120317015", 152 | "201605120429055", 153 | "201605310523046", 154 | "201610061552000", 155 | ], 156 | } 157 | time_eqid_df = pd.DataFrame(time_eqid_dict) 158 | final_traces = pd.merge(time_eqid_df, final_table, on="event_time", how="right") 159 | final_catalog = pd.merge(time_eqid_df, catalog, on="event_time", how="left") 160 | # final_traces.to_csv("cwa_test_eew_events.csv",index=False) 161 | # final_traces.to_csv("cwa_test_eew_traces.csv",index=False) 162 | # =========calculate residual mean and std 163 | final_traces["PGA"] = np.sqrt( 164 | final_traces["PGA(V)"] ** 2 165 | + final_traces["PGA(NS)"] ** 2 166 | + final_traces["PGA(EW)"] ** 2 167 | ) 168 | residual_mean = ( 169 | np.log10(final_traces["predict_pga"] * 0.01) - np.log10(final_traces["PGA"] * 0.01) 170 | ).mean() 171 | residual_std = ( 172 | np.log10(final_traces["predict_pga"] * 0.01) - np.log10(final_traces["PGA"] * 0.01) 173 | ).std() 174 | label_threshold = np.log10(np.array([0.250])) # 3,4,5級 175 | predict_logic = np.where( 176 | np.log10(final_traces["predict_pga"] * 0.01) > label_threshold[0], 1, 0 177 | ) 178 | real_logic = np.where(np.log10(final_traces["PGA"] * 0.01) > label_threshold[0], 1, 0) 179 | 180 | matrix = confusion_matrix(real_logic, predict_logic, labels=[1, 0]) 181 | accuracy = np.sum(np.diag(matrix)) / np.sum(matrix) # (TP+TN)/all 182 | precision = matrix[0][0] / np.sum(matrix, axis=0)[0] # TP/(TP+FP) 183 | recall = matrix[0][0] / np.sum(matrix, axis=1)[0] # TP/(TP+FP) 184 | F1_score = 2 / ((1 / precision) + (1 / recall)) 185 | 186 | fig, ax = Intensity_Plotter.plot_true_predicted( 187 | y_true=np.log10(final_traces["PGA"] * 0.01), 188 | y_pred=np.log10(final_traces["predict_pga"] * 0.01), 189 | quantile=False, 190 | agg="point", 191 | point_size=70, 192 | target="pga", 193 | title=f"CWA EEW prediction in 2016 M>5.5 events", 194 | ) 195 | # fig.savefig("CWA EEW report/true predict plot.png",dpi=300) 196 | # =========plot intensity map 197 | 198 | for eqid in final_catalog["eqid"]: 199 | label_type = "pga" 200 | fig, ax = Intensity_Plotter.plot_CWA_EEW_intensity_map( 201 | final_traces, final_catalog, eqid, label_type 202 | ) 203 | 204 | # fig.savefig(f"paper image/eqid_{eqid}_CWA_eew_report.pdf",dpi=300) 205 | -------------------------------------------------------------------------------- /data_preprocess/12_TSMIP_vs30.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | from tqdm import tqdm 4 | import os 5 | import cartopy.crs as ccrs 6 | import cartopy 7 | from cartopy.mpl import ticker 8 | import matplotlib.pyplot as plt 9 | from Vs30_preprocess import * 10 | 11 | sta_path = "../data/station_information" 12 | start_year = 1999 13 | end_year = 2019 14 | trace = pd.read_csv(f"./events_traces_catalog/{start_year}_{end_year}_final_traces.csv") 15 | station_info = pd.read_csv(f"{sta_path}/TSMIPstations_new.csv") 16 | vs30_info = pd.read_csv(f"{sta_path}/egdt_TSMIP_station_vs30.csv") 17 | 18 | merge_traces = pd.merge( 19 | trace, 20 | station_info[["location_code", "latitude", "longitude", "elevation (m)"]], 21 | how="left", 22 | left_on="station_name", 23 | right_on="location_code", 24 | ) 25 | 26 | merge_traces = pd.merge( 27 | merge_traces, 28 | vs30_info[["station_code", "Vs30"]], 29 | how="left", 30 | left_on="station_name", 31 | right_on="station_code", 32 | ) 33 | 34 | 35 | noVs30_station_value_counts = ( 36 | merge_traces[merge_traces["Vs30"].isna()]["station_name"] 37 | .value_counts() 38 | .rename_axis("station_name") 39 | .reset_index(name="counts") 40 | ) 41 | noVs30_station_value_counts = pd.merge( 42 | noVs30_station_value_counts, 43 | station_info[["location_code", "latitude", "longitude", "elevation (m)"]], 44 | how="left", 45 | left_on="station_name", 46 | right_on="location_code", 47 | ) 48 | Vs30_station_value_counts = ( 49 | merge_traces[~merge_traces["Vs30"].isna()]["station_name"] 50 | .value_counts() 51 | .rename_axis("station_name") 52 | .reset_index(name="counts") 53 | ) 54 | Vs30_station_value_counts = pd.merge( 55 | Vs30_station_value_counts, 56 | station_info[["location_code", "latitude", "longitude", "elevation (m)"]], 57 | how="left", 58 | left_on="station_name", 59 | right_on="location_code", 60 | ) 61 | 62 | 63 | # plot station map with vs30 or not 64 | src_crs = ccrs.PlateCarree() 65 | fig, ax_map = plt.subplots(subplot_kw={"projection": src_crs}, figsize=(7, 7)) 66 | ax_map.coastlines("10m") 67 | ax_map.scatter( 68 | Vs30_station_value_counts["longitude"], 69 | Vs30_station_value_counts["latitude"], 70 | edgecolors="k", 71 | linewidth=1, 72 | marker="o", 73 | s=10, 74 | zorder=3, 75 | label="include Vs30", 76 | alpha=0.5, 77 | ) 78 | ax_map.scatter( 79 | noVs30_station_value_counts["longitude"], 80 | noVs30_station_value_counts["latitude"], 81 | edgecolors="k", 82 | linewidth=1, 83 | marker="o", 84 | s=10, 85 | zorder=3, 86 | label="No Vs30", 87 | ) 88 | ax_map.set_title("Vs30 from egdt") 89 | ax_map.legend() 90 | # fig.savefig("./events_traces_catalog/Vs30 map.png",dpi=300) 91 | 92 | 93 | file_path = "../data/station_information" 94 | # transfer grd file to xyz file (run only one time) 95 | # if __name__ == "__main__": 96 | # os.getcwd() 97 | # input_grd_file = f"{file_path}/Vs30ofTaiwan.grd" # 輸入GRD檔案的路徑 98 | # output_xyz_file = f"{file_path}/Vs30ofTaiwan.xyz" # 輸出XYZ檔案的路徑 99 | 100 | # grd_to_xyz(input_grd_file, output_xyz_file) 101 | xyz_file = f"{file_path}/Vs30ofTaiwan.xyz" 102 | vs30_table = pd.read_csv(xyz_file, sep="\s+", header=None, names=["x", "y", "Vs30"]) 103 | vs30_table.dropna(inplace=True) 104 | vs30_table.reset_index(drop=True, inplace=True) 105 | 106 | # transform coordinate 107 | vs30_table["x_97"], vs30_table["y_97"] = twd67_to_97(vs30_table["x"], vs30_table["y"]) 108 | vs30_table["lon"] = 0 109 | vs30_table["lat"] = 0 110 | for i in tqdm(range(len(vs30_table))): 111 | vs30_table["lon"][i], vs30_table["lat"][i] = twd97_to_lonlat( 112 | vs30_table["x_97"][i], vs30_table["y_97"][i] 113 | ) 114 | # vs30_table.to_csv(f"{file_path}/Vs30ofTaiwan.csv",index=False) 115 | 116 | # vs30 map fill into no vs30 station 117 | vs30_table = pd.read_csv(f"{file_path}/Vs30ofTaiwan.csv") 118 | target_points = noVs30_station_value_counts[["longitude", "latitude"]].values.tolist() 119 | points = vs30_table[["lon", "lat"]].values.tolist() 120 | 121 | referenced_table = { 122 | "index": [], 123 | "Vs30 referenced lon": [], 124 | "Vs30 referenced lat": [], 125 | "Vs30": [], 126 | } 127 | 128 | for target_point in tqdm(target_points): 129 | nearest_index, nearest_point = find_nearest_point(target_point, points) 130 | 131 | referenced_table["index"].append(nearest_index) 132 | referenced_table["Vs30 referenced lon"].append(nearest_point[0]) 133 | referenced_table["Vs30 referenced lat"].append(nearest_point[1]) 134 | referenced_table["Vs30"].append(vs30_table.loc[nearest_index]["Vs30"]) 135 | 136 | for key in referenced_table.keys(): 137 | noVs30_station_value_counts[f"{key}"] = referenced_table[f"{key}"] 138 | 139 | fig, ax_map = plt.subplots(subplot_kw={"projection": src_crs}, figsize=(7, 7)) 140 | ax_map.coastlines("10m") 141 | ax_map.scatter( 142 | noVs30_station_value_counts["Vs30 referenced lon"], 143 | noVs30_station_value_counts["Vs30 referenced lat"], 144 | edgecolors="k", 145 | linewidth=1, 146 | marker="o", 147 | s=40, 148 | label="referenced", 149 | alpha=0.5, 150 | ) 151 | ax_map.scatter( 152 | noVs30_station_value_counts["longitude"], 153 | noVs30_station_value_counts["latitude"], 154 | edgecolors="k", 155 | linewidth=1, 156 | marker="o", 157 | s=10, 158 | label="No Vs30", 159 | ) 160 | ax_map.set_title("Vs30 filled from Lee's map") 161 | ax_map.legend() 162 | # fig.savefig("./events_traces_catalog/Vs30 filled from Lee map.png",dpi=300) 163 | 164 | # fill vs30 into traces table 165 | 166 | for index in merge_traces[merge_traces["Vs30"].isna()].index: 167 | station_name = merge_traces.iloc[index]["station_name"] 168 | vs30 = np.round( 169 | noVs30_station_value_counts.query(f"station_name=='{station_name}'")[ 170 | "Vs30" 171 | ].values[0], 172 | 2, 173 | ) 174 | merge_traces.loc[index, "Vs30"] = vs30 175 | print( 176 | station_name, 177 | noVs30_station_value_counts.query(f"station_name=='{station_name}'")[ 178 | "Vs30" 179 | ].values, 180 | ) 181 | 182 | merge_traces.drop(["location_code", "station_code"], axis=1, inplace=True) 183 | # merge_traces.to_csv( 184 | # f"./events_traces_catalog/{start_year}_{end_year}_final_traces_Vs30.csv", 185 | # index=False, 186 | # ) 187 | 188 | # plot final vs30 value map 189 | trace_with_vs30 = pd.read_csv( 190 | f"./events_traces_catalog/{start_year}_{end_year}_final_traces_Vs30.csv" 191 | ) 192 | vs30_table = trace_with_vs30[["station_name", "longitude", "latitude", "Vs30"]] 193 | vs30_table = ( 194 | vs30_table.groupby("station_name") 195 | .apply(get_unique_with_other_columns) 196 | .reset_index(drop=True) 197 | ) 198 | 199 | src_crs = ccrs.PlateCarree() 200 | fig, ax_map = plt.subplots(subplot_kw={"projection": src_crs}, figsize=(7, 7)) 201 | ax_map.coastlines("10m") 202 | map = ax_map.scatter( 203 | vs30_table["longitude"], 204 | vs30_table["latitude"], 205 | linewidth=1, 206 | marker="o", 207 | s=10, 208 | c=vs30_table["Vs30"], 209 | cmap="copper_r", 210 | ) 211 | ax_map.add_feature(cartopy.feature.OCEAN, zorder=2, edgecolor="k") 212 | # ax_map.set_title("Final Vs30 Map") 213 | cbar = plt.colorbar(map, ax=ax_map) 214 | cbar.set_label("Vs30 (m/s)") 215 | xmin, xmax = ax_map.get_xlim() 216 | ymin, ymax = ax_map.get_ylim() 217 | xticks = ticker.LongitudeLocator(nbins=5)._raw_ticks(xmin, xmax) 218 | yticks = ticker.LatitudeLocator(nbins=5)._raw_ticks(ymin, ymax) 219 | 220 | ax_map.set_xticks(xticks, crs=ccrs.PlateCarree()) 221 | ax_map.set_yticks(yticks, crs=ccrs.PlateCarree()) 222 | 223 | ax_map.xaxis.set_major_formatter(ticker.LongitudeFormatter(zero_direction_label=True)) 224 | ax_map.yaxis.set_major_formatter(ticker.LatitudeFormatter()) 225 | 226 | ax_map.xaxis.set_ticks_position("both") 227 | ax_map.yaxis.set_ticks_position("both") 228 | 229 | ax_map.set_xlim(xmin, xmax) 230 | ax_map.set_ylim(ymin, ymax) 231 | # fig.savefig("./events_traces_catalog/Final Vs30 Map.png",dpi=300) 232 | -------------------------------------------------------------------------------- /model_performance_analysis/0403_Hualien_Earthquake/1_find_trigger_station.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import os 3 | import obspy 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | from obspy.signal.trigger import ar_pick 7 | import json 8 | 9 | 10 | def dist(event_lat, event_lon, station_lat, station_lon): # unit: degree 11 | dist = ((event_lat - station_lat) ** 2 + (event_lon - station_lon) ** 2) ** (1 / 2) 12 | return dist 13 | 14 | 15 | mask_after_sec = 5 16 | station_info = pd.read_csv("../../data/station_information/TSMIPstations_new.csv") 17 | traces_info_with_vs30 = pd.read_csv( 18 | "../../data_preprocess/events_traces_catalog/1999_2019_final_traces_Vs30.csv" 19 | ) 20 | sample_rate = 200 21 | 22 | path = "./113019_TSMIP_SAC" 23 | waveform_files = os.listdir(path) 24 | 25 | stations = [] 26 | for file in waveform_files: 27 | station_name = file[:6] 28 | if station_name not in stations: 29 | stations.append(station_name) 30 | 31 | station_info = station_info[station_info["location_code"].isin(stations)] 32 | station_info = station_info.reset_index(drop=True) 33 | 34 | # event epicenter 35 | event_lat = 23.77 36 | event_lon = 121.67 37 | 38 | dist_dict = {"dist": []} 39 | for i in range(len(station_info)): 40 | station_lat = station_info["latitude"][i] 41 | station_lon = station_info["longitude"][i] 42 | dist_dict["dist"].append(dist(event_lat, event_lon, station_lat, station_lon)) 43 | station_info["dist (degree)"] = dist_dict["dist"] 44 | 45 | station_info["p_picks (sec)"] = 0 46 | check_station = ["HWA026", "HWA067", "HWA025", "TTN032", "ILA050"] 47 | # plot and picking: 48 | for i, station in enumerate(station_info["location_code"]): 49 | trace_z = obspy.read(f"{path}/{station}.Z.SAC") 50 | trace_n = obspy.read(f"{path}/{station}.N.SAC") 51 | trace_e = obspy.read(f"{path}/{station}.E.SAC") 52 | trace_z.resample(sample_rate, window="hann") 53 | trace_n.resample(sample_rate, window="hann") 54 | trace_e.resample(sample_rate, window="hann") 55 | 56 | waveforms = np.array([trace_z[0].data, trace_n[0].data, trace_e[0].data]) 57 | # fig, ax = plt.subplots(3, 1) 58 | # ax[0].plot(waveforms[0]) 59 | # ax[1].plot(waveforms[1]) 60 | # ax[2].plot(waveforms[2]) 61 | # ax[0].set_title( 62 | # f"{station}_{trace_z[0].stats.starttime}-{trace_z[0].stats.endtime}" 63 | # ) 64 | try: 65 | p_pick, _ = ar_pick( 66 | waveforms[0], 67 | waveforms[1], 68 | waveforms[2], 69 | samp_rate=200, 70 | f1=1, # Frequency of the lower bandpass window 71 | f2=20, # Frequency of the upper bandpass window 72 | lta_p=1, # Length of LTA for the P arrival in seconds 73 | sta_p=0.1, # Length of STA for the P arrival in seconds 74 | lta_s=4.0, # Length of LTA for the S arrival in seconds 75 | sta_s=1.0, # Length of STA for the P arrival in seconds 76 | m_p=2, # Number of AR coefficients for the P arrival 77 | m_s=8, # Number of AR coefficients for the S arrival 78 | l_p=0.1, 79 | l_s=0.2, 80 | s_pick=False, 81 | ) 82 | station_info.loc[i, "p_picks (sec)"] = p_pick 83 | # ax[0].axvline(x=p_pick * sample_rate, color="r", linestyle="-") 84 | # ax[1].axvline(x=p_pick * sample_rate, color="r", linestyle="-") 85 | # ax[2].axvline(x=p_pick * sample_rate, color="r", linestyle="-") 86 | except: 87 | station_info.loc[i, "p_picks (sec)"] = p_pick 88 | # fig.savefig(f"0403waveform_image/{station}.png", dpi=300) 89 | plt.close() 90 | 91 | station_info = station_info.sort_values(by="dist (degree)") 92 | station_info = station_info.reset_index(drop=True) 93 | 94 | trigger_station_info = pd.merge( 95 | station_info, 96 | traces_info_with_vs30[["station_name", "Vs30"]].drop_duplicates( 97 | subset="station_name" 98 | ), 99 | left_on="location_code", 100 | right_on="station_name", 101 | how="left", 102 | ) 103 | trigger_station_info = trigger_station_info.dropna( 104 | subset=["latitude", "longitude", "elevation (m)", "Vs30"] 105 | ) 106 | trigger_station_info=trigger_station_info[trigger_station_info["station_name"]!="HWA026"] 107 | trigger_station_info=trigger_station_info[trigger_station_info["station_name"]!="HWA067"] 108 | trigger_station_info=trigger_station_info[trigger_station_info["station_name"]!="HWA025"] 109 | trigger_station_info=trigger_station_info[trigger_station_info["station_name"]!="ILA050"] 110 | trigger_station_info = trigger_station_info.reset_index(drop=True) 111 | 112 | P_wave_velocity = 6.5 113 | stream = obspy.core.stream.Stream() 114 | waveforms_window = [] 115 | mask_station_index = [] 116 | target_length = 18000 117 | for i, station in enumerate(trigger_station_info["location_code"][:25]): 118 | trace_z = obspy.read(f"{path}/{station}.Z.SAC") 119 | trace_n = obspy.read(f"{path}/{station}.N.SAC") 120 | trace_e = obspy.read(f"{path}/{station}.E.SAC") 121 | # bad data padding to fit time window 122 | # HWA026 HWA067 HWA025 ILA050 123 | for trace in [trace_z, trace_n, trace_e]: 124 | trace[0].data = trace[0].data/100 #cm/s2 to m/s2 125 | if len(trace[0].data) < target_length: 126 | padding_length = target_length - len(trace[0].data) 127 | padding = np.zeros(padding_length) 128 | trace[0].data = np.concatenate((trace[0].data, padding)) 129 | trace_z.resample(200, window="hann") 130 | trace_n.resample(200, window="hann") 131 | trace_e.resample(200, window="hann") 132 | 133 | waveforms = np.array([trace_z[0].data, trace_n[0].data, trace_e[0].data]) 134 | if station == "HWA074": # first triggered station 135 | p_pick, _ = ar_pick( 136 | waveforms[0], 137 | waveforms[1], 138 | waveforms[2], 139 | samp_rate=200, 140 | f1=1, # Frequency of the lower bandpass window 141 | f2=20, # Frequency of the upper bandpass window 142 | lta_p=1, # Length of LTA for the P arrival in seconds 143 | sta_p=0.1, # Length of STA for the P arrival in seconds 144 | lta_s=4.0, # Length of LTA for the S arrival in seconds 145 | sta_s=1.0, # Length of STA for the P arrival in seconds 146 | m_p=2, # Number of AR coefficients for the P arrival 147 | m_s=8, # Number of AR coefficients for the S arrival 148 | l_p=0.1, 149 | l_s=0.2, 150 | s_pick=False, 151 | ) 152 | start_time = int((p_pick - 5) * sample_rate) 153 | end_time = int((p_pick + 10) * sample_rate) 154 | trace_z[0].data[int((p_pick) * sample_rate) + (mask_after_sec * sample_rate) :] = 0 155 | trace_n[0].data[int((p_pick) * sample_rate) + (mask_after_sec * sample_rate) :] = 0 156 | trace_e[0].data[int((p_pick) * sample_rate) + (mask_after_sec * sample_rate) :] = 0 157 | 158 | if ( 159 | trigger_station_info["dist (degree)"][i] 160 | - trigger_station_info["dist (degree)"][0] 161 | ) * 100 / P_wave_velocity > mask_after_sec: # zero padding non triggered station 162 | mask_station_index.append(i) #for mask non trigger station information 163 | trace_z[0].data[:] = 0 164 | trace_n[0].data[:] = 0 165 | trace_e[0].data[:] = 0 166 | waveforms = np.stack( 167 | ( 168 | trace_z[0].data[start_time:end_time], 169 | trace_n[0].data[start_time:end_time], 170 | trace_e[0].data[start_time:end_time], 171 | ), 172 | axis=1, 173 | ) 174 | waveforms = waveforms.reshape(3000, 3) 175 | waveforms_window.append(waveforms) 176 | 177 | fig, ax = plt.subplots(3, 1) 178 | ax[0].plot(waveforms[:, 0]) 179 | ax[1].plot(waveforms[:, 1]) 180 | ax[2].plot(waveforms[:, 2]) 181 | ax[0].set_title(f"{station}") 182 | # plt.close() 183 | # fig.savefig( 184 | # f"model_input_waveform_image/{mask_after_sec}_sec/{i}_{station}.png", dpi=300 185 | # ) 186 | 187 | waveform = np.stack(waveforms_window, axis=0).tolist() 188 | target_station_info = trigger_station_info.copy() 189 | 190 | #mask non trigger station information 191 | for i in mask_station_index: 192 | trigger_station_info.loc[i, ["latitude", "longitude", "elevation (m)", "Vs30"]] = 0 193 | 194 | input_station = ( 195 | trigger_station_info[["latitude", "longitude", "elevation (m)", "Vs30"]][:25] 196 | .to_numpy() 197 | .tolist() 198 | ) 199 | for i in range(1, 16): 200 | print((i - 1) * 25, i * 25) 201 | target_station = ( 202 | target_station_info[["latitude", "longitude", "elevation (m)", "Vs30"]][ 203 | (i - 1) * 25 : i * 25 204 | ] 205 | .to_numpy() 206 | .tolist() 207 | ) 208 | station_name = target_station_info["location_code"][(i - 1) * 25 : i * 25].tolist() 209 | output = { 210 | "waveform": waveform, 211 | "sta": input_station, 212 | "target": target_station, 213 | "station_name": station_name, 214 | } 215 | 216 | # with open(f"model_input/{mask_after_sec}_sec_without_broken_data/{i}.json", "w") as json_file: 217 | # json.dump(output, json_file) 218 | -------------------------------------------------------------------------------- /feature_map_correlation/tlcc_analysis.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | import seaborn as sns 4 | from scipy.stats import pearsonr 5 | 6 | 7 | class Calculator: 8 | 9 | def first_occurrences_indices(b): 10 | first_indices = {} # 用字典来存储不同数字的第一次出现的索引 11 | 12 | for i, item in enumerate(b): 13 | if item not in first_indices: 14 | first_indices[item] = i # 记录不同数字的第一次出现的索引 15 | 16 | return first_indices 17 | 18 | 19 | def normalize_to_zero_one(arr): 20 | # 找到数组的最小值和最大值 21 | min_val = arr.min() 22 | max_val = arr.max() 23 | 24 | # 将数组线性缩放到0到1之间 25 | normalized_arr = (arr - min_val) / (max_val - min_val) 26 | 27 | return normalized_arr 28 | 29 | 30 | def calculate_tlcc(time_series1, time_series2, max_delay): 31 | """ 32 | 計算TLCC(時滯交叉相關性)以及相應的時間延遲和TLCC值。 33 | 34 | 參數: 35 | - time_series1: 第一個時間序列 36 | - time_series2: 第二個時間序列 37 | - max_delay: 最大時滯的範圍 38 | 39 | 返回值: 40 | - delay: 時間延遲的數組 41 | - tlcc_values: 對應的TLCC(皮爾森相關性)值的數組 42 | """ 43 | delay = np.arange(-max_delay, max_delay + 1) 44 | tlcc_values = [] 45 | for i, d in enumerate(delay): 46 | if d < 0: 47 | x1_lagged = time_series1[: len(time_series1) + d] 48 | x2_lagged = time_series2[-d:] 49 | else: 50 | x1_lagged = time_series1[d:] 51 | x2_lagged = time_series2[: len(time_series2) - d] 52 | # if d % 5 == 0: 53 | # fig,ax=plt.subplots() 54 | # ax.plot(x1_lagged,c="k") 55 | # ax.plot(x2_lagged,c="r") 56 | # ax.set_title(f"delay:{d}") 57 | # plt.grid(True) 58 | 59 | # 計算皮爾森相關性 60 | pearson_corr, _ = pearsonr(x1_lagged, x2_lagged) 61 | tlcc_values.append(pearson_corr) 62 | 63 | return delay, tlcc_values 64 | 65 | class Plotter: 66 | 67 | def plot_waveform(waveform, eq_id, input_station,index, output_path=None): 68 | fig, ax = plt.subplots(3, 1, figsize=(14, 7)) 69 | for j in range(len(ax)): 70 | ax[j].plot(waveform[:, j]) 71 | ax[0].set_title(f"EQ_ID: {eq_id} input waveform{index+1},{input_station}") 72 | if output_path: 73 | fig.savefig(f"{output_path}/3 channel input waveform{index+1}.png", dpi=300) 74 | return fig, ax 75 | 76 | 77 | def plot_correlation_curve_with_shift_time( 78 | delay_values, tlcc_values, eq_id, attribute, index, mask_after_sec, output_path=None 79 | ): 80 | fig, ax = plt.subplots(figsize=(14, 7)) 81 | ax.plot(delay_values, tlcc_values) 82 | ax.xaxis.set_tick_params(labelsize=15) 83 | ax.yaxis.set_tick_params(labelsize=15) 84 | ax.set_xlabel("Shift Time Sample", fontsize=15) 85 | ax.set_ylabel("TLCC (Pearson Correlation) Value", fontsize=15) 86 | ax.set_title(f"EQ_ID: {eq_id} {attribute}{index+1} TLCC Analysis", fontsize=15) 87 | ax.grid(True) 88 | if output_path: 89 | fig.savefig( 90 | f"{output_path}/{mask_after_sec} sec {attribute}{index+1} TLCC Analysis.png", 91 | dpi=300, 92 | ) 93 | return fig, ax 94 | 95 | 96 | def plot_attribute_with_feature_map( 97 | attribute_arr, 98 | resized_feature_map, 99 | key, 100 | attribute, 101 | correlation_starttime, 102 | correlation_endtime, 103 | correlation, 104 | tlcc_values, 105 | input_station, 106 | output_path=None, 107 | ): 108 | x_pos = 0.05 109 | y_pos = 0.6 110 | fig, ax = plt.subplots(3, 1, figsize=(14, 7)) 111 | ax[0].plot(attribute_arr, alpha=0.7) 112 | ax[1].plot(resized_feature_map, c="red") 113 | ax[2].plot( 114 | attribute_arr, 115 | alpha=0.7, 116 | label=f"{attribute}", 117 | ) 118 | ax[2].plot( 119 | resized_feature_map, 120 | c="red", 121 | label="feature map", 122 | ) 123 | for j in range(len(ax)): 124 | ax[j].axvline(x=correlation_starttime, color="grey", linestyle="--") 125 | ax[j].axvline(x=correlation_endtime, color="grey", linestyle="--") 126 | ax[2].text( 127 | x_pos, 128 | y_pos, 129 | f"correlation: {np.round(correlation, 2)}\nTLCC max correlation: {np.round(np.array(tlcc_values).max(),2)}", 130 | transform=ax[j].transAxes, 131 | fontsize=15, 132 | horizontalalignment="left", 133 | verticalalignment="top", 134 | ) 135 | ax[0].set_title( 136 | f"EQ_ID: {key} {attribute}, station_name:{input_station}", 137 | fontsize=15, 138 | ) 139 | ax[1].set_ylabel("normalized acc", fontsize=15) 140 | ax[-1].set_xlabel("time sample", fontsize=15) 141 | ax[-1].xaxis.set_tick_params(labelsize=15) 142 | ax[-1].yaxis.set_tick_params(labelsize=15) 143 | ax[2].legend() 144 | if output_path: 145 | fig.savefig( 146 | f"{output_path}/{attribute}_{input_station} with feature map.png", 147 | dpi=300, 148 | ) 149 | return fig, ax 150 | 151 | 152 | def plot_correlation_hist( 153 | attribute_dict, attribute, TLCC_mean, TLCC_std, mask_after_sec, output_path=None 154 | ): 155 | # hist 156 | fig, ax = plt.subplots() 157 | ax.hist( 158 | np.array(attribute_dict[attribute]["tlcc_max_correlation"]), 159 | bins=15, 160 | edgecolor="k", 161 | ) 162 | ax.set_xlabel("correlation", fontsize=12) 163 | ax.set_ylabel("number of traces", fontsize=12) 164 | ax.set_title( 165 | f"Correlation (TLCC) of \n{mask_after_sec} sec {attribute}", 166 | fontsize=15, 167 | ) 168 | ax.text( 169 | 0.8, 170 | 0.8, 171 | f"mean:{TLCC_mean}\nstd:{TLCC_std}", 172 | transform=ax.transAxes, 173 | fontsize=12, 174 | ) 175 | if output_path: 176 | fig.savefig( 177 | f"{output_path}/correlation (TLCC) with {attribute} histogram.png", 178 | dpi=300, 179 | ) 180 | return fig, ax 181 | 182 | 183 | def plot_time_shifted_with_correlation( 184 | attribute_dict, attribute, TLCC_mean, TLCC_std, mask_after_sec, output_path=None 185 | ): 186 | fig, ax = plt.subplots() 187 | ax.scatter( 188 | attribute_dict[attribute]["max_delay"], 189 | attribute_dict[attribute]["tlcc_max_correlation"], 190 | alpha=0.5, 191 | s=15, 192 | ) 193 | 194 | ax.set_xlabel("shifted time sample") 195 | ax.set_ylabel("max Pearson correlation") 196 | ax.set_title( 197 | f"Correlation (TLCC) with delay time{mask_after_sec} sec \n{attribute}, mean :{TLCC_mean}, std: {TLCC_std}", 198 | fontsize=15, 199 | ) 200 | if output_path: 201 | fig.savefig( 202 | f"{output_path}/{mask_after_sec} sec {attribute} TLCC max correlation delay time.png", 203 | dpi=300, 204 | ) 205 | return fig, ax 206 | 207 | 208 | def plot_time_shifted_with_hist( 209 | attribute_dict, attribute, delay_mean, delay_std, mask_after_sec, output_path=None 210 | ): 211 | fig, ax = plt.subplots() 212 | ax.hist(attribute_dict[attribute]["max_delay"], bins=15, edgecolor="k") 213 | ax.text( 214 | 0.75, 215 | 0.8, 216 | f"mean:{delay_mean}\nstd:{delay_std}", 217 | transform=ax.transAxes, 218 | fontsize=12, 219 | ) 220 | ax.set_xlabel("shifted time sample", fontsize=12) 221 | ax.set_ylabel("number of traces", fontsize=12) 222 | ax.set_title( 223 | f"{mask_after_sec} sec {attribute}\ndistribution of time delay with max correlation (TLCC)", 224 | fontsize=15, 225 | ) 226 | if output_path: 227 | fig.savefig( 228 | f"{output_path}/{mask_after_sec} sec {attribute} distribution of time delay with max correlation (TLCC).png", 229 | dpi=300, 230 | ) 231 | return fig, ax 232 | 233 | 234 | def correlation_with_attributes_heat_map(data, attributes=None, output_path=None): 235 | fig, ax = plt.subplots() 236 | sns.heatmap(data, annot=True, cmap="Reds") 237 | 238 | ax.set_xticks([x + 0.5 for x in range(data.shape[1])]) 239 | ax.set_xticklabels(["3", "5", "7", "10"], fontsize=12) 240 | 241 | ax.set_yticks([x + 0.5 for x in range(data.shape[0])]) 242 | plt.yticks(rotation=0) 243 | ax.set_yticklabels(attributes, fontsize=12) 244 | 245 | ax.set_xlabel("second", fontsize=13) 246 | 247 | cbar = ax.collections[0].colorbar 248 | 249 | # 设置颜色条标签的字体大小 250 | cbar.set_label("Correlation", fontsize=12) 251 | plt.tight_layout() 252 | if output_path: 253 | fig.savefig(f"{output_path}/correlation_heat_map.png", dpi=300) 254 | return fig, ax 255 | -------------------------------------------------------------------------------- /data/visualize.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | 3 | plt.subplots() # without this line will cause kernel crashed: when matplotlib and torch import simultaneously 4 | import cartopy.crs as ccrs 5 | from cartopy.mpl import ticker 6 | import cartopy 7 | import numpy as np 8 | from multiple_sta_dataset import multiple_station_dataset 9 | from torch.utils.data import DataLoader 10 | from tqdm import tqdm 11 | import torch 12 | 13 | 14 | class Plot_Train_Test_Data: 15 | def event_histogram( 16 | train_catalog=None, test_catalog=None, key=None, xlabel=None, title=None 17 | ): 18 | fig, ax = plt.subplots() 19 | ax.hist(train_catalog[f"{key}"], bins=30, ec="black", label="train") 20 | ax.hist(test_catalog[f"{key}"], bins=30, ec="black", label="test", alpha=0.8) 21 | ax.set_yscale("log") 22 | ax.set_xlabel(f"{xlabel}", fontsize=15) 23 | ax.set_ylabel("Number of events", fontsize=15) 24 | ax.legend() 25 | if title: 26 | ax.set_title(f"{title}") 27 | return fig, ax 28 | 29 | def event_map(train_catalog=None, test_catalog=None, title=None): 30 | src_crs = ccrs.PlateCarree() 31 | fig, ax_map = plt.subplots(subplot_kw={"projection": src_crs}, figsize=(7, 7)) 32 | ax_map.coastlines("10m") 33 | ax_map.scatter( 34 | train_catalog["lon"] + train_catalog["lon_minute"] / 60, 35 | train_catalog["lat"] + train_catalog["lat_minute"] / 60, 36 | edgecolors="k", 37 | linewidth=1, 38 | marker="o", 39 | c="grey", 40 | s=2 ** train_catalog["magnitude"], 41 | zorder=3, 42 | alpha=0.5, 43 | label="train", 44 | ) 45 | ax_map.scatter( 46 | test_catalog["lon"] + test_catalog["lon_minute"] / 60, 47 | test_catalog["lat"] + test_catalog["lat_minute"] / 60, 48 | edgecolors="k", 49 | linewidth=1, 50 | marker="o", 51 | c="orange", 52 | s=2 ** test_catalog["magnitude"], 53 | zorder=3, 54 | alpha=0.5, 55 | label="test", 56 | ) 57 | ax_map.add_feature(cartopy.feature.OCEAN, edgecolor="k") 58 | 59 | xmin, xmax = ax_map.get_xlim() 60 | ymin, ymax = ax_map.get_ylim() 61 | xticks = ticker.LongitudeLocator(nbins=5)._raw_ticks(xmin, xmax) 62 | yticks = ticker.LatitudeLocator(nbins=5)._raw_ticks(ymin, ymax) 63 | 64 | ax_map.set_xticks(xticks, crs=ccrs.PlateCarree()) 65 | ax_map.set_yticks(yticks, crs=ccrs.PlateCarree()) 66 | 67 | ax_map.xaxis.set_major_formatter( 68 | ticker.LongitudeFormatter(zero_direction_label=True) 69 | ) 70 | ax_map.yaxis.set_major_formatter(ticker.LatitudeFormatter()) 71 | 72 | ax_map.xaxis.set_ticks_position("both") 73 | ax_map.yaxis.set_ticks_position("both") 74 | if title: 75 | ax_map.set_title(f"{title}") 76 | ax_map.legend() 77 | return fig, ax_map 78 | 79 | def pga_histogram(traces_catalog=None, test_year=None, title=None): 80 | fig, ax = plt.subplots(figsize=(8, 6)) 81 | ax.hist( 82 | traces_catalog.query(f"year!={test_year}")["pga"], 83 | bins=30, 84 | ec="black", 85 | label="train", 86 | ) 87 | ax.hist( 88 | traces_catalog.query(f"year=={test_year}")["pga"], 89 | bins=30, 90 | alpha=0.8, 91 | ec="black", 92 | label="test", 93 | ) 94 | pga_threshold = np.log10( 95 | [1e-5, 0.008, 0.025, 0.080, 0.250, 0.80, 1.4, 2.5, 4.4, 8.0, 10] 96 | ) 97 | label = ["0", "1", "2", "3", "4", "5-", "5+", "6-", "6+", "7"] 98 | ax.vlines(pga_threshold[1:-1], 0, 17700, linestyles="dotted", color="k") 99 | for i in range(len(label)): 100 | if label[i] == "0": 101 | continue 102 | ax.text( 103 | ((pga_threshold[i] + pga_threshold[i + 1]) / 2) - 0.05, 15000, label[i] 104 | ) 105 | ax.set_yscale("log") 106 | ax.set_xlabel(r"PGA log(${m/s^2}$)", fontsize=15) 107 | ax.set_ylabel("Number of traces", fontsize=15) 108 | fig.legend(fontsize=13) 109 | if title: 110 | ax.set_title(f"{title}") 111 | return fig, ax 112 | 113 | 114 | class Increase_High_Data_Test: 115 | def load_dataset_into_list( 116 | data_path, oversample_rate=1, bias_to_close_station=False 117 | ): 118 | dataset = multiple_station_dataset( 119 | data_path, 120 | mode="train", 121 | mask_waveform_sec=3, 122 | weight_label=False, 123 | oversample=oversample_rate, 124 | oversample_mag=4, 125 | test_year=2016, 126 | mask_waveform_random=True, 127 | mag_threshold=0, 128 | label_key="pga", 129 | input_type="acc", 130 | data_length_sec=15, 131 | station_blind=True, 132 | bias_to_closer_station=bias_to_close_station, 133 | ) 134 | origin_loader = DataLoader(dataset, batch_size=16) 135 | origin_PGA = [] 136 | for sample in tqdm(origin_loader): 137 | tmp_pga = torch.index_select( 138 | sample["label"].flatten(), 139 | 0, 140 | sample["label"].flatten().nonzero().flatten(), 141 | ).tolist() 142 | origin_PGA.extend(tmp_pga) 143 | return origin_PGA 144 | 145 | def plot_pga_histogram( 146 | bias_closed_sta_PGA=None, 147 | oversampled_PGA=None, 148 | origin_PGA=None, 149 | origin_high_intensity_rate=None, 150 | oversampled_high_intensity_rate=None, 151 | bias_closed_sta_high_intensity_rate=None, 152 | ): 153 | label = ["2", "3", "4", "5-", "5+", "6-", "6+", "7"] 154 | pga_threshold = np.log10([0.025, 0.080, 0.250, 0.80, 1.4, 2.5, 4.4, 8.0, 10]) 155 | 156 | fig, ax = plt.subplots(figsize=(7, 7)) 157 | ax.hist(bias_closed_sta_PGA, bins=32, edgecolor="k", label="bias_closed_sta") 158 | ax.hist(oversampled_PGA, bins=32, edgecolor="k", label="oversampled", alpha=0.6) 159 | ax.hist(origin_PGA, bins=32, edgecolor="k", label="origin", alpha=0.6) 160 | ax.vlines(pga_threshold[1:-1], 0, 40000, linestyles="dotted", color="k") 161 | for i in range(len(pga_threshold) - 1): 162 | ax.text((pga_threshold[i] + pga_threshold[i + 1]) / 2, 50000, label[i]) 163 | ax.text( 164 | 0.01, 165 | 0.8, 166 | f"high intensity rate:\norigin: {np.round(origin_high_intensity_rate,2)}\noversampled: {np.round(oversampled_high_intensity_rate,2)}\nbias to station: {np.round(bias_closed_sta_high_intensity_rate,2)}", 167 | transform=ax.transAxes, 168 | fontsize=12, 169 | ) 170 | ax.set_xlim(-2.75, 1.25) 171 | ax.set_ylabel("Number of traces", size=14) 172 | ax.set_xlabel(r"log(PGA (${m/s^2}$))", size=14) 173 | ax.set_title("TSMIP PGA distribution in training", size=14) 174 | ax.set_yscale("log") 175 | fig.legend(loc="upper right") 176 | return fig, ax 177 | 178 | 179 | def plot_station_distribution(stations=None, title=None): 180 | src_crs = ccrs.PlateCarree() 181 | fig, ax_map = plt.subplots(subplot_kw={"projection": src_crs}, figsize=(7, 7)) 182 | 183 | ax_map.coastlines("10m") 184 | 185 | ax_map.add_feature( 186 | cartopy.feature.OCEAN, zorder=2, edgecolor="k" 187 | ) # zorder越大的圖層 越上面 188 | 189 | sta = ax_map.scatter( 190 | stations["longitude"], 191 | stations["latitude"], 192 | edgecolors="gray", 193 | color="red", 194 | linewidth=0.5, 195 | marker="^", 196 | s=20, 197 | zorder=3, 198 | label="Station", 199 | ) 200 | xmin = stations["longitude"].min() - 0.1 201 | xmax = stations["longitude"].max() + 0.1 202 | ymin = stations["latitude"].min() - 0.1 203 | ymax = stations["latitude"].max() + 0.1 204 | xticks = ticker.LongitudeLocator(nbins=5)._raw_ticks(xmin, xmax) 205 | yticks = ticker.LatitudeLocator(nbins=5)._raw_ticks(ymin, ymax) 206 | ax_map.set_xticks(xticks, crs=ccrs.PlateCarree()) 207 | ax_map.set_yticks(yticks, crs=ccrs.PlateCarree()) 208 | 209 | ax_map.xaxis.set_major_formatter( 210 | ticker.LongitudeFormatter(zero_direction_label=True) 211 | ) 212 | ax_map.yaxis.set_major_formatter(ticker.LatitudeFormatter()) 213 | 214 | ax_map.xaxis.set_ticks_position("both") 215 | ax_map.yaxis.set_ticks_position("both") 216 | ax_map.legend() 217 | if title: 218 | ax_map.set_title(title) 219 | return fig, ax_map 220 | 221 | 222 | def plot_received_traces_station_map( 223 | total_station_value_counts, title="Received traces map", output_path=None 224 | ): 225 | src_crs = ccrs.PlateCarree() 226 | fig, ax_map = plt.subplots(subplot_kw={"projection": src_crs}, figsize=(7, 7)) 227 | ax_map.coastlines("10m") 228 | ax_map.scatter( 229 | total_station_value_counts["longitude"], 230 | total_station_value_counts["latitude"], 231 | edgecolors="k", 232 | linewidth=1, 233 | marker="o", 234 | s=total_station_value_counts["counts"] * 1.5, 235 | zorder=3, 236 | alpha=0.5, 237 | ) 238 | ax_map.set_title(f"{title}") 239 | if output_path: 240 | fig.savefig(f"{output_path}/{title}.png", dpi=300) 241 | return fig, ax_map 242 | -------------------------------------------------------------------------------- /feature_map_correlation/feature_map_correlation.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | 3 | plt.subplots() 4 | import numpy as np 5 | import pandas as pd 6 | import torch 7 | from tqdm import tqdm 8 | import os 9 | from scipy.ndimage import zoom 10 | import sys 11 | sys.path.append("..") 12 | from data.multiple_sta_dataset import multiple_station_dataset 13 | from model.CNN_Transformer_Mixtureoutput_TEAM import CNN_feature_map 14 | import os 15 | from scipy.signal import hilbert 16 | from tlcc_analysis import Plotter,Calculator 17 | 18 | 19 | mask_after_sec = 10 20 | sample_rate = 200 21 | label = "pga" 22 | data = multiple_station_dataset( 23 | "../data/TSMIP_1999_2019_Vs30.hdf5", 24 | mode="test", 25 | mask_waveform_sec=mask_after_sec, 26 | test_year=2016, 27 | label_key=label, 28 | mag_threshold=0, 29 | input_type="acc", 30 | data_length_sec=15, 31 | ) 32 | # need station name 33 | output_path = "../predict/station_blind_Vs30_bias2closed_station_2016" 34 | predict = pd.read_csv(f"{output_path}/{mask_after_sec} sec model11 with all info.csv") 35 | 36 | # ===========prepare model============== 37 | device = torch.device("cuda") 38 | num = 11 39 | path = f"../model/model{num}.pt" 40 | emb_dim = 150 41 | mlp_dims = (150, 100, 50, 30, 10) 42 | CNN_model = CNN_feature_map(mlp_input=5665).cuda() 43 | 44 | full_model_parameter = torch.load(path) 45 | # ===========load CNN parameter============== 46 | CNN_parameter = {} 47 | for name, param in full_model_parameter.items(): 48 | if ( 49 | "model_CNN" in name 50 | ): # model_CNN.conv2d1.0.weight : conv2d1.0.weight didn't match 51 | name = name.replace("model_CNN.", "") 52 | CNN_parameter[name] = param 53 | CNN_model.load_state_dict(CNN_parameter) 54 | 55 | event_index_list = [] 56 | for eq_id in data.events_index: 57 | event_index_list.append(eq_id[0][0, 0]) 58 | 59 | eq_first_index = Calculator.first_occurrences_indices(event_index_list) 60 | 61 | # plot feature map and calculate correlation 62 | attribute_dict = { 63 | "euclidean_norm": {"correlation": [], "tlcc_max_correlation": [], "max_delay": []}, 64 | "vertical_envelope": { 65 | "correlation": [], 66 | "tlcc_max_correlation": [], 67 | "max_delay": [], 68 | }, 69 | "NS_envelope": {"correlation": [], "tlcc_max_correlation": [], "max_delay": []}, 70 | "EW_envelope": {"correlation": [], "tlcc_max_correlation": [], "max_delay": []}, 71 | "vertical_instantaneous_phase": { 72 | "correlation": [], 73 | "tlcc_max_correlation": [], 74 | "max_delay": [], 75 | }, 76 | "NS_instantaneous_phase": { 77 | "correlation": [], 78 | "tlcc_max_correlation": [], 79 | "max_delay": [], 80 | }, 81 | "EW_instantaneous_phase": { 82 | "correlation": [], 83 | "tlcc_max_correlation": [], 84 | "max_delay": [], 85 | }, 86 | "vertical_instantaneous_freq": { 87 | "correlation": [], 88 | "tlcc_max_correlation": [], 89 | "max_delay": [], 90 | }, 91 | "NS_instantaneous_freq": { 92 | "correlation": [], 93 | "tlcc_max_correlation": [], 94 | "max_delay": [], 95 | }, 96 | "EW_instantaneous_freq": { 97 | "correlation": [], 98 | "tlcc_max_correlation": [], 99 | "max_delay": [], 100 | }, 101 | } 102 | print(len(eq_first_index.keys())) 103 | for key, index in tqdm(zip(eq_first_index.keys(), eq_first_index.values())): 104 | event_output_path = ( 105 | f"{output_path}/{mask_after_sec} sec cnn feature map/each event/{str(key)}" 106 | ) 107 | if not os.path.isdir(f"{event_output_path}"): 108 | os.makedirs(f"{event_output_path}") 109 | sample = data[index] 110 | waveform = sample["waveform"] 111 | 112 | not_padding_station_number = ( 113 | (torch.from_numpy(sample["sta"]) != 0).all(dim=1).sum().item() 114 | ) 115 | single_event_prediction = predict.query(f"EQ_ID=={key}") 116 | input_station_list = single_event_prediction["station_name"][ 117 | :not_padding_station_number 118 | ].tolist() 119 | if len(input_station_list) < 25: 120 | input_station_list += [np.nan] * (25 - len(input_station_list)) 121 | 122 | p_picks = sample["p_picks"].flatten().tolist() 123 | # plot 24784 input waveform 124 | if key == 24784: 125 | for i in range(not_padding_station_number): 126 | single_waveform=waveform[i] 127 | input_station=input_station_list[i] 128 | fig, ax = Plotter.plot_waveform(single_waveform, key, input_station,index=i) 129 | 130 | cnn_input = torch.DoubleTensor(waveform).float().cuda() 131 | cnn_output, layer_output = CNN_model(cnn_input) 132 | numeric_array = np.array(layer_output[-1].detach().cpu(), dtype=np.float32) 133 | feature_map = np.mean(numeric_array, axis=1) 134 | scale_factor_h = waveform.shape[0] / feature_map.shape[0] 135 | scale_factor_w = waveform.shape[1] / feature_map.shape[1] 136 | 137 | # zoom out feature map 138 | resized_feature_map = zoom(feature_map, (scale_factor_h, scale_factor_w), order=3) 139 | component_dict = {} 140 | euclidean_waveform = np.linalg.norm(waveform, axis=2) / np.sqrt(3) 141 | component_dict[f"euclidean_norm"] = euclidean_waveform 142 | for com, component in enumerate(["vertical", "NS", "EW"]): 143 | analytic_signal = hilbert(waveform[:, :, com]) 144 | envelope = np.abs(analytic_signal) 145 | instantaneous_phase = np.unwrap(np.angle(analytic_signal)) 146 | instantaneous_frequency = np.abs( 147 | (np.diff(instantaneous_phase) / (2.0 * np.pi) * sample_rate) 148 | ) 149 | component_dict[f"{component}_envelope"] = envelope 150 | component_dict[f"{component}_instantaneous_phase"] = instantaneous_phase 151 | component_dict[f"{component}_instantaneous_freq"] = instantaneous_frequency 152 | 153 | for attribute in component_dict: #calculate correlation to different attribute 154 | for i in range(not_padding_station_number): 155 | correlation_starttime = p_picks[i] - sample_rate 156 | correlation_endtime = p_picks[0] + (mask_after_sec + 1) * sample_rate 157 | if mask_after_sec == 10: 158 | correlation_endtime = p_picks[0] + (mask_after_sec) * sample_rate 159 | try: 160 | correlation = np.corrcoef( 161 | component_dict[attribute][ 162 | i, correlation_starttime:correlation_endtime 163 | ], 164 | resized_feature_map[i, correlation_starttime:correlation_endtime], 165 | )[0, 1] 166 | delay_values, tlcc_values = Calculator.calculate_tlcc( 167 | component_dict[attribute][ 168 | i, correlation_starttime:correlation_endtime 169 | ], 170 | resized_feature_map[i, correlation_starttime:correlation_endtime], 171 | max_delay=100, 172 | ) 173 | except: # second=10 case 174 | correlation = np.corrcoef( 175 | component_dict[attribute][ 176 | i, correlation_starttime:correlation_endtime 177 | ], 178 | resized_feature_map[ 179 | i, correlation_starttime + 1 : correlation_endtime 180 | ], 181 | )[0, 1] 182 | delay_values, tlcc_values = Calculator.calculate_tlcc( 183 | component_dict[attribute][ 184 | i, correlation_starttime:correlation_endtime 185 | ], 186 | resized_feature_map[ 187 | i, correlation_starttime + 1 : correlation_endtime 188 | ], 189 | max_delay=100, 190 | ) 191 | attribute_dict[attribute]["correlation"].append(correlation) 192 | max_index = np.argmax(tlcc_values) 193 | max_correlation = tlcc_values[max_index] 194 | max_delay = delay_values[max_index] 195 | attribute_dict[attribute]["tlcc_max_correlation"].append(max_correlation) 196 | attribute_dict[attribute]["max_delay"].append(max_delay) 197 | 198 | if key == 24784: # plot 199 | fig, ax = Plotter.plot_correlation_curve_with_shift_time( 200 | delay_values, tlcc_values, key, attribute,index=i,mask_after_sec=mask_after_sec, output_path=None 201 | ) 202 | attribute_arr=Calculator.normalize_to_zero_one(component_dict[attribute][i]) 203 | resized_feature_map=Calculator.normalize_to_zero_one(resized_feature_map[i]) 204 | fig, ax = Plotter.plot_attribute_with_feature_map( 205 | attribute_arr, 206 | resized_feature_map, 207 | key, 208 | attribute, 209 | correlation_starttime, 210 | correlation_endtime, 211 | correlation, 212 | tlcc_values, 213 | input_station_list[i], 214 | ) 215 | 216 | output_path = f"{output_path}/{mask_after_sec} sec cnn feature map" 217 | 218 | for attribute in attribute_dict: #statistical analysis 219 | TLCC_mean = np.round( 220 | np.array(attribute_dict[attribute]["tlcc_max_correlation"]).mean(), 2 221 | ) 222 | TLCC_std = np.round( 223 | np.array(attribute_dict[attribute]["tlcc_max_correlation"]).std(), 2 224 | ) 225 | fig, ax = Plotter.plot_correlation_hist( 226 | attribute_dict, attribute, TLCC_mean, TLCC_std, mask_after_sec, output_path=None 227 | ) 228 | # x: time sample lag, y: max correlation (TLCC) 229 | fig, ax = Plotter.plot_time_shifted_with_correlation( 230 | attribute_dict, attribute, TLCC_mean, TLCC_std, mask_after_sec, output_path=None 231 | ) 232 | # max correlation time delay hist 233 | delay_mean = np.round(np.array(attribute_dict[attribute]["max_delay"]).mean(), 2) 234 | delay_std = np.round(np.array(attribute_dict[attribute]["max_delay"]).std(), 2) 235 | fig, ax = Plotter.plot_time_shifted_with_hist( 236 | attribute_dict, 237 | attribute, 238 | delay_mean, 239 | delay_std, 240 | mask_after_sec, 241 | output_path=None, 242 | ) 243 | 244 | # belowed data is correlation with attributes in different seconds 245 | data = np.array( 246 | [ 247 | [0.61, 0.53, 0.49, 0.46], 248 | [0.68, 0.58, 0.52, 0.5], 249 | [0.59, 0.51, 0.47, 0.46], 250 | [0.58, 0.5, 0.47, 0.45], 251 | [0.29, 0.23, 0.18, 0.12], 252 | [0.29, 0.23, 0.18, 0.12], 253 | [0.29, 0.23, 0.18, 0.12], 254 | [0.3, 0.22, 0.16, 0.11], 255 | [0.29, 0.21, 0.15, 0.1], 256 | [0.3, 0.21, 0.15, 0.1], 257 | ] 258 | ) 259 | attributes = [ 260 | "Euclidean norm", 261 | "Vertical envelope", 262 | "NS envelope", 263 | "EW envelope", 264 | "Vertical phase", 265 | "NS phase", 266 | "EW phase", 267 | "Vertical frequency", 268 | "NS frequency", 269 | "EW frequency", 270 | ] 271 | output_path = "./predict/station_blind_Vs30_bias2closed_station_2016" 272 | fig, ax = Plotter.correlation_with_attributes_heat_map(data, attributes, output_path=None) 273 | --------------------------------------------------------------------------------