├── data_preprocess
    ├── tracer_demo
    │   ├── chichi.evt
    │   ├── input
    │   ├── ._src
    │   ├── ._input
    │   ├── tracer
    │   ├── ._README
    │   ├── ._tracer
    │   ├── ._tt.table
    │   ├── src
    │   │   ├── tracer
    │   │   ├── ._tracer
    │   │   ├── ._setup.inc
    │   │   ├── ._subr.f90
    │   │   ├── ._tracer.f90
    │   │   ├── setup.inc
    │   │   └── tracer.f90
    │   ├── ._P_path.txt
    │   ├── ._S_path.txt
    │   ├── ._chichi.evt
    │   ├── ._chichi.sta
    │   ├── ._vel3d.mod
    │   ├── tracer.sh
    │   ├── create_input_file
    │   │   └── input_file.py
    │   ├── chichi.sta
    │   ├── read_velocity_model.py
    │   ├── README
    │   └── tt.table
    ├── images
    │   └── workflow.png
    ├── 11_concat_final_table.py
    ├── cut_hdf5_sample.py
    ├── process_time_shift.py
    ├── README.md
    ├── 6_checked_traces_events.py
    ├── plot_event_trace_distribution.py
    ├── 4_data_selection.py
    ├── 1_afile_to_catalog.py
    ├── 10_check_station_overlap.py
    ├── 7_traces_picking.py
    ├── 8_shift_picking_by_velocity_model.py
    ├── plot_cut_traces.py
    ├── 9_label.py
    ├── 0918_M6.8_1319_1330
    │   ├── 1_merge_sta_info.py
    │   ├── 3_label.py
    │   ├── 4_cut_waveform.py
    │   └── 2_picking.py
    ├── plot_picking_waveform.py
    ├── 2_catalog_records_cleaning.py
    ├── plot_data_distribution.py
    ├── 3_station_location_dataset.py
    ├── analyze_resample.py
    ├── plot_double_event.py
    ├── Vs30_preprocess.py
    ├── 13_cut_waveform_to_hdf5.py
    ├── 5_check_waveform.py
    └── 12_TSMIP_vs30.py
├── images
    ├── Meinong_event.gif
    ├── Taitung_offshore_event.gif
    └── TEAM-Taiwan_model_architecture.png
├── prediction_images_in_readme
    └── ref.png
├── docker
    ├── requirements.txt
    ├── Dockerfile
    └── README.md
├── requirements.txt
├── data
    ├── plot_station_distribution.py
    ├── intensity_sort_from_station.py
    ├── plot_number_of_traces_station_map.py
    ├── data_visualize.py
    └── visualize.py
├── model_train_predict
    ├── plot_loss_curve.py
    └── predict_ensemble_merge_info.py
├── model_performance_analysis
    ├── plot_event_input_stations.py
    ├── plot_0918_M6.8_event_intensity_map.py
    ├── 0403_Hualien_Earthquake
    │   ├── plot_input_station.py
    │   ├── 3_waveform_after_preprocess.py
    │   ├── 4_plot_intensity_map.py
    │   ├── 2_TTSAM_0403.py
    │   ├── 5_plot_confusion_matrix.py
    │   ├── residual_map.py
    │   └── 1_find_trigger_station.py
    ├── analyze_prediction_in_magnitude.py
    ├── plot_CWA_TTSAM_intensity_comparision.py
    ├── plot_residual.py
    ├── plot_event_input_waveforms.py
    ├── warning_time_maximize.py
    ├── plot_intensity_map_and_warning_time.py
    ├── confusion_matrix_multi_station.py
    └── plot_CWA_EEW_intensity.py
├── .gitignore
├── README.md
└── feature_map_correlation
    ├── plot_single_event_feature_map.py
    ├── tlcc_analysis.py
    └── feature_map_correlation.py


/data_preprocess/tracer_demo/chichi.evt:
--------------------------------------------------------------------------------
1 | 120.816  23.853  7.5
2 | 


--------------------------------------------------------------------------------
/data_preprocess/tracer_demo/input:
--------------------------------------------------------------------------------
1 | 2
2 | chichi.evt
3 | chichi.sta
4 | 1
5 | 1
6 | 


--------------------------------------------------------------------------------
/images/Meinong_event.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JasonChang0320/TT-SAM/HEAD/images/Meinong_event.gif


--------------------------------------------------------------------------------
/data_preprocess/tracer_demo/._src:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JasonChang0320/TT-SAM/HEAD/data_preprocess/tracer_demo/._src


--------------------------------------------------------------------------------
/images/Taitung_offshore_event.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JasonChang0320/TT-SAM/HEAD/images/Taitung_offshore_event.gif


--------------------------------------------------------------------------------
/data_preprocess/images/workflow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JasonChang0320/TT-SAM/HEAD/data_preprocess/images/workflow.png


--------------------------------------------------------------------------------
/data_preprocess/tracer_demo/._input:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JasonChang0320/TT-SAM/HEAD/data_preprocess/tracer_demo/._input


--------------------------------------------------------------------------------
/data_preprocess/tracer_demo/tracer:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JasonChang0320/TT-SAM/HEAD/data_preprocess/tracer_demo/tracer


--------------------------------------------------------------------------------
/prediction_images_in_readme/ref.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JasonChang0320/TT-SAM/HEAD/prediction_images_in_readme/ref.png


--------------------------------------------------------------------------------
/data_preprocess/tracer_demo/._README:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JasonChang0320/TT-SAM/HEAD/data_preprocess/tracer_demo/._README


--------------------------------------------------------------------------------
/data_preprocess/tracer_demo/._tracer:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JasonChang0320/TT-SAM/HEAD/data_preprocess/tracer_demo/._tracer


--------------------------------------------------------------------------------
/data_preprocess/tracer_demo/._tt.table:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JasonChang0320/TT-SAM/HEAD/data_preprocess/tracer_demo/._tt.table


--------------------------------------------------------------------------------
/data_preprocess/tracer_demo/src/tracer:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JasonChang0320/TT-SAM/HEAD/data_preprocess/tracer_demo/src/tracer


--------------------------------------------------------------------------------
/data_preprocess/tracer_demo/._P_path.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JasonChang0320/TT-SAM/HEAD/data_preprocess/tracer_demo/._P_path.txt


--------------------------------------------------------------------------------
/data_preprocess/tracer_demo/._S_path.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JasonChang0320/TT-SAM/HEAD/data_preprocess/tracer_demo/._S_path.txt


--------------------------------------------------------------------------------
/data_preprocess/tracer_demo/._chichi.evt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JasonChang0320/TT-SAM/HEAD/data_preprocess/tracer_demo/._chichi.evt


--------------------------------------------------------------------------------
/data_preprocess/tracer_demo/._chichi.sta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JasonChang0320/TT-SAM/HEAD/data_preprocess/tracer_demo/._chichi.sta


--------------------------------------------------------------------------------
/data_preprocess/tracer_demo/._vel3d.mod:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JasonChang0320/TT-SAM/HEAD/data_preprocess/tracer_demo/._vel3d.mod


--------------------------------------------------------------------------------
/data_preprocess/tracer_demo/src/._tracer:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JasonChang0320/TT-SAM/HEAD/data_preprocess/tracer_demo/src/._tracer


--------------------------------------------------------------------------------
/data_preprocess/tracer_demo/src/._setup.inc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JasonChang0320/TT-SAM/HEAD/data_preprocess/tracer_demo/src/._setup.inc


--------------------------------------------------------------------------------
/data_preprocess/tracer_demo/src/._subr.f90:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JasonChang0320/TT-SAM/HEAD/data_preprocess/tracer_demo/src/._subr.f90


--------------------------------------------------------------------------------
/images/TEAM-Taiwan_model_architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JasonChang0320/TT-SAM/HEAD/images/TEAM-Taiwan_model_architecture.png


--------------------------------------------------------------------------------
/data_preprocess/tracer_demo/src/._tracer.f90:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JasonChang0320/TT-SAM/HEAD/data_preprocess/tracer_demo/src/._tracer.f90


--------------------------------------------------------------------------------
/docker/requirements.txt:
--------------------------------------------------------------------------------
 1 | torch==1.10.2
 2 | numpy==1.22.2
 3 | pandas==1.3.5
 4 | h5py==2.10.0
 5 | mlflow==1.30.0
 6 | scipy==1.8.0
 7 | matplotlib==3.5.1
 8 | tables==3.6.1
 9 | scikit-learn==1.1.0
10 | black==23.1.0
11 | Cartopy==0.21.1
12 | obspy==1.3.0
13 | seaborn==0.11.2
14 | tqdm==4.63.0
15 | ViTables==3.0.2


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | black==23.1.0
 2 | Cartopy==0.21.1
 3 | h5py==2.10.0
 4 | matplotlib==3.5.1
 5 | mlflow==1.30.0
 6 | numpy==1.22.2
 7 | obspy==1.3.0
 8 | pandas==1.3.5
 9 | scikit-learn==1.1.0
10 | scipy==1.8.0
11 | seaborn==0.11.2
12 | tables==3.6.1
13 | torch==1.10.2+cu113
14 | torchaudio==0.10.2+cu113
15 | torchsampler==0.1.2
16 | torchsummary==1.5.1
17 | torchvision==0.11.3+cu113
18 | tqdm==4.63.0
19 | ViTables==3.0.2
20 | 


--------------------------------------------------------------------------------
/data/plot_station_distribution.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from visualize import plot_station_distribution
 3 | 
 4 | data = pd.read_csv("../data_preprocess/events_traces_catalog/1999_2019_final_traces_Vs30.csv")
 5 | 
 6 | unique_station = data.drop_duplicates(subset="station_name")
 7 | 
 8 | fig,ax=plot_station_distribution(stations=unique_station,title="TSMIP station distribution")
 9 | 
10 | # fig.savefig(f"paper image/TSMIP_station_distribution.png",dpi=300)
11 | 


--------------------------------------------------------------------------------
/docker/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.8.12
 2 | 
 3 | # Ignore all interactive dialog during apt-get update
 4 | ENV DEBIAN_FRONTEND noninteractive
 5 | 
 6 | # Install linux package
 7 | RUN apt-get update && apt-get upgrade -y; \
 8 |     apt-get install -y \
 9 |     curl git htop sudo vim \
10 |     python3-dev python3-pip libgeos-dev
11 | 
12 | # Python package install
13 | COPY requirements.txt /tmp/
14 | RUN python3 -m pip install --upgrade pip;
15 | RUN python3 -m pip install shapely --no-binary shapely;
16 | RUN python3 -m pip --no-cache-dir install --requirement /tmp/requirements.txt;
17 | 
18 | # Define working directory
19 | CMD ["bash"]


--------------------------------------------------------------------------------
/data_preprocess/tracer_demo/tracer.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | for folder in inputs/*/; 
 4 | do
 5 |     eq_id_path="${folder%/}"
 6 |     echo $eq_id_path
 7 |     echo "2" > input.txt #mode option: 1-two points, 2-two files
 8 |     echo "\"$eq_id_path/event_input.evt\"" >> input.txt #source file
 9 |     echo "\"$eq_id_path/station_input.sta\"" >> input.txt #receiver file
10 |     echo "1" >> input.txt #output raypath-1, otherwise 0
11 |     echo "1" >> input.txt #output type 1-ascii, 2-binary
12 |     #Fortran tracer
13 |     ./tracer < input.txt
14 | 
15 |     output_file="$eq_id_path/output.table"
16 |     cp "tt.table" $output_file
17 |     echo "=========================="
18 | done
19 | 
20 | 
21 | 


--------------------------------------------------------------------------------
/model_train_predict/plot_loss_curve.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | data = pd.read_csv("../model/model11_loss.csv")
 5 | 
 6 | train_loss = data.query("key=='train_loss'")
 7 | validation_loss = data.query("key=='val_loss'")
 8 | 
 9 | fig, ax = plt.subplots()
10 | ax.plot(train_loss["step"], train_loss["value"], label="train")
11 | ax.plot(validation_loss["step"], validation_loss["value"], label="validation")
12 | ax.scatter(
13 |     validation_loss["step"][validation_loss["value"].idxmin()],
14 |     validation_loss["value"].min(),
15 |     c="red",
16 |     s=30,
17 | )
18 | ax.legend()
19 | ax.set_ylabel("loss")
20 | ax.set_xlabel("epoch")
21 | # fig.savefig(f"model/model11_loss_curve.png",dpi=300)
22 | 


--------------------------------------------------------------------------------
/model_performance_analysis/plot_event_input_stations.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from analysis import Triggered_Map
 3 | 
 4 | # plot input station map
 5 | mask_after_sec = 10
 6 | eq_id = 25900
 7 | prediction_with_info = pd.read_csv(
 8 |     f"../predict/station_blind_noVs30_bias2closed_station_2016/{mask_after_sec} sec ensemble 510 with all info.csv"
 9 | )
10 | record_prediction = prediction_with_info.query(f"EQ_ID=={eq_id}")
11 | first_trigger_time = min(record_prediction["p_picks"])
12 | input_station = record_prediction[
13 |     record_prediction["p_picks"] < first_trigger_time + (mask_after_sec * 200)
14 | ]
15 | 
16 | 
17 | if len(input_station) >= 25:
18 |     input_station = input_station[:25]
19 | 
20 | fig, ax = Triggered_Map.plot_station_map(
21 |     trace_info=input_station,
22 |     sec=mask_after_sec,
23 |     EQ_ID=eq_id,
24 |     pad=100,
25 | )
26 | 
27 | # fig.savefig(
28 | #     f"../paper image/eqid{eq_id}_{mask_after_sec}_sec_station_input.png",dpi=300
29 | # )
30 | 


--------------------------------------------------------------------------------
/data_preprocess/11_concat_final_table.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | start_year1=1999
 4 | end_year1=2008
 5 | traces1 = pd.read_csv(
 6 |     f"./events_traces_catalog/{start_year1}_{end_year1}_picked_traces_p_arrival_abstime_labeled_nostaoverlap.csv"
 7 | )
 8 | catalog1 = pd.read_csv(
 9 |     f"./events_traces_catalog/{start_year1}_{end_year1}_ok_events_p_arrival_abstime.csv"
10 | )
11 | 
12 | start_year2=2009
13 | end_year2=2019
14 | traces2 = pd.read_csv(
15 |     f"./events_traces_catalog/{start_year2}_{end_year2}_picked_traces_p_arrival_abstime_labeled_nostaoverlap.csv"
16 | )
17 | catalog2 = pd.read_csv(
18 |     f"./events_traces_catalog/{start_year2}_{end_year2}_ok_events_p_arrival_abstime.csv"
19 | )
20 | 
21 | final_trace=pd.concat([traces1,traces2])
22 | 
23 | final_catalog=pd.concat([catalog1,catalog2])
24 | 
25 | # final_trace.to_csv(f"./events_traces_catalog/{start_year1}_{end_year2}_final_traces.csv",index=False)
26 | # final_catalog.to_csv(f"./events_traces_catalog/{start_year1}_{end_year2}_final_catalog.csv",index=False)
27 | 


--------------------------------------------------------------------------------
/data_preprocess/cut_hdf5_sample.py:
--------------------------------------------------------------------------------
 1 | import h5py
 2 | import pandas as pd
 3 | 
 4 | 
 5 | data_path="../data/TSMIP_1999_2019_Vs30.hdf5"
 6 | init_event_metadata = pd.read_hdf(data_path, "metadata/event_metadata")
 7 | trace_metadata = pd.read_hdf(data_path, "metadata/traces_metadata")
 8 | 
 9 | sample_eqid=init_event_metadata.query("year==2016")["EQ_ID"]
10 | 
11 | 
12 | with h5py.File(data_path, "r") as origin, h5py.File("../data/2016_sample.hdf5", 'w') as sample:
13 |     sample.create_group("data")
14 |     sample.create_group("metadata")
15 | 
16 |     for eqid in sample_eqid.values:
17 |         print(eqid)
18 |         data = origin["data"][str(eqid)]
19 |         sample_group=sample["data"].create_group(f"{eqid}")
20 | 
21 |         for col in data:
22 |             attr=data[f"{col}"]
23 | 
24 |             sample_group.copy(attr,col)
25 | 
26 | init_event_metadata.to_hdf('2016_sample.hdf5', key="metadata/event_metadata", mode="a", format="table")
27 | trace_metadata.to_hdf('2016_sample.hdf5', key="metadata/traces_metadata", mode="a", format="table")
28 | 


--------------------------------------------------------------------------------
/model_performance_analysis/plot_0918_M6.8_event_intensity_map.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from analysis import Intensity_Plotter
 3 | 
 4 | predict_path = "../predict/station_blind_Vs30_bias2closed_station_2016/0918_M6.8_1319_1330"
 5 | catalog_path="../data_preprocess/0918_M6.8_1319_1330"
 6 | mask_sec = 10
 7 | catalog = pd.read_csv(f"{catalog_path}/event_catalog.csv")
 8 | prediction = pd.read_csv(
 9 |     f"{predict_path}/{mask_sec} sec model11 eqid_30792 prediction with all info.csv"
10 | )
11 | catalog["longitude"]=catalog["lon"]+catalog["lon_minute"]/60
12 | catalog["latitude"]=catalog["lat"]+catalog["lat_minute"]/60
13 | fig, ax = Intensity_Plotter.plot_intensity_map(
14 |     trace_info=prediction,
15 |     eventmeta=catalog,
16 |     label_type="pga",
17 |     true_label=prediction["answer"],
18 |     pred_label=prediction["predict"],
19 |     sec=mask_sec,
20 |     EQ_ID=None,
21 |     grid_method="linear",
22 |     pad=100,
23 |     title=f"{mask_sec} sec intensity Map",
24 | )
25 | # fig.savefig(f"{predict_path}/{mask_sec} sec intensity map.png",dpi=300)
26 | 


--------------------------------------------------------------------------------
/data_preprocess/process_time_shift.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | """
 4 | The script calculates the time between first triggered station got waveform and earthquake occurred.
 5 | """
 6 | 
 7 | input_path = "./events_traces_catalog"
 8 | catalog = pd.read_csv(f"{input_path}/1999_2019_final_catalog.csv")
 9 | traces = pd.read_csv(f"{input_path}/1999_2019_final_traces_Vs30.csv")
10 | 
11 | traces.loc[traces.index, "p_arrival_abs_time"] = pd.to_datetime(
12 |     traces["p_arrival_abs_time"], format="%Y-%m-%d %H:%M:%S"
13 | )
14 | catalog["event_time"] = pd.to_datetime(
15 |     catalog[["year", "month", "day", "hour", "minute", "second"]]
16 | )
17 | 
18 | 
19 | eq_id_list = [24757, 24784, 25112, 25193, 25225, 25396, 25401, 25561, 25900]
20 | 
21 | for eq_id in eq_id_list:
22 |     event = catalog.query(f"EQ_ID=={eq_id}")
23 |     triggered_trace = traces.query(f"EQ_ID=={eq_id}")
24 | 
25 |     first_triggered_trace = triggered_trace.loc[
26 |         triggered_trace["p_arrival_abs_time"].idxmin()
27 |     ]
28 |     p_wave_propogated_time = (
29 |         first_triggered_trace["p_arrival_abs_time"] - event["event_time"]
30 |     )
31 |     print(eq_id, p_wave_propogated_time)
32 | 


--------------------------------------------------------------------------------
/model_performance_analysis/0403_Hualien_Earthquake/plot_input_station.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import json
 3 | import sys
 4 | sys.path.append("..")
 5 | from analysis import Triggered_Map
 6 | 
 7 | i = 1
 8 | # for mask_after_sec in range(1,11):
 9 | mask_after_sec = 10
10 | with open(f"model_input/{mask_after_sec}_sec/{i}.json", "r") as json_file:
11 |     data = json.load(json_file)
12 | 
13 | station = data["sta"]
14 | station_info = pd.DataFrame(
15 |     station, columns=["latitude", "longitude", "elevation", "Vs30"]
16 | )
17 | condition = (
18 |     (station_info["latitude"] == 0)
19 |     & (station_info["longitude"] == 0)
20 |     & (station_info["elevation"] == 0)
21 |     & (station_info["Vs30"] == 0)
22 | )
23 | station_info = station_info.drop(station_info[condition].index)
24 | 
25 | station_info["event_lon"]=121.67
26 | station_info["event_lat"]=23.77
27 | station_info['magnitude']=7.2
28 | 
29 | fig,ax_map=Triggered_Map.plot_station_map(trace_info=station_info,min_epdis=10.87177078,sec=mask_after_sec)
30 | 
31 | ax_map.set_title(f"After {mask_after_sec} seconds")
32 | 
33 | # fig.savefig(f"triggered_station/{mask_after_sec}_sec_triggered_station.png", dpi=300)
34 | 


--------------------------------------------------------------------------------
/data_preprocess/README.md:
--------------------------------------------------------------------------------
 1 | # Data Preprocess
 2 | There are four components you need to prepare first
 3 | 1. **Event catalog**
 4 | 2. **Traces catalog**
 5 | 3. **Seismic waveform**
 6 | 4. **Vs30 table for TSMIP station**
 7 | 
 8 | Please follow the step:
 9 | 
10 | `1_afile_to_catalog.py`
11 | 
12 | `2_catalog_records_cleaning.py`
13 | 
14 | ...
15 | 
16 | `13_cut_waveform_to_hdf5.py`
17 | 
18 | At each step you will get an `.csv` output, it will be input in next step script.
19 | 
20 | After finished these steps, you will get an `.hdf5` file include all of the information you gave before.
21 | 
22 | ## Preprocess Workflow
23 | ![image](images/workflow.png)
24 | 
25 | ### Others
26 | 
27 | We used 3D velocity model to shift P-wave arrival, follow: Huang et al., 2014
28 | 
29 | Paper link: 
30 | 
31 | https://www.sciencedirect.com/science/article/pii/S0012821X14000995
32 | 
33 | All of the works are in `tracer_demo`.
34 | 
35 | Vs30 dataset was contributed by Kuo et al., 2012 and Lee et al., 2008
36 | 
37 | Paper link:
38 | 
39 | https://www.sciencedirect.com/science/article/pii/S0013795212000397
40 | 
41 | http://tao.cgu.org.tw/index.php/articles/archive/geophysics/item/799-2008196671pt
42 | 


--------------------------------------------------------------------------------
/docker/README.md:
--------------------------------------------------------------------------------
 1 | # Dockerfile for TT-SAM environment (CPU version)
 2 | 
 3 | This Dockerfile is based on the `python:3.8.12` image, and installs several Linux packages and Python packages to create a Python environment for CUDA-enabled applications.
 4 | 
 5 | ## Usage
 6 | To build the Docker image, run:
 7 | ```
 8 | docker build -t <image-name> .
 9 | ```
10 | where `<image-name>` is the desired name for the Docker image.
11 | 
12 | To run a container based on this image, use:
13 | ```
14 | docker run  -it <image-name> bash
15 | ```
16 | 
17 | This will launch an interactive shell in the container, with access to the installed packages and Python environment.
18 | 
19 | ## Packages
20 | The Dockerfile installs the following Linux packages:
21 | 
22 | - `curl`
23 | - `git`
24 | - `htop`
25 | - `sudo`
26 | - `vim`
27 | - `python3-dev`
28 | - `python3-pip`
29 | - `libgeos-dev`
30 | 
31 | And the following Python packages, installed via pip:
32 | 
33 | - `shapely` (built from source)
34 | - any packages listed in `requirements.txt`
35 | 
36 | ## Notes
37 | - The `ENV DEBIAN_FRONTEND noninteractive` line is included to prevent any interactive prompts during the package installation process.
38 | 


--------------------------------------------------------------------------------
/data_preprocess/tracer_demo/src/setup.inc:
--------------------------------------------------------------------------------
 1 | !------ DIMENSION SETTING ------
 2 | ! Data
 3 |     integer maxnsta
 4 |     parameter(maxnsta=1500)
 5 | ! Model     
 6 |     integer maxnlat,maxnlon,maxndep,ilatdeg,ilondeg,idepkm
 7 |     parameter(maxnlat=100)
 8 |     parameter(maxnlon=100)
 9 |     parameter(maxndep=100)
10 |     !-ilatdeg, ilondeg are size of map in latitude, longitude
11 |     parameter(ilatdeg=100000)
12 |     parameter(ilondeg=100000)
13 |     parameter(idepkm=100000)
14 | 
15 | !------ PARAMETER SETTING ------
16 | ! getdata
17 |     real*8 sta_loc(maxnsta,3),ray_tt(maxnsta,2),ray_wei(maxnsta,2)
18 |     real*8 olon,olat,odep,omag
19 |     integer date,time,sta_idx(maxnsta),npair
20 |     character*6 sta_nm(maxnsta)
21 |     character*12 pfile
22 |     common/data/sta_loc,ray_tt,ray_wei,date,time,olon,olat,odep,omag,sta_idx,npair,sta_nm,pfile
23 | ! input_vel
24 |     real*8 bld1,bld2
25 |     real*8 lat_a(maxnlat),lon_a(maxnlon),dep_a(maxndep)
26 |     real*8 vp_a(maxnlon,maxnlat,maxndep),vs_a(maxnlon,maxnlat,maxndep)
27 |     integer nlat_a,nlon_a,ndep_a,ips
28 |     common/vmodel_a/vp_a,vs_a,lat_a,lon_a,dep_a,bld1,bld2,nlat_a,nlon_a,ndep_a,ips
29 | ! bldmap
30 |     real*8 lat1_a,lon1_a,dep1_a
31 |     integer ilonloc_a(ilondeg),ilatloc_a(ilatdeg),ideploc_a(idepkm)
32 |     common/loc_a/ lat1_a,lon1_a,dep1_a,ilonloc_a,ilatloc_a,ideploc_a
33 | 


--------------------------------------------------------------------------------
/data/intensity_sort_from_station.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import matplotlib.pyplot as plt
 3 | import numpy as np
 4 | 
 5 | path = "../data_preprocess"
 6 | data = pd.read_csv(f"{path}/events_traces_catalog/1999_2019_final_traces_Vs30.csv")
 7 | 
 8 | stations = data["station_name"].unique()
 9 | 
10 | for station in stations:
11 |     print(station)
12 |     tmp_data = data.query(f"station_name=='{station}'")
13 |     fig, ax = plt.subplots()
14 |     ax.hist(
15 |         tmp_data["pga"],
16 |         bins=30,
17 |         ec="black",
18 |     )
19 |     hist, bins = np.histogram(tmp_data["pga"], bins=30)
20 |     pga_threshold = np.log10(
21 |         [1e-5, 0.008, 0.025, 0.080, 0.250, 0.80, 1.4, 2.5, 4.4, 8.0, 10]
22 |     )
23 |     label = ["0", "1", "2", "3", "4", "5-", "5+", "6-", "6+", "7"]
24 |     ax.vlines(pga_threshold[1:-1], 0, hist.max()+5, linestyles="dotted", color="k")
25 |     for i in range(len(label)):
26 |         if label[i] == "0":
27 |             continue
28 |         ax.text(
29 |             ((pga_threshold[i] + pga_threshold[i + 1]) / 2) - 0.05, hist.max()+5, label[i]
30 |         )
31 |     ax.set_xlabel(r"PGA log(${m/s^2}$)", fontsize=12)
32 |     ax.set_ylabel("Number of traces", fontsize=12)
33 |     ax.set_title(f"station name: {station}", fontsize=15)
34 |     # fig.savefig(f"{path}/each_station_distribution/{station}.png", dpi=300)
35 |     plt.close()
36 | 


--------------------------------------------------------------------------------
/model_performance_analysis/analyze_prediction_in_magnitude.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import sys
 3 | 
 4 | from analysis import Intensity_Plotter
 5 | 
 6 | path="../predict/station_blind_Vs30_bias2closed_station_2016"
 7 | mask_after_sec=7
 8 | prediction_with_info=pd.read_csv(f"{path}/{mask_after_sec} sec model11 with all info.csv")
 9 | # ===========plot mag>=5.5===========
10 | mag5_5_prediction = prediction_with_info.query("magnitude>=5.5")
11 | label_type = "pga"
12 | fig, ax = Intensity_Plotter.plot_true_predicted(
13 |     y_true=mag5_5_prediction["answer"],
14 |     y_pred=mag5_5_prediction["predict"],
15 |     quantile=False,
16 |     agg="point",
17 |     point_size=70,
18 |     target=label_type,
19 |     title=f"Magnitude>=5.5 event {mask_after_sec} sec",
20 | )
21 | 
22 | # ===========check prediction in magnitude===========
23 | 
24 | label = "pga"
25 | fig, ax = Intensity_Plotter.plot_true_predicted(
26 |     y_true=prediction_with_info["answer"][prediction_with_info["magnitude"] >= 5],
27 |     y_pred=prediction_with_info["predict"][prediction_with_info["magnitude"] >= 5],
28 |     quantile=False,
29 |     agg="point",
30 |     point_size=20,
31 |     target=label,
32 | )
33 | 
34 | ax.scatter(
35 |     prediction_with_info["answer"][prediction_with_info["magnitude"] < 5],
36 |     prediction_with_info["predict"][prediction_with_info["magnitude"] < 5],
37 |     c="r",
38 |     label="magnitude < 5",
39 | )


--------------------------------------------------------------------------------
/data/plot_number_of_traces_station_map.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from visualize import plot_received_traces_station_map
 3 | 
 4 | sta_path = "data/station_information"
 5 | input_path = "predict/station_blind_Vs30_bias2closed_station_2016"
 6 | output_path = "./data preprocess/events_traces_catalog"
 7 | prediction = pd.read_csv(f"{input_path}/model 11 5 sec prediction.csv")
 8 | station_info = pd.read_csv(f"{sta_path}/TSMIPstations_new.csv")
 9 | merge_traces = pd.merge(
10 |     prediction,
11 |     station_info[["location_code", "latitude", "longitude", "elevation (m)"]],
12 |     how="left",
13 |     left_on=["latitude", "longitude", "elevation"],
14 |     right_on=["latitude", "longitude", "elevation (m)"],
15 | )
16 | total_station_value_counts = (
17 |     merge_traces["location_code"]
18 |     .value_counts()
19 |     .rename_axis("location_code")
20 |     .reset_index(name="counts")
21 | )
22 | total_station_value_counts = pd.merge(
23 |     total_station_value_counts,
24 |     station_info[["location_code", "latitude", "longitude", "elevation (m)"]],
25 |     how="left",
26 |     left_on="location_code",
27 |     right_on="location_code",
28 | )
29 | title = "Number of records received by stations in test data"
30 | fig, ax = plot_received_traces_station_map(total_station_value_counts, title=title)
31 | 
32 | # total_station_value_counts.to_csv(
33 | #     "predict/station_blind_Vs30_bias2closed_station_2016/Number of records received by stations in train data.csv",
34 | #     index=False,
35 | # )
36 | 


--------------------------------------------------------------------------------
/data_preprocess/6_checked_traces_events.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | start_year=1999
 5 | end_year=2008
 6 | traces=pd.read_csv(f"events_traces_catalog/{start_year}_{end_year}_target_traces.csv")
 7 | events=pd.read_csv(f"events_traces_catalog/{start_year}_{end_year}_target_catalog.csv")
 8 | 
 9 | traces.quality_control.value_counts().plot(kind='pie', autopct='%.1f%%')
10 | labels = traces.quality_control.unique()
11 | plt.legend(labels=labels)
12 | 
13 | #trace為Y
14 | y_filter=(traces["quality_control"]=="y")
15 | 
16 | ok_traces=traces[y_filter]
17 | 
18 | 
19 | 
20 | #本來有4級的traces且整個事件只有1個但是壞掉了 需要把其他traces替除
21 | intensity_filter=(ok_traces["intensity"]>=4)
22 | include_intensity_4=ok_traces[intensity_filter]["EQ_ID"].unique().tolist()
23 | ok_traces_filter=(ok_traces["EQ_ID"].isin(include_intensity_4))
24 | ok_traces=ok_traces[ok_traces_filter]
25 | 
26 | # ok_traces.to_csv(f"events_traces_catalog/{start_year}_{end_year}_ok_traces.csv",index=False)
27 | 
28 | #plot 歷時剔除後之震度分佈
29 | fig,ax=plt.subplots()
30 | ax.hist(traces["intensity"],bins=16,edgecolor="gray")
31 | ax.hist(ok_traces["intensity"],bins=16,edgecolor="gray")
32 | plt.yscale("log")
33 | 
34 | #上述問題在events也替除
35 | ok_event_filter=(events["EQ_ID"].isin(include_intensity_4))
36 | ok_events=events[ok_event_filter]
37 | # ok_events.to_csv(f"events_traces_catalog/{start_year}_{end_year}_ok_events.csv",index=False)
38 | 
39 | #plot 事件剔除後之震度分佈
40 | fig,ax=plt.subplots()
41 | ax.hist(events["magnitude"],bins=28,edgecolor="gray")
42 | ax.hist(ok_events["magnitude"],bins=28,edgecolor="gray")
43 | plt.yscale("log")


--------------------------------------------------------------------------------
/data_preprocess/plot_event_trace_distribution.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import matplotlib.pyplot as plt
 3 | import numpy as np
 4 | 
 5 | trace = pd.read_csv("./events_traces_catalog/1999_2019_final_traces_Vs30.csv")
 6 | catalog = pd.read_csv("./events_traces_catalog/1999_2019_final_catalog.csv")
 7 | 
 8 | fig, ax = plt.subplots(figsize=(7, 7))
 9 | ax.hist(
10 |     [trace.query("year>=2009")["pga"],trace.query("year<2009")["pga"]],
11 |     bins=25,
12 |     edgecolor="black",
13 |     stacked=True,
14 |     label=["origin","increased"],
15 | )
16 | ax.legend(loc='best')
17 | ax.set_yscale("log")
18 | label = ["2", "3", "4", "5-", "5+", "6-", "6+", "7"]
19 | pga_threshold = np.log10(
20 |     [0.025, 0.080, 0.250, 0.80, 1.4, 2.5, 4.4, 8.0,10])
21 | ax.vlines(pga_threshold[1:-1], 0, 35000, linestyles="dotted", color="k")
22 | for i in range(len(pga_threshold) - 1):
23 |     ax.text((pga_threshold[i] + pga_threshold[i + 1]) / 2, 15000, label[i])
24 | ax.set_ylabel("number of trace")
25 | ax.set_xlabel("log(PGA (m/s2))")
26 | ax.set_title("TSMIP data PGA distribution")
27 | # fig.savefig("./events_traces_catalog/pga distribution.png",dpi=300)
28 | 
29 | fig, ax = plt.subplots(figsize=(7, 7))
30 | ax.hist(
31 |     [catalog.query("year>=2009")["magnitude"],catalog.query("year<2009")["magnitude"]],
32 |     bins=25,
33 |     edgecolor="black",
34 |     stacked=True,
35 |     label=["origin","increased"],
36 | )
37 | ax.legend(loc='best')
38 | ax.set_yscale("log")
39 | ax.set_ylabel("number of event")
40 | ax.set_xlabel("magnitude")
41 | ax.set_title("TSMIP data magnitude distribution")
42 | # fig.savefig("./events_traces_catalog/magnitude distribution.png",dpi=300)


--------------------------------------------------------------------------------
/data_preprocess/4_data_selection.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | start_year = 1999
 4 | end_year = 2008
 5 | intensity_threshold = 4
 6 | magnitude_thrshold = 5.5
 7 | 
 8 | 
 9 | Afile_path = "../data/Afile"
10 | sta_path = "../data/station_information"
11 | traces = pd.read_csv(
12 |     f"{Afile_path}/1991-2020 traces (no broken data, double event).csv"
13 | )
14 | catalog = pd.read_csv(f"{Afile_path}/1991-2020 catalog.csv")
15 | 
16 | # traces station location doesn't exist
17 | station_info = pd.read_csv(f"{sta_path}/TSMIPstations_new.csv")
18 | sta_filter = traces["station_name"].isin(station_info["location_code"])
19 | traces_exist_sta = traces[sta_filter]
20 | 
21 | # find Earthquake that at least 1 trace intensity > 4 & magnitude >=3.5
22 | target_traces = traces_exist_sta.query(f"year>={start_year} & year<={end_year}")
23 | EQ_ID = (
24 |     target_traces.query(f"intensity >= {intensity_threshold}")["EQ_ID"]
25 |     .unique()
26 |     .tolist()
27 | )
28 | output_catalog = catalog.query(f"EQ_ID in {EQ_ID} & magnitude >= {magnitude_thrshold}")
29 | output_traces = target_traces.copy()
30 | EQ_ID = output_catalog["EQ_ID"].tolist()
31 | output_traces = output_traces.query(f"EQ_ID in {EQ_ID}")
32 | 
33 | # check nan
34 | output_traces.isnull().sum(axis=0)
35 | output_catalog.isnull().sum(axis=0)
36 | # plot magnitude hist & check intensity
37 | output_catalog["magnitude"].hist(bins=16)
38 | output_traces["intensity"].hist(bins=20)
39 | output_traces["intensity"].value_counts()
40 | 
41 | # output_catalog.to_csv(f"events_traces_catalog/{start_year}_{end_year}_target_catalog.csv", index=False)
42 | # output_traces.to_csv(f"events_traces_catalog/{start_year}_{end_year}_target_traces.csv", index=False)
43 | 


--------------------------------------------------------------------------------
/data_preprocess/1_afile_to_catalog.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import pandas as pd
 4 | 
 5 | from read_tsmip import *
 6 | 
 7 | Afile_path = "../data/Afile"
 8 | wavefrom_path = "../data/waveform"
 9 | Events = []
10 | Traces = []
11 | for year in os.listdir(f"{wavefrom_path}"):
12 |     for month in [
13 |         "01",
14 |         "02",
15 |         "03",
16 |         "04",
17 |         "05",
18 |         "06",
19 |         "07",
20 |         "08",
21 |         "09",
22 |         "10",
23 |         "11",
24 |         "12",
25 |     ]:
26 |         trace_path = f"{wavefrom_path}/{year}/{month}"
27 |         trace_folder = os.listdir(trace_path)
28 | 
29 |         afile_name = f"{year}{month}A.DAT"
30 |         afile_path = f"{Afile_path}/{afile_name}"
31 |         events, traces = classify_event_trace(afile_path, afile_name, trace_folder)
32 |         Events.extend(events)
33 |         Traces.extend(traces)
34 | 
35 | # Events
36 | event_dict_inlist = []
37 | for eq_id, event in enumerate(Events):
38 |     header_info = read_header(event, EQ_ID=str(eq_id + 1))
39 |     event_dict_inlist.append(header_info)
40 | 
41 | event_df = pd.DataFrame.from_dict(event_dict_inlist)
42 | # event_df.to_csv("{Afile_path}/1991-2020 catalog.csv", index=False)
43 | 
44 | # Traces
45 | for i in range(len(Traces)):
46 |     if i == 0:
47 |         trace_info = read_lines(Traces[i], EQ_ID=str(i + 1))
48 |     else:
49 |         tmp_trace_info = read_lines(Traces[i], EQ_ID=str(i + 1))
50 |         trace_info.extend(tmp_trace_info)
51 | 
52 | trace_df = pd.DataFrame.from_dict(trace_info)
53 | # check traces belong to TSMIP:
54 | trace_df = trace_df[trace_df["station_name"].str.len() == 6]
55 | 
56 | # trace_df.to_csv("{Afile_path}/1991-2020 traces.csv", index=False)
57 | 


--------------------------------------------------------------------------------
/data_preprocess/10_check_station_overlap.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | start_year=1999
 4 | end_year=2008
 5 | traces = pd.read_csv(
 6 |     f"./events_traces_catalog/{start_year}_{end_year}_picked_traces_p_arrival_abstime_labeled.csv"
 7 | )
 8 | catalog = pd.read_csv(
 9 |     f"./events_traces_catalog/{start_year}_{end_year}_ok_events_p_arrival_abstime.csv"
10 | )
11 | 
12 | traces["instrument_priority"] = traces["instrument_code"].map(
13 |     {" SMTA": 1, " CVA ": 2, " NANO": 3, " A900": 4, " ETNA": 5, " K2  ": 6, " REFT": 7}
14 | )
15 | # 抓出eq_id & station_name 相同的 trace
16 | overlap_trace = pd.DataFrame()
17 | for eq_id in catalog["EQ_ID"]:
18 |     tmp_traces = traces.query(f"EQ_ID == {eq_id}")
19 |     counts = tmp_traces["station_name"].value_counts()
20 | 
21 |     target_station = counts[counts > 1].index.tolist()
22 | 
23 |     mask = tmp_traces["station_name"].isin(target_station)
24 | 
25 |     tmp_overlap_trace = tmp_traces[mask]
26 | 
27 |     overlap_trace = pd.concat([overlap_trace, tmp_overlap_trace])
28 | 
29 | # 將instrument 編號，設定優先順序
30 | insrument_priority = traces["instrument_code"].value_counts().index.tolist()
31 | 
32 | overlap_trace_sorted = overlap_trace.sort_values("instrument_priority")
33 | chosen_trace = overlap_trace_sorted.drop_duplicates(
34 |     ["station_name", "EQ_ID"], keep="first"
35 | )
36 | chosen_trace = chosen_trace.sort_index()
37 | 
38 | # 找原始df和有重疊df的差集，最後加回有重疊但最後留下來的trace
39 | differ_set = pd.concat([traces, overlap_trace]).drop_duplicates(
40 |     ["station_name", "EQ_ID"], keep=False
41 | )
42 | final_trace = pd.concat([differ_set, chosen_trace]).sort_index()
43 | 
44 | 
45 | # final_trace.to_csv(
46 | #     f"./events_traces_catalog/{start_year}_{end_year}_picked_traces_p_arrival_abstime_labeled_nostaoverlap.csv",
47 | #     index=False,
48 | # )
49 | 
50 | 


--------------------------------------------------------------------------------
/model_performance_analysis/0403_Hualien_Earthquake/3_waveform_after_preprocess.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import os
 3 | import obspy
 4 | import numpy as np
 5 | import sys
 6 | import os
 7 | import matplotlib.pyplot as plt
 8 | 
 9 | sys.path.append("../..")
10 | from data_preprocess.read_tsmip import get_peak_value
11 | 
12 | data_path = "./0403asc_by_Joey"
13 | 
14 | files = os.listdir(f"{data_path}")
15 | asc_files = [file for file in files if file.endswith(".asc")]
16 | output_df = {"station_code": [], "PGA": []}
17 | for i in range(len(asc_files)):
18 |     data = pd.read_csv(
19 |         f"{data_path}/{asc_files[i]}", sep="\s+", skiprows=[0], header=None
20 |     ).to_numpy()
21 | 
22 |     stream = obspy.core.stream.Stream()
23 |     channel = ["HLZ", "HLN", "HLE"]
24 | 
25 |     for j, chan in enumerate(channel):
26 |         trace = obspy.core.trace.Trace(data[:, j + 1])
27 |         trace.stats.sampling_rate = 100
28 |         # trace.stats.starttime = obspy.UTCDateTime(asc_files[0][:17])
29 |         stream.append(trace)
30 |     stream.filter("lowpass", freq=10)
31 |     # plot
32 |     # fig,ax=plt.subplots(3,1)
33 |     # for k in range(3):
34 |     #     ax[k].plot(stream[k].data)
35 |     # ax[0].set_title(asc_files[i][26:30])
36 |     # ax[2].set_xlabel("time sample (100Hz)")
37 |     # ax[1].set_ylabel("amplitude (gal)")
38 |     # plt.close()
39 |     # fig.savefig(f"{data_path}/image/{asc_files[i][26:30]}.png",dpi=300)
40 | 
41 |     pga, _ = get_peak_value(stream)
42 |     output_df["station_code"].append(asc_files[i][26:30])
43 |     output_df["PGA"].append(pga)
44 | 
45 | output_df = pd.DataFrame(output_df)
46 | 
47 | station_info = pd.read_csv("../../data/station_information/TSMIPstations_new.csv")
48 | 
49 | output_df = pd.merge(
50 |     output_df,
51 |     station_info[["station_code", "location_code"]],
52 |     left_on="station_code",
53 |     right_on="station_code",
54 |     how="left",
55 | )
56 | 
57 | # output_df.to_csv(f"true_answer.csv", index=False)
58 | 


--------------------------------------------------------------------------------
/data_preprocess/7_traces_picking.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import os
 3 | 
 4 | from read_tsmip import read_tsmip
 5 | from obspy.signal.trigger import ar_pick
 6 | 
 7 | start_year=1999
 8 | end_year=2008
 9 | waveform_path = "../data/waveform"
10 | traces = pd.read_csv(f"./events_traces_catalog/{start_year}_{end_year}_ok_traces.csv")
11 | 
12 | traces["p_pick_sec"] = 0
13 | for i in range(len(traces)):
14 |     print(f"{i}/{len(traces)}")
15 |     EQ_ID = str(traces["EQ_ID"][i])
16 |     year = str(traces["year"][i])
17 |     month = str(traces["month"][i])
18 |     day = str(traces["day"][i])
19 |     hour = str(traces["hour"][i])
20 |     minute = str(traces["minute"][i])
21 |     second = str(traces["second"][i])
22 |     intensity = str(traces["intensity"][i])
23 |     station_name = traces["station_name"][i]
24 |     epdis = str(traces["epdis (km)"][i])
25 |     file_name = traces["file_name"][i].strip()
26 |     if len(month) < 2:
27 |         month = "0" + month
28 |     waveform = read_tsmip(f"{waveform_path}/{year}/{month}/{file_name}.txt")
29 |     # picking
30 |     p_pick, _ = ar_pick(
31 |         waveform[0],
32 |         waveform[1],
33 |         waveform[2],
34 |         samp_rate=waveform[0].stats.sampling_rate,
35 |         f1=1,  # Frequency of the lower bandpass window
36 |         f2=20,  # Frequency of the upper bandpass window
37 |         lta_p=1,  # Length of LTA for the P arrival in seconds
38 |         sta_p=0.1,  # Length of STA for the P arrival in seconds
39 |         lta_s=4.0,  # Length of LTA for the S arrival in seconds
40 |         sta_s=1.0,  # Length of STA for the P arrival in seconds
41 |         m_p=2,  # Number of AR coefficients for the P arrival
42 |         m_s=8,  # Number of AR coefficients for the S arrival
43 |         l_p=0.1,
44 |         l_s=0.2,
45 |         s_pick=False,
46 |     )
47 |     traces.loc[i, "p_pick_sec"] = p_pick
48 | 
49 | # traces.to_csv(f"events_traces_catalog/{start_year}_{end_year}_ok_picked_traces.csv",index=False)
50 | 
51 | 
52 | 


--------------------------------------------------------------------------------
/data_preprocess/8_shift_picking_by_velocity_model.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import os
 3 | 
 4 | #before run this script, you need to go to "tracer_demo/":
 5 | #use "input_file.py" to create input data
 6 | # calculate p wave arrival by velocity model(Huang et al., 2014)
 7 | #paper link: https://www.sciencedirect.com/science/article/pii/S0012821X14000995
 8 | 
 9 | start_year=1999
10 | end_year=2008
11 | traces = pd.read_csv(f"events_traces_catalog/{start_year}_{end_year}_ok_picked_traces.csv")
12 | 
13 | EQ_ID = os.listdir(f"./tracer_demo/{start_year}_{end_year}_output")
14 | 
15 | traces["p_arrival_abs_time"] = pd.to_datetime(
16 |     traces[["year", "month", "day", "hour", "minute", "second"]]
17 | )
18 | 
19 | colnames = [
20 |     "evt_lon",
21 |     "evt_lat",
22 |     "evt_depth",
23 |     "sta_lon",
24 |     "sta_lat",
25 |     "sta_elev",
26 |     "p_arrival",
27 |     "s_arrival",
28 | ]
29 | for eq in EQ_ID:
30 |     event_file_path = f"./tracer_demo/{start_year}_{end_year}_output/{eq}/output.table"
31 |     tracer_output = pd.read_csv(
32 |         event_file_path, sep=r"\s+", names=colnames, header=None
33 |     )
34 |     trace_index = traces[traces["EQ_ID"] == int(eq)].index
35 |     p_arrival = pd.to_timedelta(tracer_output["p_arrival"], unit="s")
36 |     p_arrival.index = trace_index
37 |     traces.loc[trace_index, "p_arrival_abs_time"] = (
38 |         traces.loc[trace_index, "p_arrival_abs_time"] + p_arrival
39 |     )
40 | # traces 和 event 須將 eq_id: 29363 剔除 (velocity model calculate out of range)
41 | final_traces = traces[traces["EQ_ID"] != 29363]
42 | event = pd.read_csv(f"./events_traces_catalog/{start_year}_{end_year}_ok_events.csv")
43 | final_event = event[event["EQ_ID"] != 29363]
44 | # save catalog
45 | # final_traces.to_csv(
46 | #     f"./events_traces_catalog/{start_year}_{end_year}_picked_traces_p_arrival_abstime.csv", index=False
47 | # )
48 | # final_event.to_csv(
49 | #     f"./events_traces_catalog/{start_year}_{end_year}_ok_events_p_arrival_abstime.csv", index=False
50 | # )


--------------------------------------------------------------------------------
/data_preprocess/plot_cut_traces.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from tqdm import tqdm
 3 | 
 4 | import obspy
 5 | import matplotlib.pyplot as plt
 6 | from read_tsmip import cut_traces
 7 | 
 8 | start_year = 1999
 9 | end_year = 2019
10 | Afile_path = "data/Afile"
11 | sta_path = "../data/station_information"
12 | waveform_path = "../data/waveform"
13 | catalog = pd.read_csv(
14 |     f"./events_traces_catalog/{start_year}_{end_year}_final_catalog.csv"
15 | )
16 | traces = pd.read_csv(
17 |     f"./events_traces_catalog/{start_year}_{end_year}_final_traces_Vs30.csv"
18 | )
19 | station_info = pd.read_csv(f"{sta_path}/TSMIPstations_new.csv") 
20 | traces.loc[traces.index, "p_pick_sec"] = pd.to_timedelta(
21 |     traces["p_pick_sec"], unit="sec"
22 | )
23 | traces.loc[traces.index, "p_arrival_abs_time"] = pd.to_datetime(
24 |     traces["p_arrival_abs_time"], format="%Y-%m-%d %H:%M:%S"
25 | )
26 | 
27 | for eq_id in tqdm(catalog["EQ_ID"]):
28 |     tmp_traces, traces_info = cut_traces(traces, eq_id, waveform_path, waveform_type="acc")
29 |     for i,chan in enumerate(["HLZ","HLN","HLE"]):
30 |         stream = obspy.core.stream.Stream()
31 |         for j in range(len(traces_info["traces"])):
32 |             trace = obspy.core.trace.Trace(data=traces_info["traces"][j][:, i])
33 |             trace.stats.id = eq_id
34 |             trace.stats.station = tmp_traces["station_name"][j]
35 |             trace.stats.channel = chan
36 |             trace.stats.distance = tmp_traces["epdis (km)"][j] * 1000
37 |             trace.stats.starttime = traces_info["start_time"][j]
38 |             trace.stats.sampling_rate = 200
39 | 
40 |             stream.append(trace)
41 |         fig, ax = plt.subplots()
42 |         stream.plot(type="section",fig=fig)
43 | 
44 |         magnitude = catalog[catalog["EQ_ID"] == eq_id]["magnitude"].values[0]
45 | 
46 |         ax.set_title(
47 |             f"EQ ID:{eq_id}, Magnitude: {magnitude}, start time: {traces_info['start_time'][j]}"
48 |         )
49 |         # fig.savefig(f"cut event figure/{eq_id}_{trace.stats.channel}.png",dpi=300)
50 |         plt.close()


--------------------------------------------------------------------------------
/model_performance_analysis/0403_Hualien_Earthquake/4_plot_intensity_map.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import pandas as pd
 3 | 
 4 | sys.path.append("..")
 5 | from analysis import Intensity_Plotter
 6 | 
 7 | mask_sec = 3
 8 | event_lon = 121.66
 9 | event_lat = 23.77
10 | magnitude = 7.2
11 | answer = pd.read_csv(f"true_answer.csv")
12 | 
13 | # merge 3 5 7 10 sec to find maximum predicted pga
14 | prediction_3 = pd.read_csv(f"no_include_broken_data_prediction/3_sec_prediction.csv")
15 | prediction_5 = pd.read_csv(f"no_include_broken_data_prediction/5_sec_prediction.csv")
16 | prediction_7 = pd.read_csv(f"no_include_broken_data_prediction/7_sec_prediction.csv")
17 | prediction_10 = pd.read_csv(f"no_include_broken_data_prediction/10_sec_prediction.csv")
18 | 
19 | max_prediction = pd.concat(
20 |     [
21 |         prediction_3,
22 |         prediction_5["predict"],
23 |         prediction_7["predict"],
24 |         prediction_10["predict"],
25 |     ],
26 |     axis=1,
27 | )
28 | 
29 | max_prediction.columns = [
30 |     "3_predict",
31 |     "station_name",
32 |     "latitude",
33 |     "longitude",
34 |     "elevation",
35 |     "5_predict",
36 |     "7_predict",
37 |     "10_predict",
38 | ]
39 | max_prediction["max_predict"] = max_prediction.apply(
40 |     lambda row: max(
41 |         row["3_predict"], row["5_predict"], row["7_predict"], row["10_predict"]
42 |     ),
43 |     axis=1,
44 | )
45 | 
46 | max_prediction = pd.merge(
47 |     answer, max_prediction, how="left", left_on="location_code", right_on="station_name"
48 | )
49 | max_prediction.dropna(inplace=True)
50 | 
51 | eventmeta = pd.DataFrame(
52 |     {"longitude": [event_lon], "latitude": [event_lat], "magnitude": [magnitude]}
53 | )
54 | 
55 | Intensity_Plotter.plot_intensity_map(
56 |     trace_info=max_prediction,
57 |     eventmeta=eventmeta,
58 |     label_type="pga",
59 |     true_label=max_prediction["PGA"],
60 |     pred_label=max_prediction[f"{mask_sec}_predict"],
61 |     sec=mask_sec,
62 |     min_epdis=10.87177078,  # 0.1087度轉成km
63 |     EQ_ID=None,
64 |     grid_method="linear",
65 |     pad=100,
66 |     title=f"{mask_sec} sec intensity Map",
67 | )
68 | # fig.savefig(f"true_intensity_map_without_broken_data/{mask_sec}_sec.png",dpi=300)
69 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | 
 2 | /__pycache__
 3 | .vscode
 4 | 
 5 | #waveform and catalog
 6 | data/*
 7 | !data/*.py
 8 | /paper image
 9 | *.zip
10 | 
11 | #mlflow
12 | /mlruns
13 | 
14 | #training
15 | model/*
16 | !model/*.py
17 | /predict
18 | 
19 | /multi_input_predict_pga_and_pgv/__pycache__
20 | /multi_input_predict_pga_and_pgv/mlruns
21 | /multi_input_predict_pga_and_pgv/model
22 | 
23 | 
24 | #data preprocess
25 | data_preprocess/*
26 | !data_preprocess/images
27 | !data_preprocess/*.py
28 | !data_preprocess/README.md
29 | !data_preprocess/tracer_demo
30 | data_preprocess/tracer_demo/1999_2008_output
31 | data_preprocess/tracer_demo/2009_2019_output
32 | data_preprocess/tracer_demo/2023_output
33 | data_preprocess/tracer_demo/model_image
34 | data_preprocess/tracer_demo/create_input_file/*
35 | !data_preprocess/tracer_demo/create_input_file/*.py
36 | !data_preprocess/0918_M6.8_1319_1330/
37 | data_preprocess/0918_M6.8_1319_1330/cut trace
38 | data_preprocess/0918_M6.8_1319_1330/*.csv
39 | 
40 | #feature map correlation
41 | feature_map_correlation/__pycache__
42 | 
43 | model_performance_analysis/__pycache__
44 | 
45 | #0403 Hualien_earthquake
46 | /model_performance_analysis/0403_Hualien_Earthquake/0403waveform_image
47 | /model_performance_analysis/0403_Hualien_Earthquake/113019_TSMIP_SAC
48 | /model_performance_analysis/0403_Hualien_Earthquake/model_input
49 | /model_performance_analysis/0403_Hualien_Earthquake/true_intensity_map_with_broken_data
50 | /model_performance_analysis/0403_Hualien_Earthquake/true_intensity_map_without_broken_data
51 | /model_performance_analysis/0403_Hualien_Earthquake/include_broken_data_prediction
52 | /model_performance_analysis/0403_Hualien_Earthquake/no_include_broken_data_prediction
53 | /model_performance_analysis/0403_Hualien_Earthquake/underestimation_problem
54 | /model_performance_analysis/0403_Hualien_Earthquake/model_input_waveform_image
55 | /model_performance_analysis/0403_Hualien_Earthquake/0403asc_by_Joey
56 | /model_performance_analysis/0403_Hualien_Earthquake/triggered_station
57 | /model_performance_analysis/0403_Hualien_Earthquake/*.zip
58 | /model_performance_analysis/0403_Hualien_Earthquake/*.csv
59 | /model_performance_analysis/0403_Hualien_Earthquake/*.7z
60 | 
61 | #CWA eew
62 | /CWA_EEW_report


--------------------------------------------------------------------------------
/data_preprocess/tracer_demo/create_input_file/input_file.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import os
 3 | 
 4 | start_year=1999
 5 | end_year=2008
 6 | event = pd.read_csv(f"../../events_traces_catalog/{start_year}_{end_year}_ok_events.csv")
 7 | sta_path = "D:/TEAM_TSMIP/data/station_information"
 8 | station_info = pd.read_csv(f"{sta_path}/TSMIPstations_new.csv")
 9 | traces = pd.read_csv(f"../../events_traces_catalog/{start_year}_{end_year}_ok_picked_traces.csv")
10 | 
11 | event["longitude"] = event["lon"] + (event["lon_minute"] / 60)
12 | event["latitude"] = event["lat"] + (event["lat_minute"] / 60)
13 | 
14 | for eq_id in event["EQ_ID"]:
15 |     #
16 |     if eq_id==29363: #event location out of velocity model range
17 |         continue
18 |     tmp_event = event[event["EQ_ID"] == eq_id].reset_index()
19 |     tmp_trace = traces[traces["EQ_ID"] == eq_id]
20 |     tmp_trace_sta_lon_lat = pd.merge(
21 |         tmp_trace,
22 |         station_info[["location_code", "latitude", "longitude", "elevation (m)"]],
23 |         left_on="station_name",
24 |         right_on="location_code",
25 |         how="left",
26 |     )
27 |     #
28 |     drop_station_name = "KNM003" #station_location velocity model range
29 |     tmp_trace_sta_lon_lat = tmp_trace_sta_lon_lat[(tmp_trace_sta_lon_lat["station_name"] != drop_station_name)].reset_index()
30 |     tmp_trace_sta_lon_lat.rename(columns={'elevation (m)': 'elevation_m'}, inplace=True)
31 |     folder_path = f"{start_year}_{end_year}_input/{eq_id}/"
32 | 
33 |     if not os.path.exists(folder_path):
34 |         os.mkdir(folder_path)
35 | 
36 |     event_file_path = folder_path + "event_input.evt"
37 |     with open(event_file_path, "w") as file:
38 |         file.write(
39 |             f"{round(tmp_event.longitude[0],3)}  {round(tmp_event.latitude[0],3)}  {tmp_event.depth[0]}\n"
40 |         )
41 | 
42 |     station_file_path = folder_path + "station_input.sta"
43 |     with open(station_file_path, "w") as file:
44 |         for i in range(len(tmp_trace_sta_lon_lat)):
45 |             file.write(
46 |                 f"{round(tmp_trace_sta_lon_lat.longitude[i],3)} {round(tmp_trace_sta_lon_lat.latitude[i],4)} {round(tmp_trace_sta_lon_lat.elevation_m[i],3)} {tmp_trace_sta_lon_lat.station_name[i]}\n"
47 |             )
48 | 
49 | 


--------------------------------------------------------------------------------
/data_preprocess/9_label.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | 
 4 | from read_tsmip import read_tsmip, get_peak_value, get_integrated_stream
 5 | 
 6 | # read traces catalog
 7 | start_year=1999
 8 | end_year=2008
 9 | waveform_path = "../data/waveform"
10 | traces = pd.read_csv(
11 |     f"./events_traces_catalog/{start_year}_{end_year}_picked_traces_p_arrival_abstime.csv"
12 | )
13 | 
14 | sampling_rate = 200
15 | for i in range(len(traces)):
16 |     print(f"{i}/{len(traces)}")
17 |     EQ_ID = str(traces["EQ_ID"][i])
18 |     year = str(traces["year"][i])
19 |     month = str(traces["month"][i])
20 |     day = str(traces["day"][i])
21 |     hour = str(traces["hour"][i])
22 |     minute = str(traces["minute"][i])
23 |     second = str(traces["second"][i])
24 |     intensity = str(traces["intensity"][i])
25 |     station_name = traces["station_name"][i]
26 |     epdis = str(traces["epdis (km)"][i])
27 |     file_name = traces["file_name"][i].strip()
28 |     if len(month) < 2:
29 |         month = "0" + month
30 |     # read waveform
31 |     waveform = read_tsmip(f"{waveform_path}/{year}/{month}/{file_name}.txt")
32 |     # resample to 200Hz
33 |     if waveform[0].stats.sampling_rate != sampling_rate:
34 |         waveform.resample(sampling_rate, window="hann")
35 | 
36 |     # detrend
37 |     waveform.detrend(type="demean")
38 |     # lowpass filter
39 |     waveform.filter("lowpass", freq=10)  # filter
40 |     # get pga
41 |     pick_point = int(np.round(traces["p_pick_sec"][i] * sampling_rate, 0))
42 |     pga, pga_time = get_peak_value(waveform, pick_point=pick_point)
43 |     # waveform taper
44 |     waveform.taper(max_percentage=0.05, type="cosine")
45 |     # integrate
46 |     vel_waveform = get_integrated_stream(waveform)
47 |     # bandpass filter
48 |     vel_waveform.filter("bandpass", freqmin=0.075, freqmax=10)
49 |     # get pgv
50 |     pgv, pgv_time = get_peak_value(vel_waveform, pick_point=pick_point)
51 |     # input to df
52 |     traces.loc[i, "pga"] = pga
53 |     traces.loc[i, "pga_time"] = pga_time
54 |     traces.loc[i, "pgv"] = pgv
55 |     traces.loc[i, "pgv_time"] = pgv_time
56 | 
57 | # traces.to_csv(
58 | #     f"./events_traces_catalog/{start_year}_{end_year}_picked_traces_p_arrival_abstime_labeled.csv",
59 | #     index=False,
60 | # )
61 | 


--------------------------------------------------------------------------------
/data_preprocess/tracer_demo/chichi.sta:
--------------------------------------------------------------------------------
 1 | 120.805 23.5103 0 ALS
 2 | 121.365 23.0992 0 CHK
 3 | 120.412 23.7192 0 CHY002
 4 | 120.172 23.6013 0 CHY004
 5 | 120.552 23.5815 0 CHY006
 6 | 120.269 23.4853 0 CHY008
 7 | 120.544 23.4653 0 CHY010
 8 | 120.152 23.3328 0 CHY012
 9 | 120.583 23.2963 0 CHY014
10 | 120.405 23.355 0 CHY015
11 | 120.153 23.2212 0 CHY016
12 | 120.268 23.2147 0 CHY017
13 | 120.478 23.1795 0 CHY019
14 | 120.462 23.0457 0 CHY022
15 | 120.28 22.9655 0 CHY023
16 | 120.606 23.757 0 CHY024
17 | 120.514 23.7795 0 CHY025
18 | 120.411 23.7987 0 CHY026
19 | 120.247 23.752 0 CHY027
20 | 120.605 23.632 0 CHY028
21 | 120.528 23.6135 0 CHY029
22 | 120.294 23.5799 0 CHY032
23 | 120.215 23.5407 0 CHY033
24 | 120.544 23.5212 0 CHY034
25 | 120.584 23.52 0 CHY035
26 | 120.479 23.6073 0 CHY036
27 | 120.344 23.5207 0 CHY039
28 | 120.596 23.4388 0 CHY041
29 | 120.583 23.3583 0 CHY042
30 | 120.163 23.3832 0 CHY044
31 | 120.463 23.4765 0 CHY046
32 | 120.447 23.4938 0 CHY047
33 | 120.408 23.2803 0 CHY050
34 | 120.501 23.2878 0 CHY052
35 | 120.31 23.3077 0 CHY054
36 | 120.271 23.2698 0 CHY055
37 | 120.41 23.1495 0 CHY057
38 | 120.319 23.1725 0 CHY058
39 | 120.103 23.184 0 CHY059
40 | 120.239 23.1243 0 CHY060
41 | 120.511 23.0768 0 CHY061
42 | 120.45 23.1213 0 CHY062
43 | 120.34 23.027 0 CHY063
44 | 120.345 22.906 0 CHY065
45 | 120.208 22.9205 0 CHY066
46 | 120.184 22.999 0 CHY067
47 | 120.182 22.9737 0 CHY069
48 | 120.229 22.9651 0 CHY070
49 | 120.164 23.0648 0 CHY071
50 | 120.805 23.5103 0 CHY074
51 | 119.555 23.5672 0 CHY075
52 | 120.222 23.638 0 CHY076
53 | 120.228 23.0402 0 CHY078
54 | 120.528 23.1848 0 CHY079
55 | 120.678 23.5972 0 CHY080
56 | 120.496 23.2703 0 CHY081
57 | 120.298 23.7237 0 CHY082
58 | 120.593 23.351 0 CHY086
59 | 120.519 23.3845 0 CHY087
60 | 120.429 23.3462 0 CHY088
61 | 120.216 23.2673 0 CHY090
62 | 120.478 23.7913 0 CHY092
63 | 120.147 23.6538 0 CHY093
64 | 120.321 23.7935 0 CHY094
65 | 120.233 22.983 0 CHY096
66 | 120.28 23.1373 0 CHY099
67 | 120.342 23.2272 0 CHY100
68 | 120.562 23.6862 0 CHY101
69 | 120.614 23.2455 0 CHY102
70 | 120.465 23.6695 0 CHY104
71 | 120.29 23.2988 0 CHY107
72 | 120.53 23.2517 0 CHY109
73 | 120.53 23.2517 0 CHY110
74 | 120.227 23.7912 0 CHY111
75 | 120.183 23.7035 0 CHY112
76 | 120.119 23.0372 0 CHY114
77 | 120.097 23.1543 0 CHY115
78 | 120.108 23.0775 0 CHY116
79 | 120.424 23.4977 0 CHY
80 | 


--------------------------------------------------------------------------------
/model_performance_analysis/plot_CWA_TTSAM_intensity_comparision.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from analysis import Intensity_Plotter
 3 | 
 4 | eqid = 24784
 5 | data_folder = "../data/station_information"
 6 | multi_station = pd.read_csv(f"{data_folder}/multi-station.txt", sep=" ")
 7 | station_dataset = pd.read_csv(f"{data_folder}/TSMIPstations_new.csv")
 8 | cwa_event = pd.read_csv(f"{data_folder}/cwa_test_eew_events.csv")
 9 | cwa_traces = pd.read_csv(f"{data_folder}/cwa_test_eew_traces.csv")
10 | process_time = int(cwa_event.query(f"eqid=={eqid}")["eew_time"].values[0])
11 | event_lat = cwa_event.query(f"eqid=={eqid}")["catalog_lat"].values[0]
12 | event_lon = cwa_event.query(f"eqid=={eqid}")["catalog_lon"].values[0]
13 | mag = cwa_event.query(f"eqid=={eqid}")["catalog_mag"].values[0]
14 | merge_data = pd.merge(
15 |     multi_station,
16 |     station_dataset[["station_code", "location_code"]],
17 |     left_on="TSMIP",
18 |     right_on="station_code",
19 |     how="left",
20 | )
21 | 
22 | 
23 | cwa_merge_data = pd.merge(
24 |     cwa_traces[
25 |         ["eqid", "predict_pga", "station_code", "sta_lat_pre", "sta_lon_pre", "PGA"]
26 |     ],
27 |     merge_data[["CWASN", "location_code"]],
28 |     left_on="station_code",
29 |     right_on="CWASN",
30 |     how="inner",
31 | )
32 | 
33 | tt_sam = pd.read_csv(
34 |     "../predict/station_blind_Vs30_bias2closed_station_2016/7 sec model11 with all info.csv"
35 | )
36 | 
37 | ttsam_merge_data = pd.merge(
38 |     tt_sam[["EQ_ID", "predict", "answer", "latitude", "longitude", "station_name"]],
39 |     merge_data[["CWASN", "location_code"]],
40 |     left_on="station_name",
41 |     right_on="location_code",
42 |     how="inner",
43 | )
44 | ttsam_merge_data["sta_lon_pre"] = ttsam_merge_data["longitude"]
45 | ttsam_merge_data["sta_lat_pre"] = ttsam_merge_data["latitude"]
46 | ttsam_merge_data["predict_pga"] = (10 ** ttsam_merge_data["predict"]) * 100
47 | ttsam_merge_data["observed_pga"] = (10 ** ttsam_merge_data["answer"]) * 100
48 | ttsam_merge_data["eqid"] = ttsam_merge_data["EQ_ID"]
49 | # ==============================
50 | 
51 | # change "ttsam_merge_data" or "cwa_merge_data" to plot each system intensity map
52 | event = ttsam_merge_data.query(f"eqid=={eqid}")
53 | 
54 | fig, ax = Intensity_Plotter.plot_intensity_scatter_map(
55 |     event,
56 |     event_lon,
57 |     event_lat,
58 |     mag,
59 |     pga_column="observed_pga",
60 |     title="Observed intensity",
61 | )
62 | # fig.savefig(f"../CWA_EEW_report/eqid_{eqid}_intensity.png", dpi=300)


--------------------------------------------------------------------------------
/data_preprocess/0918_M6.8_1319_1330/1_merge_sta_info.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | station_info = pd.read_csv("../../data/station_information/TSMIPstations_new.csv")
 4 | traces_info_with_vs30 = pd.read_csv(
 5 |     "../events_traces_catalog/1999_2019_final_traces_Vs30.csv"
 6 | )
 7 | 
 8 | pick_result = pd.read_csv("../../data/0918_M6.8_1319_1330/result.csv", header=None)
 9 | pick_result.drop(0, axis=1, inplace=True)
10 | pick_result.columns = ["file_name", "pick_result"]
11 | 
12 | ok_traces = pick_result.query("pick_result=='y'")
13 | 
14 | ok_traces["station_code"] = ok_traces["file_name"].str[3:7]
15 | 
16 | ok_traces = pd.merge(
17 |     ok_traces,
18 |     station_info[["station_code", "location_code"]],
19 |     on="station_code",
20 |     how="left",
21 | )
22 | 
23 | 
24 | for i in ok_traces.index:
25 |     if pd.isna(ok_traces["location_code"][i]):
26 |         if ok_traces["station_code"][i][0] == "A":
27 |             ok_traces["location_code"][i] = "TAP" + ok_traces["station_code"][i][1:]
28 |         if ok_traces["station_code"][i][0] == "B":
29 |             ok_traces["location_code"][i] = "TCU" + ok_traces["station_code"][i][1:]
30 |         if ok_traces["station_code"][i][0] == "C":
31 |             ok_traces["location_code"][i] = "CHY" + ok_traces["station_code"][i][1:]
32 |         if ok_traces["station_code"][i][0] == "D":
33 |             ok_traces["location_code"][i] = "KAU" + ok_traces["station_code"][i][1:]
34 |         if ok_traces["station_code"][i][0] == "E":
35 |             ok_traces["location_code"][i] = "ILA" + ok_traces["station_code"][i][1:]
36 |         if ok_traces["station_code"][i][0] == "F":
37 |             ok_traces["location_code"][i] = "HWA" + ok_traces["station_code"][i][1:]
38 |         if ok_traces["station_code"][i][0] == "G":
39 |             ok_traces["location_code"][i] = "TTN" + ok_traces["station_code"][i][1:]
40 | 
41 | ok_traces = pd.merge(
42 |     ok_traces,
43 |     station_info[["location_code", "latitude", "longitude", "elevation (m)"]],
44 |     on="location_code",
45 |     how="left",
46 | )
47 | 
48 | ok_traces = pd.merge(
49 |     ok_traces,
50 |     traces_info_with_vs30[["station_name", "Vs30"]].drop_duplicates(
51 |         subset="station_name"
52 |     ),
53 |     left_on="location_code",
54 |     right_on="station_name",
55 |     how="left",
56 | )
57 | 
58 | ok_traces.dropna(inplace=True)
59 | ok_traces.drop(["station_code","location_code","pick_result"],axis=1,inplace=True)
60 | 
61 | ok_traces.to_csv("../0918_M6.8_1319_1330/traces_catalog.csv",index=None)
62 | 


--------------------------------------------------------------------------------
/data_preprocess/plot_picking_waveform.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from read_tsmip import read_tsmip
 3 | import matplotlib.pyplot as plt
 4 | from obspy.signal.trigger import ar_pick
 5 | 
 6 | ok_waveform_file = "1999_2019_final_traces_Vs30.csv"
 7 | year = 1999
 8 | traces = pd.read_csv(f"events_traces_catalog/{ok_waveform_file}")
 9 | 
10 | waveform_path = "../data/waveform"
11 | for i in traces.index:
12 |     print(f"{i}/{len(traces)}")
13 |     EQ_ID = str(traces["EQ_ID"][i])
14 |     year = str(traces["year"][i])
15 |     month = str(traces["month"][i])
16 |     day = str(traces["day"][i])
17 |     hour = str(traces["hour"][i])
18 |     minute = str(traces["minute"][i])
19 |     second = str(traces["second"][i])
20 |     intensity = str(traces["intensity"][i])
21 |     station_name = traces["station_name"][i]
22 |     epdis = str(traces["epdis (km)"][i])
23 |     file_name = traces["file_name"][i].strip()
24 |     if len(month) < 2:
25 |         month = "0" + month
26 |     waveform = read_tsmip(f"{waveform_path}/{year}/{month}/{file_name}.txt")
27 |     # p_pick=traces["p_pick_sec"][i]*waveform[0].stats.sampling_rate
28 |     p_pick, _ = ar_pick(
29 |         waveform[0],
30 |         waveform[1],
31 |         waveform[2],
32 |         samp_rate=waveform[0].stats.sampling_rate,
33 |         f1=1,  # Frequency of the lower bandpass window
34 |         f2=20,  # Frequency of the upper bandpass window
35 |         lta_p=1,  # Length of LTA for the P arrival in seconds
36 |         sta_p=0.1,  # Length of STA for the P arrival in seconds
37 |         lta_s=4.0,  # Length of LTA for the S arrival in seconds
38 |         sta_s=1.0,  # Length of STA for the P arrival in seconds
39 |         m_p=2,  # Number of AR coefficients for the P arrival
40 |         m_s=8,  # Number of AR coefficients for the S arrival
41 |         l_p=0.1,
42 |         l_s=0.2,
43 |         s_pick=False,
44 |     )
45 |     p_pick = p_pick * waveform[0].stats.sampling_rate
46 |     fig, ax = plt.subplots(3, 1)
47 |     for j in range(len(ax)):
48 |         ax[j].plot(
49 |             waveform[j].data[int(p_pick - 5 * 200) : int(p_pick + 30 * 200)], "k"
50 |         )
51 |         ax[j].axvline(x=5 * 200, color="r", linestyle="-")
52 |     ax[0].set_xticks([])
53 |     ax[1].set_xticks([])
54 |     ax[1].set_ylabel(f"Amplitude (gal)")
55 |     ax[2].set_xlabel(f"Time Sample (200Hz)")
56 |     ax[0].set_title(f"EQ_ID:{EQ_ID},year:{year},month:{month},file_name:{file_name}")
57 |     # fig.savefig(f"pick_result/EQ_ID_{EQ_ID}_{year}_{month}_{file_name}.png",dpi=300)
58 |     plt.close()
59 | 


--------------------------------------------------------------------------------
/model_performance_analysis/0403_Hualien_Earthquake/2_TTSAM_0403.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import pandas as pd
 3 | import torch
 4 | import sys
 5 | 
 6 | sys.path.append("../..")
 7 | from model.CNN_Transformer_Mixtureoutput_TEAM import (
 8 |     CNN,
 9 |     MDN,
10 |     MLP,
11 |     PositionEmbedding_Vs30,
12 |     TransformerEncoder,
13 |     full_model,
14 | )
15 | 
16 | mask_after_sec = 3
17 | num = 11
18 | device = torch.device("cuda")
19 | path = f"../../model/model{num}.pt"
20 | emb_dim = 150
21 | mlp_dims = (150, 100, 50, 30, 10)
22 | CNN_model = CNN(mlp_input=5665).cuda()
23 | pos_emb_model = PositionEmbedding_Vs30(emb_dim=emb_dim).cuda()
24 | transformer_model = TransformerEncoder()
25 | mlp_model = MLP(input_shape=(emb_dim,), dims=mlp_dims).cuda()
26 | mdn_model = MDN(input_shape=(mlp_dims[-1],)).cuda()
27 | full_Model = full_model(
28 |     CNN_model,
29 |     pos_emb_model,
30 |     transformer_model,
31 |     mlp_model,
32 |     mdn_model,
33 |     pga_targets=25,
34 |     data_length=3000,
35 | ).to(device)
36 | full_Model.load_state_dict(torch.load(path))
37 | 
38 | Lat = []
39 | Lon = []
40 | Elev = []
41 | Mixture_mu = []
42 | station_name = []
43 | for i in range(1, 15):
44 |     print(i)
45 |     with open(
46 |         f"model_input/{mask_after_sec}_sec_without_broken_data/{i}.json", "r"
47 |     ) as json_file:
48 |         data = json.load(json_file)
49 | 
50 |     waveform = torch.tensor(data["waveform"]).to(torch.double).unsqueeze(0)
51 | 
52 |     input_station = torch.tensor(data["sta"]).to(torch.double).unsqueeze(0)
53 | 
54 |     target_station = torch.tensor(data["target"]).to(torch.double).unsqueeze(0)
55 |     true_target_num = torch.sum(torch.all(target_station != 0, dim=-1)).item()
56 |     sample = {"waveform": waveform, "sta": input_station, "target": target_station}
57 | 
58 |     lat = sample["target"][:, :, 0].flatten().tolist()
59 |     lon = sample["target"][:, :, 1].flatten().tolist()
60 |     elev = sample["target"][:, :, 2].flatten().tolist()
61 |     Lat.extend(lat)
62 |     Lon.extend(lon)
63 |     Elev.extend(elev)
64 |     weight, sigma, mu = full_Model(sample)
65 |     Mixture_mu.append(
66 |         torch.sum(weight * mu, dim=2).cpu().detach().numpy().flatten().tolist()
67 |     )
68 |     station_name += data["station_name"]
69 | Mixture_mu = [item for sublist in Mixture_mu for item in sublist]
70 | output = {
71 |     "predict": Mixture_mu,
72 |     "station_name": station_name,
73 |     "latitude": Lat,
74 |     "longitude": Lon,
75 |     "elevation": Elev,
76 | }
77 | 
78 | output_df = pd.DataFrame(output)
79 | 
80 | # output_df.to_csv(
81 | #     f"no_include_broken_data_prediction/{mask_after_sec}_sec_prediction.csv", index=False
82 | # )
83 | 


--------------------------------------------------------------------------------
/data_preprocess/2_catalog_records_cleaning.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import pandas as pd
 3 | 
 4 | Afile_path = "../data/Afile"
 5 | ##############clean broken data and small data##############
 6 | 
 7 | traces_catalog = pd.read_csv(f"{Afile_path}/1991-2020 traces.csv")
 8 | acc_filter = (
 9 |     (traces_catalog["pga_z"] == 0)
10 |     | (traces_catalog["pga_ns"] == 0)
11 |     | (traces_catalog["pga_ew"] == 0)
12 |     | (traces_catalog["pga_z"] < 2.5)
13 |     | (traces_catalog["pga_ns"] < 2.5)
14 |     | (traces_catalog["pga_ew"] < 2.5)
15 |     | (traces_catalog["pga_z"] > 1300)
16 |     | (traces_catalog["pga_ns"] > 1300)
17 |     | (traces_catalog["pga_ew"] > 1300)
18 | )
19 | broken_traces = traces_catalog[acc_filter]
20 | # broken_traces.to_csv(f"{Afile_path}/1991-2020 broken traces.csv", index=False)
21 | 
22 | traces_catalog.drop(traces_catalog[acc_filter].index, inplace=True)
23 | # traces_catalog.to_csv(f"{Afile_path}/1991-2020 traces no broken data.csv", index=False)
24 | 
25 | ##############find double event traces##############
26 | catalog = pd.read_csv(f"{Afile_path}/1991-2020 catalog.csv")
27 | traces_ljoin_catalog = pd.merge(
28 |     catalog[["EQ_ID", "year", "month", "day", "hour", "minute", "second"]],
29 |     traces_catalog,
30 |     on="EQ_ID",
31 | )
32 | 
33 | double_traces_catalog = pd.DataFrame()
34 | for year in range(1991, 2021):
35 |     for month in range(1, 13):
36 |         time_filter = (traces_ljoin_catalog["year"] == year) & (
37 |             traces_ljoin_catalog["month"] == month
38 |         )
39 |         tmp_catalog = traces_ljoin_catalog[time_filter]
40 |         file_name_num = tmp_catalog["file_name"].value_counts()
41 |         double_event = file_name_num[file_name_num > 1]
42 |         same_filename_filter = tmp_catalog["file_name"].isin(double_event.index)
43 |         double_traces = tmp_catalog[same_filename_filter]
44 |         double_traces_catalog = pd.concat([double_traces_catalog, double_traces])
45 | # double_traces_catalog.to_csv(f"{Afile_path}/1991-2020 double traces.csv", index=False)
46 | 
47 | # clean trace double event
48 | traces_catalog = pd.read_csv(f"{Afile_path}/1991-2020 traces no broken data.csv")
49 | catalog = pd.read_csv(f"{Afile_path}/1991-2020 catalog.csv")
50 | traces_catalog_merge = pd.merge(
51 |     catalog[["EQ_ID", "year", "month", "day", "hour", "minute", "second"]],
52 |     traces_catalog,
53 |     on="EQ_ID",
54 | )
55 | 
56 | double_event = pd.read_csv(f"{Afile_path}/1991-2020 double traces.csv")
57 | 
58 | final_traces_catalog = pd.concat(
59 |     [traces_catalog_merge, double_event, double_event]
60 | ).drop_duplicates(keep=False)
61 | # final_traces_catalog.to_csv(
62 | #     f"{Afile_path}/1991-2020 traces (no broken data, double event).csv", index=False
63 | # )
64 | 


--------------------------------------------------------------------------------
/model_performance_analysis/plot_residual.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | import os
 4 | from analysis import Residual_Plotter
 5 | 
 6 | mask_after_sec = 7
 7 | test_year = 2016
 8 | path = f"../predict/station_blind_Vs30_bias2closed_station_{test_year}"
 9 | output_path = f"{path}/{mask_after_sec} sec residual plots"
10 | prediction_with_info = pd.read_csv(
11 |     f"{path}/{mask_after_sec} sec model11 with all info.csv"
12 | )
13 | 
14 | miss_alarm = (prediction_with_info["predict"] < np.log10(0.25)) & (
15 |     prediction_with_info["answer"] >= np.log10(0.25)
16 | )
17 | false_alarm = (prediction_with_info["predict"] >= np.log10(0.25)) & (
18 |     prediction_with_info["answer"] < np.log10(0.25)
19 | )
20 | wrong_predict = prediction_with_info[miss_alarm | false_alarm]
21 | 
22 | for column in prediction_with_info.columns:
23 |     fig, ax = Residual_Plotter.residual_with_attribute(
24 |         prediction_with_info=prediction_with_info,
25 |         column=column,
26 |         single_case_check=24784.0,
27 |         wrong_predict=wrong_predict,
28 |         test_year=test_year,
29 |     )
30 |     if not os.path.isdir(output_path):
31 |         os.mkdir(output_path)
32 |     # fig.savefig(
33 |     #     f"{output_path}/{column}.png",
34 |     #     dpi=300,
35 |     # )
36 | 
37 | # plot event residual on map
38 | fig, ax = Residual_Plotter.single_event_residual_map(
39 |     prediction_with_info=prediction_with_info,
40 |     eq_id=24784.0,
41 |     title=f"{mask_after_sec} sec 2016 Meinong earthquake residual in prediction",
42 | )
43 | # fig.savefig(
44 | #     f"{output_path}/{mask_after_sec} sec 2016 Meinong earthquake residual map.png",
45 | #     dpi=300,
46 | # )
47 | 
48 | # plot all prediction residual on map
49 | prediction_with_info["predict_residual"] = (
50 |     prediction_with_info["predict"] - prediction_with_info["answer"]
51 | )
52 | grouby_sta = prediction_with_info.groupby("station_name").agg(
53 |     {"longitude": "first", "latitude": "first", "predict_residual": ["mean", "std"]}
54 | )
55 | # 當station樣本只有1時，std 會有NaN之情形發生，需剃除
56 | grouby_sta = grouby_sta[~grouby_sta["predict_residual", f"std"].isna()]
57 | # grouby_sta.to_csv(f"{mask_after_sec}_sec_station_correction.csv")
58 | 
59 | max_abs_difference = abs(grouby_sta["predict_residual", "mean"]).max()
60 | negative_max_difference = -max_abs_difference
61 | 
62 | fig, ax = Residual_Plotter.events_station_map(
63 |     grouby_sta=grouby_sta,
64 |     column="mean",
65 |     cmap="seismic",
66 |     title=f"{mask_after_sec} sec residual mean in 2016 prediction",
67 | )
68 | fig, ax = Residual_Plotter.events_station_map(
69 |     grouby_sta=grouby_sta,
70 |     column="std",
71 |     cmap="Reds",
72 |     title=f"{mask_after_sec} sec residual std in 2016 prediction",
73 | )
74 | # fig.savefig(
75 | #     f"{output_path}/{mask_after_sec} sec residual {column} map.png",
76 | #     dpi=300,
77 | # )
78 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Taiwan Transformer Shaking Alert Model (TT-SAM) 
 2 | [![License](https://img.shields.io/badge/License-GPLv3-orange)](https://www.gnu.org/licenses/gpl-3.0.html)
 3 | 
 4 | This study has referenced the Transformer Earthquake Alerting Model (TEAM), a deep learning earthquake early warning (EEW) framework. We optimized the model using seismic data from Taiwan to develop the Taiwan Transformer Shaking Alert Model (TT-SAM), and it could rapidly calculate the seismic intensity to provide longer warning time.
 5 | 
 6 | 
 7 | ## Data Preprocess
 8 | 
 9 | ![image](data_preprocess/images/workflow.png)
10 | 
11 | ## Model architecture
12 | ![image](images/TEAM-Taiwan_model_architecture.png)
13 | 
14 | ## Model Performance
15 | 
16 | We use 2016 seismic data to evaluate model performance.
17 | 
18 | Seismic intensity threshold is from Central Weather Administration.
19 | 
20 | Background color represents model predicted intensity.
21 | 
22 | ### 2016 Meinong Earthquake
23 | 
24 | ![image](images/Meinong_event.gif)
25 | 
26 | ### 2016 Taitung Offshore Earthquake
27 | ![image](images/Taitung_offshore_event.gif)
28 | 
29 | ## References
30 | Münchmeyer, J., Bindi, D., Leser, U., & Tilmann, F. (2021). The transformer earthquake
31 | alerting model: A new versatile approach to earthquake early warning. Geophysical Journal
32 | International, 225(1), 646-656.
33 | (https://academic.oup.com/gji/article/225/1/646/6047414)
34 | 
35 | Liu, Kun-Sung, Tzay-Chyn Shin, and Yi-Ben Tsai. (1999). A free-field strong motion
36 | network in Taiwan: TSMIP. Terrestrial, Atmospheric and Oceanic Sciences, 10(2), 377-396.
37 | (http://tao.cgu.org.tw/index.php/articles/archive/geophysics/item/308)
38 | 
39 | Akazawa, T. (2004, August). A technique for automatic detection of onset time of P-and Sphases
40 | in strong motion records. In Proc. of the 13th world conf. on earthquake engineering
41 | (Vol. 786, p. 786). Vancouver, Canada.
42 | (https://www.iitk.ac.in/nicee/wcee/article/13_786.pdf)
43 | 
44 | Kuo, C. H., Wen, K. L., Hsieh, H. H., Lin, C. M., Chang, T. M., & Kuo, K. W. (2012). Site
45 | classification and Vs30 estimation of free-field TSMIP stations using the logging data of
46 | EGDT. Engineering Geology, 129, 68-75.
47 | (https://www.sciencedirect.com/science/article/pii/S0013795212000397)
48 | 
49 | Lee, C. T., & Tsai, B. R. (2008). Mapping Vs30 in Taiwan. TAO: Terrestrial, Atmospheric
50 | and Oceanic Sciences, 19(6), 6.
51 | (https://www.researchgate.net/profile/Chyi-Tyi-Lee-2/publication/250211755_Mapping_Vs30_in_Taiwan/links/557fa82608aeb61eae262086/Mapping-Vs30-in-Taiwan.pdf)
52 | 
53 | Huang, H. H., Wu, Y. M., Song, X., Chang, C. H., Lee, S. J., Chang, T. M., & Hsieh, H. H.
54 | (2014). Joint Vp and Vs tomography of Taiwan: Implications for subduction-collision
55 | orogeny. Earth and Planetary Science Letters, 392, 177-191.
56 | (https://www.sciencedirect.com/science/article/pii/S0012821X14000995)
57 | 
58 | 


--------------------------------------------------------------------------------
/data_preprocess/plot_data_distribution.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import seaborn as sns
 3 | import pandas as pd
 4 | import numpy as np
 5 | 
 6 | Afile_path = "./events_traces_catalog"
 7 | 
 8 | def plot_event_distribution(catalog, output_path=None):
 9 |     fig, ax = plt.subplots(figsize=(7, 7))
10 |     sns.histplot(catalog, x="magnitude", hue="from", alpha=1, ax=ax)
11 |     ax.set_title("Events Catalog", fontsize=20)
12 |     ax.set_yscale("log")
13 |     ax.set_xlabel("Magnitude", fontsize=13)
14 |     ax.set_ylabel("Number of events", fontsize=13)
15 |     if output_path:
16 |         fig.savefig(f"{output_path}/event_distribution.png", dpi=300)
17 |     return fig, ax
18 | 
19 | def plot_trace_distribution(trace, output_path=None):
20 |     label = ["2", "3", "4", "5-", "5+", "6-", "6+", "7"]
21 |     pga_threshold = np.log10([0.025, 0.080, 0.250, 0.80, 1.4, 2.5, 4.4, 8.0, 10])
22 |     fig, ax = plt.subplots(figsize=(7, 7))
23 |     sns.histplot(trace, x="pga", hue="from", alpha=1, ax=ax, bins=32)
24 |     for i in range(len(pga_threshold) - 1):
25 |         ax.text((pga_threshold[i] + pga_threshold[i + 1]) / 2, 10000, label[i])
26 |     ax.vlines(pga_threshold[1:-1], 0, 40000, linestyles="dotted", color="k")
27 |     ax.set_title("Traces catalog", fontsize=20)
28 |     ax.set_yscale("log")
29 |     ax.set_xlabel("PGA log(m/s^2)", fontsize=13)
30 |     ax.set_ylabel("number of traces", fontsize=13)
31 |     if output_path:
32 |         fig.savefig(f"{output_path}/traces_distribution.png", dpi=300)
33 |     return fig, ax
34 | 
35 | before_catalog = pd.read_csv(f"{Afile_path}/2009_2019_ok_events_p_arrival_abstime.csv")
36 | after_catalog = pd.read_csv(f"{Afile_path}/1999_2019_final_catalog.csv")
37 | 
38 | before_catalog["from"] = "2009~2019 M>=3.5"
39 | after_catalog["from"] = "1999~2008 M>=5.5"
40 | 
41 | catalog = pd.concat([before_catalog, after_catalog])
42 | catalog.reset_index(inplace=True, drop=True)
43 | 
44 | fig, ax = plot_event_distribution(catalog, output_path=None)
45 | 
46 | ###### trace
47 | 
48 | before_trace = pd.read_csv(
49 |     f"{Afile_path}/2009_2019_picked_traces_p_arrival_abstime_labeled_nostaoverlap.csv"
50 | )
51 | after_trace = pd.read_csv(f"{Afile_path}/1999_2019_final_traces.csv")
52 | 
53 | before_trace["from"] = "2009~2019 M>=3.5"
54 | after_trace["from"] = "1999~2008 M>=5.5"
55 | 
56 | trace = pd.concat([before_trace, after_trace])
57 | trace.reset_index(inplace=True, drop=True)
58 | label = ["2", "3", "4", "5-", "5+", "6-", "6+", "7"]
59 | pga_threshold = np.log10([0.025, 0.080, 0.250, 0.80, 1.4, 2.5, 4.4, 8.0, 10])
60 | fig, ax = plot_trace_distribution(trace, output_path=None)
61 | 
62 | print("high_intensity_rate")
63 | print(
64 |     "2009~2019:",
65 |     len(before_trace.query(f"pga >={pga_threshold[2]}")) / len(before_trace),
66 | )
67 | print(
68 |     "1999~2019:", len(after_trace.query(f"pga >={pga_threshold[2]}")) / len(after_trace)
69 | )
70 | 


--------------------------------------------------------------------------------
/model_performance_analysis/0403_Hualien_Earthquake/5_plot_confusion_matrix.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import pandas as pd
 3 | import numpy as np
 4 | from sklearn.metrics import confusion_matrix
 5 | 
 6 | 
 7 | sys.path.append("..")
 8 | from analysis import Precision_Recall_Factory
 9 | 
10 | mask_sec = 3
11 | event_lon = 121.66
12 | event_lat = 23.77
13 | magnitude = 7.2
14 | answer = pd.read_csv(f"true_answer.csv")
15 | 
16 | # merge 3 5 7 10 sec to find maximum predicted pga
17 | prediction_3 = pd.read_csv(f"no_include_broken_data_prediction/3_sec_prediction.csv")
18 | prediction_5 = pd.read_csv(f"no_include_broken_data_prediction/5_sec_prediction.csv")
19 | prediction_7 = pd.read_csv(f"no_include_broken_data_prediction/7_sec_prediction.csv")
20 | prediction_10 = pd.read_csv(f"no_include_broken_data_prediction/10_sec_prediction.csv")
21 | 
22 | max_prediction = pd.concat(
23 |     [
24 |         prediction_3,
25 |         prediction_5["predict"],
26 |         prediction_7["predict"],
27 |         prediction_10["predict"],
28 |     ],
29 |     axis=1,
30 | )
31 | 
32 | max_prediction.columns = [
33 |     "3_predict",
34 |     "station_name",
35 |     "latitude",
36 |     "longitude",
37 |     "elevation",
38 |     "5_predict",
39 |     "7_predict",
40 |     "10_predict",
41 | ]
42 | max_prediction["max_predict"] = max_prediction.apply(
43 |     lambda row: max(
44 |         row["3_predict"], row["5_predict"], row["7_predict"], row["10_predict"]
45 |     ),
46 |     axis=1,
47 | )
48 | 
49 | max_prediction = pd.merge(
50 |     answer, max_prediction, how="left", left_on="location_code", right_on="station_name"
51 | )
52 | max_prediction.dropna(inplace=True)
53 | 
54 | #################
55 | label_threshold = np.log10(np.array([0.25]))
56 | predict_label = np.array(max_prediction[f"max_predict"])
57 | real_label = np.array(max_prediction["PGA"])
58 | predict_logic = np.where(predict_label > label_threshold, 1, 0)
59 | real_logic = np.where(real_label > label_threshold, 1, 0)
60 | matrix = confusion_matrix(real_logic, predict_logic, labels=[1, 0])
61 | accuracy = np.sum(np.diag(matrix)) / np.sum(matrix)  # (TP+TN)/all
62 | precision = matrix[0][0] / np.sum(matrix, axis=0)[0]  # TP/(TP+FP)
63 | recall = matrix[0][0] / np.sum(matrix, axis=1)[0]
64 | 
65 | intensity = ["0", "1", "2", "3", "4", "5-", "5+", "6-", "6+", "7"]
66 | max_prediction["predicted_intensity"] = max_prediction["max_predict"].apply(
67 |     Precision_Recall_Factory.pga_to_intensity
68 | )
69 | max_prediction["answer_intensity"] = max_prediction["PGA"].apply(Precision_Recall_Factory.pga_to_intensity)
70 | 
71 | intensity_confusion_matrix = confusion_matrix(
72 |     max_prediction["answer_intensity"],
73 |     max_prediction["predicted_intensity"],
74 |     labels=intensity,
75 | )
76 | 
77 | fig, ax = Precision_Recall_Factory.plot_intensity_confusion_matrix(
78 |     intensity_confusion_matrix
79 | )
80 | # fig.savefig("confusion_matrix.png", dpi=300)
81 | 


--------------------------------------------------------------------------------
/data/data_visualize.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | from visualize import Plot_Train_Test_Data, Increase_High_Data_Test
 4 | 
 5 | 
 6 | data_path = "../data_preprocess/events_traces_catalog"
 7 | origin_catalog = pd.read_csv(f"{data_path}/1999_2019_final_catalog.csv")
 8 | traces_catalog = pd.read_csv(f"{data_path}/1999_2019_final_traces_Vs30.csv")
 9 | test_year = 2016
10 | train_catalog = origin_catalog.query(f"year!={test_year}")
11 | test_catalog = origin_catalog.query(f"year=={test_year}")
12 | # events histogram
13 | fig, ax = Plot_Train_Test_Data.event_histogram(
14 |     train_catalog, test_catalog, key="magnitude", xlabel="magnitude"
15 | )
16 | # fig.savefig(f"paper image/event depth distribution.png",dpi=300)
17 | # fig.savefig(f"paper image/event depth distribution.pdf",dpi=300)
18 | 
19 | # event distribution in map
20 | fig, ax = Plot_Train_Test_Data.event_map(train_catalog, test_catalog)
21 | # fig.savefig(f"paper image/event distribution map.png",dpi=300)
22 | # fig.savefig(f"paper image/event distribution map.pdf",dpi=300)
23 | 
24 | # traces pga histogram
25 | fig, ax = Plot_Train_Test_Data.pga_histogram(traces_catalog, test_year=test_year)
26 | # fig.savefig(f"paper image/trace pga distribution.png",dpi=300)
27 | # fig.savefig(f"paper image/trace pga distribution.pdf",dpi=300)
28 | 
29 | 
30 | # test oversampling method
31 | data_path = "./TSMIP_1999_2019_Vs30.hdf5"
32 | origin_PGA = Increase_High_Data_Test.load_dataset_into_list(
33 |     data_path, oversample_rate=1, bias_to_close_station=False
34 | )
35 | oversampled_PGA = Increase_High_Data_Test.load_dataset_into_list(
36 |     data_path, oversample_rate=1.5, bias_to_close_station=False
37 | )
38 | 
39 | bias_closed_sta_PGA = Increase_High_Data_Test.load_dataset_into_list(
40 |     data_path, oversample_rate=1.5, bias_to_close_station=True
41 | )
42 | 
43 | origin_PGA_array = np.array(origin_PGA)
44 | origin_high_intensity_rate = np.sum(origin_PGA_array > np.log10(0.250)) / len(
45 |     origin_PGA_array
46 | )
47 | print(f"origin rate:{origin_high_intensity_rate}")
48 | 
49 | oversampled_PGA_array = np.array(oversampled_PGA)
50 | oversampled_high_intensity_rate = np.sum(oversampled_PGA_array > np.log10(0.250)) / len(
51 |     oversampled_PGA_array
52 | )
53 | print(f"oversampled rate:{oversampled_high_intensity_rate}")
54 | 
55 | bias_closed_sta_PGA_array = np.array(bias_closed_sta_PGA)
56 | bias_closed_sta_high_intensity_rate = np.sum(
57 |     bias_closed_sta_PGA_array > np.log10(0.250)
58 | ) / len(bias_closed_sta_PGA_array)
59 | print(f"bias_closed_sta rate:{bias_closed_sta_high_intensity_rate}")
60 | 
61 | fig, ax = Increase_High_Data_Test.plot_pga_histogram(
62 |     bias_closed_sta_PGA,
63 |     oversampled_PGA,
64 |     origin_PGA,
65 |     origin_high_intensity_rate,
66 |     oversampled_high_intensity_rate,
67 |     bias_closed_sta_high_intensity_rate,
68 | )
69 | # fig.savefig("PGA distribution.png", dpi=300,bbox_inches='tight')
70 | 


--------------------------------------------------------------------------------
/data_preprocess/3_station_location_dataset.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | 
 4 | from read_tsmip import *
 5 | 
 6 | sta_path = "../data/station_information"
 7 | station_info = pd.read_csv(f"{sta_path}/TSMIPstations.csv")
 8 | station_code = station_info["station"].str.extract(r"(.*?)[(]")
 9 | location_code = station_info["station"].str.extract(r"[(](.*?)[)]")
10 | station_info.insert(1, "station_code", station_code.values)
11 | station_info.insert(2, "location_code", location_code.values)
12 | station_info.drop(["station"], axis=1, inplace=True)
13 | 
14 | # merge data from JC
15 | for sta in ["CHY", "HWA", "ILA", "KAU", "TAP", "TCU", "TTN"]:
16 |     tmp_info = pd.read_csv(f"{sta_path}/{sta}.csv", encoding="unicode_escape")
17 |     tmp_info.columns = [
18 |         "location_code",
19 |         "station_location",
20 |         "county",
21 |         "district",
22 |         "net",
23 |         "longitude",
24 |         "latitude",
25 |         "elevation",
26 |         "stamp code",
27 |         "address",
28 |     ]
29 |     sta_filter = tmp_info["location_code"].isin(station_info["location_code"])
30 |     add_df = tmp_info[~sta_filter][
31 |         ["location_code", "latitude", "longitude", "elevation"]
32 |     ]
33 |     add_df.rename(columns={"elevation": "elevation (m)"}, inplace=True)
34 |     add_df.insert(0, "network", "TSMIP")
35 |     add_df.insert(1, "station_code", np.nan)
36 |     station_info = pd.concat([station_info, add_df])
37 | 
38 | # merge data fron MH
39 | # data1
40 | station_code1 = pd.read_csv(f"{sta_path}/station_code.csv")
41 | station_code2 = pd.read_csv(f"{sta_path}/tsmip_factor.csv")
42 | merged_station_code = pd.merge(
43 |     station_code1, station_code2, left_on="Station_Code", right_on="station_code"
44 | )
45 | sta_filter = merged_station_code["TSMIP_code"].isin(station_info["location_code"])
46 | add_df = merged_station_code[~sta_filter][
47 |     ["TSMIP_code", "Ins_longitude", "Ins_latitude", "Ins_elevation", "TSMIP_short_code"]
48 | ]
49 | add_df.columns = [
50 |     "location_code",
51 |     "longitude",
52 |     "latitude",
53 |     "elevation (m)",
54 |     "station_code",
55 | ]
56 | 
57 | save_index = []
58 | for sta_code in add_df["location_code"].unique():
59 |     save_index.append(
60 |         add_df[add_df["location_code"] == sta_code]["location_code"].index[-1]
61 |     )
62 | uniqued_add_df = add_df.loc[save_index]
63 | uniqued_add_df.insert(0, "network", "TSMIP")
64 | station_info = pd.concat([station_info, uniqued_add_df])
65 | 
66 | # data2
67 | CWBstation = pd.read_csv(f"{sta_path}/CWBstation.log", sep="\s+", header=None)
68 | CWBstation.columns = [
69 |     "location_code",
70 |     "longitude",
71 |     "latitude",
72 |     "elevation (m)",
73 |     "starttime",
74 |     "endtime",
75 | ]
76 | sta_filter = CWBstation["location_code"].isin(station_info["location_code"])
77 | add_df = CWBstation[~sta_filter][
78 |     ["location_code", "longitude", "latitude", "elevation (m)"]
79 | ]
80 | add_df.insert(0, "network", "TSMIP")
81 | add_df.insert(1, "station_code", np.nan)
82 | station_info = pd.concat([station_info, add_df])
83 | station_info.sort_values(by=["location_code"], inplace=True)
84 | # station_info.to_csv(f"{sta_path}/TSMIPstations_new.csv", index=False)
85 | 


--------------------------------------------------------------------------------
/data_preprocess/tracer_demo/read_velocity_model.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | from cartopy.mpl import ticker
 4 | import cartopy.crs as ccrs
 5 | 
 6 | with open("vel3d.mod","r") as file:
 7 |     lines = file.readlines()
 8 | 
 9 | lon_coor=[]
10 | lon_coor.append([float(x) for x in lines[1].split()])
11 | lon_coor=np.array(lon_coor[0])
12 | 
13 | lat_coor=[]
14 | lat_coor.append([float(x) for x in lines[2].split()])
15 | lat_coor=np.array(lat_coor[0])
16 | 
17 | dep_coor=[]
18 | dep_coor.append([float(x) for x in lines[3].split()])
19 | dep_coor=np.array(dep_coor[0])
20 | 
21 | array=[]
22 | data=lines[4:]
23 | for i,line in enumerate(data):
24 |     # 使用指定分隔符（例如，空格）拆分每一行
25 |     elements = line.split()
26 |     
27 |     # 获取第5列以后的数据
28 |     
29 |     # 添加到结果列表
30 |     array.append([float(x) for x in elements])
31 | 
32 | array=np.array(array)
33 | reshape_array=array.reshape(2,27,61,76)#(model,depth,lat,lon)
34 | 
35 | 
36 | #plot velocity model
37 | X, Y = np.meshgrid(lon_coor, lat_coor)
38 | for model_index,model_name in enumerate(["Vp model","Vs model"]):
39 |     vmax=reshape_array[model_index,:,:].max()
40 |     vmin=reshape_array[model_index,:,:].min()
41 |     for dep_index in range(0,len(dep_coor)):
42 |         fig,ax=plt.subplots(subplot_kw={'projection': ccrs.PlateCarree()})
43 |         ax.coastlines()
44 |         cp = ax.contourf(X, Y, reshape_array[model_index,dep_index,:,:],transform=ccrs.PlateCarree())
45 |         xticks = ticker.LongitudeLocator(nbins=125-119)._raw_ticks(119, 125)
46 |         yticks = ticker.LatitudeLocator(nbins=26-20)._raw_ticks(20, 26)
47 | 
48 |         ax.set_xticks(xticks, crs=ccrs.PlateCarree())
49 |         ax.set_yticks(yticks, crs=ccrs.PlateCarree())
50 |         cbar = fig.colorbar(cp)
51 |         cbar.set_label(f'{model_name[:2]} (km/s)')
52 |         ax.set_xlabel("longitude")
53 |         ax.set_ylabel("latitude")
54 |         ax.set_title(f"Depth: {dep_coor[dep_index]}km")
55 |         fig.savefig(f"model_image/{model_name}_depth_{int(dep_coor[dep_index]*1000)}m.png",dpi=300)
56 | 
57 | #origin 76 61 27
58 | #after 89 66 27
59 | start=125.08
60 | end=126.5
61 | increased_lon=np.round(np.arange(start, end + 0.08, 0.08),2)
62 | increased_lon_to_str = " ".join("{:.2f}".format(num) for num in increased_lon)
63 | 
64 | start=26.18
65 | end=26.8
66 | increased_lat=np.round(np.arange(start, end + 0.08, 0.08),2)
67 | increased_lat_to_str = "  ".join("{:.2f}".format(num) for num in increased_lat)
68 | 
69 | 
70 | with open("vel3d_new.mod","w") as file:
71 |     points_info=lines[0]
72 |     updated_points_info=points_info.replace("76",f"{76+len(increased_lon)}").replace("61",f"{61+len(increased_lat)}")
73 |     file.write(updated_points_info)
74 |     updated_lon=lines[1].rstrip("\n")+" "+increased_lon_to_str+"\n"
75 |     file.write(updated_lon)
76 |     updated_lat=lines[2].rstrip("\n")+"  "+increased_lat_to_str+"\n"
77 |     file.write(updated_lat)
78 |     file.write(lines[3])
79 |     new_lines=[]
80 |     for i in range(4,len(lines)):
81 | 
82 |         formatted_number= "{:.3f}".format(array[i-4][-1])
83 |         result_string=("   "+str(formatted_number))*len(increased_lon)
84 |         new_line=lines[i].rstrip("\n")+result_string+"\n"
85 | 
86 |         if (i-4)%61==60:
87 |             for j in range(len(increased_lat)):
88 |                 file.write(new_line)
89 |         file.write(new_line)
90 | 
91 | #記得手動再增加一個深度
92 | 
93 | 


--------------------------------------------------------------------------------
/model_performance_analysis/plot_event_input_waveforms.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | 
 3 | plt.subplots()
 4 | import numpy as np
 5 | import pandas as pd
 6 | from torch.utils.data import DataLoader
 7 | from tqdm import tqdm
 8 | import sys
 9 | sys.path.append("..")
10 | from data.multiple_sta_dataset import multiple_station_dataset
11 | from analysis import Triggered_Map
12 | 
13 | 
14 | mask_after_sec = 10
15 | label = "pga"
16 | eq_id = 25900
17 | data = multiple_station_dataset(
18 |     "../data/TSMIP_1999_2019_Vs30.hdf5",
19 |     mode="test",
20 |     mask_waveform_sec=mask_after_sec,
21 |     test_year=2016,
22 |     label_key=label,
23 |     input_type="acc",
24 |     data_length_sec=15,
25 | )
26 | record_prediction = pd.read_csv(
27 |     f"../predict/station_blind_noVs30_bias2closed_station_2016/{mask_after_sec} sec ensemble 510 with all info.csv"
28 | )
29 | record_prediction = record_prediction[record_prediction["EQ_ID"] == eq_id].reset_index(
30 |     drop=True
31 | )
32 | # =========================
33 | loader = DataLoader(dataset=data, batch_size=1)
34 | 
35 | Mixture_mu = []
36 | Label = []
37 | P_picks = []
38 | EQ_ID = []
39 | Label_time = []
40 | Sta_name = []
41 | Lat = []
42 | Lon = []
43 | Elev = []
44 | 
45 | for j, sample in tqdm(enumerate(loader)):
46 |     picks = sample["p_picks"].flatten().numpy().tolist()
47 |     label_time = sample[f"{label}_time"].flatten().numpy().tolist()
48 |     lat = sample["target"][:, :, 0].flatten().tolist()
49 |     lon = sample["target"][:, :, 1].flatten().tolist()
50 |     elev = sample["target"][:, :, 2].flatten().tolist()
51 |     P_picks.extend(picks)
52 |     P_picks.extend([np.nan] * (25 - len(picks)))
53 |     Label_time.extend(label_time)
54 |     Label_time.extend([np.nan] * (25 - len(label_time)))
55 |     Lat.extend(lat)
56 |     Lon.extend(lon)
57 |     Elev.extend(elev)
58 | 
59 |     eq_id_list = sample["EQ_ID"][:, :, 0].flatten().numpy().tolist()
60 |     EQ_ID.extend(eq_id_list)
61 |     EQ_ID.extend([np.nan] * (25 - len(eq_id_list)))
62 |     if eq_id_list[0] == eq_id:
63 |         waveform = sample["waveform"].numpy().reshape(25, 3000, 3)
64 |         waveforms_fig,waveforms_ax=Triggered_Map.plot_model_waveforms_input(waveform,picks,record_prediction,mask_after_sec)
65 | 
66 |         waveform_num = len(
67 |             np.where(np.array(picks) <= picks[0] + (mask_after_sec * 200))[0]
68 |         )
69 |         for i in range(waveform_num):
70 |             station_name = record_prediction["station_name"][i]
71 |             answer = np.round(100 * (10 ** record_prediction["answer"][i]), 2)
72 |             waveform_fig, waveform_ax = plt.subplots(3, 1, figsize=(7, 7))
73 |             for j in range(3):
74 |                 waveform_ax[j].plot(waveform[i, :, j])
75 |                 waveform_ax[j].axvline(x=picks[i], c="r")
76 |                 waveform_ax[0].set_title(f"acc waveform, PGA: {answer} gal", size=20)
77 |                 # waveform_fig.savefig(f"./predict/acc predict pga 1999_2019/model 2 meinong intensity map/index{i}_{station_name}_acc_input.png")
78 |         waveforms_ax[0].set_title(
79 |             f"EQID:{eq_id} {mask_after_sec} sec acc records, Z component", size=20
80 |         )
81 |         waveforms_ax[-1].set_xlabel(
82 |             "Time sample (200Hz)",size=15
83 |         )
84 |         # waveforms_fig.savefig(f"paper image/eqid{eq_id}_{mask_after_sec}_sec_Z_acc.png",bbox_inches='tight',dpi=300)
85 |         break
86 | 


--------------------------------------------------------------------------------
/data_preprocess/0918_M6.8_1319_1330/3_label.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | import sys
 4 | import os
 5 | import obspy
 6 | import re
 7 | from obspy.geodetics import gps2dist_azimuth 
 8 | 
 9 | sys.path.append("../..")
10 | from read_tsmip import read_tsmip, get_peak_value, get_integrated_stream
11 | 
12 | # read traces catalog
13 | waveform_path = "../../data/0918_M6.8_1319_1330/ascii"
14 | traces = pd.read_csv("traces_catalog.csv")
15 | # delete broken waveform
16 | traces = traces.query("quality_control=='y'").reset_index(drop=True)
17 | 
18 | sampling_rate = 200
19 | for i in range(len(traces)):
20 |     print(f"{i}/{len(traces)}")
21 |     file_name = traces["file_name"][i].strip()
22 |     # read waveform
23 |     data = pd.read_csv(
24 |         f"{waveform_path}/{file_name}.asc", sep="\s+", skiprows=1, header=None
25 |     ).to_numpy()
26 | 
27 |     with open(f"{waveform_path}/{file_name}.asc", "r") as f:
28 |         picks = f.readlines()[0]
29 |         picks = re.findall(r"\d+\.\d+", picks)
30 |         picks = [np.round(float(number), 2) for number in picks]
31 | 
32 |     waveform = obspy.core.stream.Stream()
33 |     channel = ["HLZ", "HLN", "HLE"]
34 |     for j, chan in enumerate(channel):
35 |         start = np.where(data == picks[2])[0][0]
36 |         end = np.where(data == picks[3])[0][0]
37 |         trace = obspy.core.trace.Trace(data[start:end, j + 1])
38 | 
39 |         trace.stats.network = "TW"
40 |         # trace.stats.station = header[0][14:20]
41 |         trace.stats.channel = chan
42 | 
43 |         trace.stats.sampling_rate = int(1 / abs(data[0, 0] - data[1, 0]))
44 | 
45 |         waveform.append(trace)
46 |     # resample to 200Hz
47 |     if waveform[0].stats.sampling_rate != sampling_rate:
48 |         waveform.resample(sampling_rate, window="hann")
49 | 
50 |     # detrend
51 |     waveform.detrend(type="demean")
52 |     # lowpass filter
53 |     waveform.filter("lowpass", freq=10)  # filter
54 |     # get pga
55 |     pick_point = int(np.round(traces["p_pick_sec"][i] * sampling_rate, 0))
56 |     pga, pga_time = get_peak_value(waveform, pick_point=pick_point)
57 |     # waveform taper
58 |     waveform.taper(max_percentage=0.05, type="cosine")
59 |     # integrate
60 |     vel_waveform = get_integrated_stream(waveform)
61 |     # bandpass filter
62 |     vel_waveform.filter("bandpass", freqmin=0.075, freqmax=10)
63 |     # get pgv
64 |     pgv, pgv_time = get_peak_value(vel_waveform, pick_point=pick_point)
65 |     # input to df
66 |     traces.loc[i, "pga"] = pga
67 |     traces.loc[i, "pga_time"] = pga_time
68 |     traces.loc[i, "pgv"] = pgv
69 |     traces.loc[i, "pgv_time"] = pgv_time
70 | 
71 | 
72 | #calculate epicentral distance
73 | 
74 | catalog = pd.read_csv("event_catalog.csv")
75 | traces["epdis (km)"]=0
76 | 
77 | eq_latitude = catalog["lat"][0] + catalog["lat_minute"][0] / 60
78 | eq_longitude = catalog["lon"][0] + catalog["lon_minute"][0] / 60
79 | eq_depth = catalog["depth"][0]
80 | for i in range(len(traces)):
81 |     station_latitude = traces["latitude"][i]
82 |     station_longitude = traces["longitude"][i]
83 |     station_elevation = traces["elevation (m)"][i] / 1000
84 |     epi_dis, azimuth, _ = gps2dist_azimuth(
85 |         eq_latitude, eq_longitude, station_latitude, station_longitude
86 |     )
87 |     epi_dis=(epi_dis**2 + (eq_depth - station_elevation)**2)**0.5
88 |     traces.loc[i,"epdis (km)"]=epi_dis/1000
89 | 
90 | traces.to_csv(f"traces_catalog.csv", index=False)


--------------------------------------------------------------------------------
/data_preprocess/tracer_demo/README:
--------------------------------------------------------------------------------
 1 | ---------------------------------------------------------------------------
 2 | This is a script to calculate the P- and S-wave travel time between any two 
 3 | points in a 3D velocity model.
 4 | 
 5 | References please cite the papers below:
 6 | 1) For ray-tracing method:
 7 |    Koketsu and Sekine (1998), Pseudo-bending method for three-dimensional 
 8 |        seismic ray tracing in a spherical earth with discontinuities, Geophys. 
 9 |        J. Int., 132, 339-346. 
10 |    Huang et al. (2013), First Local Seismic Tomography for Red River Shear 
11 |        Zone, northern Vietnam: Stepwise inversion employing crustal P and Pn
12 |        waves, Tectonophysics, 584, 230-239.
13 | 
14 | 2) For velocity model of Taiwan region:
15 |    Huang et al. (2014), Joint Vp and Vs tomography of Taiwan: Implications
16 |        for subduction-collision orogeny, Earth. Planet. Sci. Lett., 392, 
17 |        177-191. 
18 | 
19 | ---------------------------------------------------------------------------
20 | Last update: JUL 7, 2014  by Hsin-Hua Huang
21 | 
22 | SUBDIRECTORIES:
23 |     1) src - source codes
24 |     2) mod - place to put collected velocity models
25 | 
26 | 
27 | 
28 | RUNNING THE SCRIPT, SIMPLY TYPE './tracer'!
29 |     To do it correctly, you need a velocity model named 'vel3d.mod' under main 
30 |     directory for script to read. Detailed format please refer to the content 
31 |     of the files.
32 | 
33 | 
34 | 
35 | RAY PATH OUTPUT:
36 |     If select 1 for ray path output, two files named P_path.txt and S_path.txt 
37 |     will be generated. Each ray paths in the files are separated by "X".
38 | 
39 | 
40 | 
41 | INPUT DATA FORMAT: 
42 |     You will see the instruction after typing './tracer':
43 |     1) Two-point mode - All you need is simply to input coordinates of any
44 |                         two points. Then it will give you P- and S-wave 
45 |                         calculated travel times on screen. This mode provide
46 |                         a option to output the traced ray paths for P- and
47 |                         S-wave in the file "P_path.txt" and "S_path.txt",
48 |                         repectively.
49 | 
50 |     2) Two-file mode  - In this mode you need to input two file names for 
51 |                         sources (usually earthquakes) and receivers (usually
52 |                         stations), respectively. Both of them require the
53 |                         source and receiver coordinates in three columns
54 |                         (lon, lat, dep), and can be plural. See chichi.evt 
55 |                         and chichi.sta for examples. An easier way is to 
56 |                         modify the input file and type './tracer < input'
57 |                         to run.
58 | 
59 |                         OUTPUT FORMAT:
60 |                         In two-file mode, you can further choose the format 
61 |                         of output file.
62 |                         1) ascii 
63 |                         It will output an ascii file 'tt.table', containning
64 |                         eight-column. The column 1,2,3 and column 4,5,6 are 
65 |                         the source and receiver coordinates, and the last two 
66 |                         columns are the calculated P- and S-wave travel times
67 |                         , respecitvely.
68 | 
69 |                         2) binary
70 |                         It will output a binary file 'tt.bin', which follows 
71 |                         the ascii format to store.
72 | 
73 | 


--------------------------------------------------------------------------------
/model_performance_analysis/0403_Hualien_Earthquake/residual_map.py:
--------------------------------------------------------------------------------
  1 | import cartopy.crs as ccrs
  2 | import matplotlib.pyplot as plt
  3 | import pandas as pd
  4 | import math
  5 | import sys
  6 | from obspy.taup.taup_geo import calc_dist
  7 | from obspy.geodetics import degrees2kilometers
  8 | 
  9 | sys.path.append("..")
 10 | from analysis import Consider_Angle
 11 | 
 12 | 
 13 | def calculate_angle(x1, y1, x2, y2):
 14 |     # 計算兩點之間的斜率
 15 |     delta_x = x2 - x1
 16 |     delta_y = y2 - y1
 17 |     slope = delta_y / delta_x
 18 | 
 19 |     # 使用反正切函數計算角度（以弧度為單位）
 20 |     angle_radians = math.atan(slope)
 21 | 
 22 |     # 將弧度轉換為角度
 23 |     angle_degrees = math.degrees(angle_radians)
 24 | 
 25 |     # 將角度調整為0到360度的範圍
 26 |     if delta_x < 0:
 27 |         angle_degrees += 180
 28 |     elif delta_x >= 0 and delta_y < 0:
 29 |         angle_degrees += 360
 30 | 
 31 |     return angle_degrees % 360
 32 | 
 33 | 
 34 | answer = pd.read_csv(f"true_answer.csv")
 35 | prediction_3 = pd.read_csv(f"no_include_broken_data_prediction/3_sec_prediction.csv")
 36 | prediction_5 = pd.read_csv(f"no_include_broken_data_prediction/5_sec_prediction.csv")
 37 | prediction_7 = pd.read_csv(f"no_include_broken_data_prediction/7_sec_prediction.csv")
 38 | prediction_10 = pd.read_csv(f"no_include_broken_data_prediction/10_sec_prediction.csv")
 39 | 
 40 | max_prediction = pd.concat(
 41 |     [
 42 |         prediction_3,
 43 |         prediction_5["predict"],
 44 |         prediction_7["predict"],
 45 |         prediction_10["predict"],
 46 |     ],
 47 |     axis=1,
 48 | )
 49 | 
 50 | max_prediction.columns = [
 51 |     "3_predict",
 52 |     "station_name",
 53 |     "latitude",
 54 |     "longitude",
 55 |     "elevation",
 56 |     "5_predict",
 57 |     "7_predict",
 58 |     "10_predict",
 59 | ]
 60 | max_prediction["max_predict"] = max_prediction.apply(
 61 |     lambda row: max(
 62 |         row["3_predict"], row["5_predict"], row["7_predict"], row["10_predict"]
 63 |     ),
 64 |     axis=1,
 65 | )
 66 | 
 67 | max_prediction = pd.merge(
 68 |     answer, max_prediction, how="left", left_on="location_code", right_on="station_name"
 69 | )
 70 | max_prediction.dropna(inplace=True)
 71 | 
 72 | init_latitude = max_prediction.query("location_code=='HWA074'")["latitude"].values[0]
 73 | init_longitude = max_prediction.query("location_code=='HWA074'")["longitude"].values[0]
 74 | event_lat = 23.77
 75 | event_lon = 121.67
 76 | 
 77 | max_prediction = max_prediction.reset_index(drop=True)
 78 | flattening_of_planet = 1 / 298.257223563
 79 | 
 80 | for i in range(len(max_prediction)):
 81 |     lat = max_prediction["latitude"][i]
 82 |     lon = max_prediction["longitude"][i]
 83 |     angle = calculate_angle(init_longitude, init_latitude, lon, lat)
 84 |     epi_dist = degrees2kilometers(
 85 |         calc_dist(
 86 |             event_lat,
 87 |             event_lon,
 88 |             lat,
 89 |             lon,
 90 |             radius_of_planet_in_km=6371.0,
 91 |             flattening_of_planet=flattening_of_planet,
 92 |         )
 93 |     )
 94 |     max_prediction.loc[i, "angle"] = angle
 95 |     max_prediction.loc[i, "dist"] = epi_dist
 96 | 
 97 | fig, ax = Consider_Angle.plot_pga_attenuation(prediction=max_prediction)
 98 | # fig.savefig("PGA_attenuation.png",dpi=300)
 99 | 
100 | fig, ax = Consider_Angle.angle_map(
101 |     stations=max_prediction,
102 |     init_sta_lat=init_latitude,
103 |     init_sta_lon=init_longitude,
104 |     event_lat=23.77,
105 |     event_lon=121.66,
106 | )
107 | # fig.savefig("Angle_map.png",dpi=300)


--------------------------------------------------------------------------------
/data_preprocess/analyze_resample.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import pandas as pd
 3 | import numpy as np
 4 | import matplotlib.pyplot as plt
 5 | from scipy.interpolate import interpolate,PchipInterpolator
 6 | from read_tsmip import read_tsmip, get_peak_value
 7 | 
 8 | """
 9 | In our dataset, we have different sampling rate waveforms, most of the data is 200Hz.
10 | In this script, we checked the residual of PGA after resampling all of waveforms to 200Hz.
11 | """
12 | 
13 | target_sampling_rate = 200
14 | waveform_path = "../data/waveform"
15 | output_path = "./traces_sampling_rate"
16 | traces = pd.read_csv(f"events_traces_catalog/1999_2019_final_traces_Vs30.csv")
17 | traces["p_arrival_abs_time"] = pd.to_datetime(
18 |     traces["p_arrival_abs_time"], format="%Y-%m-%d %H:%M:%S"
19 | )
20 | 
21 | dict = {"station_name":[],"sta_latitude":[],"sta_longitude":[],"sampling_rate": [], "origin_PGA": [], "resampled_PGA": []}
22 | for i in range(len(traces)):
23 |     print(f"{i}/{len(traces)}")
24 |     year = str(traces["year"][i])
25 |     month = str(traces["month"][i])
26 |     if len(month) < 2:
27 |         month = "0" + month
28 |     filename = traces["file_name"][i].strip()
29 |     waveform = read_tsmip(f"{waveform_path}/{year}/{month}/{filename}.txt")
30 |     sampling_rate = waveform[0].stats.sampling_rate
31 |     if sampling_rate != target_sampling_rate:
32 |         dict["station_name"].append(traces["station_name"][i])
33 |         dict["sta_latitude"].append(traces["latitude"][i])
34 |         dict["sta_longitude"].append(traces["longitude"][i])
35 |         dict["sampling_rate"].append(sampling_rate)
36 |         
37 | 
38 |         pick_point = int(np.round(traces["p_pick_sec"][i] * sampling_rate, 0))
39 |         waveform.detrend(type="demean")
40 |         waveform.filter("lowpass", freq=10)
41 |         origin_pga = 10 ** get_peak_value(waveform, pick_point=pick_point)[0] * 100
42 |         dict["origin_PGA"].append(origin_pga)
43 | 
44 | 
45 |         for channel in range(len(waveform)):
46 |             print(max(waveform[channel].data))
47 |             duration=len(waveform[channel].data)/sampling_rate
48 |             origin_x=np.linspace(0,duration,int(len(waveform[channel].data)))
49 |             resample_x=np.linspace(0,duration,int(target_sampling_rate*duration))
50 |             interpolater= PchipInterpolator(origin_x, waveform[channel].data)
51 |             resample_waveform = interpolater(resample_x)
52 |             
53 |             # fig,ax=plt.subplots(2,1)
54 |             # ax[0].plot(origin_x, waveform[channel].data)
55 |             # ax[0].axvline(traces["p_pick_sec"][i],c="r")
56 |             # ax[1].plot(resample_x, resample_waveform)
57 |             # ax[1].axvline(traces["p_pick_sec"][i],c="r")
58 |             waveform[channel].data=resample_waveform
59 |             waveform[channel].stats.sampling_rate=target_sampling_rate
60 |             print(max(waveform[channel].data))
61 | 
62 |         pick_point = int(np.round(traces["p_pick_sec"][i] * target_sampling_rate, 0))
63 |         resample_pga = 10 ** get_peak_value(waveform, pick_point=pick_point)[0] * 100
64 |         dict["resampled_PGA"].append(resample_pga)
65 |     break
66 | 
67 | 
68 | output = pd.DataFrame(dict)
69 | output["residual"] = output["origin_PGA"] - output["resampled_PGA"]
70 | # output.to_csv(f"{output_path}/statistic_sampling_rate_new.csv", index=False)
71 | 
72 | 
73 | 
74 | fig, ax = plt.subplots()
75 | ax.hist(output["residual"], bins=20, edgecolor="gray")
76 | ax.set_yscale("log")
77 | ax.set_xlabel("Residual (pga-resampled pga, unit: gal)")
78 | ax.set_ylabel("Number of traces")
79 | # fig.savefig(f"{output_path}/pga residual after resampling.png",dpi=300)
80 | 


--------------------------------------------------------------------------------
/model_performance_analysis/warning_time_maximize.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | from analysis import Rolling_Warning, Warning_Time_Plotter
 4 | 
 5 | eq_id = 24784.0
 6 | label_type = "pga"
 7 | if label_type == "pga":
 8 |     label_threshold = np.log10(0.25)
 9 |     intensity = "IV"
10 | if label_type == "pgv":
11 |     label_threshold = np.log10(0.15)
12 |     intensity = "V"
13 | 
14 | path = "../predict/station_blind_Vs30_bias2closed_station_2016"
15 | output_path = f"{path}/mag bigger 5.5 predict"
16 | 
17 | prediction3_with_info = pd.read_csv(f"{path}/3 sec model11 with all info.csv")
18 | prediction5_with_info = pd.read_csv(f"{path}/5 sec model11 with all info.csv")
19 | prediction7_with_info = pd.read_csv(f"{path}/7 sec model11 with all info.csv")
20 | prediction10_with_info = pd.read_csv(f"{path}/10 sec model11 with all info.csv")
21 | 
22 | rw_instance = Rolling_Warning(label_type="pga")
23 | warning_df_with_station_info = (
24 |     rw_instance.calculate_warning_time_at_different_issue_timing(
25 |         prediction_in_different_timing=[
26 |             prediction3_with_info,
27 |             prediction5_with_info,
28 |             prediction7_with_info,
29 |             prediction10_with_info,
30 |         ],
31 |         time_list=[3, 5, 7, 10],
32 |         event_filter="magnitude>=5",
33 |     )
34 | )
35 | 
36 | fig, ax = rw_instance.plot_maximum_warning_time(
37 |     warning_df_with_station_info=warning_df_with_station_info,
38 |     time_list=["3 second", "5 second", "7 second", "10 second"],
39 | )
40 | # fig.savefig(f"{path}/update warning_epi_vs_lead_time_mag_bigger_than_5.png",dpi=300)
41 | 
42 | event_info = warning_df_with_station_info[
43 |     warning_df_with_station_info["EQ_ID"] == eq_id
44 | ]
45 | fig,ax=rw_instance.plot_event_warning_time_with_distance_range(
46 |     event_info=event_info, distance_range=[20, 60], event_loc=[120.543833333333, 22.922]
47 | )
48 | 
49 | maximum_warning_time = warning_df_with_station_info["max_warning_time"]
50 | maximum_warning_time = maximum_warning_time[maximum_warning_time > 0]
51 | describe = maximum_warning_time.describe()
52 | count = int(describe["count"])
53 | mean = np.round(describe["mean"], 2)
54 | std = np.round(describe["std"], 2)
55 | median = np.round(describe["50%"], 2)
56 | max = np.round(describe["max"], 2)
57 | statistical_dict = rw_instance.calculate_statistical_value(warning_df_with_station_info)
58 | 
59 | fig, ax = rw_instance.plot_maximum_warning_time_histogram(
60 |     warning_df_with_station_info,
61 |     statistical_dict,
62 |     title="Warning time in 2016 events magnitude >=5",
63 | )
64 | # fig.savefig(f"{output_path}/maximum warning time, magnitude bigger than 5.png",dpi=300)
65 | 
66 | single_event_statistical_dict = rw_instance.calculate_statistical_value(
67 |     warning_df_with_station_info, filter=f"EQ_ID=={eq_id}"
68 | )
69 | 
70 | fig, ax = rw_instance.plot_maximum_warning_time_histogram(
71 |     warning_df_with_station_info,
72 |     single_event_statistical_dict,
73 |     filter=f"EQ_ID=={eq_id}",
74 |     title=f"EQ ID: {eq_id}, Maximum warning time",
75 | )
76 | 
77 | # fig.savefig(f"{output_path}/EQ ID{eq_id}, maximum warning time.png", dpi=300)
78 | for sec, events_prediction in zip(
79 |     [3, 5, 7, 10],
80 |     [
81 |         prediction3_with_info,
82 |         prediction5_with_info,
83 |         prediction7_with_info,
84 |         prediction10_with_info,
85 |     ],
86 | ):
87 |     single_event_prediction = events_prediction.query(f"EQ_ID=={eq_id}")
88 |     fig, ax = Warning_Time_Plotter.p_wave_pga_travel_time(
89 |         event_prediction=single_event_prediction,
90 |         title=f"EQ ID: {eq_id} {sec} sec prediction with p-wave and pga travel time",
91 |     )
92 | 


--------------------------------------------------------------------------------
/feature_map_correlation/plot_single_event_feature_map.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | 
 3 | plt.subplots()
 4 | import numpy as np
 5 | import pandas as pd
 6 | import torch
 7 | from torch.utils.data import DataLoader
 8 | from tqdm import tqdm
 9 | import os
10 | import sys
11 | 
12 | sys.path.append("..")
13 | from model.CNN_Transformer_Mixtureoutput_TEAM import CNN_feature_map
14 | from data.multiple_sta_dataset import multiple_station_dataset
15 | 
16 | mask_after_sec = 5
17 | sample_rate = 200
18 | eq_id = 24784
19 | label = "pga"
20 | data = multiple_station_dataset(
21 |     "D:/TEAM_TSMIP/data/TSMIP_1999_2019_Vs30.hdf5",
22 |     mode="test",
23 |     mask_waveform_sec=mask_after_sec,
24 |     test_year=2016,
25 |     label_key=label,
26 |     mag_threshold=0,
27 |     input_type="acc",
28 |     data_length_sec=15,
29 | )
30 | # need station name
31 | data_path = "../predict/station_blind_Vs30_bias2closed_station_2016"
32 | predict = pd.read_csv(f"{data_path}/{mask_after_sec} sec model11 with all info.csv")
33 | single_event_prediction = predict.query(f"EQ_ID=={eq_id}")
34 | # ===========prepare model==============
35 | device = torch.device("cuda")
36 | num = 11
37 | model_path = f"../model/model{num}.pt"
38 | CNN_model = CNN_feature_map(mlp_input=5665).cuda()
39 | 
40 | # ===========load CNN parameter==============
41 | full_model_parameter = torch.load(model_path)
42 | CNN_parameter = {}
43 | for name, param in full_model_parameter.items():
44 |     if (
45 |         "model_CNN" in name
46 |     ):  # model_CNN.conv2d1.0.weight : conv2d1.0.weight didn't match
47 |         name = name.replace("model_CNN.", "")
48 |         CNN_parameter[name] = param
49 | CNN_model.load_state_dict(CNN_parameter)
50 | 
51 | # find specific eq_id
52 | loader = DataLoader(dataset=data, batch_size=1)
53 | for j, sample in tqdm(enumerate(loader)):
54 |     if sample["EQ_ID"][:, :, 0].flatten().numpy().tolist()[0] == eq_id:
55 |         break
56 | 
57 | # waveform average
58 | waveform = sample["waveform"].numpy().reshape(25, 3000, 3)
59 | average_waveform = np.mean(waveform, axis=2)
60 | # station_name_list
61 | not_padding_station_number = (sample["sta"].reshape(25, 4) != 0).all(dim=1).sum().item()
62 | input_station_list = single_event_prediction["station_name"][
63 |     :not_padding_station_number
64 | ].tolist()
65 | if len(input_station_list) < 25:
66 |     input_station_list += [np.nan] * (25 - len(input_station_list))
67 | # input trace trigger time
68 | p_picks = sample["p_picks"].flatten().tolist()
69 | 
70 | cnn_input = torch.DoubleTensor(sample["waveform"].reshape(-1, 3000, 3)).float().cuda()
71 | cnn_output, layer_output = CNN_model(cnn_input)
72 | 
73 | # plot convolution layer feature map (each layer)
74 | for layer_num, tensor in enumerate(layer_output):  # convolution layer number
75 |     output_path = f"../predict/station_blind_Vs30_bias2closed_station_2016/{mask_after_sec} sec cnn feature map//layer {layer_num+1}"
76 |     if not os.path.isdir(output_path):
77 |         os.mkdir(output_path)
78 |     print("layer_number", layer_num)
79 |     numeric_array = np.array(tensor.detach().cpu(), dtype=np.float32)
80 |     feature_map = np.mean(numeric_array, axis=1)
81 |     if len(feature_map.shape) == 3:
82 |         feature_map = np.mean(feature_map, axis=2)
83 |     print(feature_map.shape)
84 |     fig, ax = plt.subplots(figsize=(10, 10))
85 |     image = ax.imshow(feature_map, cmap="Reds", aspect="auto")
86 |     ax.set_yticks(np.arange(0 - 0.5, feature_map.shape[0] + 0.5, 1), minor=True)
87 |     ax.grid(axis="y", linestyle="--", c="red", which="minor")
88 |     colorbar = plt.colorbar(image, ax=ax)
89 |     ax.set_title(f"Conv layer {layer_num+1}")
90 |     # fig.savefig(
91 |     #     f"{output_path}/Conv layer {layer_num+1}, average feature map.png", dpi=300
92 |     # )
93 | 


--------------------------------------------------------------------------------
/model_performance_analysis/plot_intensity_map_and_warning_time.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | import os
  4 | from analysis import Intensity_Plotter,Warning_Time_Plotter
  5 | 
  6 | 
  7 | mask_after_sec = 10
  8 | label_type = "pga"
  9 | if label_type == "pga":
 10 |     label_threshold = np.log10(0.25)
 11 |     intensity = "IV"
 12 | if label_type == "pgv":
 13 |     label_threshold = np.log10(0.15)
 14 |     intensity = "V"
 15 | 
 16 | path = "../predict/station_blind_Vs30_bias2closed_station_2016"
 17 | output_path = f"{path}/mag bigger 5.5 predict"
 18 | if not os.path.isdir(output_path):
 19 |     os.mkdir(output_path)
 20 | Afile_path = "../data_preprocess/events_traces_catalog"
 21 | 
 22 | catalog = pd.read_csv(f"{Afile_path}/1999_2019_final_catalog.csv")
 23 | traces_info = pd.read_csv(
 24 |     f"{Afile_path}/2009_2019_picked_traces_p_arrival_abstime_labeled_nostaoverlap.csv"
 25 | )
 26 | prediction_with_info = pd.read_csv(
 27 |     f"{path}/{mask_after_sec} sec model11 with all info.csv"
 28 | )
 29 | 
 30 | # for EQ_ID in catalog.query("year==2016 & magnitude>=5.5")["EQ_ID"]:
 31 | for EQ_ID in [24784, 25900]:
 32 |     event = catalog[catalog["EQ_ID"] == EQ_ID]
 33 |     event = event.assign(
 34 |         latitude=event["lat"] + event["lat_minute"] / 60,
 35 |         longitude=event["lon"] + event["lon_minute"] / 60,
 36 |     )
 37 |     event_prediction = prediction_with_info.query(f"EQ_ID=={EQ_ID}")
 38 | 
 39 |     fig, ax = Intensity_Plotter.plot_intensity_map(
 40 |         trace_info=event_prediction,
 41 |         eventmeta=event,
 42 |         label_type=label_type,
 43 |         true_label=event_prediction["answer"],
 44 |         pred_label=event_prediction["predict"],
 45 |         sec=mask_after_sec,
 46 |         EQ_ID=EQ_ID,
 47 |         grid_method="linear",
 48 |         pad=100,
 49 |         title=f"{mask_after_sec} sec intensity Map",
 50 |     )
 51 |     # fig.savefig(
 52 |     #     f"../paper image/{EQ_ID}_{mask_after_sec}sec PGA intensity Map.png", dpi=600, bbox_inches="tight"
 53 |     # )
 54 |     fig, ax = Intensity_Plotter.plot_true_predicted(
 55 |         y_true=event_prediction["answer"],
 56 |         y_pred=event_prediction["predict"],
 57 |         quantile=False,
 58 |         agg="point",
 59 |         point_size=70,
 60 |         target=label_type,
 61 |         title=f"EQID: {EQ_ID}, mag: {event['magnitude'].values[0]}, {mask_after_sec} sec true and predict",
 62 |     )
 63 |     # fig.savefig(
 64 |     #     f"{output_path}/{EQ_ID}_mag_{event['magnitude'].values[0]}_{mask_after_sec}sec true predict plot.png", dpi=450, bbox_inches="tight"
 65 |     # )
 66 |     try:
 67 |         fig, ax = Warning_Time_Plotter.warning_map(
 68 |             trace_info=event_prediction,
 69 |             eventmeta=event,
 70 |             label_type=label_type,
 71 |             intensity=intensity,
 72 |             EQ_ID=EQ_ID,
 73 |             sec=mask_after_sec,
 74 |             label_threshold=label_threshold,
 75 |         )
 76 | 
 77 |         # fig.savefig(f"../paper image/{EQ_ID}_mag_{event['magnitude'].values[0]}_{mask_after_sec} sec warning map.png",
 78 |         #             dpi=600)
 79 |         fig, ax = Warning_Time_Plotter.correct_warning_with_epidist(
 80 |             event_prediction=event_prediction,
 81 |             label_threshold=label_threshold,
 82 |             label_type=label_type,
 83 |             mask_after_sec=mask_after_sec,
 84 |         )
 85 |         # fig.savefig(f"{output_path}/{EQ_ID}_mag_{event['magnitude'].values[0]}_{mask_after_sec} sec epidist vs time.png",
 86 |         #             dpi=300)
 87 |         fig, ax = Warning_Time_Plotter.warning_time_hist(
 88 |             event_prediction,
 89 |             catalog,
 90 |             EQ_ID=EQ_ID,
 91 |             mask_after_sec=mask_after_sec,
 92 |             warning_mag_threshold=4,
 93 |             label_threshold=label_threshold,
 94 |             label_type=label_type,
 95 |             bins=14,
 96 |         )
 97 |         # fig.savefig(
 98 |         #     f"{output_path}/{EQ_ID}_mag_{event['magnitude'].values[0]}_{mask_after_sec} sec warning stations hist.png",
 99 |         #     dpi=300,
100 |         #     bbox_inches="tight",
101 |         # )
102 |     except Exception as e:
103 |         print(EQ_ID)
104 |         continue
105 | 


--------------------------------------------------------------------------------
/data_preprocess/plot_double_event.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import pandas as pd
 3 | from obspy.signal.trigger import ar_pick
 4 | 
 5 | from read_tsmip import read_tsmip, trace_pick_plot
 6 | 
 7 | 
 8 | Afile_path = "../data/Afile"
 9 | double_event = pd.read_csv(f"{Afile_path}/1991-2020 double traces.csv")
10 | double_event=double_event.query("year==2018")
11 | counts_file_times=double_event[["file_name","year","month"]].value_counts()
12 | error_file={"year":[],"month":[],"file":[],"eq_num":[],"reason":[]}
13 | for (file_name,year,month),eq_num in zip(counts_file_times.index,counts_file_times):
14 |     if len(str(month))<2:
15 |         month="0"+str(month)
16 |     path=f"../data/waveform/{year}/{month}"
17 |     file_name=file_name.strip()
18 |     try:
19 |         trace=read_tsmip(f"{path}/{file_name}.txt")
20 |         print("read_file ok")
21 | 
22 |     except Exception as reason:
23 |         print(file_name,f"year:{year},month:{month}, {reason}")
24 |         error_file["year"].append(year)
25 |         error_file["month"].append(month)
26 |         error_file["file"].append(file_name)
27 |         error_file["reason"].append(reason)
28 |         error_file["eq_num"].append(eq_num)
29 |         continue
30 |     sampling_rate=trace[0].stats.sampling_rate
31 |     try:
32 |         p_pick,s_pick=ar_pick(trace[0],trace[1],trace[2],
33 |                             samp_rate=sampling_rate,
34 |                             f1=1, #Frequency of the lower bandpass window
35 |                             f2=20, #Frequency of the upper bandpass window
36 |                             lta_p=1, #Length of LTA for the P arrival in seconds
37 |                             sta_p=0.1, #Length of STA for the P arrival in seconds
38 |                             lta_s=4.0, #Length of LTA for the S arrival in seconds
39 |                             sta_s=1.0, #Length of STA for the P arrival in seconds
40 |                             m_p=2, #Number of AR coefficients for the P arrival
41 |                             m_s=8, #Number of AR coefficients for the S arrival
42 |                             l_p=0.1,
43 |                             l_s=0.2,
44 |                             s_pick=True)
45 |     except Exception as reason:
46 |         print(file_name,f"year:{year},month:{month}, {reason}")
47 |         error_file["year"].append(year)
48 |         error_file["month"].append(month)
49 |         error_file["file"].append(file_name)
50 |         error_file["reason"].append(reason)
51 |         error_file["eq_num"].append(eq_num)
52 |         continue
53 |     fig,ax=plt.subplots(3,1)
54 |     ax[0].set_title(f"station: {trace[0].stats.station}, start time: {trace[0].stats.starttime}")
55 |     ax[1].set_title(f"number of events: {eq_num}")
56 |     for component in range(len(trace)):
57 |         ax[component].plot(trace[component],"k")
58 |         ymin,ymax=ax[component].get_ylim()
59 |         ax[component].vlines(p_pick*sampling_rate,ymin,ymax,"r",label="P pick")
60 |         ax[component].vlines(s_pick*sampling_rate,ymin,ymax,"g",label="S pick")
61 |     ax[0].set_xticks([])
62 |     ax[1].set_xticks([])
63 |     ax[1].set_ylabel(f"Amplitude (gal)")
64 |     ax[2].set_xlabel(f"Time Sample (200Hz)")
65 |     ax[0].legend()
66 |     fig.tight_layout()
67 |     output_path="../data/double event picking"
68 |     fig.savefig(f"{output_path}/{file_name}.png",dpi=300)
69 |     plt.close()
70 | 
71 | error_file_df=pd.DataFrame(error_file)
72 | # error_file_df.to_csv(f"{Afile_path}/double event error.csv",index=False)
73 | 
74 | #pick again error file
75 | error_file_df=pd.read_csv(f"{Afile_path}/double event error_new.csv")
76 | cant_picking_filter=((error_file_df["year"]!=2020) & (error_file_df["month"]!="07") & (error_file_df["month"]!="08") & (error_file_df["month"]!="09"))
77 | cant_picking_file=error_file_df[cant_picking_filter].reset_index(drop=True)
78 | 
79 | for i in range(len(cant_picking_file)):
80 |     year=cant_picking_file["year"][i]
81 |     month=cant_picking_file["month"][i]
82 |     if len(str(month))<2:
83 |         month="0"+str(month)
84 |     file_name=cant_picking_file["file"][i]
85 |     eq_num=cant_picking_file["eq_num"][i]
86 | 
87 |     path=f"data/waveform/{year}/{month}"
88 | 
89 |     trace=read_tsmip(f"{path}/{file_name}.txt")
90 |     trace_pick_plot(trace,file_name,eq_num=eq_num,output_path="../data/waveform/double event picking")


--------------------------------------------------------------------------------
/data_preprocess/Vs30_preprocess.py:
--------------------------------------------------------------------------------
  1 | import pygmt
  2 | import numpy as np
  3 | from numpy import sin, cos, tan, radians
  4 | import math
  5 | from scipy.spatial import distance
  6 | 
  7 | def grd_to_xyz(input_grd, output_xyz):
  8 |     with pygmt.clib.Session() as session:
  9 |         # 使用pygmt.grd2xyz進行轉換
 10 |         session.call_module("grd2xyz", f"{input_grd} > {output_xyz}")
 11 | 
 12 | def twd67_to_97(x, y):
 13 |     """_summary_
 14 | 
 15 | 
 16 |     Parameters
 17 |     ----------
 18 |     x : float
 19 |         x in TWD67 system
 20 |     y : float
 21 |         x in TWD67 system
 22 | 
 23 |     Returns
 24 |     -------
 25 |     x and y in TWD97 system
 26 |     """
 27 |     A = 0.00001549
 28 |     B = 0.000006521
 29 | 
 30 |     x_97 = x + 807.8 + A * x + B * y
 31 |     y_97 = y - 248.6 + A * y + B * x
 32 |     return x_97, y_97
 33 | 
 34 | 
 35 | def twd97_to_lonlat(x=174458.0, y=2525824.0):
 36 |     """
 37 |     Parameters
 38 |     ----------
 39 |     x : float
 40 |         TWD97 coord system. The default is 174458.0.
 41 |     y : float
 42 |         TWD97 coord system. The default is 2525824.0.
 43 |     Returns
 44 |     -------
 45 |     list
 46 |         [longitude, latitude]
 47 |     """
 48 | 
 49 |     a = 6378137
 50 |     b = 6356752.314245
 51 |     long_0 = 121 * math.pi / 180.0
 52 |     k0 = 0.9999
 53 |     dx = 250000
 54 |     dy = 0
 55 | 
 56 |     e = math.pow((1 - math.pow(b, 2) / math.pow(a, 2)), 0.5)
 57 | 
 58 |     x -= dx
 59 |     y -= dy
 60 | 
 61 |     M = y / k0
 62 | 
 63 |     mu = M / (
 64 |         a
 65 |         * (1 - math.pow(e, 2) / 4 - 3 * math.pow(e, 4) / 64 - 5 * math.pow(e, 6) / 256)
 66 |     )
 67 |     e1 = (1.0 - pow((1 - pow(e, 2)), 0.5)) / (
 68 |         1.0 + math.pow((1.0 - math.pow(e, 2)), 0.5)
 69 |     )
 70 | 
 71 |     j1 = 3 * e1 / 2 - 27 * math.pow(e1, 3) / 32
 72 |     j2 = 21 * math.pow(e1, 2) / 16 - 55 * math.pow(e1, 4) / 32
 73 |     j3 = 151 * math.pow(e1, 3) / 96
 74 |     j4 = 1097 * math.pow(e1, 4) / 512
 75 | 
 76 |     fp = (
 77 |         mu
 78 |         + j1 * math.sin(2 * mu)
 79 |         + j2 * math.sin(4 * mu)
 80 |         + j3 * math.sin(6 * mu)
 81 |         + j4 * math.sin(8 * mu)
 82 |     )
 83 | 
 84 |     e2 = math.pow((e * a / b), 2)
 85 |     c1 = math.pow(e2 * math.cos(fp), 2)
 86 |     t1 = math.pow(math.tan(fp), 2)
 87 |     r1 = (
 88 |         a
 89 |         * (1 - math.pow(e, 2))
 90 |         / math.pow((1 - math.pow(e, 2) * math.pow(math.sin(fp), 2)), (3 / 2))
 91 |     )
 92 |     n1 = a / math.pow((1 - math.pow(e, 2) * math.pow(math.sin(fp), 2)), 0.5)
 93 |     d = x / (n1 * k0)
 94 | 
 95 |     q1 = n1 * math.tan(fp) / r1
 96 |     q2 = math.pow(d, 2) / 2
 97 |     q3 = (5 + 3 * t1 + 10 * c1 - 4 * math.pow(c1, 2) - 9 * e2) * math.pow(d, 4) / 24
 98 |     q4 = (
 99 |         (
100 |             61
101 |             + 90 * t1
102 |             + 298 * c1
103 |             + 45 * math.pow(t1, 2)
104 |             - 3 * math.pow(c1, 2)
105 |             - 252 * e2
106 |         )
107 |         * math.pow(d, 6)
108 |         / 720
109 |     )
110 |     lat = fp - q1 * (q2 - q3 + q4)
111 | 
112 |     q5 = d
113 |     q6 = (1 + 2 * t1 + c1) * math.pow(d, 3) / 6
114 |     q7 = (
115 |         (5 - 2 * c1 + 28 * t1 - 3 * math.pow(c1, 2) + 8 * e2 + 24 * math.pow(t1, 2))
116 |         * math.pow(d, 5)
117 |         / 120
118 |     )
119 |     lon = long_0 + (q5 - q6 + q7) / math.cos(fp)
120 | 
121 |     lat = (lat * 180) / math.pi
122 |     lon = (lon * 180) / math.pi
123 |     return [lon, lat]
124 | 
125 | 
126 | def find_nearest_point(target_point, points):
127 |     """
128 |     找到點集points中距離目標點target_point最近的點。
129 | 
130 |     參數：
131 |     target_point: 目標點的坐標，一個包含兩個元素的列表或元組，例如 [x, y]。
132 |     points: 點集，一個包含多個點坐標的二維數組，每個點為一個包含兩個元素的列表或元組，例如 [[x1, y1], [x2, y2], ...]。
133 | 
134 |     返回值：
135 |     nearest_point: 距離目標點最近的點的坐標，一個包含兩個元素的列表，例如 [x_nearest, y_nearest]。
136 |     """
137 |     target_point = np.array(target_point)
138 |     points = np.array(points)
139 | 
140 |     # 使用cdist計算距離矩陣
141 |     distances = distance.cdist([target_point], points)
142 | 
143 |     # 找到距離最小的點的索引
144 |     nearest_index = np.argmin(distances)
145 | 
146 |     # 返回距離最小的點的坐標
147 |     nearest_point = points[nearest_index]
148 | 
149 |     return nearest_index, nearest_point
150 | 
151 | 
152 | def get_unique_with_other_columns(group):
153 |     # 獲取唯一值
154 |     unique_value = group["station_name"].unique()[0]
155 |     # 獲取其他columns的值
156 |     other_columns_values = group.drop_duplicates(subset=["station_name"])
157 |     return other_columns_values
158 | 


--------------------------------------------------------------------------------
/data_preprocess/13_cut_waveform_to_hdf5.py:
--------------------------------------------------------------------------------
  1 | import h5py
  2 | import numpy as np
  3 | import pandas as pd
  4 | from tqdm import tqdm
  5 | 
  6 | from read_tsmip import cut_traces
  7 | start_year = 1999
  8 | end_year = 2019
  9 | sta_path = "../data/station_information"
 10 | waveform_path = "../data/waveform"
 11 | catalog = pd.read_csv(
 12 |     f"./events_traces_catalog/{start_year}_{end_year}_final_catalog.csv"
 13 | )
 14 | traces = pd.read_csv(
 15 |     f"./events_traces_catalog/{start_year}_{end_year}_final_traces_Vs30.csv"
 16 | )
 17 | station_info = pd.read_csv(f"{sta_path}/TSMIPstations_new.csv") 
 18 | traces.loc[traces.index, "p_pick_sec"] = pd.to_timedelta(
 19 |     traces["p_pick_sec"], unit="sec"
 20 | )
 21 | traces.loc[traces.index, "p_arrival_abs_time"] = pd.to_datetime(
 22 |     traces["p_arrival_abs_time"], format="%Y-%m-%d %H:%M:%S"
 23 | )
 24 | 
 25 | # into hdf5
 26 | output = f"../data/TSMIP_{start_year}_{end_year}_Vs30.hdf5"
 27 | error_event = {"EQ_ID": [], "reason": []}
 28 | with h5py.File(output, "w") as file:
 29 |     data = file.create_group("data")
 30 |     meta = file.create_group("metadata")
 31 |     for eq_id in tqdm(catalog["EQ_ID"]):
 32 |         # for eq_id in [247]:
 33 |         try:
 34 |             tmp_traces, traces_info = cut_traces(
 35 |                 traces, eq_id, waveform_path, waveform_type="acc"
 36 |             )
 37 |             _, vel_info = cut_traces(traces, eq_id, waveform_path, waveform_type="vel")
 38 |             _, dis_info = cut_traces(traces, eq_id, waveform_path, waveform_type="dis")
 39 |             traces_info["vel"] = vel_info["traces"]
 40 |             traces_info["dis"] = dis_info["traces"]
 41 |             # fig=plot_cutting_event(tmp_traces,traces_info)
 42 |             start_time_str_arr = np.array(traces_info["start_time"], dtype="S30")
 43 |             station_name_str_arr = np.array(tmp_traces["station_name"], dtype="S30")
 44 |             tmp_station_info = pd.merge(
 45 |                 tmp_traces[["station_name","Vs30"]],
 46 |                 station_info[
 47 |                     ["location_code", "latitude", "longitude", "elevation (m)"]
 48 |                 ],
 49 |                 how="left",
 50 |                 left_on="station_name",
 51 |                 right_on="location_code",
 52 |             )
 53 |             location_array = np.array(
 54 |                 tmp_station_info[["latitude", "longitude", "elevation (m)"]]
 55 |             )
 56 |             Vs30_array=np.array(tmp_traces["Vs30"])
 57 |             if np.isnan(location_array).any():
 58 |                 print("The location array contain NaN values")
 59 |                 continue
 60 |             event = data.create_group(f"{eq_id}")
 61 |             event.create_dataset(
 62 |                 "acc_traces", data=traces_info["traces"], dtype=np.float64
 63 |             )
 64 |             event.create_dataset(
 65 |                 "vel_traces", data=traces_info["vel"], dtype=np.float64
 66 |             )
 67 |             event.create_dataset(
 68 |                 "dis_traces", data=traces_info["dis"], dtype=np.float64
 69 |             )
 70 |             event.create_dataset("p_picks", data=traces_info["p_picks"], dtype=np.int64)
 71 |             event.create_dataset("pga", data=traces_info["pga"], dtype=np.float64)
 72 |             event.create_dataset("pgv", data=traces_info["pgv"], dtype=np.float64)
 73 |             event.create_dataset(
 74 |                 "start_time", data=start_time_str_arr, maxshape=(None), chunks=True
 75 |             )
 76 |             event.create_dataset(
 77 |                 "pga_time", data=traces_info["pga_time"], dtype=np.int64
 78 |             )
 79 |             event.create_dataset(
 80 |                 "pgv_time", data=traces_info["pgv_time"], dtype=np.int64
 81 |             )
 82 |             event.create_dataset(
 83 |                 "station_name", data=station_name_str_arr, maxshape=(None), chunks=True
 84 |             )
 85 |             event.create_dataset(
 86 |                 "station_location", data=location_array, dtype=np.float64
 87 |             )
 88 |             event.create_dataset(
 89 |                 "Vs30", data=Vs30_array, dtype=np.float64
 90 |             )
 91 |         except Exception as reason:
 92 |             print(f"EQ_ID:{eq_id}, {reason}")
 93 |             error_event["EQ_ID"].append(eq_id)
 94 |             error_event["reason"].append(reason)
 95 |             continue
 96 |         # fig.savefig(f"data/cutting waveform image/{eq_id}.png")
 97 |     error_event_df = pd.DataFrame(error_event)
 98 |     error_event_df.to_csv(
 99 |         "./events_traces_catalog/load into hdf5 error event.csv", index=False
100 |     )
101 | 
102 | catalog.to_hdf(output, key="metadata/event_metadata", mode="a", format="table")
103 | traces.to_hdf(output, key="metadata/traces_metadata", mode="a", format="table")
104 | 


--------------------------------------------------------------------------------
/data_preprocess/tracer_demo/src/tracer.f90:
--------------------------------------------------------------------------------
  1 | ! This is a script to calculate the travel time between two points in 3D velocity model
  2 | program tracer
  3 |     implicit real*8 (a-h,o-z)
  4 |     include 'setup.inc'
  5 |     integer msg,imode,ibin,ipath
  6 |     parameter (msg=16384)
  7 |     real*8 w(3,msg+1),tp,ts
  8 |     real*8 evlo,evla,evla2,evdp
  9 |     real*8 stlo,stla,stla2,stel
 10 |     character*32 evt_fn,sta_fn,mod_fn
 11 | 
 12 | ! default file names of 3D velocity model and output travel time table
 13 |     mod_fn='vel3d.mod'
 14 | 
 15 | ! read 3D velocity model
 16 |     print*,'read default velocity model [vel3d.mod]'
 17 |     call input_vel(mod_fn)
 18 | 
 19 | 666 print*,'Mode option: (1) input two points'
 20 |     print*,'             (2) input two files'
 21 |     read(*,*)imode
 22 | 
 23 | ! two-point mode
 24 |     if (imode.eq.1) then
 25 |        print*,'Input point 1 (lon, lat, dep):'
 26 |        read(*,*)evlo,evla,evdp
 27 |        evla2=geog_to_geoc(evla)
 28 |        print*,'Input point 2 (lon, lat, dep):'
 29 |        read(*,*)stlo,stla,stel
 30 |        stla2=geog_to_geoc(stla)
 31 | 
 32 |        print*,'Set 1 to output ray path, otherwise 0:'
 33 |        read(*,*)ipath
 34 | 
 35 |        !-P-wave
 36 |        ips=1
 37 |        call pbr(evla2,evlo,evdp,stla2,stlo,stel,w,np,tp)
 38 |        if (ipath.eq.1) then
 39 |           open(11,file='P_path.txt',status='unknown')
 40 |           do i=1,np
 41 |              write(11,'(3f10.3)')w(3,i),w(2,i),w(1,i)
 42 |           enddo
 43 |           close(11)
 44 |        endif
 45 |        !-S-wave
 46 |        ips=2
 47 |        call pbr(evla2,evlo,evdp,stla2,stlo,stel,w,np,ts)
 48 |        if (ipath.eq.1) then
 49 |           open(11,file='S_path.txt',status='unknown')
 50 |           do i=1,np
 51 |              write(11,'(3f10.3)')w(3,i),w(2,i),w(1,i)
 52 |           enddo
 53 |           close(11)
 54 |        endif
 55 |        print*,'P- and S-wave ray path were outputed!'
 56 |        print*,''
 57 | 
 58 |        print*,'--------------------------------------------'
 59 |        print*,'P-wave travel time (sec):',tp
 60 |        print*,'S-wave travel time (sec):',ts
 61 | 
 62 | 
 63 | ! two-file mode
 64 |     elseif (imode.eq.2) then
 65 |        print*,'Input source file:'
 66 |        read(*,*)evt_fn
 67 |        print*,'Input receiver file:'
 68 |        read(*,*)sta_fn
 69 | 
 70 |        print*,'Set 1 to output ray path, otherwise 0:'
 71 |        read(*,*)ipath
 72 | 
 73 | 777    print*,'Format of output table: 1) ascii, 2) binary'
 74 |        read(*,*)ibin
 75 | 
 76 |        open(1,file=evt_fn,status='old')
 77 |        open(2,file=sta_fn,status='old')
 78 |        if (ibin.eq.1) then
 79 |           open(3,file='tt.table',status='unknown')
 80 |        elseif (ibin.eq.2) then
 81 |           open(3,file='tt.bin',status='unknown',form='unformatted')
 82 |        else
 83 |           print*,'Option can not be recognized! Please re-try!'
 84 |           goto 777
 85 |        endif
 86 |        ibyte=0
 87 |        if (ipath.eq.1) then
 88 |           open(10,file='P_path.txt',status='unknown')
 89 |           open(11,file='S_path.txt',status='unknown')
 90 |        endif
 91 |        do
 92 |           read(1,*,iostat=ierr1)evlo,evla,evdp
 93 |           if (ierr1.lt.0) exit
 94 |           evla2=geog_to_geoc(evla)
 95 |           do
 96 |              read(2,*,iostat=ierr2)stlo,stla,stel
 97 |              if (ierr2.lt.0) exit
 98 |              stla2=geog_to_geoc(stla)
 99 |              stel=-stel/1000.
100 | 
101 |              !-P-wave
102 |              ips=1
103 |              call pbr(evla2,evlo,evdp,stla2,stlo,stel,w,np,tp)
104 |              !-output P-wave path
105 |              if (ipath.eq.1) then
106 |                 do i=1,np
107 |                    write(10,'(3f10.3)')w(3,i),w(2,i),w(1,i)
108 |                 enddo
109 |                 write(10,'(a1)')"X"
110 |              endif
111 |              !-S-wave
112 |              ips=2
113 |              call pbr(evla2,evlo,evdp,stla2,stlo,stel,w,np,ts)
114 |              !-output S-wave path
115 |              if (ipath.eq.1) then
116 |                 do i=1,np
117 |                    write(11,'(3f10.3)')w(3,i),w(2,i),w(1,i)
118 |                 enddo
119 |                 write(11,'(a1)')"X"
120 |              endif
121 |             
122 |              !-output calculated travel time table
123 |              if (ibin.eq.1) write(3,'(2(2f8.3,f7.3,1x),2f10.3)')evlo,evla,evdp,stlo,stla,stel,tp,ts
124 |              if (ibin.eq.2) write(3)evlo,evla,evdp,stlo,stla,stel,tp,ts
125 |           enddo
126 |           rewind(2)
127 |        enddo
128 |        close(10)
129 |        close(11)
130 |        print*,'--------------------------------------------'
131 |        if (ibin.eq.1) print*,'Ascii table [tt.table] completed!'
132 |        if (ibin.eq.2) print*,'Binary table [tt.bin] completed!'
133 | 
134 |     else
135 |        print*,'Option can not be recognized! Please re-try!'
136 |        goto 666
137 |     endif
138 |     close(1)
139 |     close(2)
140 |     close(3)
141 | end
142 | 


--------------------------------------------------------------------------------
/model_performance_analysis/confusion_matrix_multi_station.py:
--------------------------------------------------------------------------------
  1 | import matplotlib.pyplot as plt
  2 | import numpy as np
  3 | import pandas as pd
  4 | from sklearn.metrics import confusion_matrix
  5 | import os
  6 | from analysis import Precision_Recall_Factory
  7 | 
  8 | path = "../predict/station_blind_Vs30_bias2closed_station_2016"
  9 | output_path = f"{path}/model11 confusion matrix"
 10 | if not os.path.isdir(output_path):
 11 |     os.mkdir(output_path)
 12 | 
 13 | label = "pga"
 14 | unit = "m/s^2"
 15 | 
 16 | #形成 warning threshold array 其中包含對應的4~5級標準
 17 | target_value = np.log10(0.8)
 18 | 
 19 | # 生成一個包含目標值的數組
 20 | score_curve_threshold = np.linspace(np.log10(0.025), np.log10(1.4), 100)
 21 | 
 22 | # 檢查最接近的值
 23 | closest_value = min(score_curve_threshold, key=lambda x: abs(x - target_value))
 24 | 
 25 | # 調整num參數以確保包含目標值
 26 | if closest_value != target_value:
 27 |     num_adjusted = 100 + int(np.ceil(abs(target_value - closest_value) / np.diff(score_curve_threshold[:2])))
 28 |     score_curve_threshold = np.linspace(np.log10(0.025), np.log10(1.4), num_adjusted)
 29 | 
 30 | 
 31 | intensity_score_dict = {"second": [], "intensity_score": []}
 32 | f1_curve_fig, f1_curve_ax = plt.subplots()
 33 | precision_curve_fig, precision_curve_ax = plt.subplots()
 34 | recall_curve_fig, recall_curve_ax = plt.subplots()
 35 | for mask_after_sec in [3, 5, 7, 10]:
 36 |     data = pd.read_csv(f"{path}/{mask_after_sec} sec model11 with all info.csv")
 37 | 
 38 |     predict_label = data["predict"]
 39 |     real_label = data["answer"]
 40 |     # calculate intensity score
 41 |     intensity = ["0", "1", "2", "3", "4", "5-", "5+", "6-", "6+", "7"]
 42 |     data["predicted_intensity"] = predict_label.apply(Precision_Recall_Factory.pga_to_intensity)
 43 |     data["answer_intensity"] = real_label.apply(Precision_Recall_Factory.pga_to_intensity)
 44 |     intensity_score = (
 45 |         (data["predicted_intensity"] == data["answer_intensity"]).sum()
 46 |     ) / len(data)
 47 |     intensity_score_dict["second"].append(mask_after_sec)
 48 |     intensity_score_dict["intensity_score"].append(intensity_score)
 49 |     intensity_table = pd.DataFrame(intensity_score_dict)
 50 | 
 51 |     # intensity_table.to_csv(
 52 |     #     f"{output_path}/intensity table.csv",
 53 |     #     index=False,
 54 |     # )
 55 |     # plot intensity score confusion matrix
 56 |     intensity_confusion_matrix = confusion_matrix(
 57 |         data["answer_intensity"], data["predicted_intensity"], labels=intensity
 58 |     )
 59 |     fig,ax=Precision_Recall_Factory.plot_intensity_confusion_matrix(intensity_confusion_matrix,intensity_score,mask_after_sec,output_path=None)
 60 | 
 61 |     performance_score = {
 62 |         f"{label}_threshold ({unit})": [],
 63 |         "confusion matrix": [],
 64 |         "accuracy": [],
 65 |         "precision": [],
 66 |         "recall": [],
 67 |         "F1": [],
 68 |     }
 69 |     for label_threshold in score_curve_threshold:
 70 |         predict_logic = np.where(predict_label > label_threshold, 1, 0)
 71 |         real_logic = np.where(real_label > label_threshold, 1, 0)
 72 |         matrix = confusion_matrix(real_logic, predict_logic, labels=[1, 0])
 73 |         accuracy = np.sum(np.diag(matrix)) / np.sum(matrix)  # (TP+TN)/all
 74 |         precision = matrix[0][0] / np.sum(matrix, axis=0)[0]  # TP/(TP+FP)
 75 |         recall = matrix[0][0] / np.sum(matrix, axis=1)[0]  # TP/(TP+FP)
 76 |         F1_score = 2 / ((1 / precision) + (1 / recall))
 77 |         performance_score[f"{label}_threshold ({unit})"].append(
 78 |             np.round((10**label_threshold), 3)
 79 |         )  # m/s^2 / 9.8 = g
 80 |         performance_score["confusion matrix"].append(matrix)
 81 |         performance_score["accuracy"].append(accuracy)
 82 |         performance_score["precision"].append(precision)
 83 |         performance_score["recall"].append(recall)
 84 |         performance_score["F1"].append(F1_score)
 85 | 
 86 |     f1_curve_fig, f1_curve_ax = Precision_Recall_Factory.plot_score_curve(
 87 |         performance_score,
 88 |         f1_curve_fig,
 89 |         f1_curve_ax,
 90 |         "F1",
 91 |         score_curve_threshold,
 92 |         mask_after_sec,
 93 |         output_path=None,
 94 |     )
 95 |     # precision_curve_ax.set_xlim(0,90) #figure in thesis
 96 |     precision_curve_fig, precision_curve_ax = Precision_Recall_Factory.plot_score_curve(
 97 |         performance_score,
 98 |         precision_curve_fig,
 99 |         precision_curve_ax,
100 |         "precision",
101 |         score_curve_threshold,
102 |         mask_after_sec,
103 |         output_path=None,
104 |     )
105 |     # precision_curve_ax.set_xlim(0,90) #figure in thesis
106 |     # precision_curve_fig.savefig(f"../paper image/precision_curve.png", dpi=300)
107 |     recall_curve_fig, recall_curve_ax = Precision_Recall_Factory.plot_score_curve(
108 |         performance_score,
109 |         recall_curve_fig,
110 |         recall_curve_ax,
111 |         "recall",
112 |         score_curve_threshold,
113 |         mask_after_sec,
114 |         output_path=None,
115 |     )
116 |     # recall_curve_ax.set_xlim(0,90) #figure in thesis
117 |     # recall_curve_fig.savefig(f"../paper image/recall_curve.png", dpi=300)
118 | 
119 |     predict_table = pd.DataFrame(performance_score)
120 |     # predict_table.to_csv(
121 |     #     f"{output_path}/{mask_after_sec} sec confusion matrix table.csv",
122 |     #     index=False,
123 |     # )
124 | 


--------------------------------------------------------------------------------
/data_preprocess/tracer_demo/tt.table:
--------------------------------------------------------------------------------
 1 |  120.816  23.853  7.500  120.805  23.510 -0.000      7.775    13.288
 2 |  120.816  23.853  7.500  121.365  23.099 -0.000     18.814    33.033
 3 |  120.816  23.853  7.500  120.412  23.719 -0.000      9.346    16.527
 4 |  120.816  23.853  7.500  120.172  23.601 -0.000     13.254    23.098
 5 |  120.816  23.853  7.500  120.552  23.581 -0.000      8.739    15.026
 6 |  120.816  23.853  7.500  120.269  23.485 -0.000     13.276    23.241
 7 |  120.816  23.853  7.500  120.544  23.465 -0.000     10.449    17.891
 8 |  120.816  23.853  7.500  120.152  23.333 -0.000     16.196    28.654
 9 |  120.816  23.853  7.500  120.583  23.296 -0.000     12.711    22.111
10 |  120.816  23.853  7.500  120.405  23.355 -0.000     13.499    23.428
11 |  120.816  23.853  7.500  120.153  23.221 -0.000     17.698    31.583
12 |  120.816  23.853  7.500  120.268  23.215 -0.000     16.828    29.655
13 |  120.816  23.853  7.500  120.478  23.180 -0.000     15.269    26.681
14 |  120.816  23.853  7.500  120.462  23.046 -0.000     17.891    31.529
15 |  120.816  23.853  7.500  120.280  22.965 -0.000     20.569    36.566
16 |  120.816  23.853  7.500  120.606  23.757 -0.000      5.982    10.513
17 |  120.816  23.853  7.500  120.514  23.779 -0.000      7.312    12.998
18 |  120.816  23.853  7.500  120.411  23.799 -0.000      8.928    15.790
19 |  120.816  23.853  7.500  120.247  23.752 -0.000     11.424    20.035
20 |  120.816  23.853  7.500  120.605  23.632 -0.000      7.320    12.607
21 |  120.816  23.853  7.500  120.528  23.613 -0.000      8.671    15.115
22 |  120.816  23.853  7.500  120.294  23.580 -0.000     11.958    20.900
23 |  120.816  23.853  7.500  120.215  23.541 -0.000     13.308    23.309
24 |  120.816  23.853  7.500  120.544  23.521 -0.000      9.665    16.530
25 |  120.816  23.853  7.500  120.584  23.520 -0.000      9.173    15.720
26 |  120.816  23.853  7.500  120.479  23.607 -0.000      9.393    16.690
27 |  120.816  23.853  7.500  120.344  23.521 -0.000     11.961    20.928
28 |  120.816  23.853  7.500  120.596  23.439 -0.000     10.387    17.869
29 |  120.816  23.853  7.500  120.583  23.358 -0.000     11.839    20.570
30 |  120.816  23.853  7.500  120.163  23.383 -0.000     15.530    27.379
31 |  120.816  23.853  7.500  120.463  23.477 -0.000     11.378    19.692
32 |  120.816  23.853  7.500  120.447  23.494 -0.000     11.369    19.981
33 |  120.816  23.853  7.500  120.408  23.280 -0.000     14.536    25.278
34 |  120.816  23.853  7.500  120.501  23.288 -0.000     13.458    23.378
35 |  120.816  23.853  7.500  120.310  23.308 -0.000     15.091    26.621
36 |  120.816  23.853  7.500  120.271  23.270 -0.000     16.031    28.307
37 |  120.816  23.853  7.500  120.410  23.149 -0.000     16.447    28.993
38 |  120.816  23.853  7.500  120.319  23.172 -0.000     16.931    29.752
39 |  120.816  23.853  7.500  120.103  23.184 -0.000     18.527    32.838
40 |  120.816  23.853  7.500  120.239  23.124 -0.000     18.363    32.707
41 |  120.816  23.853  7.500  120.511  23.077 -0.000     16.838    29.451
42 |  120.816  23.853  7.500  120.450  23.121 -0.000     16.569    29.137
43 |  120.816  23.853  7.500  120.340  23.027 -0.000     19.316    34.432
44 |  120.816  23.853  7.500  120.345  22.906 -0.000     21.063    37.970
45 |  120.816  23.853  7.500  120.208  22.921 -0.000     21.445    37.939
46 |  120.816  23.853  7.500  120.184  22.999 -0.000     20.714    37.091
47 |  120.816  23.853  7.500  120.182  22.974 -0.000     20.914    37.582
48 |  120.816  23.853  7.500  120.229  22.965 -0.000     20.913    37.361
49 |  120.816  23.853  7.500  120.164  23.065 -0.000     19.890    35.615
50 |  120.816  23.853  7.500  120.805  23.510 -0.000      7.775    13.288
51 |  120.816  23.853  7.500  119.555  23.567 -0.000     22.200    38.366
52 |  120.816  23.853  7.500  120.222  23.638 -0.000     12.358    21.596
53 |  120.816  23.853  7.500  120.228  23.040 -0.000     19.869    35.383
54 |  120.816  23.853  7.500  120.528  23.185 -0.000     14.736    25.631
55 |  120.816  23.853  7.500  120.678  23.597 -0.000      6.833    11.810
56 |  120.816  23.853  7.500  120.496  23.270 -0.000     13.734    23.881
57 |  120.816  23.853  7.500  120.298  23.724 -0.000     10.964    19.389
58 |  120.816  23.853  7.500  120.593  23.351 -0.000     11.879    20.659
59 |  120.816  23.853  7.500  120.519  23.384 -0.000     11.966    20.596
60 |  120.816  23.853  7.500  120.429  23.346 -0.000     13.418    23.195
61 |  120.816  23.853  7.500  120.216  23.267 -0.000     16.496    29.114
62 |  120.816  23.853  7.500  120.478  23.791 -0.000      7.844    13.928
63 |  120.816  23.853  7.500  120.147  23.654 -0.000     13.239    22.939
64 |  120.816  23.853  7.500  120.321  23.794 -0.000     10.330    18.324
65 |  120.816  23.853  7.500  120.233  22.983 -0.000     20.662    36.910
66 |  120.816  23.853  7.500  120.280  23.137 -0.000     17.877    31.911
67 |  120.816  23.853  7.500  120.342  23.227 -0.000     15.923    27.745
68 |  120.816  23.853  7.500  120.562  23.686 -0.000      7.360    12.923
69 |  120.816  23.853  7.500  120.614  23.245 -0.000     13.273    22.993
70 |  120.816  23.853  7.500  120.465  23.669 -0.000      8.919    15.996
71 |  120.816  23.853  7.500  120.290  23.299 -0.000     15.436    27.358
72 |  120.816  23.853  7.500  120.530  23.252 -0.000     13.672    23.754
73 |  120.816  23.853  7.500  120.530  23.252 -0.000     13.672    23.754
74 |  120.816  23.853  7.500  120.227  23.791 -0.000     11.575    20.291
75 |  120.816  23.853  7.500  120.183  23.703 -0.000     12.497    21.708
76 |  120.816  23.853  7.500  120.119  23.037 -0.000     20.554    36.901
77 |  120.816  23.853  7.500  120.097  23.154 -0.000     18.946    33.610
78 |  120.816  23.853  7.500  120.108  23.078 -0.000     20.030    35.820
79 |  120.816  23.853  7.500  120.424  23.498 -0.000     11.548    20.462
80 | 


--------------------------------------------------------------------------------
/data_preprocess/5_check_waveform.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import sys
  3 | import os
  4 | 
  5 | sys.path.append("..")
  6 | from read_tsmip import read_tsmip
  7 | from obspy.signal.trigger import ar_pick
  8 | import matplotlib.pyplot as plt
  9 | import tkinter as tk
 10 | from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
 11 | 
 12 | start_year=1999
 13 | end_year=2008
 14 | start_index = 14031
 15 | Afile_path = "../data/Afile"
 16 | sta_path = "../data/station information"
 17 | waveform_path = "../data/waveform"
 18 | output_path="events_traces_catalog"
 19 | traces_file_name=f"{start_year}_{end_year}_target_traces.csv"
 20 | error_file_name=f"{start_year}_{end_year}_error_traces_file.csv"
 21 | traces = pd.read_csv(f"{output_path}/{traces_file_name}")
 22 | catalog = pd.read_csv(f"{output_path}/{start_year}_{end_year}_target_catalog.csv")
 23 | 
 24 | 
 25 | def ok_traces(traces=None, index=None):
 26 |     traces.loc[index, "quality_control"] = "y"
 27 |     win.destroy()
 28 | 
 29 | 
 30 | def broken_traces(traces=None, index=None):
 31 |     traces.loc[index, "quality_control"] = "n"
 32 |     win.destroy()
 33 | def quit(running):
 34 |     running.set(False)
 35 |     win.destroy()
 36 | 
 37 | 
 38 | if "quality_control" not in traces.columns:
 39 |     traces["quality_control"] = "TBD"
 40 | if os.path.isfile(f"{error_file_name}"):
 41 |   error_file=pd.read_csv(f"{error_file_name}")
 42 | else: 
 43 |   error_file = pd.DataFrame({'index':[]})
 44 |   error_file.to_csv(f"{error_file_name}", index=False)
 45 | for i in range(start_index,len(traces)):
 46 |     print(f"{i}/{len(traces)}")
 47 |     try:
 48 |         EQ_ID = str(traces["EQ_ID"][i])
 49 |         year = str(traces["year"][i])
 50 |         month = str(traces["month"][i])
 51 |         day = str(traces["day"][i])
 52 |         hour = str(traces["hour"][i])
 53 |         minute = str(traces["minute"][i])
 54 |         second = str(traces["second"][i])
 55 |         intensity = str(traces["intensity"][i])
 56 |         station_name= traces["station_name"][i]
 57 |         epdis=str(traces["epdis (km)"][i])
 58 |         file_name = traces["file_name"][i].strip()
 59 |         magnitude = catalog.query(f"EQ_ID=={EQ_ID}")["magnitude"].tolist()[0]
 60 |         if len(month) < 2:
 61 |             month = "0" + month
 62 |         waveform = read_tsmip(f"{waveform_path}/{year}/{month}/{file_name}.txt")
 63 |         # picking
 64 |         if i==8319: #1999~2008 index 8319 can't pick, kernel crushed
 65 |             continue
 66 |         p_pick,_ = ar_pick(
 67 |             waveform[0],
 68 |             waveform[1],
 69 |             waveform[2],
 70 |             samp_rate=waveform[0].stats.sampling_rate,
 71 |             f1=1,  # Frequency of the lower bandpass window
 72 |             f2=20,  # Frequency of the upper bandpass window
 73 |             lta_p=1,  # Length of LTA for the P arrival in seconds
 74 |             sta_p=0.1,  # Length of STA for the P arrival in seconds
 75 |             lta_s=4.0,  # Length of LTA for the S arrival in seconds
 76 |             sta_s=1.0,  # Length of STA for the P arrival in seconds
 77 |             m_p=2,  # Number of AR coefficients for the P arrival
 78 |             m_s=8,  # Number of AR coefficients for the S arrival
 79 |             l_p=0.1,
 80 |             l_s=0.2,
 81 |             s_pick=False,
 82 |         )
 83 |         if (p_pick-3)>0:
 84 |             start_time=int((p_pick-3)*waveform[0].stats.sampling_rate)
 85 |         else:
 86 |             start_time=0
 87 |         # plot
 88 |         fig, ax = plt.subplots(3, 1)
 89 |         fig.subplots_adjust(hspace=0.4)
 90 |         for j in range(len(ax)):
 91 |             # start_time=4000
 92 |             if (p_pick+30)*waveform[0].stats.sampling_rate<len(waveform[0].data):
 93 |                 endtime=int((p_pick+30)*waveform[0].stats.sampling_rate)
 94 |                 # endtime=4600
 95 |                 ax[j].plot(waveform[j].times()[start_time:endtime], waveform[j].data[start_time:endtime], "k")
 96 |                 ax[j].axvline(x=p_pick, color="r", linestyle="-")
 97 |             else:
 98 |                 ax[j].plot(waveform[j].times()[start_time:endtime], waveform[j].data[start_time:endtime], "k")
 99 |                 ax[j].axvline(x=p_pick, color="r", linestyle="-")
100 |         ax[0].set_title(
101 |             f"EQID:{EQ_ID}_{station_name}, {year} {month}/{day} {hour}:{minute}:{second}, magnitude: {magnitude}, intensity:{intensity}"
102 |         )
103 |         ax[1].set_title(f"epicentral distance: {epdis} km")
104 |         ax[1].set_ylabel("gal")
105 |         ax[-1].set_xlabel("time (sec)")
106 |         plt.close()
107 | 
108 |         # GUI
109 |         win = tk.Tk()
110 |         win.attributes("-topmost", True)
111 |         win.after(1, lambda: win.focus_force())
112 |         win.title("check waveform")
113 |         win.geometry("700x650+10+10")
114 |         win.maxsize(1000, 700)
115 |         canvas = FigureCanvasTkAgg(fig, win)
116 |         canvas.get_tk_widget().pack(side=tk.TOP, fill=tk.BOTH, expand=1)
117 | 
118 |         label = tk.Label(win, text="Press ESC to quit")
119 |         label.pack()
120 |         win.bind("<space>", lambda event: ok_traces(traces=traces, index=i))
121 |         win.bind("<n>", lambda event: broken_traces(traces=traces, index=i))
122 |         running = tk.BooleanVar(value=True)
123 |         win.bind("<Key>", lambda event: quit(running) if event.keysym == "Escape" else None)
124 |         win.mainloop()
125 |         if running.get():
126 |             pass
127 |         else:
128 |             print(f"stop at index:{i}")
129 |             break
130 |     except Exception as reason:
131 |         print(file_name, f"year:{year},month:{month}, {reason}")
132 |         row={"index":i,"year":int(year), "month":month, "file":file_name,"reason":reason}
133 |         if i not in error_file["index"].values:
134 |             error_file= pd.concat([error_file,pd.DataFrame(row, index=[0])],ignore_index=True)
135 |         traces.loc[i, "quality_control"] = "n"
136 |         continue
137 | 
138 | # traces.to_csv(f"{output_path}/{traces_file_name}", index=False)
139 | # error_file.to_csv(f"{output_path}/{error_file_name}", index=False)
140 | print("data saved")
141 | 


--------------------------------------------------------------------------------
/data_preprocess/0918_M6.8_1319_1330/4_cut_waveform.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import h5py
  3 | import numpy as np
  4 | import pandas as pd
  5 | from tqdm import tqdm
  6 | import sys
  7 | import obspy
  8 | import matplotlib.pyplot as plt
  9 | 
 10 | sys.path.append("../..")
 11 | from read_tsmip import cut_traces
 12 | 
 13 | sta_path = "../../data/station_information"
 14 | waveform_path = "../../data/0918_M6.8_1319_1330/ascii"
 15 | traces = pd.read_csv("traces_catalog.csv")
 16 | catalog = pd.read_csv("event_catalog.csv")
 17 | station_info = pd.read_csv(f"{sta_path}/TSMIPstations_new.csv") 
 18 | traces.loc[traces.index, "p_pick_sec"] = pd.to_timedelta(
 19 |     traces["p_pick_sec"], unit="sec"
 20 | )
 21 | traces.loc[traces.index, "p_arrival_abs_time"] = pd.to_datetime(
 22 |     traces["p_arrival_abs_time"], format="%Y-%m-%d %H:%M:%S"
 23 | )
 24 | 
 25 | output = f"../../data/TSMIP_0918_M6.8_1319_1330.hdf5"
 26 | error_event = {"EQ_ID": [], "reason": []}
 27 | with h5py.File(output, "w") as file:
 28 |     data = file.create_group("data")
 29 |     meta = file.create_group("metadata")
 30 |     for eq_id in tqdm(catalog["EQ_ID"]):
 31 |         # for eq_id in [247]:
 32 |         # try:
 33 |         tmp_traces, traces_info = cut_traces(
 34 |             traces, eq_id, waveform_path, waveform_type="acc"
 35 |         )
 36 |         _, vel_info = cut_traces(traces, eq_id, waveform_path, waveform_type="vel")
 37 |         _, dis_info = cut_traces(traces, eq_id, waveform_path, waveform_type="dis")
 38 |         traces_info["vel"] = vel_info["traces"]
 39 |         traces_info["dis"] = dis_info["traces"]
 40 |         # fig=plot_cutting_event(tmp_traces,traces_info)
 41 |         start_time_str_arr = np.array(traces_info["start_time"], dtype="S30")
 42 |         station_name_str_arr = np.array(tmp_traces["station_name"], dtype="S30")
 43 |         tmp_station_info = pd.merge(
 44 |             tmp_traces[["station_name","Vs30"]],
 45 |             station_info[
 46 |                 ["location_code", "latitude", "longitude", "elevation (m)"]
 47 |             ],
 48 |             how="left",
 49 |             left_on="station_name",
 50 |             right_on="location_code",
 51 |         )
 52 |         location_array = np.array(
 53 |             tmp_station_info[["latitude", "longitude", "elevation (m)"]]
 54 |         )
 55 |         Vs30_array=np.array(tmp_traces["Vs30"])
 56 |         if np.isnan(location_array).any():
 57 |             print("The location array contain NaN values")
 58 |             continue
 59 |         event = data.create_group(f"{eq_id}")
 60 |         event.create_dataset(
 61 |             "acc_traces", data=traces_info["traces"], dtype=np.float64
 62 |         )
 63 |         event.create_dataset(
 64 |             "vel_traces", data=traces_info["vel"], dtype=np.float64
 65 |         )
 66 |         event.create_dataset(
 67 |             "dis_traces", data=traces_info["dis"], dtype=np.float64
 68 |         )
 69 |         event.create_dataset("p_picks", data=traces_info["p_picks"], dtype=np.int64)
 70 |         event.create_dataset("pga", data=traces_info["pga"], dtype=np.float64)
 71 |         event.create_dataset("pgv", data=traces_info["pgv"], dtype=np.float64)
 72 |         event.create_dataset(
 73 |             "start_time", data=start_time_str_arr, maxshape=(None), chunks=True
 74 |         )
 75 |         event.create_dataset(
 76 |             "pga_time", data=traces_info["pga_time"], dtype=np.int64
 77 |         )
 78 |         event.create_dataset(
 79 |             "pgv_time", data=traces_info["pgv_time"], dtype=np.int64
 80 |         )
 81 |         event.create_dataset(
 82 |             "station_name", data=station_name_str_arr, maxshape=(None), chunks=True
 83 |         )
 84 |         event.create_dataset(
 85 |             "station_location", data=location_array, dtype=np.float64
 86 |         )
 87 |         event.create_dataset(
 88 |             "Vs30", data=Vs30_array, dtype=np.float64
 89 |         )
 90 |         # except Exception as reason:
 91 |         #     print(f"EQ_ID:{eq_id}, {reason}")
 92 |         #     error_event["EQ_ID"].append(eq_id)
 93 |         #     error_event["reason"].append(reason)
 94 |         #     continue
 95 |         # fig.savefig(f"data/cutting waveform image/{eq_id}.png")
 96 |     error_event_df = pd.DataFrame(error_event)
 97 |     error_event_df.to_csv(
 98 |         "./load into hdf5 error event.csv", index=False
 99 |     )
100 | 
101 | catalog.to_hdf(output, key="metadata/event_metadata", mode="a", format="table")
102 | traces.to_hdf(output, key="metadata/traces_metadata", mode="a", format="table")
103 | 
104 | # plot records section
105 | for eq_id in tqdm(catalog["EQ_ID"]):
106 |     tmp_traces, traces_info = cut_traces(traces, eq_id, waveform_path, waveform_type="acc")
107 |     for i,chan in enumerate(["HLZ","HLN","HLE"]):
108 |         stream = obspy.core.stream.Stream()
109 |         for j in range(len(traces_info["traces"])):
110 |             trace = obspy.core.trace.Trace(data=traces_info["traces"][j][:, i])
111 |             trace.stats.id = eq_id
112 |             trace.stats.station = tmp_traces["station_name"][j]
113 |             trace.stats.channel = chan
114 |             trace.stats.distance = tmp_traces["epdis (km)"][j] * 1000
115 |             trace.stats.starttime = traces_info["start_time"][j]
116 |             trace.stats.sampling_rate = 200
117 | 
118 |             stream.append(trace)
119 |         fig, ax = plt.subplots()
120 |         stream.plot(type="section",fig=fig)
121 | 
122 |         magnitude = catalog[catalog["EQ_ID"] == eq_id]["magnitude"].values[0]
123 | 
124 |         ax.set_title(
125 |             f"EQ ID:{eq_id}, Magnitude: {magnitude}, start time: {traces_info['start_time'][j]}"
126 |         )
127 |         fig.savefig(f"cut trace/{eq_id}_{trace.stats.channel}.png")
128 |         plt.close()
129 | 
130 | for i in range(len(traces_info["traces"])):
131 |     file_name=tmp_traces["file_name"][i]
132 |     station_name=tmp_traces["station_name"][i]
133 |     p_pick=traces_info["p_picks"][i]
134 |     fig,ax=plt.subplots(3,1,figsize=(14,7))
135 |     for j in range(len(ax)):
136 |         ax[j].plot(traces_info["traces"][i][:,j])
137 |         ax[j].axvline(x=p_pick, color="r", linestyle="-")
138 |     ax[0].set_title(f"EQ_ID:{eq_id},station_name: {station_name},cut from file_name:{file_name}")
139 |     fig.savefig(f"cut trace/EQ_ID_{eq_id}_{station_name}.png",dpi=300)
140 |     plt.close()


--------------------------------------------------------------------------------
/data_preprocess/0918_M6.8_1319_1330/2_picking.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import sys
  3 | import os
  4 | import numpy as np
  5 | 
  6 | sys.path.append("../..")
  7 | from obspy.signal.trigger import ar_pick
  8 | import matplotlib.pyplot as plt
  9 | import obspy
 10 | import tkinter as tk
 11 | from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
 12 | import re
 13 | 
 14 | 
 15 | start_index = 0
 16 | 
 17 | 
 18 | # ===================
 19 | def ok_traces(traces=None, index=None):
 20 |     traces.loc[index, "quality_control"] = "y"
 21 |     win.destroy()
 22 | 
 23 | 
 24 | def broken_traces(traces=None, index=None):
 25 |     traces.loc[index, "quality_control"] = "n"
 26 |     win.destroy()
 27 | 
 28 | 
 29 | def quit(running):
 30 |     running.set(False)
 31 |     win.destroy()
 32 | 
 33 | 
 34 | trace_catalog = pd.read_csv("traces_catalog.csv")
 35 | 
 36 | for k in range(start_index, len(trace_catalog["file_name"])):
 37 |     file_name = trace_catalog["file_name"][k]
 38 |     print(f"{k}/{len(trace_catalog)}")
 39 |     try:
 40 |         txt = f"../../data/0918_M6.8_1319_1330/ascii/{file_name}.asc"
 41 |         data = pd.read_csv(txt, sep="\s+", skiprows=1, header=None).to_numpy()
 42 | 
 43 |         with open(txt, "r") as f:
 44 |             picks = f.readlines()[0]
 45 |             picks = re.findall(r"\d+\.\d+", picks)
 46 |             picks = [np.round(float(number), 2) for number in picks]
 47 | 
 48 |         waveform = obspy.core.stream.Stream()
 49 |         channel = ["HLZ", "HLN", "HLE"]
 50 |         for i, chan in enumerate(channel):
 51 |             start = np.where(data == picks[2])[0][0]
 52 |             end = np.where(data == picks[3])[0][0]
 53 |             trace = obspy.core.trace.Trace(data[start:end, i + 1])
 54 | 
 55 |             trace.stats.network = "TW"
 56 |             # trace.stats.station = header[0][14:20]
 57 |             trace.stats.channel = chan
 58 | 
 59 |             trace.stats.sampling_rate = int(1 / abs(data[0, 0] - data[1, 0]))
 60 | 
 61 |             waveform.append(trace)
 62 | 
 63 |         p_pick, _ = ar_pick(
 64 |             waveform[0],
 65 |             waveform[1],
 66 |             waveform[2],
 67 |             samp_rate=waveform[0].stats.sampling_rate,
 68 |             f1=1,  # Frequency of the lower bandpass window
 69 |             f2=20,  # Frequency of the upper bandpass window
 70 |             lta_p=1,  # Length of LTA for the P arrival in seconds
 71 |             sta_p=0.1,  # Length of STA for the P arrival in seconds
 72 |             lta_s=4.0,  # Length of LTA for the S arrival in seconds
 73 |             sta_s=1.0,  # Length of STA for the P arrival in seconds
 74 |             m_p=2,  # Number of AR coefficients for the P arrival
 75 |             m_s=8,  # Number of AR coefficients for the S arrival
 76 |             l_p=0.1,
 77 |             l_s=0.2,
 78 |             s_pick=False,
 79 |         )
 80 |         trace_catalog.loc[k, "p_pick_sec"] = p_pick
 81 |         if (p_pick - 3) > 0:
 82 |             start_time = int((p_pick - 3) * waveform[0].stats.sampling_rate)
 83 |         else:
 84 |             start_time = 0
 85 |         # plot
 86 |         fig, ax = plt.subplots(3, 1)
 87 |         fig.subplots_adjust(hspace=0.4)
 88 |         for j in range(len(ax)):
 89 |             # start_time=4000
 90 |             if (p_pick + 30) * waveform[0].stats.sampling_rate < len(waveform[0].data):
 91 |                 endtime = int((p_pick + 30) * waveform[0].stats.sampling_rate)
 92 |                 # endtime=4600
 93 |                 ax[j].plot(
 94 |                     waveform[j].times()[start_time:], waveform[j].data[start_time:], "k"
 95 |                 )
 96 |                 ax[j].axvline(x=p_pick, color="r", linestyle="-")
 97 |             else:
 98 |                 ax[j].plot(
 99 |                     waveform[j].times()[start_time:], waveform[j].data[start_time:], "k"
100 |                 )
101 |                 ax[j].axvline(x=p_pick, color="r", linestyle="-")
102 |         ax[0].set_title(f"{file_name}")
103 |         ax[1].set_ylabel("gal")
104 |         ax[-1].set_xlabel("time (sec)")
105 |         plt.close()
106 | 
107 |         win = tk.Tk()
108 |         win.attributes("-topmost", True)
109 |         win.after(1, lambda: win.focus_force())
110 |         win.title("check waveform")
111 |         win.geometry("700x650+10+10")
112 |         win.maxsize(1000, 700)
113 |         canvas = FigureCanvasTkAgg(fig, win)
114 |         canvas.get_tk_widget().pack(side=tk.TOP, fill=tk.BOTH, expand=1)
115 | 
116 |         label = tk.Label(win, text="Press ESC to quit")
117 |         label.pack()
118 |         win.bind("<space>", lambda event: ok_traces(traces=trace_catalog, index=k))
119 |         win.bind("<n>", lambda event: broken_traces(traces=trace_catalog, index=k))
120 |         running = tk.BooleanVar(value=True)
121 |         win.bind(
122 |             "<Key>", lambda event: quit(running) if event.keysym == "Escape" else None
123 |         )
124 |         win.mainloop()
125 |         if running.get():
126 |             pass
127 |         else:
128 |             print(f"stop at index:{k}")
129 |             break
130 |     except Exception as reason:
131 |         print(file_name, f"{reason}")
132 |         row = {"index": i, "file": file_name, "reason": reason}
133 |         if i not in error_file["index"].values:
134 |             error_file = pd.concat(
135 |                 [error_file, pd.DataFrame(row, index=[0])], ignore_index=True
136 |             )
137 |         trace_catalog.loc[i, "quality_control"] = "n"
138 |         continue
139 | trace_catalog.to_csv(f"traces_catalog.csv", index=False)
140 | 
141 | # ========shift p_picking by velocity model to correct absolute time======
142 | traces = pd.read_csv("traces_catalog.csv")
143 | catalog = pd.read_csv("event_catalog.csv")
144 | 
145 | EQ_ID = os.listdir(f"../tracer_demo/2023_output")
146 | 
147 | traces.insert(0, "EQ_ID", 30792)
148 | 
149 | traces=pd.merge(
150 |     catalog[["EQ_ID", "year", "month", "day", "hour", "minute", "second"]],
151 |     traces,
152 |     how="right",
153 |     on="EQ_ID",
154 | )
155 | traces["p_arrival_abs_time"] = pd.to_datetime(
156 |     traces[["year", "month", "day", "hour", "minute", "second"]]
157 | )
158 | 
159 | colnames = [
160 |     "evt_lon",
161 |     "evt_lat",
162 |     "evt_depth",
163 |     "sta_lon",
164 |     "sta_lat",
165 |     "sta_elev",
166 |     "p_arrival",
167 |     "s_arrival",
168 | ]
169 | for eq in EQ_ID:
170 |     event_file_path = f"../tracer_demo/2023_output/{eq}/output.table"
171 |     tracer_output = pd.read_csv(
172 |         event_file_path, sep=r"\s+", names=colnames, header=None
173 |     )
174 |     trace_index = traces[traces["EQ_ID"] == int(eq)].index
175 |     p_arrival = pd.to_timedelta(tracer_output["p_arrival"], unit="s")
176 |     p_arrival.index = trace_index
177 |     traces.loc[trace_index, "p_arrival_abs_time"] = (
178 |         traces.loc[trace_index, "p_arrival_abs_time"] + p_arrival
179 |     )
180 | traces.to_csv(f"traces_catalog.csv", index=False)


--------------------------------------------------------------------------------
/model_train_predict/predict_ensemble_merge_info.py:
--------------------------------------------------------------------------------
  1 | import h5py
  2 | import matplotlib.pyplot as plt
  3 | 
  4 | plt.subplots()
  5 | import numpy as np
  6 | import pandas as pd
  7 | import torch
  8 | from torch.utils.data import DataLoader
  9 | from tqdm import tqdm
 10 | import sys
 11 | sys.path.append("..")
 12 | from model.CNN_Transformer_Mixtureoutput_TEAM import (
 13 |     CNN,
 14 |     MDN,
 15 |     MLP,
 16 |     PositionEmbedding_Vs30,
 17 |     TransformerEncoder,
 18 |     full_model,
 19 | )
 20 | from data.multiple_sta_dataset import multiple_station_dataset
 21 | from model_performance_analysis.analysis import Intensity_Plotter
 22 | 
 23 | mask_after_sec = 7
 24 | label = "pga"
 25 | data = multiple_station_dataset(
 26 |     "../data/TSMIP_1999_2019_Vs30.hdf5",
 27 |     mode="test",
 28 |     mask_waveform_sec=mask_after_sec,
 29 |     test_year=2016,
 30 |     label_key=label,
 31 |     mag_threshold=0,
 32 |     input_type="acc",
 33 |     data_length_sec=15,
 34 | )
 35 | # ===========predict==============
 36 | device = torch.device("cuda")
 37 | for num in [11]:
 38 |     path = f"../model/model{num}.pt"
 39 |     emb_dim = 150
 40 |     mlp_dims = (150, 100, 50, 30, 10)
 41 |     CNN_model = CNN(mlp_input=5665).cuda()
 42 |     pos_emb_model = PositionEmbedding_Vs30(emb_dim=emb_dim).cuda()
 43 |     transformer_model = TransformerEncoder()
 44 |     mlp_model = MLP(input_shape=(emb_dim,), dims=mlp_dims).cuda()
 45 |     mdn_model = MDN(input_shape=(mlp_dims[-1],)).cuda()
 46 |     full_Model = full_model(
 47 |         CNN_model,
 48 |         pos_emb_model,
 49 |         transformer_model,
 50 |         mlp_model,
 51 |         mdn_model,
 52 |         pga_targets=25,
 53 |         data_length=3000,
 54 |     ).to(device)
 55 |     full_Model.load_state_dict(torch.load(path))
 56 |     loader = DataLoader(dataset=data, batch_size=1)
 57 | 
 58 |     Mixture_mu = []
 59 |     Label = []
 60 |     P_picks = []
 61 |     EQ_ID = []
 62 |     Label_time = []
 63 |     Sta_name = []
 64 |     Lat = []
 65 |     Lon = []
 66 |     Elev = []
 67 |     for j, sample in tqdm(enumerate(loader)):
 68 |         picks = sample["p_picks"].flatten().numpy().tolist()
 69 |         label_time = sample[f"{label}_time"].flatten().numpy().tolist()
 70 |         lat = sample["target"][:, :, 0].flatten().tolist()
 71 |         lon = sample["target"][:, :, 1].flatten().tolist()
 72 |         elev = sample["target"][:, :, 2].flatten().tolist()
 73 |         P_picks.extend(picks)
 74 |         P_picks.extend([np.nan] * (25 - len(picks)))
 75 |         Label_time.extend(label_time)
 76 |         Label_time.extend([np.nan] * (25 - len(label_time)))
 77 |         Lat.extend(lat)
 78 |         Lon.extend(lon)
 79 |         Elev.extend(elev)
 80 | 
 81 |         eq_id = sample["EQ_ID"][:, :, 0].flatten().numpy().tolist()
 82 |         EQ_ID.extend(eq_id)
 83 |         EQ_ID.extend([np.nan] * (25 - len(eq_id)))
 84 |         weight, sigma, mu = full_Model(sample)
 85 | 
 86 |         weight = weight.cpu()
 87 |         sigma = sigma.cpu()
 88 |         mu = mu.cpu()
 89 |         if j == 0:
 90 |             Mixture_mu = torch.sum(weight * mu, dim=2).cpu().detach().numpy()
 91 |             Label = sample["label"].cpu().detach().numpy()
 92 |         else:
 93 |             Mixture_mu = np.concatenate(
 94 |                 [Mixture_mu, torch.sum(weight * mu, dim=2).cpu().detach().numpy()],
 95 |                 axis=1,
 96 |             )
 97 |             Label = np.concatenate(
 98 |                 [Label, sample["label"].cpu().detach().numpy()], axis=1
 99 |             )
100 |     Label = Label.flatten()
101 |     Mixture_mu = Mixture_mu.flatten()
102 | 
103 |     output = {
104 |         "EQ_ID": EQ_ID,
105 |         "p_picks": P_picks,
106 |         f"{label}_time": Label_time,
107 |         "predict": Mixture_mu,
108 |         "answer": Label,
109 |         "latitude": Lat,
110 |         "longitude": Lon,
111 |         "elevation": Elev,
112 |     }
113 |     output_df = pd.DataFrame(output)
114 |     output_df = output_df[output_df["answer"] != 0]
115 |     # output_df.to_csv(
116 |     #     f"./predict/model {num} {mask_after_sec} sec prediction.csv", index=False
117 |     # )
118 |     fig, ax = Intensity_Plotter.true_predicted(
119 |         y_true=output_df["answer"],
120 |         y_pred=output_df["predict"],
121 |         quantile=False,
122 |         agg="point",
123 |         point_size=12,
124 |         target=label,
125 |     )
126 |     eq_id = 24784
127 |     ax.scatter(
128 |         output_df["answer"][output_df["EQ_ID"] == eq_id],
129 |         output_df["predict"][output_df["EQ_ID"] == eq_id],
130 |         c="r",
131 |     )
132 |     magnitude = data.event_metadata[data.event_metadata["EQ_ID"] == eq_id][
133 |         "magnitude"
134 |     ].values[0]
135 |     ax.set_title(
136 |         f"{mask_after_sec}s True Predict Plot, 2016 data",
137 |         fontsize=20,
138 |     )
139 | 
140 |     # fig.savefig(f"../predict/model {num} {mask_after_sec} sec.png")
141 | 
142 | # ===========merge info==============
143 | Afile_path = "../data_preprocess/events_traces_catalog"
144 | output_path = "../predict/station_blind_Vs30_bias2closed_station_2016"
145 | catalog = pd.read_csv(f"{Afile_path}/1999_2019_final_catalog.csv")
146 | traces_info = pd.read_csv(f"{Afile_path}/1999_2019_final_traces_Vs30.csv")
147 | ensemble_predict = pd.read_csv(
148 |     f"{output_path}/model 11 {mask_after_sec} sec prediction.csv"
149 | )
150 | trace_merge_catalog = pd.merge(
151 |     traces_info,
152 |     catalog[
153 |         [
154 |             "EQ_ID",
155 |             "lat",
156 |             "lat_minute",
157 |             "lon",
158 |             "lon_minute",
159 |             "depth",
160 |             "magnitude",
161 |             "nsta",
162 |             "nearest_sta_dist (km)",
163 |         ]
164 |     ],
165 |     on="EQ_ID",
166 |     how="left",
167 | )
168 | trace_merge_catalog["event_lat"] = (
169 |     trace_merge_catalog["lat"] + trace_merge_catalog["lat_minute"] / 60
170 | )
171 | 
172 | trace_merge_catalog["event_lon"] = (
173 |     trace_merge_catalog["lon"] + trace_merge_catalog["lon_minute"] / 60
174 | )
175 | trace_merge_catalog.drop(
176 |     ["lat", "lat_minute", "lon", "lon_minute"], axis=1, inplace=True
177 | )
178 | trace_merge_catalog.rename(columns={"elevation (m)": "elevation"}, inplace=True)
179 | 
180 | 
181 | data_path = "D:/TEAM_TSMIP/data/TSMIP_1999_2019.hdf5"
182 | dataset = h5py.File(data_path, "r")
183 | for eq_id in ensemble_predict["EQ_ID"].unique():
184 |     eq_id = int(eq_id)
185 |     station_name = dataset["data"][str(eq_id)]["station_name"][:].tolist()
186 | 
187 |     ensemble_predict.loc[
188 |         ensemble_predict.query(f"EQ_ID=={eq_id}").index, "station_name"
189 |     ] = station_name
190 | 
191 | ensemble_predict["station_name"] = ensemble_predict["station_name"].str.decode("utf-8")
192 | 
193 | 
194 | prediction_with_info = pd.merge(
195 |     ensemble_predict,
196 |     trace_merge_catalog.drop(
197 |         [
198 |             "latitude",
199 |             "longitude",
200 |             "elevation",
201 |         ],
202 |         axis=1,
203 |     ),
204 |     on=["EQ_ID", "station_name"],
205 |     how="left",
206 |     suffixes=["_window", "_file"],
207 | )
208 | # prediction_with_info.to_csv(
209 | #     f"{output_path}/{mask_after_sec} sec model11 with all info.csv", index=False
210 | # )
211 | 


--------------------------------------------------------------------------------
/model_performance_analysis/plot_CWA_EEW_intensity.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import pandas as pd
  3 | import numpy as np
  4 | import re
  5 | import os
  6 | from sklearn.metrics import confusion_matrix
  7 | from analysis import Intensity_Plotter
  8 | 
  9 | 
 10 | def haversine(lat1, lon1, lat2, lon2):
 11 |     # 將經緯度轉換為弧度
 12 |     lat1 = math.radians(lat1)
 13 |     lon1 = math.radians(lon1)
 14 |     lat2 = math.radians(lat2)
 15 |     lon2 = math.radians(lon2)
 16 | 
 17 |     # Haversine公式
 18 |     dlon = lon2 - lon1
 19 |     dlat = lat2 - lat1
 20 |     a = (
 21 |         math.sin(dlat / 2) ** 2
 22 |         + math.cos(lat1) * math.cos(lat2) * math.sin(dlon / 2) ** 2
 23 |     )
 24 |     c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
 25 | 
 26 |     # 地球半徑（千米）
 27 |     radius = 6371
 28 | 
 29 |     # 計算距離
 30 |     distance = radius * c
 31 | 
 32 |     return distance
 33 | 
 34 | 
 35 | # EEW calculate intensity
 36 | path = "../CWA_EEW_report"
 37 | site_info = pd.read_excel(f"{path}/site.xlsx")
 38 | catalog = pd.read_excel(f"{path}/EEW2016.xlsx")
 39 | catalog.columns = [
 40 |     "event_time",
 41 |     "catalog_lon",
 42 |     "catalog_lat",
 43 |     "catalog_mag",
 44 |     "catalog_dep",
 45 |     "eew_lon",
 46 |     "eew_lat",
 47 |     "eew_mag",
 48 |     "eew_dep",
 49 |     "eew_time",
 50 | ]
 51 | catalog = catalog.query("catalog_mag>=5.5")
 52 | catalog["event_time"] = catalog["event_time"].astype(str)
 53 | catalog.dropna(inplace=True)
 54 | catalog.reset_index(drop=True, inplace=True)
 55 | predict_dict = {
 56 |     "event_time": [],
 57 |     "sta_lat": [],
 58 |     "sta_lon": [],
 59 |     "predict_pga": [],
 60 |     "station_code": [],
 61 |     "process_time": [],
 62 | }
 63 | for i in range(len(catalog)):
 64 |     print(catalog["event_time"][i])
 65 |     lat = catalog["eew_lat"][i]
 66 |     lon = catalog["eew_lon"][i]
 67 |     dep = catalog["eew_dep"][i]
 68 |     mag = catalog["eew_mag"][i]
 69 |     for j in range(len(site_info)):
 70 |         if dep < 40:
 71 |             Si = site_info["site_s"][j]
 72 |             hypo_dist = math.sqrt(
 73 |                 math.pow(
 74 |                     haversine(lat, lon, site_info["lat"][j], site_info["lon"][j]), 2
 75 |                 )
 76 |                 + math.pow(dep, 2)
 77 |             )
 78 |             pga = (
 79 |                 12.44 * math.exp(1.31 * mag) * math.pow(hypo_dist, -1.837) * Si
 80 |             )  # 2021_0303 from Hsiao
 81 |         else:
 82 |             Si = site_info["site_d"][j]
 83 |             hypo_dist = math.sqrt(
 84 |                 math.pow(
 85 |                     haversine(lat, lon, site_info["lat"][j], site_info["lon"][j]), 2
 86 |                 )
 87 |                 + math.pow(dep, 2)
 88 |             )
 89 |             pga = (
 90 |                 12.44 * math.exp(1.31 * mag) * math.pow(hypo_dist, -1.837) * Si
 91 |             )  # 2021_0303 from Hsiao
 92 | 
 93 |         predict_dict["event_time"].append(catalog["event_time"][i])
 94 |         predict_dict["sta_lat"].append(site_info["lat"][j])
 95 |         predict_dict["sta_lon"].append(site_info["lon"][j])
 96 |         predict_dict["predict_pga"].append(pga)
 97 |         predict_dict["station_code"].append(site_info["code"][j])
 98 |         predict_dict["process_time"].append(catalog["eew_time"][i])
 99 | 
100 | predict_df = pd.DataFrame(predict_dict)
101 | predict_df["event_time"] = predict_df["event_time"].astype(str)
102 | # merge ground true pga
103 | 
104 | 
105 | pattern = r"[=,]"
106 | true_pga_dict = {
107 |     "event_time": [],
108 |     "station_code": [],
109 |     "sta_lon": [],
110 |     "sta_lat": [],
111 |     "dist": [],
112 |     "PGA(V)": [],
113 |     "PGA(NS)": [],
114 |     "PGA(EW)": [],
115 | }
116 | files = os.listdir(f"{path}/event_true_pga")
117 | for file in files:
118 |     with open(f"{path}/event_true_pga/{file}", "r", encoding="iso-8859-1") as event:
119 |         start_line = 5
120 |         lines = event.readlines()
121 |         for i in range(start_line, len(lines)):
122 |             line = lines[i]
123 |             result = re.split(pattern, line.strip())
124 |             true_pga_dict["event_time"].append(file.replace(".txt", ""))
125 |             true_pga_dict["station_code"].append(result[1].replace(" ", ""))
126 |             true_pga_dict["sta_lon"].append(float(result[5].replace(" ", "")))
127 |             true_pga_dict["sta_lat"].append(float(result[7].replace(" ", "")))
128 |             true_pga_dict["dist"].append(float(result[9].replace(" ", "")))
129 |             true_pga_dict["PGA(V)"].append(float(result[13].replace(" ", "")))
130 |             true_pga_dict["PGA(NS)"].append(float(result[15].replace(" ", "")))
131 |             true_pga_dict["PGA(EW)"].append(float(result[17].replace(" ", "")))
132 | 
133 | true_pga_df = pd.DataFrame(true_pga_dict)
134 | 
135 | final_table = pd.merge(
136 |     predict_df,
137 |     true_pga_df,
138 |     on=["event_time", "station_code"],
139 |     how="left",
140 |     suffixes=["_pre", "_true"],
141 | )
142 | final_table.dropna(inplace=True)
143 | time_eqid_dict = {
144 |     "eqid": [24757, 24784, 25112, 25193, 25225, 25396, 25401, 25561, 25900],
145 |     "event_time": [
146 |         "201601190213026",
147 |         "201602051957026",
148 |         "201604110545009",
149 |         "201604271517014",
150 |         "201604271819006",
151 |         "201605120317015",
152 |         "201605120429055",
153 |         "201605310523046",
154 |         "201610061552000",
155 |     ],
156 | }
157 | time_eqid_df = pd.DataFrame(time_eqid_dict)
158 | final_traces = pd.merge(time_eqid_df, final_table, on="event_time", how="right")
159 | final_catalog = pd.merge(time_eqid_df, catalog, on="event_time", how="left")
160 | # final_traces.to_csv("cwa_test_eew_events.csv",index=False)
161 | # final_traces.to_csv("cwa_test_eew_traces.csv",index=False)
162 | # =========calculate residual mean and std
163 | final_traces["PGA"] = np.sqrt(
164 |     final_traces["PGA(V)"] ** 2
165 |     + final_traces["PGA(NS)"] ** 2
166 |     + final_traces["PGA(EW)"] ** 2
167 | )
168 | residual_mean = (
169 |     np.log10(final_traces["predict_pga"] * 0.01) - np.log10(final_traces["PGA"] * 0.01)
170 | ).mean()
171 | residual_std = (
172 |     np.log10(final_traces["predict_pga"] * 0.01) - np.log10(final_traces["PGA"] * 0.01)
173 | ).std()
174 | label_threshold = np.log10(np.array([0.250]))  # 3,4,5級
175 | predict_logic = np.where(
176 |     np.log10(final_traces["predict_pga"] * 0.01) > label_threshold[0], 1, 0
177 | )
178 | real_logic = np.where(np.log10(final_traces["PGA"] * 0.01) > label_threshold[0], 1, 0)
179 | 
180 | matrix = confusion_matrix(real_logic, predict_logic, labels=[1, 0])
181 | accuracy = np.sum(np.diag(matrix)) / np.sum(matrix)  # (TP+TN)/all
182 | precision = matrix[0][0] / np.sum(matrix, axis=0)[0]  # TP/(TP+FP)
183 | recall = matrix[0][0] / np.sum(matrix, axis=1)[0]  # TP/(TP+FP)
184 | F1_score = 2 / ((1 / precision) + (1 / recall))
185 | 
186 | fig, ax = Intensity_Plotter.plot_true_predicted(
187 |     y_true=np.log10(final_traces["PGA"] * 0.01),
188 |     y_pred=np.log10(final_traces["predict_pga"] * 0.01),
189 |     quantile=False,
190 |     agg="point",
191 |     point_size=70,
192 |     target="pga",
193 |     title=f"CWA EEW prediction in 2016 M>5.5 events",
194 | )
195 | # fig.savefig("CWA EEW report/true predict plot.png",dpi=300)
196 | # =========plot intensity map
197 | 
198 | for eqid in final_catalog["eqid"]:
199 |     label_type = "pga"
200 |     fig, ax = Intensity_Plotter.plot_CWA_EEW_intensity_map(
201 |         final_traces, final_catalog, eqid, label_type
202 |     )
203 | 
204 |     # fig.savefig(f"paper image/eqid_{eqid}_CWA_eew_report.pdf",dpi=300)
205 | 


--------------------------------------------------------------------------------
/data_preprocess/12_TSMIP_vs30.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import numpy as np
  3 | from tqdm import tqdm
  4 | import os
  5 | import cartopy.crs as ccrs
  6 | import cartopy
  7 | from cartopy.mpl import ticker
  8 | import matplotlib.pyplot as plt
  9 | from Vs30_preprocess import *
 10 | 
 11 | sta_path = "../data/station_information"
 12 | start_year = 1999
 13 | end_year = 2019
 14 | trace = pd.read_csv(f"./events_traces_catalog/{start_year}_{end_year}_final_traces.csv")
 15 | station_info = pd.read_csv(f"{sta_path}/TSMIPstations_new.csv")
 16 | vs30_info = pd.read_csv(f"{sta_path}/egdt_TSMIP_station_vs30.csv")
 17 | 
 18 | merge_traces = pd.merge(
 19 |     trace,
 20 |     station_info[["location_code", "latitude", "longitude", "elevation (m)"]],
 21 |     how="left",
 22 |     left_on="station_name",
 23 |     right_on="location_code",
 24 | )
 25 | 
 26 | merge_traces = pd.merge(
 27 |     merge_traces,
 28 |     vs30_info[["station_code", "Vs30"]],
 29 |     how="left",
 30 |     left_on="station_name",
 31 |     right_on="station_code",
 32 | )
 33 | 
 34 | 
 35 | noVs30_station_value_counts = (
 36 |     merge_traces[merge_traces["Vs30"].isna()]["station_name"]
 37 |     .value_counts()
 38 |     .rename_axis("station_name")
 39 |     .reset_index(name="counts")
 40 | )
 41 | noVs30_station_value_counts = pd.merge(
 42 |     noVs30_station_value_counts,
 43 |     station_info[["location_code", "latitude", "longitude", "elevation (m)"]],
 44 |     how="left",
 45 |     left_on="station_name",
 46 |     right_on="location_code",
 47 | )
 48 | Vs30_station_value_counts = (
 49 |     merge_traces[~merge_traces["Vs30"].isna()]["station_name"]
 50 |     .value_counts()
 51 |     .rename_axis("station_name")
 52 |     .reset_index(name="counts")
 53 | )
 54 | Vs30_station_value_counts = pd.merge(
 55 |     Vs30_station_value_counts,
 56 |     station_info[["location_code", "latitude", "longitude", "elevation (m)"]],
 57 |     how="left",
 58 |     left_on="station_name",
 59 |     right_on="location_code",
 60 | )
 61 | 
 62 | 
 63 | # plot station map with vs30 or not
 64 | src_crs = ccrs.PlateCarree()
 65 | fig, ax_map = plt.subplots(subplot_kw={"projection": src_crs}, figsize=(7, 7))
 66 | ax_map.coastlines("10m")
 67 | ax_map.scatter(
 68 |     Vs30_station_value_counts["longitude"],
 69 |     Vs30_station_value_counts["latitude"],
 70 |     edgecolors="k",
 71 |     linewidth=1,
 72 |     marker="o",
 73 |     s=10,
 74 |     zorder=3,
 75 |     label="include Vs30",
 76 |     alpha=0.5,
 77 | )
 78 | ax_map.scatter(
 79 |     noVs30_station_value_counts["longitude"],
 80 |     noVs30_station_value_counts["latitude"],
 81 |     edgecolors="k",
 82 |     linewidth=1,
 83 |     marker="o",
 84 |     s=10,
 85 |     zorder=3,
 86 |     label="No Vs30",
 87 | )
 88 | ax_map.set_title("Vs30 from egdt")
 89 | ax_map.legend()
 90 | # fig.savefig("./events_traces_catalog/Vs30 map.png",dpi=300)
 91 | 
 92 | 
 93 | file_path = "../data/station_information"
 94 | # transfer grd file to xyz file (run only one time)
 95 | # if __name__ == "__main__":
 96 | #     os.getcwd()
 97 | #     input_grd_file = f"{file_path}/Vs30ofTaiwan.grd"  # 輸入GRD檔案的路徑
 98 | #     output_xyz_file = f"{file_path}/Vs30ofTaiwan.xyz"  # 輸出XYZ檔案的路徑
 99 | 
100 | #     grd_to_xyz(input_grd_file, output_xyz_file)
101 | xyz_file = f"{file_path}/Vs30ofTaiwan.xyz"
102 | vs30_table = pd.read_csv(xyz_file, sep="\s+", header=None, names=["x", "y", "Vs30"])
103 | vs30_table.dropna(inplace=True)
104 | vs30_table.reset_index(drop=True, inplace=True)
105 | 
106 | # transform coordinate
107 | vs30_table["x_97"], vs30_table["y_97"] = twd67_to_97(vs30_table["x"], vs30_table["y"])
108 | vs30_table["lon"] = 0
109 | vs30_table["lat"] = 0
110 | for i in tqdm(range(len(vs30_table))):
111 |     vs30_table["lon"][i], vs30_table["lat"][i] = twd97_to_lonlat(
112 |         vs30_table["x_97"][i], vs30_table["y_97"][i]
113 |     )
114 | # vs30_table.to_csv(f"{file_path}/Vs30ofTaiwan.csv",index=False)
115 | 
116 | # vs30 map fill into no vs30 station
117 | vs30_table = pd.read_csv(f"{file_path}/Vs30ofTaiwan.csv")
118 | target_points = noVs30_station_value_counts[["longitude", "latitude"]].values.tolist()
119 | points = vs30_table[["lon", "lat"]].values.tolist()
120 | 
121 | referenced_table = {
122 |     "index": [],
123 |     "Vs30 referenced lon": [],
124 |     "Vs30 referenced lat": [],
125 |     "Vs30": [],
126 | }
127 | 
128 | for target_point in tqdm(target_points):
129 |     nearest_index, nearest_point = find_nearest_point(target_point, points)
130 | 
131 |     referenced_table["index"].append(nearest_index)
132 |     referenced_table["Vs30 referenced lon"].append(nearest_point[0])
133 |     referenced_table["Vs30 referenced lat"].append(nearest_point[1])
134 |     referenced_table["Vs30"].append(vs30_table.loc[nearest_index]["Vs30"])
135 | 
136 | for key in referenced_table.keys():
137 |     noVs30_station_value_counts[f"{key}"] = referenced_table[f"{key}"]
138 | 
139 | fig, ax_map = plt.subplots(subplot_kw={"projection": src_crs}, figsize=(7, 7))
140 | ax_map.coastlines("10m")
141 | ax_map.scatter(
142 |     noVs30_station_value_counts["Vs30 referenced lon"],
143 |     noVs30_station_value_counts["Vs30 referenced lat"],
144 |     edgecolors="k",
145 |     linewidth=1,
146 |     marker="o",
147 |     s=40,
148 |     label="referenced",
149 |     alpha=0.5,
150 | )
151 | ax_map.scatter(
152 |     noVs30_station_value_counts["longitude"],
153 |     noVs30_station_value_counts["latitude"],
154 |     edgecolors="k",
155 |     linewidth=1,
156 |     marker="o",
157 |     s=10,
158 |     label="No Vs30",
159 | )
160 | ax_map.set_title("Vs30 filled from Lee's map")
161 | ax_map.legend()
162 | # fig.savefig("./events_traces_catalog/Vs30 filled from Lee map.png",dpi=300)
163 | 
164 | # fill vs30 into traces table
165 | 
166 | for index in merge_traces[merge_traces["Vs30"].isna()].index:
167 |     station_name = merge_traces.iloc[index]["station_name"]
168 |     vs30 = np.round(
169 |         noVs30_station_value_counts.query(f"station_name=='{station_name}'")[
170 |             "Vs30"
171 |         ].values[0],
172 |         2,
173 |     )
174 |     merge_traces.loc[index, "Vs30"] = vs30
175 |     print(
176 |         station_name,
177 |         noVs30_station_value_counts.query(f"station_name=='{station_name}'")[
178 |             "Vs30"
179 |         ].values,
180 |     )
181 | 
182 | merge_traces.drop(["location_code", "station_code"], axis=1, inplace=True)
183 | # merge_traces.to_csv(
184 | #     f"./events_traces_catalog/{start_year}_{end_year}_final_traces_Vs30.csv",
185 | #     index=False,
186 | # )
187 | 
188 | # plot final vs30 value map
189 | trace_with_vs30 = pd.read_csv(
190 |     f"./events_traces_catalog/{start_year}_{end_year}_final_traces_Vs30.csv"
191 | )
192 | vs30_table = trace_with_vs30[["station_name", "longitude", "latitude", "Vs30"]]
193 | vs30_table = (
194 |     vs30_table.groupby("station_name")
195 |     .apply(get_unique_with_other_columns)
196 |     .reset_index(drop=True)
197 | )
198 | 
199 | src_crs = ccrs.PlateCarree()
200 | fig, ax_map = plt.subplots(subplot_kw={"projection": src_crs}, figsize=(7, 7))
201 | ax_map.coastlines("10m")
202 | map = ax_map.scatter(
203 |     vs30_table["longitude"],
204 |     vs30_table["latitude"],
205 |     linewidth=1,
206 |     marker="o",
207 |     s=10,
208 |     c=vs30_table["Vs30"],
209 |     cmap="copper_r",
210 | )
211 | ax_map.add_feature(cartopy.feature.OCEAN, zorder=2, edgecolor="k")
212 | # ax_map.set_title("Final Vs30 Map")
213 | cbar = plt.colorbar(map, ax=ax_map)
214 | cbar.set_label("Vs30 (m/s)")
215 | xmin, xmax = ax_map.get_xlim()
216 | ymin, ymax = ax_map.get_ylim()
217 | xticks = ticker.LongitudeLocator(nbins=5)._raw_ticks(xmin, xmax)
218 | yticks = ticker.LatitudeLocator(nbins=5)._raw_ticks(ymin, ymax)
219 | 
220 | ax_map.set_xticks(xticks, crs=ccrs.PlateCarree())
221 | ax_map.set_yticks(yticks, crs=ccrs.PlateCarree())
222 | 
223 | ax_map.xaxis.set_major_formatter(ticker.LongitudeFormatter(zero_direction_label=True))
224 | ax_map.yaxis.set_major_formatter(ticker.LatitudeFormatter())
225 | 
226 | ax_map.xaxis.set_ticks_position("both")
227 | ax_map.yaxis.set_ticks_position("both")
228 | 
229 | ax_map.set_xlim(xmin, xmax)
230 | ax_map.set_ylim(ymin, ymax)
231 | # fig.savefig("./events_traces_catalog/Final Vs30 Map.png",dpi=300)
232 | 


--------------------------------------------------------------------------------
/model_performance_analysis/0403_Hualien_Earthquake/1_find_trigger_station.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import os
  3 | import obspy
  4 | import numpy as np
  5 | import matplotlib.pyplot as plt
  6 | from obspy.signal.trigger import ar_pick
  7 | import json
  8 | 
  9 | 
 10 | def dist(event_lat, event_lon, station_lat, station_lon):  # unit: degree
 11 |     dist = ((event_lat - station_lat) ** 2 + (event_lon - station_lon) ** 2) ** (1 / 2)
 12 |     return dist
 13 | 
 14 | 
 15 | mask_after_sec = 5
 16 | station_info = pd.read_csv("../../data/station_information/TSMIPstations_new.csv")
 17 | traces_info_with_vs30 = pd.read_csv(
 18 |     "../../data_preprocess/events_traces_catalog/1999_2019_final_traces_Vs30.csv"
 19 | )
 20 | sample_rate = 200
 21 | 
 22 | path = "./113019_TSMIP_SAC"
 23 | waveform_files = os.listdir(path)
 24 | 
 25 | stations = []
 26 | for file in waveform_files:
 27 |     station_name = file[:6]
 28 |     if station_name not in stations:
 29 |         stations.append(station_name)
 30 | 
 31 | station_info = station_info[station_info["location_code"].isin(stations)]
 32 | station_info = station_info.reset_index(drop=True)
 33 | 
 34 | # event epicenter
 35 | event_lat = 23.77
 36 | event_lon = 121.67
 37 | 
 38 | dist_dict = {"dist": []}
 39 | for i in range(len(station_info)):
 40 |     station_lat = station_info["latitude"][i]
 41 |     station_lon = station_info["longitude"][i]
 42 |     dist_dict["dist"].append(dist(event_lat, event_lon, station_lat, station_lon))
 43 | station_info["dist (degree)"] = dist_dict["dist"]
 44 | 
 45 | station_info["p_picks (sec)"] = 0
 46 | check_station = ["HWA026", "HWA067", "HWA025", "TTN032", "ILA050"]
 47 | # plot and picking:
 48 | for i, station in enumerate(station_info["location_code"]):
 49 |     trace_z = obspy.read(f"{path}/{station}.Z.SAC")
 50 |     trace_n = obspy.read(f"{path}/{station}.N.SAC")
 51 |     trace_e = obspy.read(f"{path}/{station}.E.SAC")
 52 |     trace_z.resample(sample_rate, window="hann")
 53 |     trace_n.resample(sample_rate, window="hann")
 54 |     trace_e.resample(sample_rate, window="hann")
 55 | 
 56 |     waveforms = np.array([trace_z[0].data, trace_n[0].data, trace_e[0].data])
 57 |     # fig, ax = plt.subplots(3, 1)
 58 |     # ax[0].plot(waveforms[0])
 59 |     # ax[1].plot(waveforms[1])
 60 |     # ax[2].plot(waveforms[2])
 61 |     # ax[0].set_title(
 62 |     #     f"{station}_{trace_z[0].stats.starttime}-{trace_z[0].stats.endtime}"
 63 |     # )
 64 |     try:
 65 |         p_pick, _ = ar_pick(
 66 |             waveforms[0],
 67 |             waveforms[1],
 68 |             waveforms[2],
 69 |             samp_rate=200,
 70 |             f1=1,  # Frequency of the lower bandpass window
 71 |             f2=20,  # Frequency of the upper bandpass window
 72 |             lta_p=1,  # Length of LTA for the P arrival in seconds
 73 |             sta_p=0.1,  # Length of STA for the P arrival in seconds
 74 |             lta_s=4.0,  # Length of LTA for the S arrival in seconds
 75 |             sta_s=1.0,  # Length of STA for the P arrival in seconds
 76 |             m_p=2,  # Number of AR coefficients for the P arrival
 77 |             m_s=8,  # Number of AR coefficients for the S arrival
 78 |             l_p=0.1,
 79 |             l_s=0.2,
 80 |             s_pick=False,
 81 |         )
 82 |         station_info.loc[i, "p_picks (sec)"] = p_pick
 83 |         # ax[0].axvline(x=p_pick * sample_rate, color="r", linestyle="-")
 84 |         # ax[1].axvline(x=p_pick * sample_rate, color="r", linestyle="-")
 85 |         # ax[2].axvline(x=p_pick * sample_rate, color="r", linestyle="-")
 86 |     except:
 87 |         station_info.loc[i, "p_picks (sec)"] = p_pick
 88 |     # fig.savefig(f"0403waveform_image/{station}.png", dpi=300)
 89 |     plt.close()
 90 | 
 91 | station_info = station_info.sort_values(by="dist (degree)")
 92 | station_info = station_info.reset_index(drop=True)
 93 | 
 94 | trigger_station_info = pd.merge(
 95 |     station_info,
 96 |     traces_info_with_vs30[["station_name", "Vs30"]].drop_duplicates(
 97 |         subset="station_name"
 98 |     ),
 99 |     left_on="location_code",
100 |     right_on="station_name",
101 |     how="left",
102 | )
103 | trigger_station_info = trigger_station_info.dropna(
104 |     subset=["latitude", "longitude", "elevation (m)", "Vs30"]
105 | )
106 | trigger_station_info=trigger_station_info[trigger_station_info["station_name"]!="HWA026"]
107 | trigger_station_info=trigger_station_info[trigger_station_info["station_name"]!="HWA067"]
108 | trigger_station_info=trigger_station_info[trigger_station_info["station_name"]!="HWA025"]
109 | trigger_station_info=trigger_station_info[trigger_station_info["station_name"]!="ILA050"]
110 | trigger_station_info = trigger_station_info.reset_index(drop=True)
111 | 
112 | P_wave_velocity = 6.5
113 | stream = obspy.core.stream.Stream()
114 | waveforms_window = []
115 | mask_station_index = []
116 | target_length = 18000
117 | for i, station in enumerate(trigger_station_info["location_code"][:25]):
118 |     trace_z = obspy.read(f"{path}/{station}.Z.SAC")
119 |     trace_n = obspy.read(f"{path}/{station}.N.SAC")
120 |     trace_e = obspy.read(f"{path}/{station}.E.SAC")
121 |     # bad data padding to fit time window
122 |     # HWA026 HWA067 HWA025 ILA050
123 |     for trace in [trace_z, trace_n, trace_e]:
124 |         trace[0].data = trace[0].data/100 #cm/s2 to m/s2
125 |         if len(trace[0].data) < target_length:
126 |             padding_length = target_length - len(trace[0].data)
127 |             padding = np.zeros(padding_length)
128 |             trace[0].data = np.concatenate((trace[0].data, padding))
129 |     trace_z.resample(200, window="hann")
130 |     trace_n.resample(200, window="hann")
131 |     trace_e.resample(200, window="hann")
132 | 
133 |     waveforms = np.array([trace_z[0].data, trace_n[0].data, trace_e[0].data])
134 |     if station == "HWA074":  # first triggered station
135 |         p_pick, _ = ar_pick(
136 |             waveforms[0],
137 |             waveforms[1],
138 |             waveforms[2],
139 |             samp_rate=200,
140 |             f1=1,  # Frequency of the lower bandpass window
141 |             f2=20,  # Frequency of the upper bandpass window
142 |             lta_p=1,  # Length of LTA for the P arrival in seconds
143 |             sta_p=0.1,  # Length of STA for the P arrival in seconds
144 |             lta_s=4.0,  # Length of LTA for the S arrival in seconds
145 |             sta_s=1.0,  # Length of STA for the P arrival in seconds
146 |             m_p=2,  # Number of AR coefficients for the P arrival
147 |             m_s=8,  # Number of AR coefficients for the S arrival
148 |             l_p=0.1,
149 |             l_s=0.2,
150 |             s_pick=False,
151 |         )
152 |     start_time = int((p_pick - 5) * sample_rate)
153 |     end_time = int((p_pick + 10) * sample_rate)
154 |     trace_z[0].data[int((p_pick) * sample_rate) + (mask_after_sec * sample_rate) :] = 0
155 |     trace_n[0].data[int((p_pick) * sample_rate) + (mask_after_sec * sample_rate) :] = 0
156 |     trace_e[0].data[int((p_pick) * sample_rate) + (mask_after_sec * sample_rate) :] = 0
157 | 
158 |     if (
159 |         trigger_station_info["dist (degree)"][i]
160 |         - trigger_station_info["dist (degree)"][0]
161 |     ) * 100 / P_wave_velocity > mask_after_sec:  # zero padding non triggered station
162 |         mask_station_index.append(i) #for mask non trigger station information
163 |         trace_z[0].data[:] = 0
164 |         trace_n[0].data[:] = 0
165 |         trace_e[0].data[:] = 0
166 |     waveforms = np.stack(
167 |         (
168 |             trace_z[0].data[start_time:end_time],
169 |             trace_n[0].data[start_time:end_time],
170 |             trace_e[0].data[start_time:end_time],
171 |         ),
172 |         axis=1,
173 |     )
174 |     waveforms = waveforms.reshape(3000, 3)
175 |     waveforms_window.append(waveforms)
176 | 
177 |     fig, ax = plt.subplots(3, 1)
178 |     ax[0].plot(waveforms[:, 0])
179 |     ax[1].plot(waveforms[:, 1])
180 |     ax[2].plot(waveforms[:, 2])
181 |     ax[0].set_title(f"{station}")
182 |     # plt.close()
183 |     # fig.savefig(
184 |     #     f"model_input_waveform_image/{mask_after_sec}_sec/{i}_{station}.png", dpi=300
185 |     # )
186 | 
187 | waveform = np.stack(waveforms_window, axis=0).tolist()
188 | target_station_info = trigger_station_info.copy()
189 | 
190 | #mask non trigger station information
191 | for i in mask_station_index:
192 |     trigger_station_info.loc[i, ["latitude", "longitude", "elevation (m)", "Vs30"]] = 0
193 | 
194 | input_station = (
195 |     trigger_station_info[["latitude", "longitude", "elevation (m)", "Vs30"]][:25]
196 |     .to_numpy()
197 |     .tolist()
198 | )
199 | for i in range(1, 16):
200 |     print((i - 1) * 25, i * 25)
201 |     target_station = (
202 |         target_station_info[["latitude", "longitude", "elevation (m)", "Vs30"]][
203 |             (i - 1) * 25 : i * 25
204 |         ]
205 |         .to_numpy()
206 |         .tolist()
207 |     )
208 |     station_name = target_station_info["location_code"][(i - 1) * 25 : i * 25].tolist()
209 |     output = {
210 |         "waveform": waveform,
211 |         "sta": input_station,
212 |         "target": target_station,
213 |         "station_name": station_name,
214 |     }
215 | 
216 |     # with open(f"model_input/{mask_after_sec}_sec_without_broken_data/{i}.json", "w") as json_file:
217 |     #     json.dump(output, json_file)
218 | 


--------------------------------------------------------------------------------
/feature_map_correlation/tlcc_analysis.py:
--------------------------------------------------------------------------------
  1 | import matplotlib.pyplot as plt
  2 | import numpy as np
  3 | import seaborn as sns
  4 | from scipy.stats import pearsonr
  5 | 
  6 | 
  7 | class Calculator:
  8 |         
  9 |     def first_occurrences_indices(b):
 10 |         first_indices = {}  # 用字典来存储不同数字的第一次出现的索引
 11 | 
 12 |         for i, item in enumerate(b):
 13 |             if item not in first_indices:
 14 |                 first_indices[item] = i  # 记录不同数字的第一次出现的索引
 15 | 
 16 |         return first_indices
 17 | 
 18 | 
 19 |     def normalize_to_zero_one(arr):
 20 |         # 找到数组的最小值和最大值
 21 |         min_val = arr.min()
 22 |         max_val = arr.max()
 23 | 
 24 |         # 将数组线性缩放到0到1之间
 25 |         normalized_arr = (arr - min_val) / (max_val - min_val)
 26 | 
 27 |         return normalized_arr
 28 | 
 29 | 
 30 |     def calculate_tlcc(time_series1, time_series2, max_delay):
 31 |         """
 32 |         計算TLCC（時滯交叉相關性）以及相應的時間延遲和TLCC值。
 33 | 
 34 |         參數：
 35 |         - time_series1: 第一個時間序列
 36 |         - time_series2: 第二個時間序列
 37 |         - max_delay: 最大時滯的範圍
 38 | 
 39 |         返回值：
 40 |         - delay: 時間延遲的數組
 41 |         - tlcc_values: 對應的TLCC（皮爾森相關性）值的數組
 42 |         """
 43 |         delay = np.arange(-max_delay, max_delay + 1)
 44 |         tlcc_values = []
 45 |         for i, d in enumerate(delay):
 46 |             if d < 0:
 47 |                 x1_lagged = time_series1[: len(time_series1) + d]
 48 |                 x2_lagged = time_series2[-d:]
 49 |             else:
 50 |                 x1_lagged = time_series1[d:]
 51 |                 x2_lagged = time_series2[: len(time_series2) - d]
 52 |             # if d % 5 == 0:
 53 |             #     fig,ax=plt.subplots()
 54 |             #     ax.plot(x1_lagged,c="k")
 55 |             #     ax.plot(x2_lagged,c="r")
 56 |             #     ax.set_title(f"delay:{d}")
 57 |             #     plt.grid(True)
 58 | 
 59 |             # 計算皮爾森相關性
 60 |             pearson_corr, _ = pearsonr(x1_lagged, x2_lagged)
 61 |             tlcc_values.append(pearson_corr)
 62 | 
 63 |         return delay, tlcc_values
 64 | 
 65 | class Plotter:
 66 | 
 67 |     def plot_waveform(waveform, eq_id, input_station,index, output_path=None):
 68 |         fig, ax = plt.subplots(3, 1, figsize=(14, 7))
 69 |         for j in range(len(ax)):
 70 |             ax[j].plot(waveform[:, j])
 71 |         ax[0].set_title(f"EQ_ID: {eq_id} input waveform{index+1},{input_station}")
 72 |         if output_path:
 73 |             fig.savefig(f"{output_path}/3 channel input waveform{index+1}.png", dpi=300)
 74 |         return fig, ax
 75 | 
 76 | 
 77 |     def plot_correlation_curve_with_shift_time(
 78 |         delay_values, tlcc_values, eq_id, attribute, index, mask_after_sec, output_path=None
 79 |     ):
 80 |         fig, ax = plt.subplots(figsize=(14, 7))
 81 |         ax.plot(delay_values, tlcc_values)
 82 |         ax.xaxis.set_tick_params(labelsize=15)
 83 |         ax.yaxis.set_tick_params(labelsize=15)
 84 |         ax.set_xlabel("Shift Time Sample", fontsize=15)
 85 |         ax.set_ylabel("TLCC (Pearson Correlation) Value", fontsize=15)
 86 |         ax.set_title(f"EQ_ID: {eq_id} {attribute}{index+1} TLCC Analysis", fontsize=15)
 87 |         ax.grid(True)
 88 |         if output_path:
 89 |             fig.savefig(
 90 |                 f"{output_path}/{mask_after_sec} sec {attribute}{index+1} TLCC Analysis.png",
 91 |                 dpi=300,
 92 |             )
 93 |         return fig, ax
 94 | 
 95 | 
 96 |     def plot_attribute_with_feature_map(
 97 |         attribute_arr,
 98 |         resized_feature_map,
 99 |         key,
100 |         attribute,
101 |         correlation_starttime,
102 |         correlation_endtime,
103 |         correlation,
104 |         tlcc_values,
105 |         input_station,
106 |         output_path=None,
107 |     ):
108 |         x_pos = 0.05
109 |         y_pos = 0.6
110 |         fig, ax = plt.subplots(3, 1, figsize=(14, 7))
111 |         ax[0].plot(attribute_arr, alpha=0.7)
112 |         ax[1].plot(resized_feature_map, c="red")
113 |         ax[2].plot(
114 |             attribute_arr,
115 |             alpha=0.7,
116 |             label=f"{attribute}",
117 |         )
118 |         ax[2].plot(
119 |             resized_feature_map,
120 |             c="red",
121 |             label="feature map",
122 |         )
123 |         for j in range(len(ax)):
124 |             ax[j].axvline(x=correlation_starttime, color="grey", linestyle="--")
125 |             ax[j].axvline(x=correlation_endtime, color="grey", linestyle="--")
126 |         ax[2].text(
127 |             x_pos,
128 |             y_pos,
129 |             f"correlation: {np.round(correlation, 2)}\nTLCC max correlation: {np.round(np.array(tlcc_values).max(),2)}",
130 |             transform=ax[j].transAxes,
131 |             fontsize=15,
132 |             horizontalalignment="left",
133 |             verticalalignment="top",
134 |         )
135 |         ax[0].set_title(
136 |             f"EQ_ID: {key} {attribute}, station_name:{input_station}",
137 |             fontsize=15,
138 |         )
139 |         ax[1].set_ylabel("normalized acc", fontsize=15)
140 |         ax[-1].set_xlabel("time sample", fontsize=15)
141 |         ax[-1].xaxis.set_tick_params(labelsize=15)
142 |         ax[-1].yaxis.set_tick_params(labelsize=15)
143 |         ax[2].legend()
144 |         if output_path:
145 |             fig.savefig(
146 |                 f"{output_path}/{attribute}_{input_station} with feature map.png",
147 |                 dpi=300,
148 |             )
149 |         return fig, ax
150 | 
151 | 
152 |     def plot_correlation_hist(
153 |         attribute_dict, attribute, TLCC_mean, TLCC_std, mask_after_sec, output_path=None
154 |     ):
155 |         # hist
156 |         fig, ax = plt.subplots()
157 |         ax.hist(
158 |             np.array(attribute_dict[attribute]["tlcc_max_correlation"]),
159 |             bins=15,
160 |             edgecolor="k",
161 |         )
162 |         ax.set_xlabel("correlation", fontsize=12)
163 |         ax.set_ylabel("number of traces", fontsize=12)
164 |         ax.set_title(
165 |             f"Correlation (TLCC) of \n{mask_after_sec} sec {attribute}",
166 |             fontsize=15,
167 |         )
168 |         ax.text(
169 |             0.8,
170 |             0.8,
171 |             f"mean:{TLCC_mean}\nstd:{TLCC_std}",
172 |             transform=ax.transAxes,
173 |             fontsize=12,
174 |         )
175 |         if output_path:
176 |             fig.savefig(
177 |                 f"{output_path}/correlation (TLCC) with {attribute} histogram.png",
178 |                 dpi=300,
179 |             )
180 |         return fig, ax
181 | 
182 | 
183 |     def plot_time_shifted_with_correlation(
184 |         attribute_dict, attribute, TLCC_mean, TLCC_std, mask_after_sec, output_path=None
185 |     ):
186 |         fig, ax = plt.subplots()
187 |         ax.scatter(
188 |             attribute_dict[attribute]["max_delay"],
189 |             attribute_dict[attribute]["tlcc_max_correlation"],
190 |             alpha=0.5,
191 |             s=15,
192 |         )
193 | 
194 |         ax.set_xlabel("shifted time sample")
195 |         ax.set_ylabel("max Pearson correlation")
196 |         ax.set_title(
197 |             f"Correlation (TLCC) with delay time{mask_after_sec} sec \n{attribute}, mean :{TLCC_mean}, std: {TLCC_std}",
198 |             fontsize=15,
199 |         )
200 |         if output_path:
201 |             fig.savefig(
202 |                 f"{output_path}/{mask_after_sec} sec {attribute} TLCC max correlation delay time.png",
203 |                 dpi=300,
204 |             )
205 |         return fig, ax
206 | 
207 | 
208 |     def plot_time_shifted_with_hist(
209 |         attribute_dict, attribute, delay_mean, delay_std, mask_after_sec, output_path=None
210 |     ):
211 |         fig, ax = plt.subplots()
212 |         ax.hist(attribute_dict[attribute]["max_delay"], bins=15, edgecolor="k")
213 |         ax.text(
214 |             0.75,
215 |             0.8,
216 |             f"mean:{delay_mean}\nstd:{delay_std}",
217 |             transform=ax.transAxes,
218 |             fontsize=12,
219 |         )
220 |         ax.set_xlabel("shifted time sample", fontsize=12)
221 |         ax.set_ylabel("number of traces", fontsize=12)
222 |         ax.set_title(
223 |             f"{mask_after_sec} sec {attribute}\ndistribution of time delay with max correlation (TLCC)",
224 |             fontsize=15,
225 |         )
226 |         if output_path:
227 |             fig.savefig(
228 |                 f"{output_path}/{mask_after_sec} sec {attribute} distribution of time delay with max correlation (TLCC).png",
229 |                 dpi=300,
230 |             )
231 |         return fig, ax
232 | 
233 | 
234 |     def correlation_with_attributes_heat_map(data, attributes=None, output_path=None):
235 |         fig, ax = plt.subplots()
236 |         sns.heatmap(data, annot=True, cmap="Reds")
237 | 
238 |         ax.set_xticks([x + 0.5 for x in range(data.shape[1])])
239 |         ax.set_xticklabels(["3", "5", "7", "10"], fontsize=12)
240 | 
241 |         ax.set_yticks([x + 0.5 for x in range(data.shape[0])])
242 |         plt.yticks(rotation=0)
243 |         ax.set_yticklabels(attributes, fontsize=12)
244 | 
245 |         ax.set_xlabel("second", fontsize=13)
246 | 
247 |         cbar = ax.collections[0].colorbar
248 | 
249 |         # 设置颜色条标签的字体大小
250 |         cbar.set_label("Correlation", fontsize=12)
251 |         plt.tight_layout()
252 |         if output_path:
253 |             fig.savefig(f"{output_path}/correlation_heat_map.png", dpi=300)
254 |         return fig, ax
255 | 


--------------------------------------------------------------------------------
/data/visualize.py:
--------------------------------------------------------------------------------
  1 | import matplotlib.pyplot as plt
  2 | 
  3 | plt.subplots()  # without this line will cause kernel crashed: when matplotlib and torch import simultaneously
  4 | import cartopy.crs as ccrs
  5 | from cartopy.mpl import ticker
  6 | import cartopy
  7 | import numpy as np
  8 | from multiple_sta_dataset import multiple_station_dataset
  9 | from torch.utils.data import DataLoader
 10 | from tqdm import tqdm
 11 | import torch
 12 | 
 13 | 
 14 | class Plot_Train_Test_Data:
 15 |     def event_histogram(
 16 |         train_catalog=None, test_catalog=None, key=None, xlabel=None, title=None
 17 |     ):
 18 |         fig, ax = plt.subplots()
 19 |         ax.hist(train_catalog[f"{key}"], bins=30, ec="black", label="train")
 20 |         ax.hist(test_catalog[f"{key}"], bins=30, ec="black", label="test", alpha=0.8)
 21 |         ax.set_yscale("log")
 22 |         ax.set_xlabel(f"{xlabel}", fontsize=15)
 23 |         ax.set_ylabel("Number of events", fontsize=15)
 24 |         ax.legend()
 25 |         if title:
 26 |             ax.set_title(f"{title}")
 27 |         return fig, ax
 28 | 
 29 |     def event_map(train_catalog=None, test_catalog=None, title=None):
 30 |         src_crs = ccrs.PlateCarree()
 31 |         fig, ax_map = plt.subplots(subplot_kw={"projection": src_crs}, figsize=(7, 7))
 32 |         ax_map.coastlines("10m")
 33 |         ax_map.scatter(
 34 |             train_catalog["lon"] + train_catalog["lon_minute"] / 60,
 35 |             train_catalog["lat"] + train_catalog["lat_minute"] / 60,
 36 |             edgecolors="k",
 37 |             linewidth=1,
 38 |             marker="o",
 39 |             c="grey",
 40 |             s=2 ** train_catalog["magnitude"],
 41 |             zorder=3,
 42 |             alpha=0.5,
 43 |             label="train",
 44 |         )
 45 |         ax_map.scatter(
 46 |             test_catalog["lon"] + test_catalog["lon_minute"] / 60,
 47 |             test_catalog["lat"] + test_catalog["lat_minute"] / 60,
 48 |             edgecolors="k",
 49 |             linewidth=1,
 50 |             marker="o",
 51 |             c="orange",
 52 |             s=2 ** test_catalog["magnitude"],
 53 |             zorder=3,
 54 |             alpha=0.5,
 55 |             label="test",
 56 |         )
 57 |         ax_map.add_feature(cartopy.feature.OCEAN, edgecolor="k")
 58 | 
 59 |         xmin, xmax = ax_map.get_xlim()
 60 |         ymin, ymax = ax_map.get_ylim()
 61 |         xticks = ticker.LongitudeLocator(nbins=5)._raw_ticks(xmin, xmax)
 62 |         yticks = ticker.LatitudeLocator(nbins=5)._raw_ticks(ymin, ymax)
 63 | 
 64 |         ax_map.set_xticks(xticks, crs=ccrs.PlateCarree())
 65 |         ax_map.set_yticks(yticks, crs=ccrs.PlateCarree())
 66 | 
 67 |         ax_map.xaxis.set_major_formatter(
 68 |             ticker.LongitudeFormatter(zero_direction_label=True)
 69 |         )
 70 |         ax_map.yaxis.set_major_formatter(ticker.LatitudeFormatter())
 71 | 
 72 |         ax_map.xaxis.set_ticks_position("both")
 73 |         ax_map.yaxis.set_ticks_position("both")
 74 |         if title:
 75 |             ax_map.set_title(f"{title}")
 76 |         ax_map.legend()
 77 |         return fig, ax_map
 78 | 
 79 |     def pga_histogram(traces_catalog=None, test_year=None, title=None):
 80 |         fig, ax = plt.subplots(figsize=(8, 6))
 81 |         ax.hist(
 82 |             traces_catalog.query(f"year!={test_year}")["pga"],
 83 |             bins=30,
 84 |             ec="black",
 85 |             label="train",
 86 |         )
 87 |         ax.hist(
 88 |             traces_catalog.query(f"year=={test_year}")["pga"],
 89 |             bins=30,
 90 |             alpha=0.8,
 91 |             ec="black",
 92 |             label="test",
 93 |         )
 94 |         pga_threshold = np.log10(
 95 |             [1e-5, 0.008, 0.025, 0.080, 0.250, 0.80, 1.4, 2.5, 4.4, 8.0, 10]
 96 |         )
 97 |         label = ["0", "1", "2", "3", "4", "5-", "5+", "6-", "6+", "7"]
 98 |         ax.vlines(pga_threshold[1:-1], 0, 17700, linestyles="dotted", color="k")
 99 |         for i in range(len(label)):
100 |             if label[i] == "0":
101 |                 continue
102 |             ax.text(
103 |                 ((pga_threshold[i] + pga_threshold[i + 1]) / 2) - 0.05, 15000, label[i]
104 |             )
105 |         ax.set_yscale("log")
106 |         ax.set_xlabel(r"PGA log(${m/s^2}$)", fontsize=15)
107 |         ax.set_ylabel("Number of traces", fontsize=15)
108 |         fig.legend(fontsize=13)
109 |         if title:
110 |             ax.set_title(f"{title}")
111 |         return fig, ax
112 | 
113 | 
114 | class Increase_High_Data_Test:
115 |     def load_dataset_into_list(
116 |         data_path, oversample_rate=1, bias_to_close_station=False
117 |     ):
118 |         dataset = multiple_station_dataset(
119 |             data_path,
120 |             mode="train",
121 |             mask_waveform_sec=3,
122 |             weight_label=False,
123 |             oversample=oversample_rate,
124 |             oversample_mag=4,
125 |             test_year=2016,
126 |             mask_waveform_random=True,
127 |             mag_threshold=0,
128 |             label_key="pga",
129 |             input_type="acc",
130 |             data_length_sec=15,
131 |             station_blind=True,
132 |             bias_to_closer_station=bias_to_close_station,
133 |         )
134 |         origin_loader = DataLoader(dataset, batch_size=16)
135 |         origin_PGA = []
136 |         for sample in tqdm(origin_loader):
137 |             tmp_pga = torch.index_select(
138 |                 sample["label"].flatten(),
139 |                 0,
140 |                 sample["label"].flatten().nonzero().flatten(),
141 |             ).tolist()
142 |             origin_PGA.extend(tmp_pga)
143 |         return origin_PGA
144 | 
145 |     def plot_pga_histogram(
146 |         bias_closed_sta_PGA=None,
147 |         oversampled_PGA=None,
148 |         origin_PGA=None,
149 |         origin_high_intensity_rate=None,
150 |         oversampled_high_intensity_rate=None,
151 |         bias_closed_sta_high_intensity_rate=None,
152 |     ):
153 |         label = ["2", "3", "4", "5-", "5+", "6-", "6+", "7"]
154 |         pga_threshold = np.log10([0.025, 0.080, 0.250, 0.80, 1.4, 2.5, 4.4, 8.0, 10])
155 | 
156 |         fig, ax = plt.subplots(figsize=(7, 7))
157 |         ax.hist(bias_closed_sta_PGA, bins=32, edgecolor="k", label="bias_closed_sta")
158 |         ax.hist(oversampled_PGA, bins=32, edgecolor="k", label="oversampled", alpha=0.6)
159 |         ax.hist(origin_PGA, bins=32, edgecolor="k", label="origin", alpha=0.6)
160 |         ax.vlines(pga_threshold[1:-1], 0, 40000, linestyles="dotted", color="k")
161 |         for i in range(len(pga_threshold) - 1):
162 |             ax.text((pga_threshold[i] + pga_threshold[i + 1]) / 2, 50000, label[i])
163 |         ax.text(
164 |             0.01,
165 |             0.8,
166 |             f"high intensity rate:\norigin: {np.round(origin_high_intensity_rate,2)}\noversampled: {np.round(oversampled_high_intensity_rate,2)}\nbias to station: {np.round(bias_closed_sta_high_intensity_rate,2)}",
167 |             transform=ax.transAxes,
168 |             fontsize=12,
169 |         )
170 |         ax.set_xlim(-2.75, 1.25)
171 |         ax.set_ylabel("Number of traces", size=14)
172 |         ax.set_xlabel(r"log(PGA (${m/s^2}$))", size=14)
173 |         ax.set_title("TSMIP PGA distribution in training", size=14)
174 |         ax.set_yscale("log")
175 |         fig.legend(loc="upper right")
176 |         return fig, ax
177 | 
178 | 
179 | def plot_station_distribution(stations=None, title=None):
180 |     src_crs = ccrs.PlateCarree()
181 |     fig, ax_map = plt.subplots(subplot_kw={"projection": src_crs}, figsize=(7, 7))
182 | 
183 |     ax_map.coastlines("10m")
184 | 
185 |     ax_map.add_feature(
186 |         cartopy.feature.OCEAN, zorder=2, edgecolor="k"
187 |     )  # zorder越大的圖層 越上面
188 | 
189 |     sta = ax_map.scatter(
190 |         stations["longitude"],
191 |         stations["latitude"],
192 |         edgecolors="gray",
193 |         color="red",
194 |         linewidth=0.5,
195 |         marker="^",
196 |         s=20,
197 |         zorder=3,
198 |         label="Station",
199 |     )
200 |     xmin = stations["longitude"].min() - 0.1
201 |     xmax = stations["longitude"].max() + 0.1
202 |     ymin = stations["latitude"].min() - 0.1
203 |     ymax = stations["latitude"].max() + 0.1
204 |     xticks = ticker.LongitudeLocator(nbins=5)._raw_ticks(xmin, xmax)
205 |     yticks = ticker.LatitudeLocator(nbins=5)._raw_ticks(ymin, ymax)
206 |     ax_map.set_xticks(xticks, crs=ccrs.PlateCarree())
207 |     ax_map.set_yticks(yticks, crs=ccrs.PlateCarree())
208 | 
209 |     ax_map.xaxis.set_major_formatter(
210 |         ticker.LongitudeFormatter(zero_direction_label=True)
211 |     )
212 |     ax_map.yaxis.set_major_formatter(ticker.LatitudeFormatter())
213 | 
214 |     ax_map.xaxis.set_ticks_position("both")
215 |     ax_map.yaxis.set_ticks_position("both")
216 |     ax_map.legend()
217 |     if title:
218 |         ax_map.set_title(title)
219 |     return fig, ax_map
220 | 
221 | 
222 | def plot_received_traces_station_map(
223 |     total_station_value_counts, title="Received traces map", output_path=None
224 | ):
225 |     src_crs = ccrs.PlateCarree()
226 |     fig, ax_map = plt.subplots(subplot_kw={"projection": src_crs}, figsize=(7, 7))
227 |     ax_map.coastlines("10m")
228 |     ax_map.scatter(
229 |         total_station_value_counts["longitude"],
230 |         total_station_value_counts["latitude"],
231 |         edgecolors="k",
232 |         linewidth=1,
233 |         marker="o",
234 |         s=total_station_value_counts["counts"] * 1.5,
235 |         zorder=3,
236 |         alpha=0.5,
237 |     )
238 |     ax_map.set_title(f"{title}")
239 |     if output_path:
240 |         fig.savefig(f"{output_path}/{title}.png", dpi=300)
241 |     return fig, ax_map
242 | 


--------------------------------------------------------------------------------
/feature_map_correlation/feature_map_correlation.py:
--------------------------------------------------------------------------------
  1 | import matplotlib.pyplot as plt
  2 | 
  3 | plt.subplots()
  4 | import numpy as np
  5 | import pandas as pd
  6 | import torch
  7 | from tqdm import tqdm
  8 | import os
  9 | from scipy.ndimage import zoom
 10 | import sys
 11 | sys.path.append("..")
 12 | from data.multiple_sta_dataset import multiple_station_dataset
 13 | from model.CNN_Transformer_Mixtureoutput_TEAM import CNN_feature_map
 14 | import os
 15 | from scipy.signal import hilbert
 16 | from tlcc_analysis import Plotter,Calculator
 17 | 
 18 | 
 19 | mask_after_sec = 10
 20 | sample_rate = 200
 21 | label = "pga"
 22 | data = multiple_station_dataset(
 23 |     "../data/TSMIP_1999_2019_Vs30.hdf5",
 24 |     mode="test",
 25 |     mask_waveform_sec=mask_after_sec,
 26 |     test_year=2016,
 27 |     label_key=label,
 28 |     mag_threshold=0,
 29 |     input_type="acc",
 30 |     data_length_sec=15,
 31 | )
 32 | # need station name
 33 | output_path = "../predict/station_blind_Vs30_bias2closed_station_2016"
 34 | predict = pd.read_csv(f"{output_path}/{mask_after_sec} sec model11 with all info.csv")
 35 | 
 36 | # ===========prepare model==============
 37 | device = torch.device("cuda")
 38 | num = 11
 39 | path = f"../model/model{num}.pt"
 40 | emb_dim = 150
 41 | mlp_dims = (150, 100, 50, 30, 10)
 42 | CNN_model = CNN_feature_map(mlp_input=5665).cuda()
 43 | 
 44 | full_model_parameter = torch.load(path)
 45 | # ===========load CNN parameter==============
 46 | CNN_parameter = {}
 47 | for name, param in full_model_parameter.items():
 48 |     if (
 49 |         "model_CNN" in name
 50 |     ):  # model_CNN.conv2d1.0.weight : conv2d1.0.weight didn't match
 51 |         name = name.replace("model_CNN.", "")
 52 |         CNN_parameter[name] = param
 53 | CNN_model.load_state_dict(CNN_parameter)
 54 | 
 55 | event_index_list = []
 56 | for eq_id in data.events_index:
 57 |     event_index_list.append(eq_id[0][0, 0])
 58 | 
 59 | eq_first_index = Calculator.first_occurrences_indices(event_index_list)
 60 | 
 61 | # plot feature map and calculate correlation
 62 | attribute_dict = {
 63 |     "euclidean_norm": {"correlation": [], "tlcc_max_correlation": [], "max_delay": []},
 64 |     "vertical_envelope": {
 65 |         "correlation": [],
 66 |         "tlcc_max_correlation": [],
 67 |         "max_delay": [],
 68 |     },
 69 |     "NS_envelope": {"correlation": [], "tlcc_max_correlation": [], "max_delay": []},
 70 |     "EW_envelope": {"correlation": [], "tlcc_max_correlation": [], "max_delay": []},
 71 |     "vertical_instantaneous_phase": {
 72 |         "correlation": [],
 73 |         "tlcc_max_correlation": [],
 74 |         "max_delay": [],
 75 |     },
 76 |     "NS_instantaneous_phase": {
 77 |         "correlation": [],
 78 |         "tlcc_max_correlation": [],
 79 |         "max_delay": [],
 80 |     },
 81 |     "EW_instantaneous_phase": {
 82 |         "correlation": [],
 83 |         "tlcc_max_correlation": [],
 84 |         "max_delay": [],
 85 |     },
 86 |     "vertical_instantaneous_freq": {
 87 |         "correlation": [],
 88 |         "tlcc_max_correlation": [],
 89 |         "max_delay": [],
 90 |     },
 91 |     "NS_instantaneous_freq": {
 92 |         "correlation": [],
 93 |         "tlcc_max_correlation": [],
 94 |         "max_delay": [],
 95 |     },
 96 |     "EW_instantaneous_freq": {
 97 |         "correlation": [],
 98 |         "tlcc_max_correlation": [],
 99 |         "max_delay": [],
100 |     },
101 | }
102 | print(len(eq_first_index.keys()))
103 | for key, index in tqdm(zip(eq_first_index.keys(), eq_first_index.values())):
104 |     event_output_path = (
105 |         f"{output_path}/{mask_after_sec} sec cnn feature map/each event/{str(key)}"
106 |     )
107 |     if not os.path.isdir(f"{event_output_path}"):
108 |         os.makedirs(f"{event_output_path}")
109 |     sample = data[index]
110 |     waveform = sample["waveform"]
111 | 
112 |     not_padding_station_number = (
113 |         (torch.from_numpy(sample["sta"]) != 0).all(dim=1).sum().item()
114 |     )
115 |     single_event_prediction = predict.query(f"EQ_ID=={key}")
116 |     input_station_list = single_event_prediction["station_name"][
117 |         :not_padding_station_number
118 |     ].tolist()
119 |     if len(input_station_list) < 25:
120 |         input_station_list += [np.nan] * (25 - len(input_station_list))
121 | 
122 |     p_picks = sample["p_picks"].flatten().tolist()
123 |     # plot 24784 input waveform
124 |     if key == 24784:
125 |         for i in range(not_padding_station_number):
126 |             single_waveform=waveform[i]
127 |             input_station=input_station_list[i]
128 |             fig, ax = Plotter.plot_waveform(single_waveform, key, input_station,index=i)
129 | 
130 |     cnn_input = torch.DoubleTensor(waveform).float().cuda()
131 |     cnn_output, layer_output = CNN_model(cnn_input)
132 |     numeric_array = np.array(layer_output[-1].detach().cpu(), dtype=np.float32)
133 |     feature_map = np.mean(numeric_array, axis=1)
134 |     scale_factor_h = waveform.shape[0] / feature_map.shape[0]
135 |     scale_factor_w = waveform.shape[1] / feature_map.shape[1]
136 | 
137 |     # zoom out feature map
138 |     resized_feature_map = zoom(feature_map, (scale_factor_h, scale_factor_w), order=3)
139 |     component_dict = {}
140 |     euclidean_waveform = np.linalg.norm(waveform, axis=2) / np.sqrt(3)
141 |     component_dict[f"euclidean_norm"] = euclidean_waveform
142 |     for com, component in enumerate(["vertical", "NS", "EW"]):
143 |         analytic_signal = hilbert(waveform[:, :, com])
144 |         envelope = np.abs(analytic_signal)
145 |         instantaneous_phase = np.unwrap(np.angle(analytic_signal))
146 |         instantaneous_frequency = np.abs(
147 |             (np.diff(instantaneous_phase) / (2.0 * np.pi) * sample_rate)
148 |         )
149 |         component_dict[f"{component}_envelope"] = envelope
150 |         component_dict[f"{component}_instantaneous_phase"] = instantaneous_phase
151 |         component_dict[f"{component}_instantaneous_freq"] = instantaneous_frequency
152 | 
153 |     for attribute in component_dict: #calculate correlation to different attribute
154 |         for i in range(not_padding_station_number):
155 |             correlation_starttime = p_picks[i] - sample_rate
156 |             correlation_endtime = p_picks[0] + (mask_after_sec + 1) * sample_rate
157 |             if mask_after_sec == 10:
158 |                 correlation_endtime = p_picks[0] + (mask_after_sec) * sample_rate
159 |             try:
160 |                 correlation = np.corrcoef(
161 |                     component_dict[attribute][
162 |                         i, correlation_starttime:correlation_endtime
163 |                     ],
164 |                     resized_feature_map[i, correlation_starttime:correlation_endtime],
165 |                 )[0, 1]
166 |                 delay_values, tlcc_values = Calculator.calculate_tlcc(
167 |                     component_dict[attribute][
168 |                         i, correlation_starttime:correlation_endtime
169 |                     ],
170 |                     resized_feature_map[i, correlation_starttime:correlation_endtime],
171 |                     max_delay=100,
172 |                 )
173 |             except:  # second=10 case
174 |                 correlation = np.corrcoef(
175 |                     component_dict[attribute][
176 |                         i, correlation_starttime:correlation_endtime
177 |                     ],
178 |                     resized_feature_map[
179 |                         i, correlation_starttime + 1 : correlation_endtime
180 |                     ],
181 |                 )[0, 1]
182 |                 delay_values, tlcc_values = Calculator.calculate_tlcc(
183 |                     component_dict[attribute][
184 |                         i, correlation_starttime:correlation_endtime
185 |                     ],
186 |                     resized_feature_map[
187 |                         i, correlation_starttime + 1 : correlation_endtime
188 |                     ],
189 |                     max_delay=100,
190 |                 )
191 |             attribute_dict[attribute]["correlation"].append(correlation)
192 |             max_index = np.argmax(tlcc_values)
193 |             max_correlation = tlcc_values[max_index]
194 |             max_delay = delay_values[max_index]
195 |             attribute_dict[attribute]["tlcc_max_correlation"].append(max_correlation)
196 |             attribute_dict[attribute]["max_delay"].append(max_delay)
197 | 
198 |             if key == 24784:  # plot
199 |                 fig, ax = Plotter.plot_correlation_curve_with_shift_time(
200 |                     delay_values, tlcc_values, key, attribute,index=i,mask_after_sec=mask_after_sec, output_path=None
201 |                 )
202 |                 attribute_arr=Calculator.normalize_to_zero_one(component_dict[attribute][i])
203 |                 resized_feature_map=Calculator.normalize_to_zero_one(resized_feature_map[i])
204 |                 fig, ax = Plotter.plot_attribute_with_feature_map(
205 |                     attribute_arr,
206 |                     resized_feature_map,
207 |                     key,
208 |                     attribute,
209 |                     correlation_starttime,
210 |                     correlation_endtime,
211 |                     correlation,
212 |                     tlcc_values,
213 |                     input_station_list[i],
214 |                 )
215 | 
216 | output_path = f"{output_path}/{mask_after_sec} sec cnn feature map"
217 | 
218 | for attribute in attribute_dict: #statistical analysis
219 |     TLCC_mean = np.round(
220 |         np.array(attribute_dict[attribute]["tlcc_max_correlation"]).mean(), 2
221 |     )
222 |     TLCC_std = np.round(
223 |         np.array(attribute_dict[attribute]["tlcc_max_correlation"]).std(), 2
224 |     )
225 |     fig, ax = Plotter.plot_correlation_hist(
226 |         attribute_dict, attribute, TLCC_mean, TLCC_std, mask_after_sec, output_path=None
227 |     )
228 |     # x: time sample lag, y: max correlation (TLCC)
229 |     fig, ax = Plotter.plot_time_shifted_with_correlation(
230 |         attribute_dict, attribute, TLCC_mean, TLCC_std, mask_after_sec, output_path=None
231 |     )
232 |     # max correlation time delay hist
233 |     delay_mean = np.round(np.array(attribute_dict[attribute]["max_delay"]).mean(), 2)
234 |     delay_std = np.round(np.array(attribute_dict[attribute]["max_delay"]).std(), 2)
235 |     fig, ax = Plotter.plot_time_shifted_with_hist(
236 |         attribute_dict,
237 |         attribute,
238 |         delay_mean,
239 |         delay_std,
240 |         mask_after_sec,
241 |         output_path=None,
242 |     )
243 | 
244 | # belowed data is correlation with attributes in different seconds
245 | data = np.array(
246 |     [
247 |         [0.61, 0.53, 0.49, 0.46],
248 |         [0.68, 0.58, 0.52, 0.5],
249 |         [0.59, 0.51, 0.47, 0.46],
250 |         [0.58, 0.5, 0.47, 0.45],
251 |         [0.29, 0.23, 0.18, 0.12],
252 |         [0.29, 0.23, 0.18, 0.12],
253 |         [0.29, 0.23, 0.18, 0.12],
254 |         [0.3, 0.22, 0.16, 0.11],
255 |         [0.29, 0.21, 0.15, 0.1],
256 |         [0.3, 0.21, 0.15, 0.1],
257 |     ]
258 | )
259 | attributes = [
260 |     "Euclidean norm",
261 |     "Vertical envelope",
262 |     "NS envelope",
263 |     "EW envelope",
264 |     "Vertical phase",
265 |     "NS phase",
266 |     "EW phase",
267 |     "Vertical frequency",
268 |     "NS frequency",
269 |     "EW frequency",
270 | ]
271 | output_path = "./predict/station_blind_Vs30_bias2closed_station_2016"
272 | fig, ax = Plotter.correlation_with_attributes_heat_map(data, attributes, output_path=None)
273 | 


--------------------------------------------------------------------------------