├── .devcontainer
    └── devcontainer.json
├── .github
    ├── environment.yml
    └── workflows
    │   └── docs.yml
├── .gitignore
├── .gitmodules
├── HypoDD
    ├── .gitignore
    ├── Dockerfile
    ├── Makefile
    ├── convert_stations.py
    ├── gamma2hypodd.py
    ├── gamma2hypoinverse.py
    ├── hyp.command
    ├── hypoDD.inc
    ├── hypodd_cc.inp
    ├── hypodd_ct.inp
    ├── hypoinverse2hypodd.py
    ├── ph2dt.inp
    ├── plot_catalog.ipynb
    ├── plotly_3d.ipynb
    ├── run.sh
    ├── vel_model_P.crh
    ├── vel_model_S.crh
    └── visulization.ipynb
├── LICENSE
├── datasets
    ├── NCEDC
    │   ├── .gitignore
    │   ├── convert_hdf5.py
    │   ├── convert_hdf5_v2.py
    │   ├── download_catalog.py
    │   ├── download_fm.py
    │   ├── download_station.py
    │   ├── download_waveform.py
    │   ├── extract_csv.py
    │   ├── extract_ps.py
    │   ├── merge_hdf5.py
    │   └── run.yaml
    └── SCEDC
    │   ├── .gitignore
    │   ├── convert_hdf5.py
    │   ├── convert_hdf5_v2.py
    │   ├── download_catalog.py
    │   ├── download_station.py
    │   ├── download_waveform.py
    │   ├── download_waveform_v2.py
    │   ├── extract_ps.py
    │   ├── merge_hdf5.py
    │   ├── run.yaml
    │   └── split_large_files.py
├── docs
    ├── .gitignore
    ├── README.md
    ├── assets
    │   ├── inference_pipeline_plotly.png
    │   ├── logo.jpg
    │   ├── logo.png
    │   ├── quakeflow.gif
    │   └── quakeflow_diagram.png
    ├── data.md
    ├── data_format.md
    ├── deepdenoiser.md
    ├── earthquake_location.md
    ├── fastapi.ipynb
    ├── gamma.md
    ├── gcp_readme.md
    ├── k8s_readme.md
    ├── kubeflow
    └── phasenet.md
├── environment.yml
├── examples
    ├── california
    │   ├── .gitignore
    │   ├── .skyignore
    │   ├── args.py
    │   ├── cut_templates_cc.py
    │   ├── cut_templates_merge.py
    │   ├── download_waveform.py
    │   ├── filter_gamma_ncedc.py
    │   ├── generate_pairs.py
    │   ├── load_cloud_data.py
    │   ├── load_cloud_picks.py
    │   ├── merge_ncedc.py
    │   ├── monitor.py
    │   ├── plot_catalog.py
    │   ├── plotting.py
    │   ├── refresh.py
    │   ├── run_adloc.py
    │   ├── run_adloc_ct.py
    │   ├── run_cctorch.py
    │   ├── run_gamma.py
    │   ├── run_gamma.yaml
    │   ├── run_gamma_ncedc.py
    │   ├── run_growclust_cc.py
    │   ├── run_growclust_cc.sh
    │   ├── run_hypodd_cc.py
    │   ├── run_hypodd_cc.sh
    │   ├── run_phasenet.py
    │   ├── run_phasenet.yaml
    │   ├── run_phasenet_ncedc.py
    │   ├── run_phasenet_scedc.py
    │   ├── set_config_ncedc.py
    │   ├── submit_adloc.py
    │   ├── submit_cctorch.py
    │   ├── submit_download.py
    │   ├── submit_gamma.py
    │   ├── submit_phasenet.py
    │   ├── submit_template.py
    │   └── tests
    │   │   └── clustering.py
    ├── forge
    │   └── load_data.py
    ├── hawaii
    │   └── workflow.ipynb
    ├── japan
    │   ├── .gitignore
    │   ├── convert_data_hinet.py
    │   ├── cut_templates_cc.py
    │   ├── download_data_hinet.py
    │   ├── filter_similar_pairs.py
    │   ├── merge_csv.py
    │   ├── merge_events.py
    │   ├── merge_picks.py
    │   ├── plot_catalog.py
    │   ├── plotting.py
    │   ├── run_adloc.py
    │   ├── run_adloc_cc_bak.py
    │   ├── run_cctorch.py
    │   ├── run_gamma.py
    │   ├── run_growclust_cc.py
    │   ├── run_growclust_cc.sh
    │   ├── run_hypodd_cc.py
    │   ├── run_hypodd_cc.sh
    │   ├── run_phasenet.py
    │   ├── run_qtm.py
    │   └── set_config.py
    └── seafoam
    │   └── load_data.py
├── kubeflow
    ├── .gitignore
    ├── Dockerfile
    ├── README.md
    ├── Stream.ipynb
    ├── Training.ipynb
    ├── cloud_dataset.ipynb
    ├── debug_magnitude.ipynb
    ├── debug_pvc.yaml
    ├── env.yml
    ├── plot_catalog.ipynb
    ├── prepare_test_data.ipynb
    ├── rsync.yaml
    ├── tweepy_test.ipynb
    ├── waveforms
    │   ├── Dockerfile
    │   └── download_waveform.ipynb
    ├── workflow-api.ipynb
    ├── workflow-kfp2.ipynb
    ├── workflow.ipynb
    └── workflow_debug.ipynb
├── kubernetes
    ├── deploy_gcp.sh
    ├── deploy_local.sh
    ├── metrics-server.yaml
    ├── quakeflow-autoscaling.yaml
    ├── quakeflow-gcp.yaml
    ├── quakeflow-ingress.yaml
    ├── quakeflow-local.yaml
    └── replay
    │   ├── real_data.py
    │   └── replay_data.py
├── mkdocs.yml
├── mongodb
    └── test_mongodb.ipynb
├── quakeflow
    ├── demo
    │   ├── association
    │   │   ├── Dockerfile
    │   │   ├── app.py
    │   │   └── requirements.txt
    │   ├── data
    │   │   ├── Dockerfile
    │   │   ├── app.py
    │   │   └── requirements.txt
    │   ├── hub
    │   │   ├── Dockerfile
    │   │   ├── app.py
    │   │   └── requirements.txt
    │   ├── location
    │   │   ├── Dockerfile
    │   │   ├── app.py
    │   │   └── requirements.txt
    │   └── picking
    │   │   ├── Dockerfile
    │   │   ├── app.py
    │   │   └── requirements.txt
    ├── deployment.yaml
    ├── helm.sh
    ├── index.html
    ├── main.py
    ├── replay_data.py
    └── service.yaml
├── requirements.txt
├── scripts
    ├── .gitignore
    ├── Dockerfile
    ├── README.md
    ├── args.py
    ├── config.json
    ├── convert_dtcc.py
    ├── convert_qtm.py
    ├── convert_velest.py
    ├── convert_velest_output.py
    ├── create_filelist.py
    ├── cut_templates.py
    ├── cut_templates_cc.py
    ├── cut_templates_qtm.py
    ├── cut_templates_v2.py
    ├── debug_growclust.py
    ├── download_catalog.py
    ├── download_event_hinet.py
    ├── download_station.py
    ├── download_waveform.py
    ├── download_waveform_event.py
    ├── download_waveform_v2.py
    ├── download_waveform_v3.py
    ├── generate_pairs.py
    ├── load_cloud_picks.py
    ├── load_cloud_templates.py
    ├── merge_adloc_picks.py
    ├── merge_csv.py
    ├── merge_gamma_picks.py
    ├── merge_phasenet_picks.py
    ├── merge_phasenet_plus_picks.py
    ├── plot_catalog.py
    ├── plot_gamma.py
    ├── quakeflow.py
    ├── quakeflow_demo.ipynb
    ├── quakeflow_job.yaml
    ├── run_adloc.py
    ├── run_adloc_cc.py
    ├── run_adloc_ct.py
    ├── run_adloc_v2.py
    ├── run_cctorch.py
    ├── run_eqnet.py
    ├── run_event_association.py
    ├── run_gamma.py
    ├── run_gamma_v2.py
    ├── run_growclust_cc.py
    ├── run_growclust_cc.sh
    ├── run_growclust_ct.sh
    ├── run_hypodd_cc.py
    ├── run_hypodd_cc.sh
    ├── run_hypodd_ct.py
    ├── run_hypodd_ct.sh
    ├── run_phasenet.py
    ├── run_phasenet_das.yaml
    ├── run_phasenet_plus.py
    ├── run_phasenet_v2.py
    ├── run_qtm.py
    ├── run_qtm_association.py
    ├── run_skhash.py
    ├── run_velest.sh
    ├── set_config.py
    ├── station_clustering.ipynb
    ├── submit_download_waveform.py
    ├── submit_vertex.py
    ├── synthetic_test.ipynb
    ├── tests
    │   ├── .gitignore
    │   ├── cut_template_picks.py
    │   ├── cut_template_picks_dummy.py
    │   ├── prepare_data.py
    │   ├── prepare_data_quakeflow.py
    │   ├── run_hypodd.py
    │   └── run_hypodd_cc.sh
    └── utils
    │   ├── __init__.py
    │   ├── convert_cctorch_turkey.py
    │   ├── plotting.py
    │   └── preprocess_focal_mechanism.py
├── seedlink
    ├── Dockerfile
    ├── env.yml
    ├── producer.py
    ├── producer_iris.py
    ├── producer_parallel.py
    ├── readme.md
    ├── realtime-iris.ipynb
    └── realtime-stations.txt
├── skaffold.yaml
├── spark
    ├── Dockerfile
    ├── env.yml
    ├── readme.md
    ├── requirements.txt
    └── spark_streaming.py
├── tests
    ├── analysis
    │   ├── .gitignore
    │   ├── check_waveforms_v2.ipynb
    │   ├── check_waveforms_v2.py
    │   ├── comparison.ipynb
    │   ├── config.json
    │   ├── mccc.py
    │   ├── mccc_plot.ipynb
    │   └── util.py
    ├── check_pvc.yaml
    └── kafka-spark
    │   ├── TEST - Structured Streaming.ipynb
    │   ├── consumer.py
    │   ├── env.yml
    │   ├── producer.py
    │   ├── quakeflow logo design 2.jpg
    │   ├── readme.md
    │   ├── spark.py
    │   └── ui_streamlit.py
└── ui
    ├── Dockerfile
    ├── Procfile
    ├── app_plotly.py
    ├── assets
        ├── app.css
        ├── demo-button.css
        └── style.css
    ├── env.yml
    ├── gradio
        └── test_api.py
    └── streamlit
        ├── Dockerfile
        ├── env.yml
        ├── ui_streamlit.py
        ├── ui_streamlit_debug.py
        └── ui_streamlit_iris.py


/.devcontainer/devcontainer.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "image": "mcr.microsoft.com/devcontainers/universal:2",
 3 |   "hostRequirements": {
 4 |     "cpus": 4
 5 |   },
 6 |   "waitFor": "onCreateCommand",
 7 |   "updateContentCommand": "sudo apt update && sudo apt install -y libgeos-dev && python3 -m pip install -r requirements.txt && python -m pip install kfp --pre",
 8 |   // "updateContentCommand": "conda env update --file environment.yml",
 9 |   "postCreateCommand": "",
10 |   "customizations": {
11 |     "codespaces": {
12 |       "openFiles": []
13 |     },
14 |     "vscode": {
15 |       "extensions": [
16 |         "ms-toolsai.jupyter",
17 |         "ms-python.python",
18 |         "googlecloudtools.cloudcode"
19 |       ]
20 |     }
21 |   }
22 | }


--------------------------------------------------------------------------------
/.github/environment.yml:
--------------------------------------------------------------------------------
 1 | name: mkdocs
 2 | channels:
 3 |   - conda-forge
 4 |   - defaults
 5 | dependencies:
 6 |   - _libgcc_mutex=0.1=main
 7 |   - _openmp_mutex=5.1=1_gnu
 8 |   - attrs=22.2.0=pyh71513ae_0
 9 |   - beautifulsoup4=4.11.1=pyha770c72_0
10 |   - bleach=5.0.1=pyhd8ed1ab_0
11 |   - ca-certificates=2022.12.7=ha878542_0
12 |   - certifi=2022.12.7=pyhd8ed1ab_0
13 |   - click=8.1.3=unix_pyhd8ed1ab_2
14 |   - defusedxml=0.7.1=pyhd8ed1ab_0
15 |   - entrypoints=0.4=pyhd8ed1ab_0
16 |   - ghp-import=2.1.0=pyhd8ed1ab_0
17 |   - importlib-metadata=6.0.0=pyha770c72_0
18 |   - importlib_resources=5.10.2=pyhd8ed1ab_0
19 |   - jinja2=3.1.2=pyhd8ed1ab_1
20 |   - jsonschema=4.17.3=pyhd8ed1ab_0
21 |   - jupyter_client=7.3.4=pyhd8ed1ab_0
22 |   - jupyter_core=5.1.3=py38h578d9bd_0
23 |   - jupyterlab_pygments=0.2.2=pyhd8ed1ab_0
24 |   - jupytext=1.14.4=pyhcff175f_0
25 |   - ld_impl_linux-64=2.38=h1181459_1
26 |   - libffi=3.4.2=h6a678d5_6
27 |   - libgcc-ng=11.2.0=h1234567_1
28 |   - libgomp=11.2.0=h1234567_1
29 |   - libsodium=1.0.18=h36c2ea0_1
30 |   - libstdcxx-ng=11.2.0=h1234567_1
31 |   - markdown=3.4.1=pyhd8ed1ab_0
32 |   - markdown-it-py=2.1.0=pyhd8ed1ab_0
33 |   - markupsafe=2.1.1=py38h0a891b7_1
34 |   - mdit-py-plugins=0.3.3=pyhd8ed1ab_0
35 |   - mdurl=0.1.0=pyhd8ed1ab_0
36 |   - mergedeep=1.3.4=pyhd8ed1ab_0
37 |   - mistune=0.8.4=pyh1a96a4e_1006
38 |   - mkdocs=1.3.0=pyhd8ed1ab_0
39 |   - mkdocs-exclude=1.0.2=pyhd8ed1ab_0
40 |   - mkdocs-jupyter=0.21.0=pyhd8ed1ab_0
41 |   - mkdocs-material=8.4.1=pyhd8ed1ab_0
42 |   - mkdocs-material-extensions=1.0.3=pyhd8ed1ab_2
43 |   - nbclient=0.7.2=pyhd8ed1ab_0
44 |   - nbconvert=6.5.0=pyhd8ed1ab_0
45 |   - nbconvert-core=6.5.0=pyhd8ed1ab_0
46 |   - nbconvert-pandoc=6.5.0=pyhd8ed1ab_0
47 |   - nbformat=5.7.3=pyhd8ed1ab_0
48 |   - ncurses=6.3=h5eee18b_3
49 |   - nest-asyncio=1.5.6=pyhd8ed1ab_0
50 |   - openssl=1.1.1s=h7f8727e_0
51 |   - packaging=23.0=pyhd8ed1ab_0
52 |   - pandoc=2.19.2=ha770c72_0
53 |   - pandocfilters=1.5.0=pyhd8ed1ab_0
54 |   - pip=22.3.1=py38h06a4308_0
55 |   - pkgutil-resolve-name=1.3.10=pyhd8ed1ab_0
56 |   - platformdirs=2.6.2=pyhd8ed1ab_0
57 |   - pygments=2.14.0=pyhd8ed1ab_0
58 |   - pymdown-extensions=9.9.1=pyhd8ed1ab_0
59 |   - pyrsistent=0.18.1=py38h0a891b7_1
60 |   - python=3.8.16=h7a1cb2a_2
61 |   - python-dateutil=2.8.2=pyhd8ed1ab_0
62 |   - python-fastjsonschema=2.16.2=pyhd8ed1ab_0
63 |   - python_abi=3.8=2_cp38
64 |   - pyyaml=6.0=py38h0a891b7_4
65 |   - pyyaml-env-tag=0.1=pyhd8ed1ab_0
66 |   - pyzmq=23.0.0=py38hfc09fa9_0
67 |   - readline=8.2=h5eee18b_0
68 |   - setuptools=65.6.3=py38h06a4308_0
69 |   - six=1.16.0=pyh6c4a22f_0
70 |   - soupsieve=2.3.2.post1=pyhd8ed1ab_0
71 |   - sqlite=3.40.1=h5082296_0
72 |   - tinycss2=1.2.1=pyhd8ed1ab_0
73 |   - tk=8.6.12=h1ccaba5_0
74 |   - toml=0.10.2=pyhd8ed1ab_0
75 |   - tornado=6.1=py38h0a891b7_3
76 |   - traitlets=5.8.1=pyhd8ed1ab_0
77 |   - typing-extensions=4.4.0=hd8ed1ab_0
78 |   - typing_extensions=4.4.0=pyha770c72_0
79 |   - watchdog=2.2.1=py38h578d9bd_0
80 |   - webencodings=0.5.1=py_1
81 |   - wheel=0.37.1=pyhd3eb1b0_0
82 |   - xz=5.2.10=h5eee18b_1
83 |   - yaml=0.2.5=h7f98852_2
84 |   - zeromq=4.3.4=h9c3ff4c_1
85 |   - zipp=3.11.0=pyhd8ed1ab_0
86 |   - zlib=1.2.13=h5eee18b_0
87 | prefix: /home/weiqiang/.local/miniconda3/envs/mkdocs
88 | 


--------------------------------------------------------------------------------
/.github/workflows/docs.yml:
--------------------------------------------------------------------------------
 1 | name: documentation
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: 
 6 |       - master
 7 |       - main
 8 | 
 9 | jobs:
10 |   deploy:
11 |     runs-on: ubuntu-latest
12 | #     runs-on: macos-latest
13 | #     runs-on: windows-latest
14 |     steps:
15 |       - uses: actions/checkout@v3
16 |         with:
17 |           submodules: true
18 |           
19 | #       - uses: actions/setup-python@v4
20 | #         with:
21 | #           python-version: 3.8
22 | #           cache: 'pip'
23 | #       - run: pip install -r requirements.txt
24 | # #       - run: pip install mkdocs mkdocs-material mkdocs-jupyter mkdocs-exclude
25 | #       - run: mkdocs gh-deploy --force
26 | 
27 |       - uses: conda-incubator/setup-miniconda@v2
28 |         with:
29 |           python-version: 3.8
30 |           miniconda-version: "latest"
31 |           activate-environment: mkdocs
32 |           environment-file: .github/environment.yml
33 | #       - run: conda install mkdocs=1.3.0 mkdocs-material=8.4.1 mkdocs-material-extensions==1.0.3 mkdocs-jupyter=0.21.0 mkdocs-exclude -c conda-forge
34 |       
35 |       - name: mkdocs
36 |         shell: bash -el {0}
37 |         run: mkdocs gh-deploy --force
38 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Spark checkpoints
  2 | checkpoint-*
  3 | twitter_fig.*
  4 | 
  5 | # log files
  6 | log.*
  7 | 
  8 | # Byte-compiled / optimized / DLL files
  9 | __pycache__/
 10 | *.py[cod]
 11 | *$py.class
 12 | .DS_Store
 13 | 
 14 | # C extensions
 15 | *.so
 16 | 
 17 | # Distribution / packaging
 18 | .Python
 19 | *pyc
 20 | build/
 21 | develop-eggs/
 22 | dist/
 23 | downloads/
 24 | eggs/
 25 | .eggs/
 26 | lib/
 27 | lib64/
 28 | parts/
 29 | sdist/
 30 | var/
 31 | wheels/
 32 | share/python-wheels/
 33 | *.egg-info/
 34 | .installed.cfg
 35 | *.egg
 36 | MANIFEST
 37 | 
 38 | # PyInstaller
 39 | #  Usually these files are written by a python script from a template
 40 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 41 | *.manifest
 42 | *.spec
 43 | 
 44 | # Installer logs
 45 | pip-log.txt
 46 | pip-delete-this-directory.txt
 47 | 
 48 | # Unit test / coverage reports
 49 | htmlcov/
 50 | .tox/
 51 | .nox/
 52 | .coverage
 53 | .coverage.*
 54 | .cache
 55 | nosetests.xml
 56 | coverage.xml
 57 | *.cover
 58 | *.py,cover
 59 | .hypothesis/
 60 | .pytest_cache/
 61 | cover/
 62 | 
 63 | # Translations
 64 | *.mo
 65 | *.pot
 66 | 
 67 | # Django stuff:
 68 | *.log
 69 | local_settings.py
 70 | db.sqlite3
 71 | db.sqlite3-journal
 72 | 
 73 | # Flask stuff:
 74 | instance/
 75 | .webassets-cache
 76 | 
 77 | # Scrapy stuff:
 78 | .scrapy
 79 | 
 80 | # Sphinx documentation
 81 | docs/_build/
 82 | 
 83 | # PyBuilder
 84 | .pybuilder/
 85 | target/
 86 | 
 87 | # Jupyter Notebook
 88 | .ipynb_checkpoints
 89 | 
 90 | # IPython
 91 | profile_default/
 92 | ipython_config.py
 93 | 
 94 | # pyenv
 95 | #   For a library or package, you might want to ignore these files since the code is
 96 | #   intended to run in multiple environments; otherwise, check them in:
 97 | # .python-version
 98 | 
 99 | # pipenv
100 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
101 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
102 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
103 | #   install all needed dependencies.
104 | #Pipfile.lock
105 | 
106 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
107 | __pypackages__/
108 | 
109 | # Celery stuff
110 | celerybeat-schedule
111 | celerybeat.pid
112 | 
113 | # SageMath parsed files
114 | *.sage.py
115 | 
116 | # Environments
117 | .env
118 | .venv
119 | env/
120 | venv/
121 | ENV/
122 | env.bak/
123 | venv.bak/
124 | 
125 | # Spyder project settings
126 | .spyderproject
127 | .spyproject
128 | 
129 | # Rope project settings
130 | .ropeproject
131 | 
132 | # mkdocs documentation
133 | /site
134 | 
135 | # mypy
136 | .mypy_cache/
137 | .dmypy.json
138 | dmypy.json
139 | 
140 | # Pyre type checker
141 | .pyre/
142 | 
143 | # pytype static type analyzer
144 | .pytype/
145 | 
146 | # Cython debug symbols
147 | cython_debug/
148 | 
149 | Trash
150 | 
151 | # seismic data
152 | *.mseed
153 | *.pdf
154 | *.png
155 | *.csv
156 | *.pkl
157 | hypoinverse/
158 | notebooks/*/config.json
159 | Trash
160 | slurm/stations/
161 | slurm/figures/
162 | slurm/waveforms/
163 | slurm/results/
164 | slurm/templates/
165 | slurm/relocation/hypodd/
166 | slurm/relocation/growclust/
167 | 
168 | slurm/*/stations/
169 | slurm/*/waveforms/
170 | slurm/*/waveforms/
171 | slurm/*/results/
172 | slurm/*/*.xml
173 | .history/
174 | *.npy


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
 1 | [submodule "DeepDenoiser"]
 2 | 	path = DeepDenoiser
 3 | 	url = https://github.com/wayneweiqiang/DeepDenoiser.git
 4 | 	branch = master
 5 | [submodule "PhaseNet"]
 6 | 	path = PhaseNet
 7 |  	url = https://github.com/wayneweiqiang/PhaseNet.git
 8 | 	branch = master
 9 | [submodule "GaMMA"]
10 | 	path = GaMMA
11 | 	url = https://github.com/wayneweiqiang/GaMMA.git
12 | 	branch = master
13 | [submodule "EQNet"]
14 | 	path = EQNet
15 | 	url = https://github.com/AI4EPS/EQNet.git
16 | [submodule "CCTorch"]
17 | 	path = CCTorch
18 | 	url = https://github.com/AI4EPS/CCTorch.git
19 | [submodule "ADLoc"]
20 | 	path = ADLoc
21 | 	url = https://github.com/AI4EPS/ADLoc.git
22 | 


--------------------------------------------------------------------------------
/HypoDD/.gitignore:
--------------------------------------------------------------------------------
 1 | HYPODD
 2 | HYPODD*
 3 | dt*.ct
 4 | event*.dat
 5 | event*.sel
 6 | f77
 7 | g77
 8 | stations_*.dat
 9 | 
10 | Hawaii*
11 | Ridgecrest*
12 | PuertoRico*
13 | tmp_*
14 | *.html
15 | test/
16 | 


--------------------------------------------------------------------------------
/HypoDD/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.8
 2 | 
 3 | RUN apt-get update && apt-get install -y gfortran && rm -rf /var/lib/apt/lists/*
 4 | 
 5 | WORKDIR /opt
 6 | 
 7 | ARG CACHEBUST=1 
 8 | 
 9 | ENV PATH="/opt/:${PATH}"
10 | RUN wget -O HYPODD_1.3.tar.gz http://www.ldeo.columbia.edu/~felixw/HYPODD/HYPODD_1.3.tar.gz
11 | RUN tar -xf HYPODD_1.3.tar.gz
12 | COPY Makefile /opt/HYPODD/src/hypodd
13 | COPY hypoDD.inc /opt/HYPODD/include
14 | RUN ln -s $(which gfortran) f77
15 | RUN ln -s $(which gfortran) g77
16 | RUN make -C HYPODD/src
17 | 
18 | RUN mkdir hypodd
19 | RUN cp HYPODD/src/ph2dt/ph2dt hypodd/ph2dt
20 | RUN cp HYPODD/src/hypoDD/hypoDD hypodd/hypoDD
21 | 
22 | # RUN ls -l /opt/HYPODD/src/hypoDD/hypoDD
23 | # COPY gamma2hypodd.py convert_stations.py /opt/
24 | # COPY hypoDD_ct.inp hypoDD_cc.inp ph2dt.inp /opt/
25 | RUN python -m pip install --upgrade numpy pandas tqdm minio && rm -rf /var/cache/apk/*


--------------------------------------------------------------------------------
/HypoDD/Makefile:
--------------------------------------------------------------------------------
 1 | CMD	= hypoDD
 2 | CC	= gcc
 3 | FC	= g77
 4 | #FC	= gfortran
 5 | #FC	= f77
 6 | SRCS	= $(CMD).f \
 7 | 	  aprod.f cluster1.f covar.f datum.f \
 8 | 	  delaz.f delaz2.f direct1.f dist.f dtres.f exist.f \
 9 | 	  freeunit.f getdata.f getinp.f ifindi.f \
10 | 	  indexxi.f juliam.f lsfit_lsqr.f lsfit_svd.f \
11 | 	  lsqr.f matmult1.f matmult2.f matmult3.f mdian1.f \
12 | 	  normlz.f partials.f ran.f redist.f refract.f \
13 | 	  resstat.f scopy.f sdc2.f setorg.f skip.f \
14 | 	  snrm2.f sort.f sorti.f sscal.f \
15 | 	  svd.f tiddid.f trialsrc.f trimlen.f \
16 | 	  ttime.f vmodel.f weighting.f
17 | CSRCS	= atoangle_.c atoangle.c datetime_.c hypot_.c rpad_.c sscanf3_.c
18 | OBJS	= $(SRCS:%.f=%.o) $(CSRCS:%.c=%.o)
19 | INCLDIR	= ../../include
20 | CFLAGS	= -O -I$(INCLDIR)
21 | 
22 | # Flags for GNU g77 compiler
23 | #FFLAGS	= -O -I$(INCLDIR) -fno-silent -Wall -implicit
24 | 
25 | # Flags for SUN f77 compiler
26 | FFLAGS	= -I$(INCLDIR)
27 | 
28 | # Following line needed on HP-UX (hasn't been tested, though).
29 | #LDFLAGS	= +U77
30 | 
31 | all: $(CMD)
32 | 
33 | $(CMD): $(OBJS)
34 | 	$(FC) $(LDFLAGS) $(OBJS) $(LIBS) -o $@
35 | 
36 | %.o: %.f
37 | 	$(FC) $(FFLAGS) -c $(@F:.o=.f) -o $@
38 | 
39 | # Extensive lint-like diagnostic listing (SUN f77 only)
40 | hypoDD.lst: $(SRCS)
41 | 	f77 -e -Xlist -c $(SRCS)
42 | 
43 | clean:
44 | 	-rm -f $(CMD) *.o core a.out *.fln junk
45 | 
46 | # Include-file dependencies
47 | 
48 | dist.o		: $(INCLDIR)/geocoord.inc
49 | redist.o	: $(INCLDIR)/geocoord.inc
50 | sdc2.o		: $(INCLDIR)/geocoord.inc
51 | setorg.o	: $(INCLDIR)/geocoord.inc
52 | 
53 | cluster1.o	: $(INCLDIR)/hypoDD.inc
54 | dtres.o		: $(INCLDIR)/hypoDD.inc
55 | getdata.o	: $(INCLDIR)/hypoDD.inc
56 | hypoDD.o	: $(INCLDIR)/hypoDD.inc
57 | lsfit_lsqr.o	: $(INCLDIR)/hypoDD.inc
58 | lsfit_svd.o	: $(INCLDIR)/hypoDD.inc
59 | partials.o	: $(INCLDIR)/hypoDD.inc
60 | refract.o	: $(INCLDIR)/hypoDD.inc
61 | resstat.o	: $(INCLDIR)/hypoDD.inc
62 | skip.o		: $(INCLDIR)/hypoDD.inc
63 | tiddid.o	: $(INCLDIR)/hypoDD.inc
64 | trialsrc.o	: $(INCLDIR)/hypoDD.inc
65 | ttime.o		: $(INCLDIR)/hypoDD.inc
66 | vmodel.o	: $(INCLDIR)/hypoDD.inc
67 | weighting.o	: $(INCLDIR)/hypoDD.inc
68 | 
69 | atoangle_.o	: $(INCLDIR)/compat.h
70 | atoangle_.o	: $(INCLDIR)/f77types.h
71 | datetime_.o	: $(INCLDIR)/f77types.h
72 | rpad_.o		: $(INCLDIR)/f77types.h
73 | sscanf3_.o	: $(INCLDIR)/compat.h
74 | sscanf3_.o	: $(INCLDIR)/f77types.h
75 | 
76 | 


--------------------------------------------------------------------------------
/HypoDD/convert_stations.py:
--------------------------------------------------------------------------------
 1 | #%%
 2 | import numpy as np
 3 | import pandas as pd
 4 | from tqdm import tqdm
 5 | 
 6 | # %%
 7 | stations = pd.read_csv('stations.csv', sep="\t")
 8 | 
 9 | # %%
10 | converted_hypoinverse = []
11 | converted_hypoDD = {}
12 | 
13 | for i in tqdm(range(len(stations))):
14 | 
15 |     network_code, station_code, comp_code, channel_code = stations.iloc[i]['station'].split('.')
16 |     station_weight = " "
17 |     lat_degree = int(stations.iloc[i]['latitude'])
18 |     lat_minute = (stations.iloc[i]['latitude'] - lat_degree) * 60
19 |     north = "N" if lat_degree >= 0 else "S"
20 |     lng_degree = int(stations.iloc[i]['longitude'])
21 |     lng_minute = (stations.iloc[i]['longitude'] - lng_degree) * 60
22 |     west = "W" if lng_degree <= 0 else "E"
23 |     elevation = stations.iloc[i]['elevation(m)']
24 |     line_hypoinverse = f"{station_code:<5} {network_code:<2} {comp_code[:-1]:<1}{channel_code:<3} {station_weight}{abs(lat_degree):2.0f} {abs(lat_minute):7.4f}{north}{abs(lng_degree):3.0f} {abs(lng_minute):7.4f}{west}{elevation:4.0f}\n"
25 |     # line_hypoDD = f"{network_code:<2}.{station_code:<5} {stations.iloc[i]['latitude']:.3f}, {stations.iloc[i]['longitude']:.3f}\n"
26 |     #line_hypoDD = f"{station_code} {stations.iloc[i]['latitude']:.3f} {stations.iloc[i]['longitude']:.3f}\n"
27 |     converted_hypoinverse.append(line_hypoinverse)
28 |     #converted_hypoDD.append(line_hypoDD)
29 |     converted_hypoDD[f"{station_code}"] = f"{station_code} {stations.iloc[i]['latitude']:.3f} {stations.iloc[i]['longitude']:.3f}\n"
30 | 
31 | # %%
32 | out_file = 'stations_hypoinverse.dat'
33 | with open(out_file, 'w') as f:
34 |     f.writelines(converted_hypoinverse)
35 | 
36 | out_file = 'stations_hypoDD.dat'
37 | # converted_hypoDD = list(set(converted_hypoDD))
38 | with open(out_file, 'w') as f:
39 |     #f.writelines(converted_hypoDD)
40 |     for k, v in converted_hypoDD.items():
41 |         f.write(v)
42 | 
43 | # %%
44 | 


--------------------------------------------------------------------------------
/HypoDD/gamma2hypoinverse.py:
--------------------------------------------------------------------------------
 1 | #%%
 2 | from datetime import datetime
 3 | 
 4 | import numpy as np
 5 | import pandas as pd
 6 | from tqdm import tqdm
 7 | 
 8 | # %%
 9 | picks = pd.read_csv('gamma_picks.csv', sep="\t")
10 | events = pd.read_csv('gamma_catalog.csv', sep="\t")
11 | 
12 | # %%
13 | events["match_id"] = events.apply(lambda x: f'{x["event_idx"]}_{x["file_index"]}', axis=1)
14 | picks["match_id"] = picks.apply(lambda x: f'{x["event_idx"]}_{x["file_index"]}', axis=1)
15 | 
16 | # %%
17 | out_file = open("hypoInput.arc", "w")
18 | 
19 | picks_by_event = picks.groupby("match_id").groups
20 | 
21 | for i in tqdm(range(len(events))):
22 | 
23 |     event = events.iloc[i]
24 |     event_time = datetime.strptime(event["time"], "%Y-%m-%dT%H:%M:%S.%f").strftime("%Y%m%d%H%M%S%f")[:-4]
25 |     lat_degree = int(event["latitude"])
26 |     lat_minute = (event["latitude"] - lat_degree) * 60 * 100
27 |     south = "S" if lat_degree <= 0 else " "
28 |     lng_degree = int(event["longitude"])
29 |     lng_minute = (event["longitude"] - lng_degree) * 60 * 100
30 |     east = "E" if lng_degree >= 0 else " "
31 |     depth = event["depth(m)"] / 1e3 * 100
32 |     event_line = f"{event_time}{abs(lat_degree):2d}{south}{abs(lat_minute):4.0f}{abs(lng_degree):3d}{east}{abs(lng_minute):4.0f}{depth:5.0f}"
33 |     out_file.write(event_line + "\n")
34 | 
35 |     picks_idx = picks_by_event[event["match_id"]]
36 |     for j in picks_idx:
37 |         pick = picks.iloc[j]
38 |         network_code, station_code, comp_code, channel_code = pick['id'].split('.')
39 |         phase_type = pick['type']
40 |         phase_weight = min(max(int((1 - pick['prob']) / (1 - 0.3) * 4) - 1, 0), 3)
41 |         pick_time = datetime.strptime(pick["timestamp"], "%Y-%m-%dT%H:%M:%S.%f")
42 |         phase_time_minute = pick_time.strftime("%Y%m%d%H%M")
43 |         phase_time_second = pick_time.strftime("%S%f")[:-4]
44 |         tmp_line = f"{station_code:<5}{network_code:<2} {comp_code:<1}{channel_code:<3}"
45 |         if phase_type.upper() == 'P':
46 |             pick_line = f"{tmp_line:<13} P {phase_weight:<1d}{phase_time_minute} {phase_time_second}"
47 |         elif phase_type.upper() == 'S':
48 |             pick_line = f"{tmp_line:<13}   4{phase_time_minute} {'':<12}{phase_time_second} S {phase_weight:<1d}"
49 |         else:
50 |             raise (f"Phase type error {phase_type}")
51 |         out_file.write(pick_line + "\n")
52 | 
53 |     out_file.write("\n")
54 |     if i > 1e3:
55 |         break
56 | 
57 | out_file.close()
58 | 


--------------------------------------------------------------------------------
/HypoDD/hyp.command:
--------------------------------------------------------------------------------
 1 | * This is a very simple hypoinverse test command file.
 2 | * It uses only a simple station and crust model file,
 3 | * with no station delay file or other options.
 4 | * Run hypoinverse, then type @test2000.hyp at the command prompt.
 5 | 
 6 | 200 t 2000 0			/enable y2000 formats
 7 | H71 3 1 3			    /use new hypoinverse station format
 8 | DIS 4 50 1 3            /Main Distance weighting
 9 | RMS 4 0.16 1.5 3        /Residual weighting
10 | ERR .10
11 | *POS 1.8
12 | MIN 5                  /min number of stations
13 | ZTR 8                  /trial depth
14 | *WET 1. .5 .2 .1       /weighting by pick quanlity
15 | *PRE 3, 3 0 0 9        /magnitude
16 | * OUTPUT
17 | ERF T
18 | TOP F
19 | 
20 | STA 'stations_hypoinverse.dat'
21 | LET 5 2 0                               /Net Sta Chn
22 | TYP Read in crustal model(s):
23 | CRH 1 'vel_model_P.crh'		/read crust model for Vp, here depth 0 is relative to the averge elevation of stations 
24 | CRH 2 'vel_model_S.crh' 	/read crust model for Vs
25 | SAL 1 2
26 | PHS 'hypoInput.arc'		        /input phase file
27 | 
28 | FIL				        /automatically set phase format from file
29 | ARC 'hypoOut.arc'		/output archive file
30 | PRT 'prtOut.prt'		/output print file
31 | SUM 'catOut.sum'        /output location summary
32 | *RDM T
33 | CAR 1
34 | *LST 2
35 | LOC				/locate the earthquake
36 | STO
37 | 


--------------------------------------------------------------------------------
/HypoDD/hypoDD.inc:
--------------------------------------------------------------------------------
 1 | c hypoDD.inc: Stores parameters that define array dimensions in hypoDD.
 2 | c             Modify to fit size of problem and available computer memory.
 3 | c Parameter Description:
 4 | c MAXEVE:   Max number of events (must be at least the size of the number 
 5 | c           of events listed in the event file)
 6 | c MAXDATA:  Max number of observations (must be at least the size of the 
 7 | c           number of observations).  
 8 | c MAXEVE0:  Max number of events used for SVD. If only LSQR is used, 
 9 | c           MAXEVE0 can be set to 2 to free up memory. 
10 | c MAXDATA0: Max number of observations used for SVD. If only LSQR is used, 
11 | c           MAXDATA0 can be set to 1 to free up memory. 
12 | c MAXLAY:   Max number of model layers.
13 | c MAXSTA:   Max number of stations.
14 | c MAXCL:    Max number of clusters allowed. 
15 | 	integer*4 MAXEVE, MAXLAY, MAXDATA, MAXSTA, MAXEVE0, MAXDATA0, 
16 |      & MAXCL
17 | 
18 | cc parameters for small size problems (e.g. SUN ULTRA-5, 256 MB RAM)
19 | c      parameter(MAXEVE=   7000,    
20 | c     &          MAXDATA=  5000000,
21 | c     &          MAXEVE0=  30,	
22 | c     &          MAXDATA0= 5000,	
23 | c     &          MAXLAY=   12,		
24 | c     &          MAXSTA=   2000,		
25 | c     &          MAXCL=    20)		
26 | 
27 | c parameters for medium size problems (e.g. : SUN ULTRA-2, 768 MB RAM)
28 | c       parameter(MAXEVE=  8000,
29 | c     &          MAXDATA=  5000000,
30 | c       parameter(MAXEVE=  10800,
31 | c     &          MAXDATA=  3500000,
32 | c     &          MAXEVE0=  2,	
33 | c     &          MAXDATA0= 1,	
34 | c     &          MAXLAY=   20,		
35 | c     &          MAXSTA=   2400,		
36 | c     &          MAXCL=    50)		
37 | 
38 | cc parameters for large problems (e.g. SUN BLADE 100, 2 GB RAM):
39 |       parameter(MAXEVE=   10800,    
40 |      &          MAXDATA=  9100000,
41 |      &          MAXEVE0=  50,	
42 |      &          MAXDATA0= 10000,	
43 |      &          MAXLAY=   15,		
44 |      &          MAXSTA=   1300,		
45 |      &          MAXCL=    100)		
46 | 
47 | cc parameters for very large problems, with cluster1 enabled (e.g. SUN BLADE 100, 2 GB RAM):
48 | c      parameter(MAXEVE=   100000,    
49 | c     &          MAXDATA=  10000000,
50 | c     &          MAXEVE0=  2,	
51 | c     &          MAXDATA0= 1,	
52 | c     &          MAXLAY=   12,		
53 | c     &          MAXSTA=   600,		
54 | c     &          MAXCL=    1)		
55 | 
56 | cc parameters for customized problems (e.g. SUN BLADE 100, 2 GB RAM):
57 | c      parameter(MAXEVE=   27000,    
58 | c     &          MAXDATA=  2900000,
59 | c     &          MAXEVE0=  2,	
60 | c     &          MAXDATA0= 1,	
61 | c     &          MAXLAY=   12,		
62 | c     &          MAXSTA=   155,		
63 | c     &          MAXCL=    10)		
64 | 
65 | 


--------------------------------------------------------------------------------
/HypoDD/hypodd_cc.inp:
--------------------------------------------------------------------------------
 1 | * RELOC.INP:
 2 | *--- input file selection
 3 | * cross correlation diff times:
 4 | 
 5 | *
 6 | *catalog P diff times:
 7 | dt.ct
 8 | *
 9 | * event file:
10 | event.sel
11 | *
12 | * station file:
13 | stations.dat
14 | *
15 | *--- output file selection
16 | * original locations:
17 | hypoDD.loc
18 | * relocations:
19 | hypoDD.reloc
20 | * station information:
21 | hypoDD.sta
22 | * residual information:
23 | hypoDD.res
24 | * source paramater information:
25 | hypoDD.src
26 | *
27 | *--- data type selection: 
28 | * IDAT:  0 = synthetics; 1= cross corr; 2= catalog; 3= cross & cat 
29 | * IPHA: 1= P; 2= S; 3= P&S
30 | * DIST:max dist [km] between cluster centroid and station 
31 | * IDAT   IPHA   DIST
32 |     2     3     120
33 | *
34 | *--- event clustering:
35 | * OBSCC:    min # of obs/pair for crosstime data (0= no clustering)
36 | * OBSCT:    min # of obs/pair for network data (0= no clustering)
37 | * OBSCC  OBSCT    
38 |      0     8        
39 | *
40 | *--- solution control:
41 | * ISTART:  	1 = from single source; 2 = from network sources
42 | * ISOLV:	1 = SVD, 2=lsqr
43 | * NSET:      	number of sets of iteration with specifications following
44 | *  ISTART  ISOLV  NSET
45 |     2        2      4
46 | *
47 | *--- data weighting and re-weighting: 
48 | * NITER: 		last iteration to used the following weights
49 | * WTCCP, WTCCS:		weight cross P, S 
50 | * WTCTP, WTCTS:		weight catalog P, S 
51 | * WRCC, WRCT:		residual threshold in sec for cross, catalog data 
52 | * WDCC, WDCT:  		max dist [km] between cross, catalog linked pairs
53 | * DAMP:    		damping (for lsqr only) 
54 | *       ---  CROSS DATA ----- ----CATALOG DATA ----
55 | * NITER WTCCP WTCCS WRCC WDCC WTCTP WTCTS WRCT WDCT DAMP
56 |    4     -9     -9   -9    -9   1     1      8   -9  70 
57 |    4     -9     -9   -9    -9   1     1      6    4  70 
58 |    4     -9     -9   -9    -9   1    0.8     4    2  70 
59 |    4     -9     -9   -9    -9   1    0.8     3    2  70 
60 | *
61 | *--- 1D model:
62 | * NLAY:		number of model layers  
63 | * RATIO:	vp/vs ratio 
64 | * TOP:		depths of top of layer (km) 
65 | * VEL: 		layer velocities (km/s)
66 | * NLAY  RATIO 
67 |    12     1.82
68 | * TOP 
69 | 0.0 1.0 3.0 5.0 7.0 9.0 11.0 13.0 17.0 21.0 31.00 31.10
70 | * VEL
71 | 5.30 5.65 5.93 6.20 6.20 6.20 6.20 6.20 6.20 6.20 7.50 8.11
72 | *
73 | *--- event selection:
74 | * CID: 	cluster to be relocated (0 = all)
75 | * ID:	cuspids of event to be relocated (8 per line)
76 | * CID    
77 |     0      
78 | * ID
79 | 


--------------------------------------------------------------------------------
/HypoDD/hypodd_ct.inp:
--------------------------------------------------------------------------------
 1 | * RELOC.INP:
 2 | *--- input file selection
 3 | * cross correlation diff times:
 4 | 
 5 | *
 6 | *catalog P diff times:
 7 | dt.ct
 8 | *
 9 | * event file:
10 | event.sel
11 | *
12 | * station file:
13 | stations.dat
14 | *
15 | *--- output file selection
16 | * original locations:
17 | hypoDD.loc
18 | * relocations:
19 | hypoDD.reloc
20 | * station information:
21 | hypoDD.sta
22 | * residual information:
23 | hypoDD.res
24 | * source paramater information:
25 | hypoDD.src
26 | *
27 | *--- data type selection: 
28 | * IDAT:  0 = synthetics; 1= cross corr; 2= catalog; 3= cross & cat 
29 | * IPHA: 1= P; 2= S; 3= P&S
30 | * DIST:max dist [km] between cluster centroid and station 
31 | * IDAT   IPHA   DIST
32 |     2     3     120
33 | *
34 | *--- event clustering:
35 | * OBSCC:    min # of obs/pair for crosstime data (0= no clustering)
36 | * OBSCT:    min # of obs/pair for network data (0= no clustering)
37 | * OBSCC  OBSCT    
38 |      0     8        
39 | *
40 | *--- solution control:
41 | * ISTART:  	1 = from single source; 2 = from network sources
42 | * ISOLV:	1 = SVD, 2=lsqr
43 | * NSET:      	number of sets of iteration with specifications following
44 | *  ISTART  ISOLV  NSET
45 |     2        2      4
46 | *
47 | *--- data weighting and re-weighting: 
48 | * NITER: 		last iteration to used the following weights
49 | * WTCCP, WTCCS:		weight cross P, S 
50 | * WTCTP, WTCTS:		weight catalog P, S 
51 | * WRCC, WRCT:		residual threshold in sec for cross, catalog data 
52 | * WDCC, WDCT:  		max dist [km] between cross, catalog linked pairs
53 | * DAMP:    		damping (for lsqr only) 
54 | *       ---  CROSS DATA ----- ----CATALOG DATA ----
55 | * NITER WTCCP WTCCS WRCC WDCC WTCTP WTCTS WRCT WDCT DAMP
56 |    4     -9     -9   -9    -9   1     1      8   -9  70 
57 |    4     -9     -9   -9    -9   1     1      6    4  70 
58 |    4     -9     -9   -9    -9   1    0.8     4    2  70 
59 |    4     -9     -9   -9    -9   1    0.8     3    2  70 
60 | *
61 | *--- 1D model:
62 | * NLAY:		number of model layers  
63 | * RATIO:	vp/vs ratio 
64 | * TOP:		depths of top of layer (km) 
65 | * VEL: 		layer velocities (km/s)
66 | * NLAY  RATIO 
67 |    12     1.82
68 | * TOP 
69 | 0.0 1.0 3.0 5.0 7.0 9.0 11.0 13.0 17.0 21.0 31.00 31.10
70 | * VEL
71 | 5.30 5.65 5.93 6.20 6.20 6.20 6.20 6.20 6.20 6.20 7.50 8.11
72 | *
73 | *--- event selection:
74 | * CID: 	cluster to be relocated (0 = all)
75 | * ID:	cuspids of event to be relocated (8 per line)
76 | * CID    
77 |     0      
78 | * ID
79 | 


--------------------------------------------------------------------------------
/HypoDD/ph2dt.inp:
--------------------------------------------------------------------------------
 1 | * ph2dt.inp - input control file for program ph2dt
 2 | * Input station file:
 3 | stations_hypoDD.dat
 4 | * Input phase file:
 5 | hypoDD.pha
 6 | *MINWGHT: min. pick weight allowed [0]
 7 | *MAXDIST: max. distance in km between event pair and stations [200]
 8 | *MAXSEP: max. hypocentral separation in km [10]
 9 | *MAXNGH: max. number of neighbors per event [10]
10 | *MINLNK: min. number of links required to define a neighbor [8]
11 | *MINOBS: min. number of links per pair saved [8]
12 | *MAXOBS: max. number of links per pair saved [20]
13 | *MINWGHT MAXDIST MAXSEP MAXNGH MINLNK MINOBS MAXOBS
14 |    0      120     10     50     8      8     100
15 | 


--------------------------------------------------------------------------------
/HypoDD/run.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | hypo=../hyp1.40/source/hyp1.40
 3 | ph2dt=../HYPODD/src/ph2dt/ph2dt
 4 | hypoDD=../HYPODD/src/hypoDD/hypoDD
 5 | 
 6 | ## gamma to hypoDD
 7 | python gamma2hypoDD.py
 8 | # $ph2dt ph2dt.inp
 9 | # $hypoDD hypoDD.inp
10 | 
11 | # ## gamma to hypoinverse
12 | # python convert_stations.py
13 | # python convert_picks.py
14 | # $hypo < hyp.command
15 | 
16 | # ## hypoinvese to hypoDD
17 | # python hypoinverse2hypoDD.py
18 | # $ph2dt ph2dt.inp  
19 | # $hypoDD hypoDD.inp
20 | 


--------------------------------------------------------------------------------
/HypoDD/vel_model_P.crh:
--------------------------------------------------------------------------------
1 | MODEL Vp from REAL
2 | 5.30   0.00
3 | 5.65   1.00
4 | 5.93   3.00
5 | 6.20   7.00
6 | 7.50  31.00
7 | 8.11  31.10
8 | 


--------------------------------------------------------------------------------
/HypoDD/vel_model_S.crh:
--------------------------------------------------------------------------------
1 | MODEL Vs from REAL
2 | 2.75   0.00
3 | 2.80   1.00
4 | 3.10   3.00
5 | 3.40   7.00
6 | 4.00  31.00
7 | 4.49  31.10
8 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Weiqiang Zhu
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/datasets/NCEDC/.gitignore:
--------------------------------------------------------------------------------
 1 | gs
 2 | data
 3 | dataset
 4 | waveform_h5
 5 | waveform_ps_h5
 6 | *.h5
 7 | FDSNstationXML
 8 | tmp_*
 9 | mseed_list_NC
10 | mseed_list_SC
11 | 
12 | 


--------------------------------------------------------------------------------
/datasets/NCEDC/extract_csv.py:
--------------------------------------------------------------------------------
 1 | # %%
 2 | import multiprocessing as mp
 3 | 
 4 | import h5py
 5 | import numpy as np
 6 | import pandas as pd
 7 | from tqdm import tqdm
 8 | 
 9 | 
10 | # %%
11 | # for folder in ["/nfs/quakeflow_dataset/NC/quakeflow_nc", "/nfs/quakeflow_dataset/NC"]:
12 | # for mode in ["", "_train", "_test"]:
13 | def process(i, folder, mode):
14 |     h5_file = f"{folder}/waveform{mode}.h5"
15 |     print(f"Processing {h5_file}")
16 | 
17 |     events_df = []
18 |     picks_df = []
19 |     with h5py.File(h5_file, "r") as f:
20 |         event_ids = list(f.keys())
21 |         for event_id in tqdm(event_ids, desc=f"{h5_file}", position=i):
22 |             event_attrs = dict(f[event_id].attrs)
23 |             events_df.append(event_attrs)
24 | 
25 |             station_ids = list(f[event_id].keys())
26 |             for station_id in station_ids:
27 |                 station_attrs = dict(f[event_id][station_id].attrs)
28 |                 station_attrs["event_id"] = event_id
29 |                 station_attrs["station_id"] = station_id
30 |                 picks_df.append(station_attrs)
31 | 
32 |     events_df = pd.DataFrame(events_df)
33 |     picks_df = pd.DataFrame(picks_df)
34 |     events_df.to_csv(f"{folder}/events{mode}.csv", index=False)
35 |     picks_df.to_csv(f"{folder}/picks{mode}.csv", index=False)
36 | 
37 | 
38 | # %%
39 | folders = [
40 |     "/nfs/quakeflow_dataset/NC/quakeflow_nc",
41 |     "/nfs/quakeflow_dataset/SC/quakeflow_sc",
42 |     "/nfs/quakeflow_dataset/NC",
43 |     "/nfs/quakeflow_dataset/SC",
44 | ]
45 | mode = ["", "_train", "_test"]
46 | inputs = [(folder, m) for folder in folders for m in mode]
47 | 
48 | pbar = tqdm(total=len(inputs))
49 | callback = lambda *args: pbar.update()
50 | 
51 | with mp.Pool(len(inputs)) as pool:
52 |     jobs = []
53 |     for i, (folder, mode) in enumerate(inputs):
54 |         job = pool.apply_async(
55 |             process,
56 |             args=(
57 |                 i,
58 |                 folder,
59 |                 mode,
60 |             ),
61 |             callback=callback,
62 |         )
63 |         jobs.append(job)
64 |     pool.close()
65 |     pool.join()
66 | 
67 |     results = [job.get() for job in jobs]
68 | 


--------------------------------------------------------------------------------
/datasets/NCEDC/extract_ps.py:
--------------------------------------------------------------------------------
 1 | # %%
 2 | import multiprocessing as mp
 3 | import os
 4 | 
 5 | import h5py
 6 | from tqdm import tqdm
 7 | 
 8 | # %%
 9 | h5_dir = "waveform_h5"
10 | h5_files = os.listdir(h5_dir)
11 | 
12 | # %%
13 | result_path = "waveform_ps_h5"
14 | if not os.path.exists(result_path):
15 |     os.makedirs(result_path)
16 | 
17 | 
18 | # %%
19 | def run(h5_file):
20 |     h5_input = os.path.join(h5_dir, h5_file)
21 |     h5_output = os.path.join(result_path, h5_file)
22 |     pos = 2022 - int(h5_file.split("/")[-1].split(".")[0])
23 |     with h5py.File(h5_input, "r") as fin:
24 |         with h5py.File(h5_output, "w") as fout:
25 |             for event in tqdm(fin.keys(), desc=h5_file, total=len(fin.keys()), position=pos, leave=True):
26 |                 # copy event and attributes
27 |                 gp = fout.create_group(event)
28 |                 for key in fin[event].attrs.keys():
29 |                     gp.attrs[key] = fin[event].attrs[key]
30 |                 num_station = 0
31 |                 for station in fin[event].keys():
32 |                     if "S" in fin[event][station].attrs["phase_type"]:
33 |                         ds = gp.create_dataset(station, data=fin[event][station])
34 |                         for key in fin[event][station].attrs.keys():
35 |                             ds.attrs[key] = fin[event][station].attrs[key]
36 |                         num_station += 1
37 |                     else:
38 |                         continue
39 |                 gp.attrs["nx"] = num_station
40 | 
41 | 
42 | # %%
43 | if __name__ == "__main__":
44 |     # run(0, h5_files[0])
45 | 
46 |     ncpu = len(h5_files)
47 |     print(f"Using {ncpu} CPUs")
48 |     with mp.Pool(ncpu) as p:
49 |         p.map(run, h5_files)
50 | 


--------------------------------------------------------------------------------
/datasets/NCEDC/merge_hdf5.py:
--------------------------------------------------------------------------------
 1 | # %%
 2 | import os
 3 | 
 4 | import h5py
 5 | import matplotlib.pyplot as plt
 6 | from tqdm import tqdm
 7 | 
 8 | # %%
 9 | h5_dir = "waveform_ps_h5"
10 | h5_out = "waveform_ps.h5"
11 | h5_train = "waveform_ps_train.h5"
12 | h5_test = "waveform_ps_test.h5"
13 | 
14 | # # %%
15 | # h5_dir = "waveform_h5"
16 | # h5_out = "waveform.h5"
17 | # h5_train = "waveform_train.h5"
18 | # h5_test = "waveform_test.h5"
19 | 
20 | h5_files = sorted(os.listdir(h5_dir))
21 | train_files = h5_files[:-1]
22 | test_files = h5_files[-1:]
23 | # train_files = h5_files
24 | # train_files = [x for x in train_files if (x != "2014.h5") and (x not in [])]
25 | # test_files = []
26 | print(f"train files: {train_files}")
27 | print(f"test files: {test_files}")
28 | 
29 | # %%
30 | with h5py.File(h5_out, "w") as fp:
31 |     # external linked file
32 |     for h5_file in h5_files:
33 |         with h5py.File(os.path.join(h5_dir, h5_file), "r") as f:
34 |             for event in tqdm(f.keys(), desc=h5_file, total=len(f.keys())):
35 |                 if event not in fp:
36 |                     fp[event] = h5py.ExternalLink(os.path.join(h5_dir, h5_file), event)
37 |                 else:
38 |                     print(f"{event} already exists")
39 |                     continue
40 | 
41 | # %%
42 | with h5py.File(h5_train, "w") as fp:
43 |     # external linked file
44 |     for h5_file in train_files:
45 |         with h5py.File(os.path.join(h5_dir, h5_file), "r") as f:
46 |             for event in tqdm(f.keys(), desc=h5_file, total=len(f.keys())):
47 |                 if event not in fp:
48 |                     fp[event] = h5py.ExternalLink(os.path.join(h5_dir, h5_file), event)
49 |                 else:
50 |                     print(f"{event} already exists")
51 |                     continue
52 | 
53 | # %%
54 | with h5py.File(h5_test, "w") as fp:
55 |     # external linked file
56 |     for h5_file in test_files:
57 |         with h5py.File(os.path.join(h5_dir, h5_file), "r") as f:
58 |             for event in tqdm(f.keys(), desc=h5_file, total=len(f.keys())):
59 |                 if event not in fp:
60 |                     fp[event] = h5py.ExternalLink(os.path.join(h5_dir, h5_file), event)
61 |                 else:
62 |                     print(f"{event} already exists")
63 |                     continue
64 | 
65 | # %%
66 | 


--------------------------------------------------------------------------------
/datasets/NCEDC/run.yaml:
--------------------------------------------------------------------------------
 1 | name: quakeflow
 2 | 
 3 | workdir: .
 4 | 
 5 | num_nodes: 1
 6 | 
 7 | resources:
 8 | 
 9 |   cloud: gcp
10 | 
11 |   region: us-west1
12 | 
13 |   zone: us-west1-b
14 | 
15 |   # instance_type: n2-highmem-16
16 | 
17 |   # accelerators: P100:4
18 | 
19 |   cpus: 16+
20 |   # cpus: 64+
21 | 
22 |   # disk_size: 300
23 | 
24 |   # disk_tier: high
25 | 
26 |   use_spot: True
27 | 
28 |   # spot_recovery: FAILOVER
29 | 
30 |   # image_id: docker:zhuwq0/quakeflow:latest
31 | 
32 | envs:
33 |   JOB: quakeflow_dataset
34 | 
35 | file_mounts:
36 | 
37 |   /scedc-pds:
38 |     source: s3://scedc-pds/
39 |     mode: MOUNT
40 | 
41 |   /ncedc-pds:
42 |     source: s3://ncedc-pds/
43 |     mode: MOUNT
44 | 
45 |   /quakeflow_dataset:
46 |     source: gs://quakeflow_dataset/
47 |     mode: MOUNT
48 | 
49 |   ~/.ssh/id_rsa.pub: ~/.ssh/id_rsa.pub
50 |   ~/.ssh/id_rsa: ~/.ssh/id_rsa
51 |   ~/.config/rclone/rclone.conf: ~/.config/rclone/rclone.conf
52 |   ~/.config/gcloud/application_default_credentials.json: ~/.config/gcloud/application_default_credentials.json
53 | 
54 | setup: |
55 |   echo "Begin setup."
56 |   sudo apt install rclone
57 |   pip install fsspec gcsfs
58 |   pip install obspy matplotlib
59 |   pip install h5py tqdm
60 | 
61 | run: |
62 |   echo "Begin run." 
63 |   # python download_catalog.py
64 |   python download_waveform.py


--------------------------------------------------------------------------------
/datasets/SCEDC/.gitignore:
--------------------------------------------------------------------------------
1 | *.h5
2 | FDSNstationXML
3 | 


--------------------------------------------------------------------------------
/datasets/SCEDC/download_station.py:
--------------------------------------------------------------------------------
 1 | # %%
 2 | import os
 3 | from datetime import timezone
 4 | 
 5 | import fsspec
 6 | import obspy
 7 | import pandas as pd
 8 | from tqdm import tqdm
 9 | 
10 | # %%
11 | input_protocol = "s3"
12 | input_bucket = "scedc-pds"
13 | input_fs = fsspec.filesystem(input_protocol, anon=True)
14 | 
15 | output_protocol = "gs"
16 | output_token = f"{os.environ['HOME']}/.config/gcloud/application_default_credentials.json"
17 | output_bucket = "quakeflow_dataset/SC"
18 | output_fs = fsspec.filesystem(output_protocol, token=output_token)
19 | 
20 | # %%
21 | station_path = f"{input_bucket}/FDSNstationXML"
22 | 
23 | 
24 | # %%
25 | def parse_inventory_csv(inventory):
26 |     channel_list = []
27 |     for network in inventory:
28 |         for station in network:
29 |             for channel in station:
30 |                 if channel.sensor is None:
31 |                     sensor_description = ""
32 |                 else:
33 |                     sensor_description = channel.sensor.description
34 |                 channel_list.append(
35 |                     {
36 |                         "network": network.code,
37 |                         "station": station.code,
38 |                         "location": channel.location_code,
39 |                         "instrument": channel.code[:-1],
40 |                         "component": channel.code[-1],
41 |                         "channel": channel.code,
42 |                         "longitude": channel.longitude,
43 |                         "latitude": channel.latitude,
44 |                         "elevation_m": channel.elevation,
45 |                         "local_depth_m": channel.depth,
46 |                         "depth_km": round(-channel.elevation / 1000, 4),
47 |                         # "depth_km": channel.depth,
48 |                         "begin_time": (
49 |                             channel.start_date.datetime.replace(tzinfo=timezone.utc).isoformat()
50 |                             if channel.start_date is not None
51 |                             else None
52 |                         ),
53 |                         "end_time": (
54 |                             channel.end_date.datetime.replace(tzinfo=timezone.utc).isoformat()
55 |                             if channel.end_date is not None
56 |                             else None
57 |                         ),
58 |                         "azimuth": channel.azimuth,
59 |                         "dip": channel.dip,
60 |                         "sensitivity": (
61 |                             channel.response.instrument_sensitivity.value
62 |                             if channel.response.instrument_sensitivity
63 |                             else None
64 |                         ),
65 |                         "site": station.site.name,
66 |                         "sensor": sensor_description,
67 |                     }
68 |                 )
69 |     channel_list = pd.DataFrame(channel_list)
70 | 
71 |     print(f"Parse {len(channel_list)} channels into csv")
72 | 
73 |     return channel_list
74 | 
75 | 
76 | # %%
77 | inv = obspy.Inventory()
78 | for network in input_fs.glob(f"{station_path}/*"):
79 |     print(f"Parse {network}")
80 |     for xml in tqdm(input_fs.glob(f"{network}/*.xml")):
81 |         with input_fs.open(xml) as f:
82 |             inv += obspy.read_inventory(f)
83 | 
84 | # %%
85 | stations = parse_inventory_csv(inv)
86 | 
87 | # %%
88 | for network, sta in stations.groupby("network"):
89 |     with output_fs.open(f"{output_bucket}/station/{network}.csv", "wb") as f:
90 |         sta.to_csv(f, index=False)
91 | 
92 | # %%
93 | 


--------------------------------------------------------------------------------
/datasets/SCEDC/extract_ps.py:
--------------------------------------------------------------------------------
1 | ../NCEDC/extract_ps.py


--------------------------------------------------------------------------------
/datasets/SCEDC/merge_hdf5.py:
--------------------------------------------------------------------------------
 1 | # %%
 2 | import os
 3 | 
 4 | import h5py
 5 | import matplotlib.pyplot as plt
 6 | from tqdm import tqdm
 7 | 
 8 | # %%
 9 | h5_dir = "waveform_ps_h5"
10 | h5_out = "waveform_ps.h5"
11 | h5_train = "waveform_ps_train.h5"
12 | h5_test = "waveform_ps_test.h5"
13 | 
14 | # # %%
15 | # h5_dir = "waveform_h5"
16 | # h5_out = "waveform.h5"
17 | # h5_train = "waveform_train.h5"
18 | # h5_test = "waveform_test.h5"
19 | 
20 | h5_files = sorted(os.listdir(h5_dir))
21 | h5_files = [x for x in h5_files if (x not in ["2019.h5", "2020.h5"])]
22 | train_files = h5_files[:-1]
23 | test_files = h5_files[-1:]
24 | # train_files = h5_files
25 | # train_files = [x for x in train_files if (x != "2014.h5") and (x not in [])]
26 | # test_files = []
27 | print(f"train files: {train_files}")
28 | print(f"test files: {test_files}")
29 | 
30 | # %%
31 | with h5py.File(h5_out, "w") as fp:
32 |     # external linked file
33 |     for h5_file in h5_files:
34 |         with h5py.File(os.path.join(h5_dir, h5_file), "r") as f:
35 |             for event in tqdm(f.keys(), desc=h5_file, total=len(f.keys())):
36 |                 if event not in fp:
37 |                     fp[event] = h5py.ExternalLink(os.path.join(h5_dir, h5_file), event)
38 |                 else:
39 |                     print(f"{event} already exists")
40 |                     continue
41 | 
42 | # %%
43 | with h5py.File(h5_train, "w") as fp:
44 |     # external linked file
45 |     for h5_file in train_files:
46 |         with h5py.File(os.path.join(h5_dir, h5_file), "r") as f:
47 |             for event in tqdm(f.keys(), desc=h5_file, total=len(f.keys())):
48 |                 if event not in fp:
49 |                     fp[event] = h5py.ExternalLink(os.path.join(h5_dir, h5_file), event)
50 |                 else:
51 |                     print(f"{event} already exists")
52 |                     continue
53 | 
54 | # %%
55 | with h5py.File(h5_test, "w") as fp:
56 |     # external linked file
57 |     for h5_file in test_files:
58 |         with h5py.File(os.path.join(h5_dir, h5_file), "r") as f:
59 |             for event in tqdm(f.keys(), desc=h5_file, total=len(f.keys())):
60 |                 if event not in fp:
61 |                     fp[event] = h5py.ExternalLink(os.path.join(h5_dir, h5_file), event)
62 |                 else:
63 |                     print(f"{event} already exists")
64 |                     continue
65 | 
66 | # %%
67 | 


--------------------------------------------------------------------------------
/datasets/SCEDC/run.yaml:
--------------------------------------------------------------------------------
 1 | name: quakeflow
 2 | 
 3 | workdir: .
 4 | 
 5 | num_nodes: 1
 6 | 
 7 | resources:
 8 | 
 9 |   cloud: gcp
10 | 
11 |   region: us-west1
12 | 
13 |   zone: us-west1-b
14 | 
15 |   # instance_type: n2-highmem-16
16 | 
17 |   # accelerators: P100:4
18 | 
19 |   cpus: 16+
20 |   # cpus: 64+
21 | 
22 |   # disk_size: 300
23 | 
24 |   # disk_tier: high
25 | 
26 |   # use_spot: True
27 | 
28 |   # spot_recovery: FAILOVER
29 | 
30 |   # image_id: docker:zhuwq0/quakeflow:latest
31 | 
32 | envs:
33 |   JOB: quakeflow_dataset
34 | 
35 | file_mounts:
36 | 
37 |   /scedc-pds:
38 |     source: s3://scedc-pds/
39 |     mode: MOUNT
40 | 
41 |   /ncedc-pds:
42 |     source: s3://ncedc-pds/
43 |     mode: MOUNT
44 | 
45 |   ~/.ssh/id_rsa.pub: ~/.ssh/id_rsa.pub
46 |   ~/.ssh/id_rsa: ~/.ssh/id_rsa
47 |   ~/.config/rclone/rclone.conf: ~/.config/rclone/rclone.conf
48 |   ~/.config/gcloud/application_default_credentials.json: ~/.config/gcloud/application_default_credentials.json
49 | 
50 | setup: |
51 |   echo "Begin setup."
52 |   sudo apt install rclone
53 |   pip install fsspec gcsfs s3fs
54 |   pip install obspy matplotlib
55 |   pip install h5py tqdm
56 | 
57 | run: |
58 |   echo "Begin run." 
59 |   # python download_catalog.py
60 |   python download_waveform.py
61 | 


--------------------------------------------------------------------------------
/datasets/SCEDC/split_large_files.py:
--------------------------------------------------------------------------------
 1 | # %%
 2 | import multiprocessing as mp
 3 | import os
 4 | from glob import glob
 5 | 
 6 | import h5py
 7 | import numpy as np
 8 | import pandas as pd
 9 | from tqdm import tqdm
10 | 
11 | # %%
12 | data_path = "waveform_ps_h5"
13 | result_path = "data"
14 | file_list = sorted(glob(f"{data_path}/*.h5"))
15 | # %%
16 | file_size = {file: os.path.getsize(file)/1e9 for file in file_list}
17 | 
18 | # %%
19 | MAX_SIZE = 45 # GB
20 | for file, size in file_size.items():
21 |     if size > MAX_SIZE:
22 |         # split into smaller files
23 |         NUM_FILES = int(np.ceil(size / MAX_SIZE))
24 |         with h5py.File(file, "r") as f:
25 |             event_ids = list(f.keys())
26 |             for event_id in tqdm(event_ids, desc=f"Processing {file}"):
27 |                 index = int(event_id[-1]) % NUM_FILES
28 |                 # with h5py.File(f"{result_path}/{file.split('/')[-1].replace('.h5', '')}_{index}.h5", "a") as g:
29 |                 with h5py.File(f"{data_path}/{file.split('/')[-1].replace('.h5', '')}_{index}.h5", "a") as g:
30 |                     if event_id in g:
31 |                         print(f"Event {event_id} already exists in {file.split('/')[-1].replace('.h5', '')}_{index}.h5")
32 |                         continue
33 |                     # copy 
34 |                     f.copy(event_id, g)
35 |     # else:
36 |     #     print(f"Copying {file} to {result_path}")
37 |     #     os.system(f"cp {file} {result_path}")


--------------------------------------------------------------------------------
/docs/.gitignore:
--------------------------------------------------------------------------------
1 | Ridgecrest_demo
2 | Ridgecrest_oneweek
3 | Tahoe
4 | SmithValley
5 | Antilles
6 | 
7 | condaenv.*.txt
8 | 
9 | 


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
 1 | <!-- <img src="https://raw.githubusercontent.com/wayneweiqiang/QuakeFlow/master/docs/assets/logo.png"> -->
 2 | 
 3 | # [QuakeFlow: A Scalable Machine-learning-based Earthquake Monitoring Workflow with Cloud Computing](https://ai4eps.github.io/QuakeFlow/)
 4 | [![documentation](https://github.com/ai4eps/QuakeFlow/actions/workflows/docs.yml/badge.svg)](https://ai4eps.github.io/QuakeFlow/)
 5 | 
 6 | ## Overview
 7 | 
 8 | ![](https://raw.githubusercontent.com/ai4eps/QuakeFlow/master/docs/assets/quakeflow_diagram.png)
 9 | 
10 | QuakeFlow is a scalable deep-learning-based earthquake monitoring system with cloud computing. 
11 | It applies the state-of-art deep learning/machine learning models for earthquake detection. 
12 | With auto-scaling enabled on Kubernetes, our system can balance computational loads with computational resources. 
13 | 
14 | <!-- Checkout our Twitter Bot for realtime earthquake early warning at [@Quakeflow_Bot](https://twitter.com/QuakeFlow_bot). -->
15 | 
16 | ## Current Modules 
17 | 
18 | ### Models
19 | - [DeepDenoiser](https://ai4eps.github.io/DeepDenoiser/): [(paper)](https://arxiv.org/abs/1811.02695) [(example)](https://ai4eps.github.io/DeepDenoiser/example_interactive/)
20 | - [PhaseNet](https://ai4eps.github.io/PhaseNet/): [(paper)](https://arxiv.org/abs/1803.03211) [(example)](https://ai4eps.github.io/PhaseNet/example_interactive/)
21 | - [GaMMA](https://ai4eps.github.io/GaMMA/): [(paper)](https://arxiv.org/abs/2109.09008) [(example)](https://ai4eps.github.io/GaMMA/example_interactive/)
22 | - [HypoDD](https://www.ldeo.columbia.edu/~felixw/hypoDD.html) [(paper)](https://pubs.geoscienceworld.org/ssa/bssa/article-abstract/90/6/1353/120565/A-Double-Difference-Earthquake-Location-Algorithm?redirectedFrom=fulltext) [(example)](https://github.com/ai4eps/QuakeFlow/blob/master/HypoDD/gamma2hypodd.py)
23 | - More models to be added. Contributions are highly welcomed!
24 |   
25 | ### Data stream
26 | - [Plotly](https://dash.gallery/Portal/): [ui.quakeflow.com](http://ui.quakeflow.com)
27 | - [Kafka](https://www.confluent.io/what-is-apache-kafka/) 
28 | - [Spark Streaming](https://spark.apache.org/docs/latest/streaming-programming-guide.html)
29 | 
30 | ### Data process
31 | - [Colab example](https://colab.research.google.com/drive/19dC8-Vq0mv1Q9K-OS8VJf3xNEweKv4SN)
32 | - [Kubeflow](https://www.kubeflow.org/): [(example)](https://ai4eps.github.io/QuakeFlow/kubeflow/workflow/)
33 | 
34 | ![](https://raw.githubusercontent.com/wayneweiqiang/QuakeFlow/master/docs/assets/quakeflow.gif)
35 | 
36 | ## Deployment
37 | 
38 | QuakeFlow can be deployed on any cloud platforms with Kubernetes service.
39 | 
40 | - For google cloud platform (GCP), check out the [GCP README](gcp_readme.md).
41 | - For on-premise servers, check out the [Kubernetes README](k8s_readme.md).
42 | 
43 | <!-- ## User-Facing Platform
44 | 
45 | ### Streamlit Web App
46 | 
47 | <img src="https://i.imgur.com/xL696Yh.jpg" width="800px">
48 | 
49 | 
50 | ### Twitter Bot
51 | 
52 | <img src="https://i.imgur.com/50kVK4Q.png" width="400px"> -->
53 | 
54 | 


--------------------------------------------------------------------------------
/docs/assets/inference_pipeline_plotly.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AI4EPS/QuakeFlow/f558a63dd6afc1a7b4fd4ad89d6ca6961a7d937b/docs/assets/inference_pipeline_plotly.png


--------------------------------------------------------------------------------
/docs/assets/logo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AI4EPS/QuakeFlow/f558a63dd6afc1a7b4fd4ad89d6ca6961a7d937b/docs/assets/logo.jpg


--------------------------------------------------------------------------------
/docs/assets/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AI4EPS/QuakeFlow/f558a63dd6afc1a7b4fd4ad89d6ca6961a7d937b/docs/assets/logo.png


--------------------------------------------------------------------------------
/docs/assets/quakeflow.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AI4EPS/QuakeFlow/f558a63dd6afc1a7b4fd4ad89d6ca6961a7d937b/docs/assets/quakeflow.gif


--------------------------------------------------------------------------------
/docs/assets/quakeflow_diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AI4EPS/QuakeFlow/f558a63dd6afc1a7b4fd4ad89d6ca6961a7d937b/docs/assets/quakeflow_diagram.png


--------------------------------------------------------------------------------
/docs/data.md:
--------------------------------------------------------------------------------
1 | # Downloading Data using Obspy


--------------------------------------------------------------------------------
/docs/data_format.md:
--------------------------------------------------------------------------------
 1 | # Standard Data Formats of QuakeFlow
 2 | 
 3 | - Raw data: 
 4 | 	- Waveform (MSEED): 
 5 | 		- Year/Jday/Hour/Network.Station.Location.Channel.mseed
 6 | 	- Station (xml):
 7 | 		- Network.Station.xml
 8 | 	- Events (CSV):
 9 | 		- colums: time, latitude, longitude, depth_km, magnitude, event_id
10 | 	- Picks (CSV)
11 | 		- columns: station_id (network.station.location.channel) phase_time, phase_type, phase_score, event_id
12 | - Phase picking:
13 | 	- Picks (CSV):
14 | 		- columns: station_id (network.station.location.channel) phase_time, phase_type, phase_score, phase_polarity
15 | - Phase association:
16 | 	- Events (CSV):
17 | 		- colums: time, latitude, longitude, depth_km, magnitude, event_id
18 | 	- Picks (CSV):
19 | 		- columns: station_id (network.station.location.channel), phase_time, phase_type, phase_score, phase_polarity, event_id
20 | - Earthquake location:
21 | 	- Events (CSV):
22 | 		- colums: time, latitude, longitude, depth_km, magnitude, event_id
23 | - Earthquake relocation:
24 | 	- Events (CSV):
25 | 		- colums: time, latitude, longitude, depth_km, magnitude, event_id
26 | - Focal mechanism:
27 | 	- Focal mechanism (CSV):
28 | 		- columns: strike1, dip1, rake1, strike2, dip2, rake2, event_id


--------------------------------------------------------------------------------
/docs/deepdenoiser.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AI4EPS/QuakeFlow/f558a63dd6afc1a7b4fd4ad89d6ca6961a7d937b/docs/deepdenoiser.md


--------------------------------------------------------------------------------
/docs/earthquake_location.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AI4EPS/QuakeFlow/f558a63dd6afc1a7b4fd4ad89d6ca6961a7d937b/docs/earthquake_location.md


--------------------------------------------------------------------------------
/docs/gamma.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AI4EPS/QuakeFlow/f558a63dd6afc1a7b4fd4ad89d6ca6961a7d937b/docs/gamma.md


--------------------------------------------------------------------------------
/docs/gcp_readme.md:
--------------------------------------------------------------------------------
 1 | # Quick readme, not detailed
 2 | 
 3 | 
 4 | 1. Create a cluster on GCP with node autoscaling
 5 | 
 6 | ```
 7 | gcloud container clusters create quakeflow-cluster --zone="us-west1-a" --scopes="cloud-platform" --image-type="ubuntu"  --machine-type="n1-standard-2" --num-nodes=2 --enable-autoscaling --min-nodes 1 --max-nodes 4
 8 | ```
 9 | 
10 | 2. Switch to the correct context
11 | 
12 | ```
13 | gcloud container clusters get-credentials quakeflow-cluster
14 | ```
15 | 
16 | 3. Deploy the services on the cluster
17 | 
18 | ```
19 | kubectl apply -f quakeflow-gcp.yaml 
20 | ```
21 | 
22 | 4. Setup the APIs
23 | 
24 | 4.1 Add pods autoscaling
25 | ```
26 | kubectl autoscale deployment phasenet-api --cpu-percent=80 --min=1 --max=10
27 | kubectl autoscale deployment gmma-api --cpu-percent=80 --min=1 --max=10
28 | ```
29 | 
30 | 4.2 Expose API
31 | ```
32 | kubectl expose deployment phasenet-api --type=LoadBalancer --name=phasenet-service
33 | kubectl expose deployment gmma-api --type=LoadBalancer --name=gmma-service
34 | kubectl expose deployment quakeflow-ui --type=LoadBalancer --name=quakeflow-ui
35 | ```
36 | 
37 | 5. Install Kafka
38 | 
39 | 5.1 Install
40 | ```
41 | helm install quakeflow-kafka bitnami/kafka   
42 | ```
43 | 
44 | 5.2 Create topics
45 | ```
46 | kubectl run --quiet=true -it --rm quakeflow-kafka-client --restart='Never' --image docker.io/bitnami/kafka:2.7.0-debian-10-r68 --restart=Never --command -- bash -c "kafka-topics.sh --create --topic phasenet_picks --bootstrap-server my-kafka.default.svc.cluster.local:9092 && kafka-topics.sh --create --topic gmma_events --bootstrap-server my-kafka.default.svc.cluster.local:9092 && kafka-topics.sh --create --topic waveform_raw --bootstrap-server my-kafka.default.svc.cluster.local:9092"
47 | ```
48 | 
49 | 5.3 Check status
50 | ```
51 | helm status quakeflow-kafka
52 | ```
53 | 
54 | 
55 | 6. Rollup restart deployments
56 | ```
57 | kubectl rollout restart deployments   
58 | ```
59 | 
60 | 7. Install Dashboard
61 | ```
62 | kubectl apply -f https://raw.githubusercontent.com/kubernetes/dashboard/v2.0.0/aio/deploy/recommended.yaml
63 | ```
64 | 
65 | Run the following command and visit http://localhost:8001/api/v1/namespaces/kubernetes-dashboard/services/https:kubernetes-dashboard:/proxy/
66 | ```
67 | kubectl proxy 
68 | ```
69 | 
70 | If you are asked to provide a token, get the token with the following command
71 | ```
72 | gcloud config config-helper --format=json | jq -r '.credential.access_token'
73 | ```
74 | 


--------------------------------------------------------------------------------
/docs/k8s_readme.md:
--------------------------------------------------------------------------------
 1 | # Quick readme, not detailed
 2 | 
 3 | ## All-in-one script
 4 | You need to preinstall [helm](https://helm.sh/), [kubectl](https://kubernetes.io/docs/tasks/tools/), [docker](https://docs.docker.com/engine/install/) and [minikube](https://minikube.sigs.k8s.io/docs/start/) (or any other local Kubernetes framework)
 5 | 
 6 | Then deploy everything with the following script!
 7 | 
 8 | ```
 9 | $ git clone --recurse-submodules -j8 git@github.com:wayneweiqiang/QuakeFlow.git
10 | $ sh deploy_local.sh
11 | ```
12 | 
13 | ## Prebuilt Kafka 
14 | 
15 | 1. Install
16 | ```
17 | helm repo add bitnami https://charts.bitnami.com/bitnami
18 | helm install quakeflow-kafka bitnami/kafka   
19 | ```
20 | 
21 | 2. Create topics
22 | ```
23 | kubectl run --quiet=true -it --rm quakeflow-kafka-client --restart='Never' --image docker.io/bitnami/kafka:2.7.0-debian-10-r68 --restart=Never --command -- bash -c "kafka-topics.sh --create --topic phasenet_picks --bootstrap-server quakeflow-kafka.default.svc.cluster.local:9092 && kafka-topics.sh --create --topic gmma_events --bootstrap-server quakeflow-kafka.default.svc.cluster.local:9092 && kafka-topics.sh --create --topic waveform_raw --bootstrap-server quakeflow-kafka.default.svc.cluster.local:9092"
24 | ```
25 | 
26 | 2. Check status
27 | ```
28 | helm status quakeflow-kafka
29 | ```
30 | 
31 | ## Our own containers
32 | 
33 | 1. Switch to minikube environment
34 | ```
35 | eval $(minikube docker-env)     
36 | ```
37 | 
38 | 1.1. Fix metrics-server for auto-scalling (Only for docker)
39 | https://stackoverflow.com/questions/54106725/docker-kubernetes-mac-autoscaler-unable-to-find-metrics
40 | 
41 | ```
42 | kubectl apply -f metrics-server.yaml
43 | ```
44 | 
45 | 2. Build the docker images, see the docs for each container
46 | 
47 | ```
48 | docker build --tag quakeflow-spark:1.0 .
49 | ...
50 | ```
51 | 
52 | 3. Create everything
53 | ```
54 | kubectl apply -f quakeflow-delpoyment.yaml     
55 | ```
56 | 
57 | 3.1 Add autoscaling
58 | ```
59 | kubectl autoscale deployment phasenet-api --cpu-percent=80 --min=1 --max=10
60 | kubectl autoscale deployment gmma-api --cpu-percent=80 --min=1 --max=10
61 | ```
62 | 
63 | 3.2 Expose API
64 | ```
65 | kubectl expose deployment phasenet-api --type=LoadBalancer --name=phasenet-service
66 | ```
67 | 
68 | 4. Check the pods
69 | ```
70 | kubectl get pods
71 | ```
72 | 
73 | 5. Check the logs (an example)
74 | ```
75 | kubectl logs quakeflow-spark-7699cd45d8-mvv6r
76 | ```
77 | 
78 | 6. Delete a single deployment
79 | ```
80 | kubectl delete deploy quakeflow-spark     
81 | ```
82 | 
83 | 7. Delete everything
84 | ```
85 | kubectl delete -f quakeflow-delpoyment.yaml   
86 | ```
87 | 
88 | 
89 | 
90 | 


--------------------------------------------------------------------------------
/docs/kubeflow:
--------------------------------------------------------------------------------
1 | ../kubeflow


--------------------------------------------------------------------------------
/docs/phasenet.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AI4EPS/QuakeFlow/f558a63dd6afc1a7b4fd4ad89d6ca6961a7d937b/docs/phasenet.md


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
1 | name: base
2 | channels:
3 |   - defaults
4 |   - conda-forge
5 | dependencies:
6 |   - cartopy
7 |   - obspy
8 | prefix: /opt/conda
9 | 


--------------------------------------------------------------------------------
/examples/california/.gitignore:
--------------------------------------------------------------------------------
 1 | eikonal/
 2 | local/
 3 | tests/
 4 | mseed_list_NC/
 5 | mseed_list_SC/
 6 | tmp_NC/
 7 | tmp_SC/
 8 | NC/
 9 | SC/
10 | .history/
11 | local/
12 | tests/
13 | figures/
14 | benchmark/
15 | Cal/
16 | cctorch_figures/
17 | cctorch_figures2/
18 | cctorch2_figures/
19 | __pycache__/
20 | 


--------------------------------------------------------------------------------
/examples/california/.skyignore:
--------------------------------------------------------------------------------
 1 | __pycache__/
 2 | local/
 3 | tests/
 4 | figures/
 5 | benchmark/
 6 | Cal/
 7 | cctorch_figures/
 8 | cctorch_figures2/
 9 | cctorch2_figures/
10 | 
11 | 


--------------------------------------------------------------------------------
/examples/california/args.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | 
 4 | def parse_args():
 5 |     parser = argparse.ArgumentParser(description="Run Gamma on NCEDC/SCEDC data")
 6 |     parser.add_argument("--num_nodes", type=int, default=1)
 7 |     parser.add_argument("--node_rank", type=int, default=0)
 8 |     parser.add_argument("--year", type=int, default=2023)
 9 |     parser.add_argument("--root_path", type=str, default="local")
10 |     parser.add_argument("--region", type=str, default="Cal")
11 |     parser.add_argument("--bucket", type=str, default="quakeflow_catalog")
12 |     parser.add_argument("--config", type=str, default="local/Mendocino/config.json")
13 |     return parser.parse_args()
14 | 


--------------------------------------------------------------------------------
/examples/california/monitor.py:
--------------------------------------------------------------------------------
 1 | # %%
 2 | import argparse
 3 | import json
 4 | import logging
 5 | import os
 6 | import time
 7 | from tqdm import tqdm
 8 | import fsspec
 9 | 
10 | logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(message)s")
11 | 
12 | # %%
13 | NUM_NODES = 32  # < 2004
14 | # NUM_NODES = 128 # >= 2004
15 | 
16 | ###### Hardcoded #######
17 | token_json = f"{os.environ['HOME']}/.config/gcloud/application_default_credentials.json"
18 | with open(token_json, "r") as fp:
19 |     token = json.load(fp)
20 | fs = fsspec.filesystem("gs", token=token)
21 | 
22 | # for year in tqdm(range(1986, 1999)[::-1]):
23 | #     cmd = f"python run_phasenet.py --region NC --year {year} --num_nodes 1"
24 | #     os.system(cmd)
25 | 
26 | # raise
27 | 
28 | for year in range(1999, 2005)[::-1]:
29 | 
30 |     cmds = [
31 |         f"python submit_phasenet.py --region NC --branch ncedc --year {year} --num_nodes {NUM_NODES}",
32 |         f"python submit_phasenet.py --region SC --branch scedc --year {year} --num_nodes {NUM_NODES}",
33 |     ]
34 | 
35 |     # while True:
36 |     if True:
37 |         for cmd in cmds:
38 |             logging.info(f"Running: {cmd}")
39 |             os.system(cmd)
40 |             logging.info("Sleeping for 1 minutes...")
41 |             time.sleep(60)
42 | 
43 |         finish = True
44 |         for REGION in ["NC", "SC"]:
45 |             for NODE_RANK in range(NUM_NODES):
46 |                 mseed_file = (
47 |                     f"gs://quakeflow_catalog/{REGION}/phasenet/mseed_list/{year}_{NODE_RANK:03d}_{NUM_NODES:03d}.txt"
48 |                 )
49 |                 if fs.exists(mseed_file):
50 |                     with fs.open(mseed_file, "r") as fp:
51 |                         mseed_list = fp.readlines()
52 |                     if len(mseed_list) > 0:
53 |                         print(f"{mseed_file}, {len(mseed_list) = }")
54 |                         finish = False
55 |                         break
56 |         if finish:
57 |             break
58 | 


--------------------------------------------------------------------------------
/examples/california/refresh.py:
--------------------------------------------------------------------------------
 1 | # %%
 2 | import sky
 3 | from tqdm import tqdm
 4 | 
 5 | # %%
 6 | status = sky.status()
 7 | 
 8 | # %%
 9 | for cluster in tqdm(sky.status()[::-1]):
10 |     try:
11 |         print(f"Refreshing {cluster['name']}...")
12 |         sky.status(cluster_names=[cluster["name"]], refresh=True)
13 |         if not cluster["to_down"]:
14 |             sky.autostop(cluster["name"], idle_minutes=10, down=True)
15 |     except Exception as e:
16 |         print(e)
17 | 
18 | # %%
19 | 


--------------------------------------------------------------------------------
/examples/california/run_gamma.yaml:
--------------------------------------------------------------------------------
 1 | name: dev
 2 | 
 3 | workdir: .
 4 | 
 5 | num_nodes: 1
 6 | 
 7 | resources:
 8 | 
 9 |   cloud: gcp # aws
10 |   region: us-west1 # gcp
11 |   # region: us-west-2 # aws
12 |   # instance_type: n2-highmem-16
13 |   # accelerators: P100:1
14 |   # cpus: 16+
15 |   cpus: 16
16 |   # disk_size: 300
17 |   # disk_tier: high
18 |   # use_spot: True
19 |   # spot_recovery: FAILOVER
20 |   # image_id: docker:zhuwq0/quakeflow:latest
21 | 
22 | # # envs:
23 | #   JOB: quakeflow
24 | #   NCPU: 1
25 | #   ROOT_PATH: /data
26 | #   MODEL_NAME: phasenet_plus
27 | #   WANDB_API_KEY: cb014c63ac451036ca406582b41d32ae83154289
28 | 
29 | file_mounts:
30 | 
31 |   # /data/waveforms:
32 |   #   name: waveforms
33 |   #   source: waveforms_combined
34 |   #   mode: MOUNT
35 | 
36 |   # /dataset/stations:
37 |   #   name: stations
38 |   #   source: stations
39 |   #   mode: COPY
40 | 
41 |   # /data/waveforms: waveforms_combined
42 |   # /dataset/stations: stations
43 | 
44 |   # ~/.ssh/id_rsa.pub: ~/.ssh/id_rsa.pub
45 |   # ~/.ssh/id_rsa: ~/.ssh/id_rsa
46 |   # ~/.config/rclone/rclone.conf: ~/.config/rclone/rclone.conf
47 | 
48 |   /opt/GaMMA: ../../GaMMA
49 | 
50 | setup: |
51 |   echo "Begin setup."                                                           
52 |   echo export WANDB_API_KEY=$WANDB_API_KEY >> ~/.bashrc                         
53 |   pip install h5py tqdm wandb pandas numpy scipy                                
54 |   pip install fsspec gcsfs                                                      
55 |   pip install obspy pyproj                                                      
56 |   # pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
57 |   # pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
58 |   pip install -e /opt/GaMMA
59 | 
60 | run: |
61 |   num_nodes=`echo "$SKYPILOT_NODE_IPS" | wc -l`
62 |   master_addr=`echo "$SKYPILOT_NODE_IPS" | head -n1`
63 |   [[ ${SKYPILOT_NUM_GPUS_PER_NODE} -gt $NCPU ]] && nproc_per_node=${SKYPILOT_NUM_GPUS_PER_NODE} || nproc_per_node=$NCPU
64 |   if [ "${SKYPILOT_NODE_RANK}" == "0" ]; then
65 |     ls -al /opt
66 |     ls -al /data
67 |   fi
68 |   python run_gamma.py --num_node $num_nodes --node_rank $SKYPILOT_NODE_RANK
69 |   # torchrun \
70 |   #   --nproc_per_node=${nproc_per_node} \
71 |   #   --node_rank=${SKYPILOT_NODE_RANK} \
72 |   #   --nnodes=$num_nodes \
73 |   #   --master_addr=$master_addr \
74 |   #   --master_port=8008 \
75 |   #   train.py --model $MODEL_NAME --batch-size=256 --hdf5-file /dataset/train.h5 --test-hdf5-file /dataset/test.h5 \
76 |   #   --workers 12 --stack-event --flip-polarity --drop-channel --output /checkpoint/$MODEL_NAME --wandb --wandb-project $MODEL_NAME --resume True


--------------------------------------------------------------------------------
/examples/california/run_growclust_cc.py:
--------------------------------------------------------------------------------
 1 | # %%
 2 | import argparse
 3 | import os
 4 | from datetime import datetime
 5 | 
 6 | import pandas as pd
 7 | from tqdm import tqdm
 8 | 
 9 | 
10 | def parse_args():
11 |     parser = argparse.ArgumentParser(description="Run Gamma on NCEDC/SCEDC data")
12 |     parser.add_argument("--num_nodes", type=int, default=1)
13 |     parser.add_argument("--node_rank", type=int, default=0)
14 |     parser.add_argument("--year", type=int, default=2023)
15 |     parser.add_argument("--root_path", type=str, default="local")
16 |     parser.add_argument("--region", type=str, default="Cal")
17 |     parser.add_argument("--bucket", type=str, default="quakeflow_catalog")
18 |     return parser.parse_args()
19 | 
20 | 
21 | # %%
22 | args = parse_args()
23 | root_path = args.root_path
24 | region = args.region
25 | 
26 | # %%
27 | result_path = f"{region}/growclust"
28 | if not os.path.exists(f"{root_path}/{result_path}"):
29 |     os.makedirs(f"{root_path}/{result_path}")
30 | 
31 | # %%
32 | # stations_json = f"{region}/results/data/stations.json"
33 | # stations = pd.read_json(f"{root_path}/{stations_json}", orient="index")
34 | station_csv = f"{region}/cctorch/cctorch_stations.csv"
35 | stations = pd.read_csv(f"{root_path}/{station_csv}")
36 | stations.set_index("station_id", inplace=True)
37 | 
38 | 
39 | lines = []
40 | for i, row in stations.iterrows():
41 |     # line = f"{row['network']}{row['station']:<4} {row['latitude']:.4f} {row['longitude']:.4f}\n"
42 |     line = f"{row['station']:<4} {row['latitude']:.4f} {row['longitude']:.4f}\n"
43 |     lines.append(line)
44 | 
45 | with open(f"{root_path}/{result_path}/stlist.txt", "w") as fp:
46 |     fp.writelines(lines)
47 | 
48 | 
49 | # %%
50 | # events_csv = f"{region}/results/phase_association/events.csv"
51 | # events_csv = f"{region}/adloc/ransac_events.csv"
52 | events_csv = f"{region}/cctorch/cctorch_events.csv"
53 | # event_file = f"{region}/cctorch/events.csv"
54 | events = pd.read_csv(f"{root_path}/{events_csv}")
55 | # event_df = event_df[event_df["gamma_score"] > 10]
56 | # event_index = [f"{x:06d}" for x in event_df["event_index"]]
57 | # events["time"] = pd.to_datetime(events["time"])
58 | events["time"] = pd.to_datetime(events["event_time"])
59 | if "magnitude" not in events.columns:
60 |     events["magnitude"] = 0.0
61 | 
62 | events[["year", "month", "day", "hour", "minute", "second"]] = (
63 |     events["time"]
64 |     # .apply(lambda x: datetime.fromisoformat(x).strftime("%Y %m %d %H %M %S.%f").split(" "))
65 |     .apply(lambda x: x.strftime("%Y %m %d %H %M %S.%f").split(" "))
66 |     .apply(pd.Series)
67 |     .apply(pd.to_numeric)
68 | )
69 | 
70 | lines = []
71 | for i, row in events.iterrows():
72 |     # yr mon day hr min sec lat lon dep mag eh ez rms evid
73 |     line = f"{row['year']:4d} {row['month']:2d} {row['day']:2d} {row['hour']:2d} {row['minute']:2d} {row['second']:7.3f} {row['latitude']:.4f} {row['longitude']:.4f} {row['depth_km']:7.3f} {row['magnitude']:.2f} 0.000 0.000 0.000 {row['event_index']:6d}\n"
74 |     lines.append(line)
75 | 
76 | with open(f"{root_path}/{result_path}/evlist.txt", "w") as fp:
77 |     fp.writelines(lines)
78 | 
79 | # %%
80 | os.system(f"bash run_growclust_cc.sh {root_path} {region}")
81 | 


--------------------------------------------------------------------------------
/examples/california/run_growclust_cc.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | set -x
  3 | WORKING_DIR=$PWD
  4 | if [ $# -eq 2 ]; then
  5 |   root_path=$1
  6 |   region=$2
  7 | else
  8 |   root_path="local"
  9 |   region="demo"
 10 | fi
 11 | 
 12 | if [ ! -d "$root_path/$region/growclust" ]; then
 13 |   mkdir -p $root_path/$region/growclust
 14 | fi
 15 | 
 16 | cp $root_path/$region/cctorch/dt.cc $root_path/$region/growclust/dt.cc 
 17 | cd $root_path/$region/growclust
 18 | mkdir -p TT OUT
 19 | 
 20 | if [ ! -d "GrowClust" ]; then
 21 |    git clone https://github.com/zhuwq0/GrowClust.git
 22 |    make -C GrowClust/SRC/
 23 | fi
 24 | 
 25 | cat <<EOF > growclust.inp
 26 | ****  Example GrowClust Control File  *****
 27 | ********  Daniel Trugman, 2016   **********
 28 | *******************************************
 29 | *
 30 | *******************************************
 31 | *************  Event list  ****************
 32 | *******************************************
 33 | * evlist_fmt (0 = evlist, 1 = phase, 2 = GrowClust, 3 = HypoInverse)
 34 | 1
 35 | * fin_evlist (event list file name)
 36 | evlist.txt
 37 | *
 38 | *******************************************
 39 | ************   Station list   *************
 40 | *******************************************
 41 | * stlist_fmt (0 = SEED channel, 1 = station name)
 42 | 1
 43 | * fin_stlist (station list file name)
 44 | stlist.txt
 45 | *
 46 | *******************************************
 47 | *************   XCOR data   ***************
 48 | *******************************************
 49 | * xcordat_fmt (0 = binary, 1 = text), tdif_fmt (21 = tt2-tt1, 12 = tt1-tt2)
 50 | 1  12
 51 | * fin_xcordat
 52 | dt.cc
 53 | *
 54 | *******************************************
 55 | *** Velocity Model / Travel Time Tables ***
 56 | *******************************************
 57 | * fin_vzmdl (input vz model file)
 58 | vzmodel.txt
 59 | * fout_vzfine (output, interpolated vz model file)
 60 | TT/vzfine.txt
 61 | * fout_pTT (output travel time table, P phase)
 62 | TT/tt.pg
 63 | * fout_sTT (output travel time table, S phase)
 64 | TT/tt.sg
 65 | *
 66 | ******************************************
 67 | ***** Travel Time Table Parameters  ******
 68 | ******************************************
 69 | * vpvs_factor  rayparam_min (-1 = default)
 70 |   1.732             0.0
 71 | * tt_dep0  tt_dep1  tt_ddep
 72 |    0.        71.       1.
 73 | * tt_del0  tt_del1  tt_ddel
 74 |    0.        500.      2.
 75 | *
 76 | ******************************************
 77 | ***** GrowClust Algorithm Parameters *****
 78 | ******************************************
 79 | * rmin  delmax rmsmax 
 80 |    0.1    120    1.0
 81 | * rpsavgmin, rmincut  ngoodmin   iponly 
 82 |     0          0.1         8        0
 83 | *
 84 | ******************************************
 85 | ************ Output files ****************
 86 | ******************************************
 87 | * nboot  nbranch_min
 88 |    0         1
 89 | * fout_cat (relocated catalog)
 90 | OUT/out.growclust_cc_cat
 91 | * fout_clust (relocated cluster file)
 92 | OUT/out.growclust_cc_clust
 93 | * fout_log (program log)
 94 | OUT/out.growclust_cc_log
 95 | * fout_boot (bootstrap distribution)
 96 | OUT/out.growclust_cc_boot
 97 | ******************************************
 98 | ******************************************
 99 | EOF
100 | 
101 | cat <<EOF > vzmodel.txt
102 | 0.0 5.30 0.00
103 | 1.0 5.65 0.00
104 | 3.0 5.93 0.00
105 | 5.0 6.20 0.00
106 | 7.0 6.20 0.00
107 | 9.0 6.20 0.00
108 | 11.0 6.20 0.00
109 | 13.0 6.20 0.00
110 | 17.0 6.20 0.00
111 | 21.0 6.20 0.00
112 | 31.00 7.50 0.00
113 | 31.10 8.11 0.00
114 | 100.0 8.11 0.00
115 | EOF
116 | 
117 | ./GrowClust/SRC/growclust  growclust.inp
118 | cp OUT/out.growclust_cc_cat growclust_cc_catalog.txt
119 | cd $WORKING_DIR
120 | 


--------------------------------------------------------------------------------
/examples/california/run_hypodd_cc.py:
--------------------------------------------------------------------------------
 1 | # %%
 2 | # from args import parse_args
 3 | ##
 4 | import argparse
 5 | import json
 6 | import os
 7 | 
 8 | import numpy as np
 9 | import pandas as pd
10 | 
11 | 
12 | def parse_args():
13 |     parser = argparse.ArgumentParser(description="Run Gamma on NCEDC/SCEDC data")
14 |     parser.add_argument("--num_nodes", type=int, default=1)
15 |     parser.add_argument("--node_rank", type=int, default=0)
16 |     parser.add_argument("--year", type=int, default=2023)
17 |     parser.add_argument("--root_path", type=str, default="local")
18 |     parser.add_argument("--region", type=str, default="Cal")
19 |     parser.add_argument("--bucket", type=str, default="quakeflow_catalog")
20 |     return parser.parse_args()
21 | 
22 | 
23 | # %%
24 | args = parse_args()
25 | root_path = args.root_path
26 | region = args.region
27 | 
28 | # with open(f"{root_path}/{region}/config.json", "r") as fp:
29 | #     config = json.load(fp)
30 | config = json.load(open("config.json", "r"))
31 | 
32 | # %%
33 | data_path = f"{region}/cctorch"
34 | result_path = f"{region}/hypodd"
35 | if not os.path.exists(f"{root_path}/{result_path}"):
36 |     os.makedirs(f"{root_path}/{result_path}")
37 | 
38 | # %%
39 | stations = pd.read_csv(f"{root_path}/{data_path}/cctorch_stations.csv")
40 | 
41 | station_lines = {}
42 | for i, row in stations.iterrows():
43 |     station_id = row["station_id"]
44 |     network_code, station_code, comp_code, channel_code = station_id.split(".")
45 |     # tmp_code = f"{station_code}{channel_code}"
46 |     tmp_code = f"{station_code}"
47 |     station_lines[tmp_code] = f"{tmp_code:<8s} {row['latitude']:.3f} {row['longitude']:.3f}\n"
48 | 
49 | 
50 | with open(f"{root_path}/{result_path}/stations.dat", "w") as f:
51 |     for line in sorted(station_lines.values()):
52 |         f.write(line)
53 | 
54 | # %%
55 | events = pd.read_csv(f"{root_path}/{data_path}/cctorch_events.csv")
56 | events["time"] = pd.to_datetime(events["event_time"], format="mixed")
57 | 
58 | event_lines = []
59 | 
60 | for i, row in events.iterrows():
61 |     event_index = row["event_index"]
62 |     origin = row["time"]
63 |     magnitude = row["magnitude"]
64 |     x_err = 0.0
65 |     z_err = 0.0
66 |     time_err = 0.0
67 |     dx, dy, dz = 0.0, 0.0, 0.0
68 |     # dx = np.random.uniform(-0.01, 0.01)
69 |     # dy = np.random.uniform(-0.01, 0.01)
70 |     # dz = np.random.uniform(0, 10)
71 |     # dz = 0
72 |     event_lines.append(
73 |         f"{origin.year:4d}{origin.month:02d}{origin.day:02d}  "
74 |         f"{origin.hour:2d}{origin.minute:02d}{origin.second:02d}{round(origin.microsecond / 1e4):02d}  "
75 |         # f"{row['latitude']:8.4f}  {row['longitude']:9.4f}   {row['depth_km']:8.4f}  "
76 |         f"{row['latitude'] + dy:8.4f}  {row['longitude']+ dx:9.4f}   {row['depth_km']+dz:8.4f}  "
77 |         f"{magnitude:5.2f}  {x_err:5.2f}  {z_err:5.2f}  {time_err:5.2f}  {event_index:9d}\n"
78 |     )
79 | 
80 | with open(f"{root_path}/{result_path}/events.dat", "w") as f:
81 |     f.writelines(event_lines)
82 | 
83 | # %%
84 | os.system(f"bash run_hypodd_cc.sh {root_path} {region}")
85 | 


--------------------------------------------------------------------------------
/examples/california/run_hypodd_cc.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | set -x
  3 | WORKING_DIR=$PWD
  4 | if [ $# -eq 2 ]; then
  5 |   root_path=$1
  6 |   region=$2
  7 | else
  8 |   root_path="local"
  9 |   region="demo"
 10 | fi
 11 | 
 12 | if [ ! -d "$root_path/$region/hypodd" ]; then
 13 |   mkdir -p $root_path/$region/hypodd
 14 | fi
 15 | 
 16 | cp $root_path/$region/cctorch/dt.cc $root_path/$region/hypodd/dt.cc 
 17 | cd $root_path/$region/hypodd
 18 | 
 19 | if [ ! -d "HypoDD" ]; then
 20 |   git clone https://github.com/zhuwq0/HypoDD.git
 21 |   export PATH=$PATH:$PWD/HypoDD
 22 |   make -C HypoDD/src/
 23 | fi
 24 | 
 25 | cat <<EOF > cc.inp
 26 | * RELOC.INP:
 27 | *--- input file selection
 28 | * cross correlation diff times:
 29 | dt.cc
 30 | *
 31 | *catalog P diff times:
 32 | 
 33 | *
 34 | * event file:
 35 | events.dat
 36 | *
 37 | * station file:
 38 | stations.dat
 39 | *
 40 | *--- output file selection
 41 | * original locations:
 42 | hypodd_cc.loc
 43 | * relocations:
 44 | hypodd_cc.reloc
 45 | * station information:
 46 | hypodd.sta
 47 | * residual information:
 48 | hypodd.res
 49 | * source paramater information:
 50 | hypodd.src
 51 | *
 52 | *--- data type selection: 
 53 | * IDAT:  0 = synthetics; 1= cross corr; 2= catalog; 3= cross & cat 
 54 | * IPHA: 1= P; 2= S; 3= P&S
 55 | * DIST:max dist [km] between cluster centroid and station 
 56 | * IDAT   IPHA   DIST
 57 |     1     3     120
 58 | *
 59 | *--- event clustering:
 60 | * OBSCC:    min # of obs/pair for crosstime data (0= no clustering)
 61 | * OBSCT:    min # of obs/pair for network data (0= no clustering)
 62 | * OBSCC  OBSCT    
 63 |      0     0        
 64 | *
 65 | *--- solution control:
 66 | * ISTART:  	1 = from single source; 2 = from network sources
 67 | * ISOLV:	1 = SVD, 2=lsqr
 68 | * NSET:      	number of sets of iteration with specifications following
 69 | *  ISTART  ISOLV  NSET
 70 |     2        2      4
 71 | *
 72 | *--- data weighting and re-weighting: 
 73 | * NITER: 		last iteration to used the following weights
 74 | * WTCCP, WTCCS:		weight cross P, S 
 75 | * WTCTP, WTCTS:		weight catalog P, S 
 76 | * WRCC, WRCT:		residual threshold in sec for cross, catalog data 
 77 | * WDCC, WDCT:  		max dist [km] between cross, catalog linked pairs
 78 | * DAMP:    		damping (for lsqr only) 
 79 | *       ---  CROSS DATA ----- ----CATALOG DATA ----
 80 | * NITER WTCCP WTCCS WRCC WDCC WTCTP WTCTS WRCT WDCT DAMP
 81 |    4      1    1    -9    -9    -9    -9     -9    -9  70
 82 |    4      1    1     6    -9    -9    -9     -9    -9  70
 83 |    4      1    0.8   3     4    -9    -9     -9    -9  70
 84 |    4      1    0.8   2     2    -9    -9     -9    -9  70  
 85 | *
 86 | *--- 1D model:
 87 | * NLAY:		number of model layers  
 88 | * RATIO:	vp/vs ratio 
 89 | * TOP:		depths of top of layer (km) 
 90 | * VEL: 		layer velocities (km/s)
 91 | * NLAY  RATIO 
 92 |    12     1.73
 93 | * TOP 
 94 | 0.0 1.0 3.0 5.0 7.0 9.0 11.0 13.0 17.0 21.0 31.00 31.10
 95 | * VEL
 96 | 5.30 5.65 5.93 6.20 6.20 6.20 6.20 6.20 6.20 6.20 7.50 8.11
 97 | *
 98 | *--- event selection:
 99 | * CID: 	cluster to be relocated (0 = all)
100 | * ID:	cuspids of event to be relocated (8 per line)
101 | * CID    
102 |     0      
103 | * ID
104 | EOF
105 | 
106 | ./HypoDD/src/hypoDD/hypoDD cc.inp
107 | cd $WORKING_DIR


--------------------------------------------------------------------------------
/examples/california/run_phasenet.yaml:
--------------------------------------------------------------------------------
 1 | name: dev
 2 | 
 3 | workdir: .
 4 | 
 5 | num_nodes: 1
 6 | 
 7 | resources:
 8 | 
 9 |   cloud: gcp # aws
10 |   region: us-west1 # gcp
11 |   # region: us-west-2 # aws
12 |   # instance_type: n2-highmem-16
13 |   # accelerators: P100:1
14 |   # cpus: 16+
15 |   cpus: 16
16 |   # disk_size: 300
17 |   # disk_tier: high
18 |   # use_spot: True
19 |   # spot_recovery: FAILOVER
20 |   # image_id: docker:zhuwq0/quakeflow:latest
21 | 
22 | envs:
23 |   # REGION: SC
24 |   # BRANCH: scedc
25 |   REGION: NC
26 |   BRANCH: ncedc
27 |   YEAR: 2022
28 | 
29 | file_mounts:
30 | 
31 |   # /data/waveforms:
32 |   #   name: waveforms
33 |   #   source: waveforms_combined
34 |   #   mode: MOUNT
35 | 
36 |   # /dataset/stations:
37 |   #   name: stations
38 |   #   source: stations
39 |   #   mode: COPY
40 | 
41 |   # /data/waveforms: waveforms_combined
42 |   # /dataset/stations: stations
43 | 
44 |   # ~/.ssh/id_rsa.pub: ~/.ssh/id_rsa.pub
45 |   # ~/.ssh/id_rsa: ~/.ssh/id_rsa
46 |   # ~/.config/rclone/rclone.conf: ~/.config/rclone/rclone.conf
47 | 
48 |   # /opt/GaMMA: ../../GaMMA
49 |   # /opt/PhaseNet: ../../PhaseNet
50 | 
51 | setup: |
52 |   echo "Begin setup."                                                           
53 |   echo export WANDB_API_KEY=$WANDB_API_KEY >> ~/.bashrc                         
54 |   pip install h5py tqdm wandb pandas numpy scipy                                
55 |   pip install fsspec gcsfs s3fs                                                     
56 |   pip install obspy pyproj                                                      
57 |   # pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
58 |   # pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
59 |   ### PhaseNet
60 |   pip install tensorflow==2.14.0
61 |   [ ! -d "PhaseNet" ] && git clone https://github.com/AI4EPS/PhaseNet.git
62 |   cd PhaseNet && git checkout $BRANCH && git pull origin $BRANCH && cd ..
63 |   ### GaMMA
64 |   # pip install -e /opt/GaMMA
65 | 
66 | run: |
67 |   num_nodes=`echo "$SKYPILOT_NODE_IPS" | wc -l`
68 |   master_addr=`echo "$SKYPILOT_NODE_IPS" | head -n1`
69 |   [[ ${SKYPILOT_NUM_GPUS_PER_NODE} -gt $NCPU ]] && nproc_per_node=${SKYPILOT_NUM_GPUS_PER_NODE} || nproc_per_node=$NCPU
70 |   if [ "${SKYPILOT_NODE_RANK}" == "0" ]; then
71 |     ls -al /opt
72 |     ls -al /data
73 |   fi
74 |   python run_phasenet.py --model_path PhaseNet --num_node $num_nodes --node_rank $SKYPILOT_NODE_RANK --region $REGION --year $YEAR


--------------------------------------------------------------------------------
/examples/california/set_config_ncedc.py:
--------------------------------------------------------------------------------
 1 | if __name__ == "__main__":
 2 |     import json
 3 |     import os
 4 |     import sys
 5 | 
 6 |     root_path = "local"
 7 |     region = "ncedc"
 8 |     if len(sys.argv) > 1:
 9 |         root_path = sys.argv[1]
10 |         region = sys.argv[2]
11 | 
12 |     config = {
13 |         "minlatitude": 32,
14 |         "maxlatitude": 43,
15 |         "minlongitude": -126.0,
16 |         "maxlongitude": -114.0,
17 |         "num_nodes": 1,
18 |         "sampling_rate": 100,
19 |         "degree2km": 111.1949,
20 |         "channel": "HH*,BH*,EH*,HN*",
21 |     }
22 | 
23 |     with open(f"{root_path}/{region}/config.json", "w") as fp:
24 |         json.dump(config, fp, indent=2)


--------------------------------------------------------------------------------
/examples/california/submit_template.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import time
  3 | from concurrent.futures import ThreadPoolExecutor
  4 | 
  5 | import sky
  6 | 
  7 | 
  8 | # NUM_NODES = 8
  9 | def parse_args():
 10 |     parser = argparse.ArgumentParser()
 11 |     parser.add_argument("--num_nodes", type=int, default=16)
 12 |     parser.add_argument("--year", type=int, default=2023)
 13 |     parser.add_argument("--region", type=str, default="CA")
 14 |     return parser.parse_args()
 15 | 
 16 | 
 17 | args = parse_args()
 18 | NUM_NODES = args.num_nodes
 19 | YEAR = args.year
 20 | REGION = args.region
 21 | 
 22 | task = sky.Task(
 23 |     name="cut_template",
 24 |     setup="""
 25 | echo "Begin setup."                                                           
 26 | echo export WANDB_API_KEY=$WANDB_API_KEY >> ~/.bashrc
 27 | pip install -U h5py tqdm wandb pandas scipy scikit-learn numpy==1.26.4
 28 | pip install -U fsspec gcsfs s3fs                                                   
 29 | pip install -U obspy pyproj
 30 | pip install -e /opt/ADLoc
 31 | """,
 32 |     run="""
 33 | num_nodes=`echo "$SKYPILOT_NODE_IPS" | wc -l`
 34 | master_addr=`echo "$SKYPILOT_NODE_IPS" | head -n1`
 35 | if [ "$SKYPILOT_NODE_RANK" == "0" ]; then
 36 |     ls -al /opt
 37 |     ls -al /data
 38 |     ls -al ./
 39 |     cat config.json
 40 | fi
 41 | python cut_templates_cc.py --num_node $NUM_NODES --node_rank $NODE_RANK --year $YEAR --config config.json
 42 | """,
 43 |     workdir=".",
 44 |     num_nodes=1,
 45 |     envs={"NUM_NODES": NUM_NODES, "NODE_RANK": 0, "YEAR": YEAR},
 46 | )
 47 | 
 48 | task.set_file_mounts(
 49 |     {
 50 |         "/opt/ADLoc": "../../ADLoc",
 51 |         "config.json": "local/Mendocino/config.json",
 52 |         # "config.json": "local/Cal/config.json",
 53 |     },
 54 | )
 55 | # task.set_storage_mounts({
 56 | #     '/remote/imagenet/': sky.Storage(name='my-bucket',
 57 | #                                      source='/local/imagenet'),
 58 | # })
 59 | task.set_resources(
 60 |     sky.Resources(
 61 |         cloud=sky.GCP(),
 62 |         region="us-west1",  # GCP
 63 |         # region="us-west-2",  # AWS
 64 |         accelerators=None,
 65 |         cpus=16,
 66 |         disk_tier="low",
 67 |         disk_size=50,  # GB
 68 |         memory="64+",
 69 |         use_spot=True,
 70 |     ),
 71 | )
 72 | 
 73 | # for NODE_RANK in range(NUM_NODES):
 74 | #     task.update_envs({"NODE_RANK": NODE_RANK})
 75 | #     cluster_name = f"cctorch-{NODE_RANK:02d}"
 76 | #     print(f"Launching cluster {cluster_name}-{NUM_NODES}...")
 77 | #     sky.jobs.launch(
 78 | #         task,
 79 | #         name=f"{cluster_name}",
 80 | #     )
 81 | 
 82 | jobs = []
 83 | try:
 84 |     sky.status(refresh="AUTO")
 85 | except Exception as e:
 86 |     print(e)
 87 | 
 88 | # task.update_envs({"NODE_RANK": 0})
 89 | # job_id = sky.launch(task, cluster_name="template", fast=True)
 90 | # # job_id = sky.exec(task, cluster_name="template")
 91 | # status = sky.stream_and_get(job_id)
 92 | # # sky.tail_logs(cluster_name="cctorch8", job_id=job_id, follow=True)
 93 | # print(f"Job ID: {job_id}, status: {status}")
 94 | 
 95 | # raise
 96 | 
 97 | job_idx = 1
 98 | requests_ids = []
 99 | for NODE_RANK in range(NUM_NODES):
100 |     # for NODE_RANK in range(30):
101 | 
102 |     task.update_envs({"NODE_RANK": NODE_RANK})
103 |     cluster_name = f"template-{NODE_RANK:03d}"
104 | 
105 |     requests_ids.append(sky.jobs.launch(task, name=f"{cluster_name}"))
106 | 
107 |     print(f"Running cut_template on (rank={NODE_RANK}, num_node={NUM_NODES}) of {cluster_name}")
108 | 
109 |     job_idx += 1
110 | 
111 | for request_id in requests_ids:
112 |     print(sky.get(request_id))
113 | 


--------------------------------------------------------------------------------
/examples/california/tests/clustering.py:
--------------------------------------------------------------------------------
 1 | # %%
 2 | import matplotlib.pyplot as plt
 3 | import numpy as np
 4 | import pandas as pd
 5 | from sklearn.cluster import DBSCAN
 6 | 
 7 | # %%
 8 | v = 6.0
 9 | 
10 | # scale_x = 4
11 | # eps = 8
12 | # min_samples = 8
13 | 
14 | scale_x = 1
15 | eps = 10
16 | min_samples = 3
17 | 
18 | # %%
19 | picks = pd.read_csv("phasenet_picks_20230919_1h.csv", parse_dates=["phase_time"])
20 | # picks = pd.read_csv("phasenet_picks_20230820_1h.csv", parse_dates=["phase_time"])
21 | 
22 | # %%
23 | stations = pd.read_json("stations.json", orient="index")
24 | stations["station_id"] = stations.index
25 | 
26 | # %%
27 | picks = picks.merge(stations, on="station_id")
28 | picks["t_s"] = (picks["phase_time"] - picks["phase_time"].min()).dt.total_seconds()
29 | 
30 | # %%
31 | dbscan = DBSCAN(eps=eps, min_samples=min_samples)
32 | dbscan.fit(picks[["t_s", "x_km", "y_km"]].values / np.array([1, scale_x * v, v]))
33 | 
34 | # %%
35 | picks["cluster"] = dbscan.labels_
36 | # %%
37 | mapping_color = lambda x: f"C{x}" if x >= 0 else "black"
38 | plt.figure(figsize=(20, 5))
39 | plt.scatter(picks["t_s"], picks["x_km"], c=picks["cluster"].apply(mapping_color), s=0.3)
40 | plt.title(f"Number of picks: {len(picks)}")
41 | plt.show()
42 | 
43 | # %%
44 | picks_selected = picks.copy()
45 | dbscan = DBSCAN(eps=1, min_samples=1)
46 | dbscan.fit(picks_selected[["t_s", "x_km", "y_km"]].values / np.array([1, scale_x * v, v]))
47 | picks_selected["cluster"] = dbscan.labels_
48 | picks_selected = (
49 |     picks_selected.groupby("cluster").agg({"t_s": "median", "x_km": "median", "y_km": "median"}).reset_index()
50 | )
51 | 
52 | # %%
53 | print(f"{len(picks) = }, {len(picks_selected) = }")
54 | 
55 | # %%
56 | # mapping_color = lambda x: f"C{x}" if x >= 0 else "black"
57 | # plt.figure(figsize=(20, 5))
58 | # plt.scatter(picks_selected["t_s"], picks_selected["x_km"], c=picks_selected["cluster"].apply(mapping_color), s=0.3)
59 | # plt.title(f"Number of picks: {len(picks_selected)}")
60 | # plt.show()
61 | 
62 | # %%
63 | dbscan = DBSCAN(eps=eps, min_samples=min_samples)
64 | dbscan.fit(picks_selected[["t_s", "x_km", "y_km"]].values / np.array([1, scale_x * v, v]))
65 | 
66 | # %%
67 | picks_selected["cluster"] = dbscan.labels_
68 | # %%
69 | mapping_color = lambda x: f"C{x}" if x >= 0 else "black"
70 | plt.figure(figsize=(20, 5))
71 | plt.scatter(picks_selected["t_s"], picks_selected["x_km"], c=picks_selected["cluster"].apply(mapping_color), s=0.3)
72 | plt.title(f"Number of picks: {len(picks_selected)}")
73 | plt.show()
74 | 


--------------------------------------------------------------------------------
/examples/forge/load_data.py:
--------------------------------------------------------------------------------
 1 | # %%
 2 | import io
 3 | import multiprocessing as mp
 4 | import os
 5 | 
 6 | import fsspec
 7 | from obspy import read
 8 | from tqdm import tqdm
 9 | 
10 | 
11 | # %%
12 | def process_url(url):
13 |     with fsspec.open(url, mode="rb") as fp:
14 |         content = fp.read()
15 | 
16 |         file_like_object = io.BytesIO(content)
17 | 
18 |         st = read(file_like_object)
19 | 
20 | 
21 | # %%
22 | if __name__ == "__main__":
23 |     # %%
24 |     # https://constantine.seis.utah.edu/datasets.html
25 |     os.system("curl -o urls.txt https://constantine.seis.utah.edu/files/get_all_slb.sh")
26 | 
27 |     # %%
28 |     urls = []
29 |     with open("urls.txt") as f:
30 |         for line in f:
31 |             if line.startswith("wget"):
32 |                 urls.append(line.split()[-1])
33 | 
34 |     # # %%
35 |     # for url in tqdm(urls):
36 |     #     with fsspec.open(url, mode="rb") as fp:
37 |     #         content = fp.read()
38 | 
39 |     #         file_like_object = io.BytesIO(content)
40 | 
41 |     #         st = read(file_like_object)
42 |     #         # print(st)
43 |     #         # raise
44 | 
45 |     # %%
46 |     ncpu = mp.cpu_count() * 2
47 |     print(f"Number of CPUs: {ncpu}")
48 |     pbar = tqdm(total=len(urls))
49 |     with mp.Pool(ncpu) as pool:
50 |         for url in urls:
51 |             pool.apply_async(
52 |                 func=process_url,
53 |                 args=(url,),
54 |                 callback=lambda _: pbar.update(1),
55 |                 error_callback=lambda e: print(e),
56 |             )
57 |         pool.close()
58 |         pool.join()
59 | 
60 | # %%
61 | 


--------------------------------------------------------------------------------
/examples/japan/.gitignore:
--------------------------------------------------------------------------------
1 | local/
2 | 


--------------------------------------------------------------------------------
/examples/japan/merge_picks.py:
--------------------------------------------------------------------------------
 1 | # %%
 2 | import multiprocessing as mp
 3 | import os
 4 | from glob import glob
 5 | 
 6 | import pandas as pd
 7 | from tqdm import tqdm
 8 | 
 9 | 
10 | def merge_csv(csv_files, pick_file):
11 |     keep_header = True
12 |     for csv_file in csv_files:
13 |         if os.stat(csv_file).st_size == 0:
14 |             continue
15 |         if keep_header:
16 |             cmd = f"cat {csv_file} > {pick_file}"
17 |             keep_header = False
18 |         else:
19 |             cmd = f"tail -n +2 {csv_file} >> {pick_file}"
20 |         os.system(cmd)
21 | 
22 | 
23 | # %%
24 | if __name__ == "__main__":
25 |     # %%
26 |     csv_path = "local/hinet/phasenet/csvs"
27 |     pick_path = "local/hinet/phasenet/picks"
28 |     if not os.path.exists(pick_path):
29 |         os.makedirs(pick_path)
30 | 
31 |     # %%
32 |     jdays = sorted(os.listdir(csv_path))
33 | 
34 |     # %%
35 |     ncpu = min(32, mp.cpu_count())
36 |     ctx = mp.get_context("spawn")
37 |     pbar = tqdm(total=len(jdays))
38 | 
39 |     # %%
40 |     jobs = []
41 |     with ctx.Pool(ncpu) as pool:
42 | 
43 |         # %%
44 |         for jday in jdays:
45 |             csv_files = []
46 |             for hour in sorted(os.listdir(f"{csv_path}/{jday}")):
47 |                 tmp = glob(f"{csv_path}/{jday}/{hour}/*.csv")
48 |                 csv_files.extend(tmp)
49 | 
50 |             year, jday = jday.split("-")
51 |             if not os.path.exists(f"local/hinet/phasenet/picks/{year}"):
52 |                 os.makedirs(f"local/hinet/phasenet/picks/{year}")
53 |             pick_file = f"local/hinet/phasenet/picks/{year}/{jday}.csv"
54 | 
55 |             # merge_csv(csv_files, pick_file)
56 |             job = pool.apply_async(merge_csv, (csv_files, pick_file), callback=lambda _: pbar.update(1))
57 |             jobs.append(job)
58 | 
59 |         pool.close()
60 |         pool.join()
61 | 
62 |         for job in jobs:
63 |             out = job.get()
64 |             if out is not None:
65 |                 print(out)
66 | 


--------------------------------------------------------------------------------
/examples/japan/run_cctorch.py:
--------------------------------------------------------------------------------
 1 | # %%
 2 | import os
 3 | 
 4 | import torch
 5 | 
 6 | # %%
 7 | # root_path = args.root_path
 8 | # region = args.region
 9 | root_path = "local"
10 | region = "hinet"
11 | 
12 | data_path = f"{region}/cctorch"
13 | result_path = f"{region}/cctorch/ccpairs"
14 | if not os.path.exists(f"{root_path}/{result_path}"):
15 |     os.makedirs(f"{root_path}/{result_path}")
16 | 
17 | 
18 | ## based on GPU memory
19 | 
20 | batch = 1_024
21 | block_size1 = 1000_000
22 | block_size2 = 1000_000
23 | 
24 | 
25 | base_cmd = (
26 |     f"../../CCTorch/run.py --pair_list={root_path}/{region}/cctorch/pairs.txt --data_path1={root_path}/{region}/cctorch/template.dat --data_format1=memmap "
27 |     f"--data_list1={root_path}/{region}/cctorch/cctorch_picks.csv "
28 |     f"--events_csv={root_path}/{region}/cctorch/cctorch_events.csv --picks_csv={root_path}/{region}/cctorch/cctorch_picks.csv --stations_csv={root_path}/{region}/cctorch/cctorch_stations.csv "
29 |     f"--config={root_path}/{region}/cctorch/config.json  --batch_size={batch} --block_size1={block_size1} --block_size2={block_size2} --result_path={root_path}/{result_path}"
30 | )
31 | 
32 | num_gpu = torch.cuda.device_count()
33 | if num_gpu == 0:
34 |     if os.uname().sysname == "Darwin":
35 |         cmd = f"python {base_cmd} --device=cpu"
36 |     else:
37 |         cmd = f"python {base_cmd} --device=cpu"
38 | else:
39 |     cmd = f"torchrun --standalone --nproc_per_node {num_gpu} {base_cmd}"
40 | print(cmd)
41 | os.system(cmd)
42 | 
43 | # %%
44 | os.chdir(f"{root_path}/{region}/cctorch")
45 | source_file = f"ccpairs/CC_{num_gpu:03d}_dt.cc"
46 | target_file = f"dt.cc"
47 | print(f"{source_file} -> {target_file}")
48 | if os.path.lexists(target_file):
49 |     os.remove(target_file)
50 | os.symlink(source_file, target_file)
51 | 
52 | source_file = f"ccpairs/CC_{num_gpu:03d}.csv"
53 | target_file = f"dtcc.csv"
54 | print(f"{source_file} -> {target_file}")
55 | if os.path.lexists(target_file):
56 |     os.remove(target_file)
57 | os.symlink(source_file, target_file)
58 | 


--------------------------------------------------------------------------------
/examples/japan/run_growclust_cc.py:
--------------------------------------------------------------------------------
 1 | # %%
 2 | import os
 3 | from datetime import datetime
 4 | 
 5 | import pandas as pd
 6 | from tqdm import tqdm
 7 | 
 8 | # %%
 9 | root_path = "local"
10 | region = "hinet"
11 | result_path = f"{region}/growclust"
12 | if not os.path.exists(f"{root_path}/{result_path}"):
13 |     os.makedirs(f"{root_path}/{result_path}")
14 | 
15 | # %%
16 | # stations_json = f"{region}/results/data/stations.json"
17 | # stations = pd.read_json(f"{root_path}/{stations_json}", orient="index")
18 | # station_csv = f"{region}/adloc/ransac_stations.csv"
19 | station_csv = f"{region}/cctorch/cctorch_stations.csv"
20 | stations = pd.read_csv(f"{root_path}/{station_csv}")
21 | stations["station"] = stations["station_id"].apply(lambda x: x.split(".")[2])
22 | stations.set_index("station_id", inplace=True)
23 | 
24 | 
25 | lines = []
26 | for i, row in stations.iterrows():
27 |     # line = f"{row['network']}{row['station']:<4} {row['latitude']:.4f} {row['longitude']:.4f}\n"
28 |     line = f"{row['station']:<4} {row['latitude']:.4f} {row['longitude']:.4f}\n"
29 |     lines.append(line)
30 | 
31 | with open(f"{root_path}/{result_path}/stlist.txt", "w") as fp:
32 |     fp.writelines(lines)
33 | 
34 | 
35 | # %%
36 | # events_csv = f"{region}/results/phase_association/events.csv"
37 | # events_csv = f"{region}/adloc/ransac_events.csv"
38 | events_csv = f"{region}/cctorch/cctorch_events.csv"
39 | # event_file = f"{region}/cctorch/events.csv"
40 | events = pd.read_csv(f"{root_path}/{events_csv}")
41 | # event_df = event_df[event_df["gamma_score"] > 10]
42 | # event_index = [f"{x:06d}" for x in event_df["event_index"]]
43 | events["time"] = pd.to_datetime(events["event_time"])
44 | if "magnitude" not in events.columns:
45 |     events["magnitude"] = 0.0
46 | 
47 | events[["year", "month", "day", "hour", "minute", "second"]] = (
48 |     events["time"]
49 |     # .apply(lambda x: datetime.fromisoformat(x).strftime("%Y %m %d %H %M %S.%f").split(" "))
50 |     .apply(lambda x: x.strftime("%Y %m %d %H %M %S.%f").split(" "))
51 |     .apply(pd.Series)
52 |     .apply(pd.to_numeric)
53 | )
54 | 
55 | lines = []
56 | for i, row in events.iterrows():
57 |     # yr mon day hr min sec lat lon dep mag eh ez rms evid
58 |     line = f"{row['year']:4d} {row['month']:2d} {row['day']:2d} {row['hour']:2d} {row['minute']:2d} {row['second']:7.3f} {row['latitude']:.4f} {row['longitude']:.4f} {row['depth_km']:7.3f} {row['magnitude']:.2f} 0.000 0.000 0.000 {row['event_index']:6d}\n"
59 |     lines.append(line)
60 | 
61 | with open(f"{root_path}/{result_path}/evlist.txt", "w") as fp:
62 |     fp.writelines(lines)
63 | 
64 | # %%
65 | os.system(f"bash run_growclust_cc.sh {root_path} {region}")
66 | 


--------------------------------------------------------------------------------
/examples/japan/run_growclust_cc.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | set -x
  3 | WORKING_DIR=$PWD
  4 | if [ $# -eq 2 ]; then
  5 |   root_path=$1
  6 |   region=$2
  7 | else
  8 |   root_path="local"
  9 |   region="demo"
 10 | fi
 11 | 
 12 | if [ ! -d "$root_path/$region/growclust" ]; then
 13 |   mkdir -p $root_path/$region/growclust
 14 | fi
 15 | 
 16 | cp $root_path/$region/cctorch/dt.cc $root_path/$region/growclust/dt.cc 
 17 | cd $root_path/$region/growclust
 18 | mkdir -p TT OUT
 19 | 
 20 | if [ ! -d "GrowClust" ]; then
 21 |    git clone git@github.com:zhuwq0/GrowClust.git
 22 |    make -C GrowClust/SRC/
 23 | fi
 24 | 
 25 | cat <<EOF > growclust.inp
 26 | ****  Example GrowClust Control File  *****
 27 | ********  Daniel Trugman, 2016   **********
 28 | *******************************************
 29 | *
 30 | *******************************************
 31 | *************  Event list  ****************
 32 | *******************************************
 33 | * evlist_fmt (0 = evlist, 1 = phase, 2 = GrowClust, 3 = HypoInverse)
 34 | 1
 35 | * fin_evlist (event list file name)
 36 | evlist.txt
 37 | *
 38 | *******************************************
 39 | ************   Station list   *************
 40 | *******************************************
 41 | * stlist_fmt (0 = SEED channel, 1 = station name)
 42 | 1
 43 | * fin_stlist (station list file name)
 44 | stlist.txt
 45 | *
 46 | *******************************************
 47 | *************   XCOR data   ***************
 48 | *******************************************
 49 | * xcordat_fmt (0 = binary, 1 = text), tdif_fmt (21 = tt2-tt1, 12 = tt1-tt2)
 50 | 1  12
 51 | * fin_xcordat
 52 | dt.cc
 53 | *
 54 | *******************************************
 55 | *** Velocity Model / Travel Time Tables ***
 56 | *******************************************
 57 | * fin_vzmdl (input vz model file)
 58 | vzmodel.txt
 59 | * fout_vzfine (output, interpolated vz model file)
 60 | TT/vzfine.txt
 61 | * fout_pTT (output travel time table, P phase)
 62 | TT/tt.pg
 63 | * fout_sTT (output travel time table, S phase)
 64 | TT/tt.sg
 65 | *
 66 | ******************************************
 67 | ***** Travel Time Table Parameters  ******
 68 | ******************************************
 69 | * vpvs_factor  rayparam_min (-1 = default)
 70 |   1.732             0.0
 71 | * tt_dep0  tt_dep1  tt_ddep
 72 |    0.        71.       1.
 73 | * tt_del0  tt_del1  tt_ddel
 74 |    0.        500.      2.
 75 | *
 76 | ******************************************
 77 | ***** GrowClust Algorithm Parameters *****
 78 | ******************************************
 79 | * rmin  delmax rmsmax 
 80 |    0.1    120    1.0
 81 | * rpsavgmin, rmincut  ngoodmin   iponly 
 82 |     0          0         8        0
 83 | *
 84 | ******************************************
 85 | ************ Output files ****************
 86 | ******************************************
 87 | * nboot  nbranch_min
 88 |    0         1
 89 | * fout_cat (relocated catalog)
 90 | OUT/out.growclust_cc_cat
 91 | * fout_clust (relocated cluster file)
 92 | OUT/out.growclust_cc_clust
 93 | * fout_log (program log)
 94 | OUT/out.growclust_cc_log
 95 | * fout_boot (bootstrap distribution)
 96 | OUT/out.growclust_cc_boot
 97 | ******************************************
 98 | ******************************************
 99 | EOF
100 | 
101 | cat <<EOF > vzmodel.txt
102 | 0.0 5.30 0.00
103 | 1.0 5.65 0.00
104 | 3.0 5.93 0.00
105 | 5.0 6.20 0.00
106 | 7.0 6.20 0.00
107 | 9.0 6.20 0.00
108 | 11.0 6.20 0.00
109 | 13.0 6.20 0.00
110 | 17.0 6.20 0.00
111 | 21.0 6.20 0.00
112 | 31.00 7.50 0.00
113 | 31.10 8.11 0.00
114 | 100.0 8.11 0.00
115 | EOF
116 | 
117 | ./GrowClust/SRC/growclust  growclust.inp
118 | cp OUT/out.growclust_cc_cat growclust_cc_catalog.txt
119 | cd $WORKING_DIR
120 | 


--------------------------------------------------------------------------------
/examples/japan/run_hypodd_cc.py:
--------------------------------------------------------------------------------
 1 | # %%
 2 | import json
 3 | import os
 4 | 
 5 | import numpy as np
 6 | import pandas as pd
 7 | 
 8 | # %%
 9 | root_path = "local"
10 | region = "hinet"
11 | 
12 | with open(f"{root_path}/{region}/config.json", "r") as fp:
13 |     config = json.load(fp)
14 | 
15 | # %%
16 | data_path = f"{region}/cctorch"
17 | result_path = f"{region}/hypodd"
18 | if not os.path.exists(f"{root_path}/{result_path}"):
19 |     os.makedirs(f"{root_path}/{result_path}")
20 | 
21 | # %%
22 | stations = pd.read_csv(f"{root_path}/{data_path}/cctorch_stations.csv")
23 | 
24 | station_lines = {}
25 | for i, row in stations.iterrows():
26 |     station_id = row["station_id"]
27 |     # network_code, station_code, comp_code, channel_code = station_id.split(".")
28 |     station_code = station_id.split(".")[2]
29 |     # tmp_code = f"{station_code}{channel_code}"
30 |     tmp_code = f"{station_code}"
31 |     station_lines[tmp_code] = f"{tmp_code:<8s} {row['latitude']:.3f} {row['longitude']:.3f}\n"
32 | 
33 | 
34 | with open(f"{root_path}/{result_path}/stations.dat", "w") as f:
35 |     for line in sorted(station_lines.values()):
36 |         f.write(line)
37 | 
38 | # %%
39 | events = pd.read_csv(f"{root_path}/{data_path}/cctorch_events.csv")
40 | events["time"] = pd.to_datetime(events["event_time"], format="mixed")
41 | 
42 | event_lines = []
43 | 
44 | mean_latitude = events["latitude"].mean()
45 | mean_longitude = events["longitude"].mean()
46 | for i, row in events.iterrows():
47 |     event_index = row["event_index"]
48 |     origin = row["time"]
49 |     magnitude = row["magnitude"]
50 |     x_err = 0.0
51 |     z_err = 0.0
52 |     time_err = 0.0
53 |     dx, dy, dz = 0.0, 0.0, 0.0
54 |     dx = np.random.uniform(-0.01, 0.01)
55 |     dy = np.random.uniform(-0.01, 0.01)
56 |     # dz = np.random.uniform(0, 10)
57 |     dz = 0
58 |     event_lines.append(
59 |         f"{origin.year:4d}{origin.month:02d}{origin.day:02d}  "
60 |         f"{origin.hour:2d}{origin.minute:02d}{origin.second:02d}{round(origin.microsecond / 1e4):02d}  "
61 |         # f"{row['latitude']:8.4f}  {row['longitude']:9.4f}   {row['depth_km']:8.4f}  "
62 |         f"{row['latitude'] + dy:8.4f}  {row['longitude']+ dx:9.4f}   {row['depth_km']+dz:8.4f}  "
63 |         f"{magnitude:5.2f}  {x_err:5.2f}  {z_err:5.2f}  {time_err:5.2f}  {event_index:9d}\n"
64 |     )
65 | 
66 | with open(f"{root_path}/{result_path}/events.dat", "w") as f:
67 |     f.writelines(event_lines)
68 | 
69 | # %%
70 | os.system(f"bash run_hypodd_cc.sh {root_path} {region}")
71 | 


--------------------------------------------------------------------------------
/examples/japan/run_hypodd_cc.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | set -x
  3 | WORKING_DIR=$PWD
  4 | if [ $# -eq 2 ]; then
  5 |   root_path=$1
  6 |   region=$2
  7 | else
  8 |   root_path="local"
  9 |   region="demo"
 10 | fi
 11 | 
 12 | if [ ! -d "$root_path/$region/hypodd" ]; then
 13 |   mkdir -p $root_path/$region/hypodd
 14 | fi
 15 | 
 16 | cp $root_path/$region/cctorch/dt.cc $root_path/$region/hypodd/dt.cc 
 17 | cd $root_path/$region/hypodd
 18 | 
 19 | if [ ! -d "HypoDD" ]; then
 20 |   git clone git@github.com:zhuwq0/HypoDD.git
 21 |   export PATH=$PATH:$PWD/HypoDD
 22 |   make -C HypoDD/src/
 23 | fi
 24 | 
 25 | cat <<EOF > cc.inp
 26 | * RELOC.INP:
 27 | *--- input file selection
 28 | * cross correlation diff times:
 29 | dt.cc
 30 | *
 31 | *catalog P diff times:
 32 | 
 33 | *
 34 | * event file:
 35 | events.dat
 36 | *
 37 | * station file:
 38 | stations.dat
 39 | *
 40 | *--- output file selection
 41 | * original locations:
 42 | hypodd_cc.loc
 43 | * relocations:
 44 | hypodd_cc.reloc
 45 | * station information:
 46 | hypodd.sta
 47 | * residual information:
 48 | hypodd.res
 49 | * source paramater information:
 50 | hypodd.src
 51 | *
 52 | *--- data type selection: 
 53 | * IDAT:  0 = synthetics; 1= cross corr; 2= catalog; 3= cross & cat 
 54 | * IPHA: 1= P; 2= S; 3= P&S
 55 | * DIST:max dist [km] between cluster centroid and station 
 56 | * IDAT   IPHA   DIST
 57 |     1     3     120
 58 | *
 59 | *--- event clustering:
 60 | * OBSCC:    min # of obs/pair for crosstime data (0= no clustering)
 61 | * OBSCT:    min # of obs/pair for network data (0= no clustering)
 62 | * OBSCC  OBSCT    
 63 |      0     0        
 64 | *
 65 | *--- solution control:
 66 | * ISTART:  	1 = from single source; 2 = from network sources
 67 | * ISOLV:	1 = SVD, 2=lsqr
 68 | * NSET:      	number of sets of iteration with specifications following
 69 | *  ISTART  ISOLV  NSET
 70 |     2        2      4
 71 | *
 72 | *--- data weighting and re-weighting: 
 73 | * NITER: 		last iteration to used the following weights
 74 | * WTCCP, WTCCS:		weight cross P, S 
 75 | * WTCTP, WTCTS:		weight catalog P, S 
 76 | * WRCC, WRCT:		residual threshold in sec for cross, catalog data 
 77 | * WDCC, WDCT:  		max dist [km] between cross, catalog linked pairs
 78 | * DAMP:    		damping (for lsqr only) 
 79 | *       ---  CROSS DATA ----- ----CATALOG DATA ----
 80 | * NITER WTCCP WTCCS WRCC WDCC WTCTP WTCTS WRCT WDCT DAMP
 81 |    4      1    1    -9    -9    -9    -9     -9    -9  170
 82 |    4      1    1     6    -9    -9    -9     -9    -9  170
 83 |    4      1    0.8   3     4    -9    -9     -9    -9  170
 84 |    4      1    0.8   2     2    -9    -9     -9    -9  170  
 85 | *
 86 | *--- 1D model:
 87 | * NLAY:		number of model layers  
 88 | * RATIO:	vp/vs ratio 
 89 | * TOP:		depths of top of layer (km) 
 90 | * VEL: 		layer velocities (km/s)
 91 | * NLAY  RATIO 
 92 |    12     1.73
 93 | * TOP 
 94 | 0.0 1.0 3.0 5.0 7.0 9.0 11.0 13.0 17.0 21.0 31.00 31.10
 95 | * VEL
 96 | 5.30 5.65 5.93 6.20 6.20 6.20 6.20 6.20 6.20 6.20 7.50 8.11
 97 | *
 98 | *--- event selection:
 99 | * CID: 	cluster to be relocated (0 = all)
100 | * ID:	cuspids of event to be relocated (8 per line)
101 | * CID    
102 |     0      
103 | * ID
104 | EOF
105 | 
106 | ./HypoDD/src/hypoDD/hypoDD cc.inp
107 | cd $WORKING_DIR


--------------------------------------------------------------------------------
/examples/japan/set_config.py:
--------------------------------------------------------------------------------
 1 | # %%
 2 | import json
 3 | 
 4 | config = {
 5 |     "minlatitude": 36.8,
 6 |     "maxlatitude": 38.2,
 7 |     "minlongitude": 136.2,
 8 |     "maxlongitude": 138.3,
 9 |     "starttime": "2024-01-01T00:00:00",
10 |     "endtime": "2024-02-29T23:00:00",
11 | }
12 | 
13 | # %%
14 | with open("local/hinet/config.json", "w") as f:
15 |     json.dump(config, f, indent=2)
16 | 
17 | # %%
18 | 


--------------------------------------------------------------------------------
/examples/seafoam/load_data.py:
--------------------------------------------------------------------------------
 1 | # %%
 2 | import multiprocessing as mp
 3 | 
 4 | import fsspec
 5 | import h5py
 6 | from tqdm import tqdm
 7 | 
 8 | 
 9 | # %%
10 | def read_hdf5(file, key_path):
11 |     fs = fsspec.filesystem("gcs", token=key_path)
12 |     with fs.open(file, "rb") as f:
13 |         with h5py.File(f, "r") as hf:
14 |             print(file.split("/")[-1], hf["Acquisition/Raw[0]/RawData"])
15 | 
16 | 
17 | # %%
18 | if __name__ == "__main__":
19 | 
20 |     # %%
21 |     token_file = ""
22 | 
23 |     fs = fsspec.filesystem("gcs", token=token_file)
24 | 
25 |     folders = fs.ls("berkeley-mbari-das/")
26 | 
27 |     hdf5_files = []
28 |     for folder in folders:
29 |         if folder.split("/")[-1] in ["ContextData", "MBARI_cable_geom_dx10m.csv"]:
30 |             continue
31 |         years = fs.ls(folder)
32 |         for year in tqdm(years, desc=folder):
33 |             jdays = fs.ls(year)
34 |             for jday in jdays:
35 |                 files = fs.ls(jday)
36 |                 for file in files:
37 |                     if file.endswith(".h5"):
38 |                         hdf5_files.append(file)
39 | 
40 |     # %%
41 |     # for file in hdf5_files:
42 |     #     read_hdf5(file, key_path)
43 | 
44 |     ctx = mp.get_context("spawn")
45 |     pbar = tqdm(total=len(hdf5_files))
46 |     ncpu = 8
47 |     with ctx.Pool(ncpu) as pool:
48 |         jobs = []
49 |         for file in hdf5_files:
50 |             job = pool.apply_async(read_hdf5, (file, key_path), callback=lambda _: pbar.update())
51 |         pool.close()
52 |         pool.join()
53 | 
54 |         for job in jobs:
55 |             result = job.get()
56 |             if result:
57 |                 print(result)
58 | 
59 |     pbar.close()
60 | 
61 | # %%
62 | 


--------------------------------------------------------------------------------
/kubeflow/.gitignore:
--------------------------------------------------------------------------------
 1 | *zip
 2 | share
 3 | Hawaii
 4 | Ridgecrest*
 5 | PuertoRico
 6 | Demo
 7 | HYPODD
 8 | LongValley*
 9 | SierraNegra
10 | 


--------------------------------------------------------------------------------
/kubeflow/Dockerfile:
--------------------------------------------------------------------------------
 1 | # FROM continuumio/miniconda3
 2 | 
 3 | FROM ubuntu:18.04
 4 | 
 5 | ENV PATH="/root/miniconda3/bin:${PATH}"
 6 | ARG PATH="/root/miniconda3/bin:${PATH}"
 7 | RUN apt-get update && apt-get install -y wget && rm -rf /var/lib/apt/lists/*
 8 | 
 9 | RUN wget \
10 |     https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh \
11 |     && mkdir /root/.conda \
12 |     && bash Miniconda3-latest-Linux-x86_64.sh -b \
13 |     && rm -f Miniconda3-latest-Linux-x86_64.sh 
14 | RUN conda --version
15 | 
16 | # Setup env variables
17 | ENV PYTHONUNBUFFERED=1
18 | 
19 | WORKDIR /app
20 | COPY env.yml /app
21 | RUN conda env update -f=env.yml -n base
22 | RUN pip install --no-cache-dir --upgrade -i https://pypi.anaconda.org/zhuwq0/simple gmma
23 | RUN conda clean --all 
24 | #SHELL ["conda", "run", "-n", "base", "/bin/bash", "-c"]
25 | 


--------------------------------------------------------------------------------
/kubeflow/README.md:
--------------------------------------------------------------------------------
1 | ## QuakeFlow Demo Install
2 | 
3 | 
4 | ```
5 | git clone -b factorize https://github.com/wayneweiqiang/PhaseNet.git
6 | git clone https://github.com/wayneweiqiang/GMMA.git
7 | conda env create quakeflow --file=env.yml --force
8 | conda activate quakeflow
9 | ```


--------------------------------------------------------------------------------
/kubeflow/debug_pvc.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: Pod
 3 | metadata:
 4 |   name: dataaccess
 5 | spec:
 6 |   containers:
 7 |   - name: alpine
 8 |     image: alpine:latest
 9 |     command: ['sleep', 'infinity']
10 |     volumeMounts:
11 |     - name: mypvc
12 |       mountPath: /data
13 |   volumes:
14 |   - name: mypvc
15 |     persistentVolumeClaim:
16 |       claimName: mypvc


--------------------------------------------------------------------------------
/kubeflow/env.yml:
--------------------------------------------------------------------------------
 1 | name: quakeflow
 2 | channels:
 3 |   - defaults
 4 |   - conda-forge
 5 | dependencies:
 6 |   - python=3.8
 7 |   - numpy
 8 |   - scipy
 9 |   - matplotlib
10 |   - pandas
11 |   - scikit-learn
12 |   - tqdm
13 |   - obspy
14 |   - pymongo
15 |   - tensorflow
16 |   - pip
17 |   - pip:
18 |     - minio
19 |     - kfp
20 |     - gmma
21 |     - --extra-index-url https://pypi.anaconda.org/zhuwq0/simple 
22 | 
23 | 


--------------------------------------------------------------------------------
/kubeflow/rsync.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: Pod
 3 | metadata:
 4 |   name: dataaccess52
 5 | spec:
 6 |   containers:
 7 |   - name: alpine
 8 |     image: zhuwq0/waveform-env:1.1
 9 |     command: ['sleep', 'infinity']
10 |     volumeMounts:
11 |     - name: mypvc
12 |       mountPath: /data
13 |   volumes:
14 |   - name: mypvc
15 |     persistentVolumeClaim:
16 |       claimName: quakeflow-w8gfg-data-volume-52
17 | 


--------------------------------------------------------------------------------
/kubeflow/waveforms/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.8
 2 | 
 3 | RUN apt-get update && apt-get install -y vim ssh rsync && rm -rf /var/lib/apt/lists/*
 4 | 
 5 | RUN python -m pip install --upgrade obspy && rm -rf /var/cache/apk/*
 6 | 
 7 | WORKDIR /opt
 8 | 
 9 | # RUN ssh-keygen -q -t rsa -N "" <<< $"\ny" >/dev/null 2>&1
10 | # RUN ssh-copy-id zhuwq@wintermute.gps.caltech.edu
11 | 
12 | # ARG CACHEBUST=1


--------------------------------------------------------------------------------
/kubernetes/deploy_gcp.sh:
--------------------------------------------------------------------------------
 1 | # Deploy Kafka with Helm, create client and add topics
 2 | helm repo add bitnami https://charts.bitnami.com/bitnami
 3 | helm install quakeflow-kafka bitnami/kafka
 4 | kubectl run --quiet=true -it --rm quakeflow-kafka-client --restart='Never' --image docker.io/bitnami/kafka:2.7.0-debian-10-r68 --restart=Never \
 5 |     --command -- bash -c "kafka-topics.sh --create --topic phasenet_picks --bootstrap-server quakeflow-kafka.default.svc.cluster.local:9092 && kafka-topics.sh --create --topic gmma_events --bootstrap-server quakeflow-kafka.default.svc.cluster.local:9092 && kafka-topics.sh --create --topic waveform_raw --bootstrap-server quakeflow-kafka.default.svc.cluster.local:9092 && kafka-topics.sh --create --topic phasenet_waveform --bootstrap-server quakeflow-kafka.default.svc.cluster.local:9092"
 6 | kubectl run --quiet=true -it --rm quakeflow-kafka-client --restart='Never' --image docker.io/bitnami/kafka:2.7.0-debian-10-r68 --restart=Never \
 7 |     --command -- bash -c "kafka-configs.sh --alter --entity-type topics --entity-name phasenet_picks --add-config 'retention.ms=-1' --bootstrap-server quakeflow-kafka.default.svc.cluster.local:9092 && kafka-configs.sh --alter --entity-type topics --entity-name gmma_events --add-config 'retention.ms=-1' --bootstrap-server quakeflow-kafka.default.svc.cluster.local:9092"
 8 | ## For external access:
 9 | # helm upgrade quakeflow-kafka bitnami/kafka --set externalAccess.enabled=true,externalAccess.autoDiscovery.enabled=true,rbac.create=true
10 | ## Check topic configs:
11 | # kubectl run --quiet=true -it --rm quakeflow-kafka-client --restart='Never' --image docker.io/bitnami/kafka:2.7.0-debian-10-r68 --restart=Never \
12 | #     --command -- bash -c "kafka-topics.sh --describe --topics-with-overrides --bootstrap-server quakeflow-kafka.default.svc.cluster.local:9092"
13 | 
14 | # Deploy MongoDB
15 | helm install quakeflow-mongodb --set auth.rootPassword=quakeflow123,auth.username=quakeflow,auth.password=quakeflow123,auth.database=quakeflow,architecture=replicaset,persistence.size=100Gi
16 |  bitnami/mongodb
17 | 
18 | # Deploy to Kubernetes
19 | kubectl apply -f quakeflow-gcp.yaml
20 | 
21 | # Add autoscaling
22 | kubectl autoscale deployment phasenet-api --cpu-percent=50 --min=1 --max=365
23 | kubectl autoscale deployment gamma-api --cpu-percent=200 --min=1 --max=365
24 | kubectl autoscale deployment deepdenoiser-api --cpu-percent=50 --min=1 --max=10
25 | 
26 | # Expose APIs
27 | # kubectl expose deployment phasenet-api --type=LoadBalancer --name=phasenet-service
28 | # kubectl expose deployment gamma-api --type=LoadBalancer --name=gmma-service
29 | # kubectl expose deployment quakeflow-streamlit --type=LoadBalancer --name=streamlit-ui
30 | # kubectl expose deployment quakeflow-ui --type=LoadBalancer --name=quakeflow-ui
31 | 
32 | # Add MINIO storage
33 | # helm install quakeflow-minio --set accessKey.password=minio --set secretKey.password=minio123 --set persistence.size=1T  bitnami/minio
34 | 


--------------------------------------------------------------------------------
/kubernetes/quakeflow-autoscaling.yaml:
--------------------------------------------------------------------------------
  1 | apiVersion: autoscaling/v2beta2
  2 | kind: HorizontalPodAutoscaler
  3 | metadata:
  4 |   name: phasenet-api
  5 | spec:
  6 |   scaleTargetRef:
  7 |     apiVersion: apps/v1
  8 |     kind: Deployment
  9 |     name: phasenet-api
 10 |   minReplicas: 1
 11 |   maxReplicas: 365
 12 |   metrics:
 13 |   - type: Resource
 14 |     resource:
 15 |       name: cpu
 16 |       target:
 17 |         type: Utilization
 18 |         averageUtilization: 80
 19 |   - type: Resource
 20 |     resource:
 21 |       name: memory
 22 |       target:
 23 |         type: AverageValue
 24 |         averageValue: 1500Mi
 25 |   # - type: Object
 26 |   #   object:
 27 |   #     metric:
 28 |   #       name: requests-per-second
 29 |   #     describedObject:
 30 |   #     #   apiVersion: v1
 31 |   #     #   kind: Service
 32 |   #     #   name: phasenet-api
 33 |   #       apiVersion: networking.k8s.io/v1beta1
 34 |   #       kind: Ingress
 35 |   #       name: quakeflow-ingress
 36 |   #     target:
 37 |   #       type: Value
 38 |   #       value: 10
 39 |   # Uncomment these lines if you create the custom packets_per_second metric and
 40 |   # configure your app to export the metric.
 41 |   # - type: Pods
 42 |   #   pods:
 43 |   #     metric:
 44 |   #       name: packets_per_second
 45 |   #     target:
 46 |   #       type: AverageValue
 47 |   #       averageValue: 100
 48 | ---
 49 | apiVersion: autoscaling/v2beta2
 50 | kind: HorizontalPodAutoscaler
 51 | metadata:
 52 |   name: gamma-api
 53 | spec:
 54 |   scaleTargetRef:
 55 |     apiVersion: apps/v1
 56 |     kind: Deployment
 57 |     name: gamma-api
 58 |   minReplicas: 1
 59 |   maxReplicas: 365
 60 |   metrics:
 61 |   - type: Resource
 62 |     resource:
 63 |       name: cpu
 64 |       target:
 65 |         type: Utilization
 66 |         averageUtilization: 300
 67 |   - type: Resource
 68 |     resource:
 69 |       name: memory
 70 |       target:
 71 |         type: AverageValue
 72 |         averageValue: 500Mi
 73 |   # Uncomment these lines if you create the custom packets_per_second metric and
 74 |   # configure your app to export the metric.
 75 |   # - type: Pods
 76 |   #   pods:
 77 |   #     metric:
 78 |   #       name: packets_per_second
 79 |   #     target:
 80 |   #       type: AverageValue
 81 |   #       averageValue: 100
 82 | ---
 83 | apiVersion: autoscaling/v2beta2
 84 | kind: HorizontalPodAutoscaler
 85 | metadata:
 86 |   name: deepdenoiser-api
 87 | spec:
 88 |   scaleTargetRef:
 89 |     apiVersion: apps/v1
 90 |     kind: Deployment
 91 |     name: deepdenoiser-api
 92 |   minReplicas: 1
 93 |   maxReplicas: 365
 94 |   metrics:
 95 |   - type: Resource
 96 |     resource:
 97 |       name: cpu
 98 |       target:
 99 |         type: Utilization
100 |         averageUtilization: 50
101 |   - type: Resource
102 |     resource:
103 |       name: memory
104 |       target:
105 |         type: AverageValue
106 |         averageValue: 1000Mi
107 |   # Uncomment these lines if you create the custom packets_per_second metric and
108 |   # configure your app to export the metric.
109 |   # - type: Pods
110 |   #   pods:
111 |   #     metric:
112 |   #       name: packets_per_second
113 |   #     target:
114 |   #       type: AverageValue
115 |   #       averageValue: 100


--------------------------------------------------------------------------------
/kubernetes/quakeflow-ingress.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: networking.k8s.io/v1
 2 | kind: Ingress
 3 | metadata:
 4 |   name: quakeflow-ingress
 5 |   annotations:
 6 |     kubernetes.io/ingress.global-static-ip-name: "quakeflow-static-ip"
 7 | spec:
 8 |   # defaultBackend:
 9 |   #   service:
10 |   #     name: phasenet-api
11 |   #     port:
12 |   #       number: 8000
13 |   rules:
14 |   - host: phasenet.quakeflow.com
15 |     http: 
16 |       paths:
17 |       - path: /*
18 |         pathType: ImplementationSpecific
19 |         backend:
20 |           service:
21 |             name: phasenet-api
22 |             port:
23 |               number: 8000
24 |   - host: gamma.quakeflow.com
25 |     http: 
26 |       paths:
27 |       - path: /*
28 |         pathType: ImplementationSpecific
29 |         backend:
30 |           service:
31 |             name: gamma-api
32 |             port:
33 |               number: 8001
34 |   - host: deepdenoiser.quakeflow.com
35 |     http: 
36 |       paths:
37 |       - path: /*
38 |         pathType: ImplementationSpecific
39 |         backend:
40 |           service:
41 |             name: deepdenoiser-api
42 |             port:
43 |               number: 8002
44 |   - host: ui.quakeflow.com
45 |     http: 
46 |       paths:
47 |       - path: /*
48 |         pathType: ImplementationSpecific
49 |         backend:
50 |           service:
51 |             name: quakeflow-ui
52 |             port:
53 |               number: 8005
54 | ---
55 | apiVersion: cloud.google.com/v1
56 | kind: BackendConfig
57 | metadata:
58 |   name: backendconfig
59 | spec:
60 |   timeoutSec: 600
61 |   # connectionDraining:
62 |   #   drainingTimeoutSec: 600
63 | ---


--------------------------------------------------------------------------------
/kubernetes/replay/real_data.py:
--------------------------------------------------------------------------------
 1 | # %%
 2 | # https://docs.obspy.org/packages/autogen/obspy.clients.seedlink.easyseedlink.create_client.html#obspy.clients.seedlink.easyseedlink.create_client
 3 | from obspy.clients.seedlink.easyseedlink import create_client
 4 | 
 5 | 
 6 | # %%
 7 | def handle_data(trace):
 8 |     print("Received new data:")
 9 |     print(trace)
10 |     print()
11 | 
12 | 
13 | # %%
14 | client = create_client("rtserve.iris.washington.edu:18000", handle_data)
15 | client.select_stream("CI", "LRL", "HNZ")
16 | client.run()
17 | 
18 | # %%
19 | 


--------------------------------------------------------------------------------
/kubernetes/replay/replay_data.py:
--------------------------------------------------------------------------------
 1 | # %%
 2 | import fsspec
 3 | import pandas as pd
 4 | import obspy
 5 | import matplotlib.pyplot as plt
 6 | 
 7 | # %%
 8 | protocal = "gs"
 9 | bucket = "quakeflow_share"
10 | folder = "demo/obspy"
11 | 
12 | # %% Seismic stations used in the demo
13 | stations = pd.read_csv(f"{protocal}://{bucket}/{folder}/stations.csv")
14 | plt.figure()
15 | plt.scatter(stations["longitude"], stations["latitude"], marker="^", label="stations")
16 | plt.show()
17 | 
18 | # %% Read replay waveforms
19 | fs = fsspec.filesystem(protocal)
20 | mseeds = fs.glob(f"{bucket}/{folder}/waveforms/*/*.mseed")
21 | 
22 | # %%
23 | for mseed in mseeds:
24 |     print(mseed)
25 |     with fs.open(mseed, "rb") as f:
26 |         st = obspy.read(f)
27 | 
28 |         # plot a few seconds
29 |         tmp = st.slice(starttime=st[0].stats.starttime, endtime=st[0].stats.starttime + 20)
30 |         tmp.plot()
31 |     break
32 | 


--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------
 1 | site_name: "QuakeFlow"
 2 | site_description: 'QuakeFlow: a Scalable Deep-learning-based Earthquake Monitoring Workflow with Cloud Computing'
 3 | site_author: 'Weiqiang Zhu'
 4 | docs_dir: docs/
 5 | repo_name: 'AI4EPS/QuakeFlow'
 6 | repo_url: 'https://github.com/ai4eps/QuakeFlow'
 7 | nav:
 8 |     - Overview: README.md
 9 |     - Pipeline: kubeflow/workflow.ipynb
10 |     - Standard Formats: data_format.md
11 |     # - Visualization: plot_catalog.ipynb
12 |     # - FastAPI: fastapi.ipynb
13 |     # - Deploy on cluster: k8s_readme.md
14 |     # - Deploy on cloud: gcp_readme.md
15 | theme:
16 |   name: 'material'
17 | plugins:
18 |   - mkdocs-jupyter:
19 |       ignore_h1_titles: True
20 |   - exclude:
21 |       glob:
22 |         - PhaseNet/*
23 |         - GaMMA/*
24 |         - "*_demo"
25 | extra:
26 |   analytics:
27 |     provider: google
28 |     property: G-69DX3B35RK
29 | 


--------------------------------------------------------------------------------
/quakeflow/demo/association/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Python image to use.
 2 | FROM python:3.9-alpine
 3 | 
 4 | # Install gcc
 5 | RUN apk add --no-cache gcc musl-dev linux-headers
 6 | 
 7 | # Set the working directory to /app
 8 | WORKDIR /app
 9 | 
10 | # copy the requirements file used for dependencies
11 | COPY requirements.txt .
12 | 
13 | # Install any needed packages specified in requirements.txt
14 | RUN pip install --trusted-host pypi.python.org -r requirements.txt
15 | 
16 | # Copy the rest of the working directory contents into the container at /app
17 | COPY . .
18 | 
19 | # Run app.py when the container launches
20 | CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8080"]


--------------------------------------------------------------------------------
/quakeflow/demo/association/app.py:
--------------------------------------------------------------------------------
 1 | from fastapi import FastAPI
 2 | from pydantic import BaseModel
 3 | 
 4 | app = FastAPI()
 5 | 
 6 | 
 7 | # Define a request body model
 8 | class Pick(BaseModel):
 9 |     station_id: list
10 |     phase_time: list
11 |     phase_type: list
12 |     phase_score: list
13 |     phase_amplitude: list
14 |     phase_polarity: list
15 | 
16 | 
17 | # Define an endpoint to make predictions
18 | @app.post("/predict")
19 | def predict(request: Pick):
20 |     print(f"Associating on {len(request.station_id)} picks.", flush=True)
21 |     return {
22 |         "events": {
23 |             "time": [],
24 |             "latitude": [],
25 |             "longitude": [],
26 |             "depth_km": [],
27 |         },
28 |         "picks": {
29 |             "station_id": [],
30 |             "phase_time": [],
31 |             "phase_type": [],
32 |             "phase_score": [],
33 |             "phase_amplitude": [],
34 |             "phase_polarity": [],
35 |         },
36 |     }
37 | 


--------------------------------------------------------------------------------
/quakeflow/demo/association/requirements.txt:
--------------------------------------------------------------------------------
1 | fastapi
2 | obspy
3 | uvicorn
4 | pandas


--------------------------------------------------------------------------------
/quakeflow/demo/data/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Python image to use.
 2 | FROM python:3.9-alpine
 3 | 
 4 | # Install gcc
 5 | RUN apk add --no-cache gcc musl-dev linux-headers
 6 | 
 7 | # Set the working directory to /app
 8 | WORKDIR /app
 9 | 
10 | # Copy the requirements file used for dependencies
11 | COPY requirements.txt .
12 | 
13 | # Install any needed packages specified in requirements.txt
14 | RUN pip install --trusted-host pypi.python.org -r requirements.txt
15 | 
16 | # Copy the rest of the working directory contents into the container at /app
17 | COPY . .
18 | 
19 | # Run app.py when the container launches
20 | CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8080"]
21 | 


--------------------------------------------------------------------------------
/quakeflow/demo/data/app.py:
--------------------------------------------------------------------------------
 1 | # %%
 2 | import json
 3 | import logging
 4 | import threading
 5 | import time
 6 | from datetime import datetime
 7 | from random import randint
 8 | 
 9 | import fsspec
10 | import obspy
11 | import pandas as pd
12 | import redis
13 | from fastapi import FastAPI
14 | 
15 | logging.basicConfig(level=logging.INFO)
16 | 
17 | 
18 | app = FastAPI()
19 | 
20 | # %%
21 | PROTOCAL = "gs"
22 | BUCKET = "quakeflow_share"
23 | FOLDER = "demo"
24 | REDIS_HOST = "quakeflow-redis-master.default.svc.cluster.local"
25 | try:
26 |     redis_client = redis.Redis(host=REDIS_HOST, port=6379, decode_responses=True)
27 |     redis_client.ping()
28 | except:
29 |     redis_client = redis.Redis(host="localhost", port=6379, decode_responses=True)
30 | 
31 | 
32 | def replay():
33 |     fs = fsspec.filesystem(PROTOCAL)
34 |     mseeds = fs.glob(f"{BUCKET}/{FOLDER}/waveforms/????-???/??/*.mseed")
35 | 
36 |     waveforms = {}
37 |     station_ids = []
38 |     min_timestamp = None
39 |     print("Reading waveforms: ", end="", flush=True)
40 |     for i, mseed in enumerate(mseeds):
41 |         print(mseed.split("/")[-1], end=" ", flush=True)
42 |         with fs.open(mseed, "rb") as f:
43 |             st = obspy.read(f)
44 |             st = st.merge(fill_value="latest")
45 |             st = st.resample(100)
46 |             tr = st[0]
47 |             if min_timestamp is None:
48 |                 min_timestamp = tr.times("timestamp")[0]
49 |             waveforms[tr.id] = {
50 |                 "data": tr.data.tolist(),
51 |                 # "timestamp": (tr.times("timestamp") - min_timestamp).tolist(),
52 |                 "timestamp": tr.times("timestamp").tolist(),
53 |             }
54 |             station_ids.append(tr.id)
55 |         if i > 40:
56 |             break
57 |     print("\nFinished reading waveforms.", flush=True)
58 | 
59 |     index = {x: 0 for x in station_ids}
60 |     while True:
61 |         for i, sid in enumerate(station_ids):
62 |             # print(sid, end=" ", flush=True)
63 |             window_size = randint(80, 120)
64 |             data = waveforms[sid]["data"][index[sid] : index[sid] + window_size]
65 |             timestamp = waveforms[sid]["timestamp"][index[sid] : index[sid] + window_size]
66 |             if len(data) < window_size:  # wrap around
67 |                 index[sid] = 0
68 |                 data = waveforms[sid]["data"][:window_size]
69 |                 timestamp = waveforms[sid]["timestamp"][:window_size]
70 |             redis_client.xadd(
71 |                 sid,
72 |                 {
73 |                     "data": json.dumps(data),
74 |                     "timestamp": json.dumps(timestamp),
75 |                 },
76 |             )
77 |             redis_client.xtrim(sid, maxlen=60000)
78 |             index[sid] += window_size
79 |         # print(datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"))
80 |         time.sleep(1)
81 | 
82 | 
83 | threading.Thread(target=replay, daemon=True).start()
84 | 
85 | 
86 | @app.get("/")
87 | def read_root():
88 |     return {"message": "Replaying waveforms."}
89 | 


--------------------------------------------------------------------------------
/quakeflow/demo/data/requirements.txt:
--------------------------------------------------------------------------------
1 | redis
2 | fastapi
3 | fsspec
4 | gcsfs
5 | obspy
6 | uvicorn
7 | pandas


--------------------------------------------------------------------------------
/quakeflow/demo/hub/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Python image to use.
 2 | FROM python:3.9-alpine
 3 | 
 4 | # Set the working directory to /app
 5 | WORKDIR /app
 6 | 
 7 | # copy the requirements file used for dependencies
 8 | COPY requirements.txt .
 9 | 
10 | # Install any needed packages specified in requirements.txt
11 | RUN pip install --trusted-host pypi.python.org -r requirements.txt
12 | 
13 | # Copy the rest of the working directory contents into the container at /app
14 | COPY . .
15 | 
16 | # Run app.py when the container launches
17 | CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8080"]
18 | 


--------------------------------------------------------------------------------
/quakeflow/demo/hub/requirements.txt:
--------------------------------------------------------------------------------
 1 | numpy
 2 | redis
 3 | fsspec
 4 | gcsfs
 5 | fastapi
 6 | requests
 7 | uvicorn
 8 | pandas
 9 | debugpy # Required for debugging
10 | 


--------------------------------------------------------------------------------
/quakeflow/demo/location/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Python image to use.
 2 | FROM python:3.9-alpine
 3 | 
 4 | # Install gcc
 5 | RUN apk add --no-cache gcc musl-dev linux-headers
 6 | 
 7 | # Set the working directory to /app
 8 | WORKDIR /app
 9 | 
10 | # copy the requirements file used for dependencies
11 | COPY requirements.txt .
12 | 
13 | # Install any needed packages specified in requirements.txt
14 | RUN pip install --trusted-host pypi.python.org -r requirements.txt
15 | 
16 | # Copy the rest of the working directory contents into the container at /app
17 | COPY . .
18 | 
19 | # Run app.py when the container launches
20 | CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8080"]


--------------------------------------------------------------------------------
/quakeflow/demo/location/app.py:
--------------------------------------------------------------------------------
 1 | from fastapi import FastAPI
 2 | from pydantic import BaseModel
 3 | 
 4 | app = FastAPI()
 5 | 
 6 | 
 7 | # Define a request body model
 8 | class Pick(BaseModel):
 9 |     station_id: list
10 |     phase_time: list
11 |     phase_type: list
12 |     phase_score: list
13 |     phase_amplitude: list
14 |     phase_polarity: list
15 | 
16 | 
17 | # Define an endpoint to make predictions
18 | @app.post("/predict")
19 | def predict(request: Pick):
20 |     print(f"Locating on {len(request.station_id)} picks.", flush=True)
21 |     return {
22 |         "time": [],
23 |         "latitude": [],
24 |         "longitude": [],
25 |         "depth_km": [],
26 |         "num_p_picks": [],
27 |         "num_s_picks": [],
28 |     }
29 | 


--------------------------------------------------------------------------------
/quakeflow/demo/location/requirements.txt:
--------------------------------------------------------------------------------
1 | redis
2 | fastapi
3 | fsspec
4 | gcsfs
5 | obspy
6 | uvicorn
7 | pandas


--------------------------------------------------------------------------------
/quakeflow/demo/picking/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Python image to use.
 2 | FROM python:3.9-alpine
 3 | 
 4 | # Install gcc
 5 | RUN apk add --no-cache gcc musl-dev linux-headers
 6 | 
 7 | # Set the working directory to /app
 8 | WORKDIR /app
 9 | 
10 | # copy the requirements file used for dependencies
11 | COPY requirements.txt .
12 | 
13 | # Install any needed packages specified in requirements.txt
14 | RUN pip install --trusted-host pypi.python.org -r requirements.txt
15 | 
16 | # Copy the rest of the working directory contents into the container at /app
17 | COPY . .
18 | 
19 | # Run app.py when the container launches
20 | CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8080"]


--------------------------------------------------------------------------------
/quakeflow/demo/picking/app.py:
--------------------------------------------------------------------------------
 1 | from fastapi import FastAPI
 2 | from pydantic import BaseModel
 3 | 
 4 | app = FastAPI()
 5 | 
 6 | 
 7 | # Define a request body model
 8 | class Data(BaseModel):
 9 |     id: list
10 |     vec: list
11 |     timestamp: list
12 | 
13 | 
14 | # Define an endpoint to make predictions
15 | @app.post("/predict")
16 | def predict(request: Data):
17 |     print(f"Picking on {len(request.id)} stations.", flush=True)
18 |     return {
19 |         "station_id": [],
20 |         "phase_time": [],
21 |         "phase_type": [],
22 |         "phase_score": [],
23 |         "phase_amplitude": [],
24 |         "phase_polarity": [],
25 |     }
26 | 


--------------------------------------------------------------------------------
/quakeflow/demo/picking/requirements.txt:
--------------------------------------------------------------------------------
1 | fastapi
2 | obspy
3 | uvicorn
4 | pandas


--------------------------------------------------------------------------------
/quakeflow/deployment.yaml:
--------------------------------------------------------------------------------
  1 | apiVersion: apps/v1
  2 | kind: Deployment
  3 | metadata:
  4 |   name: quakeflow-hub
  5 | spec:
  6 |   replicas: 1
  7 |   selector:
  8 |     matchLabels:
  9 |       app: quakeflow-hub
 10 |   template:
 11 |     metadata:
 12 |       labels:
 13 |         app: quakeflow-hub
 14 |     spec:
 15 |       containers:
 16 |       - name: server
 17 |         image: zhuwq0/quakeflow-hub
 18 |         ports:
 19 |         - containerPort: 8080
 20 |         env:
 21 |         - name: PORT
 22 |           value: "8080"
 23 |         readinessProbe:
 24 |           tcpSocket:
 25 |             port: 8080
 26 |           initialDelaySeconds: 5
 27 | ---
 28 | apiVersion: apps/v1
 29 | kind: Deployment
 30 | metadata:
 31 |   name: quakeflow-data
 32 | spec:
 33 |   replicas: 1
 34 |   selector:
 35 |     matchLabels:
 36 |       app: quakeflow-data
 37 |   template:
 38 |     metadata:
 39 |       labels:
 40 |         app: quakeflow-data
 41 |     spec:
 42 |       containers:
 43 |       - name: server
 44 |         image: zhuwq0/quakeflow-data
 45 |         ports:
 46 |         - containerPort: 8080
 47 |         env:
 48 |         - name: PORT
 49 |           value: "8080"
 50 |         readinessProbe:
 51 |           tcpSocket:
 52 |             port: 8080
 53 |           initialDelaySeconds: 5
 54 | ---
 55 | apiVersion: apps/v1
 56 | kind: Deployment
 57 | metadata:
 58 |   name: picking-api
 59 | spec:
 60 |   replicas: 1
 61 |   selector:
 62 |     matchLabels:
 63 |       app: picking-api
 64 |   template:
 65 |     metadata:
 66 |       labels:
 67 |         app: picking-api
 68 |     spec:
 69 |       containers:
 70 |       - name: server
 71 |         image: zhuwq0/picking-api
 72 |         ports:
 73 |         - containerPort: 8080
 74 |         env:
 75 |         - name: PORT
 76 |           value: "8080"
 77 |         readinessProbe:
 78 |           tcpSocket:
 79 |             port: 8080
 80 |           initialDelaySeconds: 5
 81 | ---
 82 | apiVersion: apps/v1
 83 | kind: Deployment
 84 | metadata:
 85 |   name: association-api
 86 | spec:
 87 |   replicas: 1
 88 |   selector:
 89 |     matchLabels:
 90 |       app: association-api
 91 |   template:
 92 |     metadata:
 93 |       labels:
 94 |         app: association-api
 95 |     spec:
 96 |       containers:
 97 |       - name: server
 98 |         image: zhuwq0/association-api
 99 |         ports:
100 |         - containerPort: 8080
101 |         env:
102 |         - name: PORT
103 |           value: "8080"
104 |         readinessProbe:
105 |           tcpSocket:
106 |             port: 8080
107 |           initialDelaySeconds: 5
108 | ---
109 | apiVersion: apps/v1
110 | kind: Deployment
111 | metadata:
112 |   name: location-api
113 | spec:
114 |   replicas: 1
115 |   selector:
116 |     matchLabels:
117 |       app: location-api
118 |   template:
119 |     metadata:
120 |       labels:
121 |         app: location-api
122 |     spec:
123 |       containers:
124 |       - name: server
125 |         image: zhuwq0/location-api
126 |         ports:
127 |         - containerPort: 8080
128 |         env:
129 |         - name: PORT
130 |           value: "8080"
131 |         readinessProbe:
132 |           tcpSocket:
133 |             port: 8080
134 |           initialDelaySeconds: 5


--------------------------------------------------------------------------------
/quakeflow/helm.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | helm install quakeflow-redis --set auth.enabled=false oci://registry-1.docker.io/bitnamicharts/redis


--------------------------------------------------------------------------------
/quakeflow/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |     <meta charset="UTF-8">
 5 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
 6 |     <title>Sensor Data Visualization</title>
 7 |     <script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
 8 | </head>
 9 | <body>
10 |     <div id="plotly-div"></div>
11 |     <script>
12 |         var ws = new WebSocket('ws://localhost:8000/ws');
13 |         var layout = { title: "Real-Time Waveforms", height: window.innerHeight };
14 | 
15 |         ws.onopen = function(event) {
16 |             console.log("Connected");
17 |         };
18 | 
19 |         ws.onmessage = function(event) {
20 |             var data = JSON.parse(event.data);
21 |             var traces = [];
22 |             var offset = 0;            
23 |             for (var id in data) {
24 |                 traces.push({
25 |                     // convert timestamp from seconds to ISO format in "yyyy-mm-dd hh:mm:ss"
26 |                     x: data[id].timestamp.map(function(x) { return new Date(x * 1000); }),
27 |                     // x: data[id].timestamp,
28 |                     y: data[id].data,
29 |                     type: "scatter",
30 |                     mode: "lines",
31 |                     name: id
32 |                 });
33 |             }
34 |             Plotly.newPlot('plotly-div', traces, layout);
35 |         };
36 |     </script>
37 | </body>
38 | </html>
39 | 


--------------------------------------------------------------------------------
/quakeflow/replay_data.py:
--------------------------------------------------------------------------------
 1 | # %%
 2 | import redis
 3 | import json
 4 | import time
 5 | from random import randint
 6 | import fsspec
 7 | import pandas as pd
 8 | import obspy
 9 | import matplotlib.pyplot as plt
10 | import numpy as np
11 | 
12 | # %%
13 | PROTOCAL = "gs"
14 | BUCKET = "quakeflow_share"
15 | FOLDER = "demo/obspy"
16 | 
17 | stations = pd.read_csv(f"{PROTOCAL}://{BUCKET}/{FOLDER}/stations.csv")
18 | fs = fsspec.filesystem(PROTOCAL)
19 | mseeds = fs.glob(f"{BUCKET}/{FOLDER}/waveforms/*/*.mseed")
20 | 
21 | # %%
22 | waveforms = {}
23 | station_ids = []
24 | min_timestamp = None
25 | print("Reading waveforms: ", end="", flush=True)
26 | for i, mseed in enumerate(mseeds):
27 |     print(mseed.split("/")[-1], end=" ", flush=True)
28 |     with fs.open(mseed, "rb") as f:
29 |         st = obspy.read(f)
30 |         st = st.merge(fill_value="latest")
31 |         st = st.resample(100)
32 |         tr = st[0]
33 |         if min_timestamp is None:
34 |             min_timestamp = tr.times("timestamp")[0]
35 |         waveforms[tr.id] = {
36 |             "data": tr.data.tolist(),
37 |             # "timestamp": (tr.times("timestamp") - min_timestamp).tolist(),
38 |             "timestamp": tr.times("timestamp").tolist(),
39 |         }
40 |         station_ids.append(tr.id)
41 |     if i > 40:
42 |         break
43 | print("\nFinished reading waveforms.", flush=True)
44 | with open("station_ids.json", "w") as f:
45 |     json.dump(station_ids, f)
46 | 
47 | 
48 | # %%
49 | r = redis.Redis(host="localhost", port=6379, db=0)
50 | 
51 | index = {x: 0 for x in station_ids}
52 | while True:
53 |     for i, sid in enumerate(station_ids):
54 |         print(sid, end=" ", flush=True)
55 |         window_size = randint(80, 120)
56 |         r.xadd(
57 |             sid,
58 |             {
59 |                 "data": json.dumps(waveforms[sid]["data"][index[sid] : index[sid] + window_size]),
60 |                 "timestamp": json.dumps(waveforms[sid]["timestamp"][index[sid] : index[sid] + window_size]),
61 |             },
62 |         )
63 |         r.xtrim(sid, maxlen=60000)
64 |         index[sid] += window_size
65 |     print()
66 |     time.sleep(1)
67 | 
68 | # %%
69 | 


--------------------------------------------------------------------------------
/quakeflow/service.yaml:
--------------------------------------------------------------------------------
 1 | # This Service manifest defines:
 2 | # - a load balancer for pods matching label "app: python-hello-world"
 3 | # - exposing the application to the public Internet (type:LoadBalancer)
 4 | # - routes port 80 of the load balancer to the port 8080 of the Pods.
 5 | # Syntax reference https://kubernetes.io/docs/concepts/configuration/overview/
 6 | # apiVersion: v1
 7 | # kind: Service
 8 | # metadata:
 9 | #   name: quakeflow-external
10 | # spec:
11 | #   type: LoadBalancer
12 | #   selector:
13 | #     app: quakeflow-hub
14 | #   ports:
15 | #   - name: http
16 | #     port: 80
17 | #     targetPort: 8080
18 | ---
19 | apiVersion: v1
20 | kind: Service
21 | metadata:
22 |   name: picking-api
23 | spec:
24 |   type: LoadBalancer
25 |   selector:
26 |     app: picking-api
27 |   ports:
28 |   - name: http
29 |     port: 80
30 |     targetPort: 8080
31 | ---
32 | apiVersion: v1
33 | kind: Service
34 | metadata:
35 |   name: association-api
36 | spec:
37 |   type: LoadBalancer
38 |   selector:
39 |     app: association-api
40 |   ports:
41 |   - name: http
42 |     port: 80
43 |     targetPort: 8080
44 | ---
45 | apiVersion: v1
46 | kind: Service
47 | metadata:
48 |   name: location-api
49 | spec:
50 |   type: LoadBalancer
51 |   selector:
52 |     app: location-api
53 |   ports:
54 |   - name: http
55 |     port: 80
56 |     targetPort: 8080
57 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | obspy
2 | cartopy


--------------------------------------------------------------------------------
/scripts/.gitignore:
--------------------------------------------------------------------------------
 1 | yaml/
 2 | debug/
 3 | debug_*
 4 | demo
 5 | local
 6 | debug
 7 | figures
 8 | templates
 9 | win32tools
10 | __pycache__
11 | 2019-185
12 | BayArea
13 | Shelly2020.txt
14 | *.png
15 | *.pth
16 | *.csv
17 | *.txt
18 | *.tar.gz
19 | 
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/scripts/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:22.04
 2 | 
 3 | RUN apt-get update
 4 | RUN apt-get install -y git wget
 5 | RUN apt-get install -y libgeos++-dev
 6 | 
 7 | ENV PATH="/root/miniconda3/bin:${PATH}"
 8 | ARG PATH="/root/miniconda3/bin:${PATH}"
 9 | ENV MINICONDA_VERSION=Miniconda3-py310_23.11.0-2-Linux-x86_64.sh
10 | RUN wget "https://repo.anaconda.com/miniconda/${MINICONDA_VERSION}" && \
11 |     mkdir /root/.conda && \
12 |     bash $MINICONDA_VERSION -b && \
13 |     rm -f $MINICONDA_VERSION && \
14 |     conda --version
15 | 
16 | ENV PYTHONUNBUFFERED=1
17 | ## plotting
18 | RUN conda install -c conda-forge pygmt
19 | RUN pip install --no-cache-dir cartopy plotly
20 | ## machine learning
21 | RUN pip install --no-cache-dir numpy scipy matplotlib pandas scikit-learn
22 | RUN pip install --no-cache-dir obspy pyproj
23 | RUN pip install --no-cache-dir fsspec gcsfs s3fs
24 | RUN pip install --no-cache-dir kfp
25 | RUN pip install --no-cache-dir git+https://github.com/AI4EPS/GaMMA.git
26 | 
27 | RUN apt-get clean && \
28 |     rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
29 | RUN conda clean --all --yes
30 | RUN rm -rf /root/.cache/pip


--------------------------------------------------------------------------------
/scripts/README.md:
--------------------------------------------------------------------------------
 1 | ```
 2 | mkir relocation
 3 | cd relocation
 4 | git clone git@github.com:zhuwq0/GrowClust.git
 5 | git clone git@github.com:zhuwq0/HypoDD.git
 6 | cd ..
 7 | ```
 8 | ```
 9 | python download_waveform.py
10 | python run_phasenet.py
11 | python run_gamma.py
12 | python convert_hypodd.py && bash run_hypodd_ct.sh
13 | python convert_growclust.py && bash run_growclust_ct.sh
14 | python cut_templates.py && python run_cctorch.py
15 | python convert_hypodd.py --dtcc && bash run_hypodd_cc.sh
16 | python convert_growclust.py --dtcc && bash run_growclust_cc.sh
17 | python run_template_macthing.py
18 | ```


--------------------------------------------------------------------------------
/scripts/args.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | 
 4 | def parse_args():
 5 |     parser = argparse.ArgumentParser()
 6 |     parser.add_argument("--region", type=str, default="demo", help="region")
 7 |     parser.add_argument("--root_path", type=str, default="local", help="root path")
 8 | 
 9 |     ## Cloud
10 |     parser.add_argument("--protocol", type=str, default="file", help="protocol (file, gs, s3)")
11 |     parser.add_argument("--bucket", type=str, default=None, help="bucket name")
12 |     parser.add_argument("--token", type=str, default=None, help="token")
13 | 
14 |     # parser.add_argument("--bucket", type=str, default="quakeflow_catalog", help="bucket name")
15 |     # parser.add_argument("--protocol", type=str, default="gs", help="protocol (file, gs, s3)")
16 |     # parser.add_argument("--token", type=str, default="application_default_credentials.json", help="token")
17 | 
18 |     ## Parallel
19 |     parser.add_argument("--num_nodes", type=int, default=1, help="number of nodes")
20 |     parser.add_argument("--node_rank", type=int, default=0, help="node rank")
21 | 
22 |     ## Model
23 |     parser.add_argument("--model", type=str, default="phasenet", help="model")
24 | 
25 |     ## PhaseNet
26 |     parser.add_argument("--overwrite", action="store_true", help="overwrite existing results")
27 | 
28 |     ## ADLOC
29 |     parser.add_argument("--iter", type=int, default=0, help="iteration")
30 | 
31 |     ## CCTorch
32 |     parser.add_argument("--dtct_pair", action="store_true", help="run convert_dtcc.py")
33 | 
34 |     return parser.parse_args()
35 | 


--------------------------------------------------------------------------------
/scripts/convert_dtcc.py:
--------------------------------------------------------------------------------
 1 | # %%
 2 | import json
 3 | import os
 4 | import pickle
 5 | 
 6 | import numpy as np
 7 | import pandas as pd
 8 | from args import parse_args
 9 | from tqdm import tqdm
10 | 
11 | # %%
12 | args = parse_args()
13 | root_path = args.root_path
14 | region = args.region
15 | 
16 | with open(f"{root_path}/{region}/config.json", "r") as fp:
17 |     config = json.load(fp)
18 | 
19 | # %%
20 | data_path = f"{region}/cctorch"
21 | result_path = f"{region}/adloc_dd"
22 | if not os.path.exists(f"{result_path}"):
23 |     os.makedirs(f"{result_path}")
24 | 
25 | # %%
26 | stations = pd.read_csv(f"{root_path}/{data_path}/cctorch_stations.csv")
27 | stations["station_id"] = stations["station"]
28 | stations = stations.groupby("station_id").first().reset_index()
29 | 
30 | # %%
31 | events = pd.read_csv(f"{root_path}/{data_path}/cctorch_events.csv", dtype={"event_index": str})
32 | events["time"] = pd.to_datetime(events["event_time"], format="mixed")
33 | 
34 | # %%
35 | stations["idx_sta"] = np.arange(len(stations))  # reindex in case the index does not start from 0 or is not continuous
36 | events["idx_eve"] = np.arange(len(events))  # reindex in case the index does not start from 0 or is not continuous
37 | mapping_phase_type_int = {"P": 0, "S": 1}
38 | 
39 | # %%
40 | with open(f"{root_path}/{data_path}/dt.cc", "r") as f:
41 |     lines = f.readlines()
42 | 
43 | # %%
44 | event_index1 = []
45 | event_index2 = []
46 | station_index = []
47 | phase_type = []
48 | phase_score = []
49 | phase_dtime = []
50 | 
51 | stations.set_index("station_id", inplace=True)
52 | events.set_index("event_index", inplace=True)
53 | 
54 | for line in tqdm(lines):
55 |     if line[0] == "#":
56 |         evid1, evid2, _ = line[1:].split()
57 |     else:
58 |         stid, dt, weight, phase = line.split()
59 |         event_index1.append(events.loc[evid1, "idx_eve"])
60 |         event_index2.append(events.loc[evid2, "idx_eve"])
61 |         station_index.append(stations.loc[stid, "idx_sta"])
62 |         phase_type.append(mapping_phase_type_int[phase])
63 |         phase_score.append(weight)
64 |         phase_dtime.append(dt)
65 | 
66 | 
67 | dtypes = np.dtype(
68 |     [
69 |         ("idx_eve1", np.int32),
70 |         ("idx_eve2", np.int32),
71 |         ("idx_sta", np.int32),
72 |         ("phase_type", np.int32),
73 |         ("phase_score", np.float32),
74 |         ("phase_dtime", np.float32),
75 |     ]
76 | )
77 | pairs_array = np.memmap(
78 |     f"{root_path}/{result_path}/pair_dt.dat",
79 |     mode="w+",
80 |     shape=(len(phase_dtime),),
81 |     dtype=dtypes,
82 | )
83 | pairs_array["idx_eve1"] = event_index1
84 | pairs_array["idx_eve2"] = event_index2
85 | pairs_array["idx_sta"] = station_index
86 | pairs_array["phase_type"] = phase_type
87 | pairs_array["phase_score"] = phase_score
88 | pairs_array["phase_dtime"] = phase_dtime
89 | with open(f"{root_path}/{result_path}/pair_dtypes.pkl", "wb") as f:
90 |     pickle.dump(dtypes, f)
91 | 
92 | 
93 | # %%
94 | events.to_csv(f"{root_path}/{result_path}/pair_events.csv", index=True, index_label="event_index")
95 | stations.to_csv(f"{root_path}/{result_path}/pair_stations.csv", index=True, index_label="station_id")
96 | 


--------------------------------------------------------------------------------
/scripts/create_filelist.py:
--------------------------------------------------------------------------------
 1 | # %%
 2 | import os
 3 | from glob import glob
 4 | 
 5 | # %%
 6 | protocol = "file"
 7 | token = None
 8 | 
 9 | ## get from command line
10 | root_path = "local"
11 | region = "demo"
12 | if len(os.sys.argv) > 1:
13 |     root_path = os.sys.argv[1]
14 |     region = os.sys.argv[2]
15 | print(f"root_path: {root_path}")
16 | print(f"region: {region}")
17 | 
18 | # %%
19 | result_path = f"{region}/phasenet_das"
20 | if not os.path.exists(f"{root_path}/{result_path}"):
21 |     os.makedirs(f"{root_path}/{result_path}", exist_ok=True)
22 | 
23 | # %%
24 | folder_depth = 2
25 | csv_list = sorted(glob(f"{root_path}/{result_path}/picks_phasenet_das/????-??-??/*.csv"))
26 | csv_list = ["/".join(x.split("/")[-folder_depth:]) for x in csv_list]
27 | 
28 | # %%
29 | hdf5_list = sorted(glob(f"{root_path}/{region}/????-??-??/*.h5"))
30 | num_to_process = 0
31 | with open(f"{root_path}/{result_path}/filelist.csv", "w") as fp:
32 |     # fp.write("\n".join(hdf5_list))
33 |     for line in hdf5_list:
34 |         csv_name = "/".join(line.split("/")[-folder_depth:]).replace(".h5", ".csv")
35 |         if csv_name not in csv_list:
36 |             fp.write(f"{line}\n")
37 |             num_to_process += 1
38 | 
39 | print(f"filelist.csv created in {root_path}/{result_path}: {num_to_process} / {len(hdf5_list)} to process")
40 | 


--------------------------------------------------------------------------------
/scripts/debug_growclust.py:
--------------------------------------------------------------------------------
 1 | # %%
 2 | from pathlib import Path
 3 | import h5py
 4 | import scipy
 5 | from tqdm import tqdm
 6 | import numpy as np
 7 | import json
 8 | import pandas as pd
 9 | from datetime import datetime
10 | 
11 | # %%
12 | output_path = Path("relocation/growclust/")
13 | if not output_path.exists():
14 |     output_path.mkdir(parents=True)
15 | 
16 | # %%
17 | dt_ct = Path("relocation/hypodd/dt.ct")
18 | 
19 | lines = []
20 | with open(dt_ct, "r") as fp:
21 |     for line in tqdm(fp):
22 |         if line.startswith("#"):
23 |             ev1, ev2 = line.split()[1:3]
24 |             lines.append(f"# {ev1} {ev2} 0.000\n")
25 |         else:
26 |             station, t1, t2, score, phase = line.split()
27 |             #station = station[:-2]
28 |             # if station in ["WAS2", "FUR", "RRX"]:
29 |             #     continue
30 |             lines.append(f"{station} {float(t1)-float(t2):.5f} {score} {phase}\n")
31 | 
32 | # %%
33 | with open(output_path / "dt.ct", "w") as fp:
34 |     fp.writelines(lines)
35 | 
36 | 


--------------------------------------------------------------------------------
/scripts/download_event_hinet.py:
--------------------------------------------------------------------------------
 1 | # %%
 2 | !pip install HinetPy
 3 | # !wget https://github.com/AI4EPS/software/releases/download/win32tools/win32tools.tar.gz
 4 | ! [ -e win32tools.tar.gz ] || wget https://github.com/AI4EPS/software/releases/download/win32tools/win32tools.tar.gz
 5 | !tar -xvf win32tools.tar.gz
 6 | !cd win32tools && make
 7 | 
 8 | 
 9 | # %%
10 | from HinetPy import Client, win32
11 | import os
12 | 
13 | os.environ["PATH"] += os.pathsep + os.path.abspath("win32tools/catwin32.src") + os.pathsep + os.path.abspath("win32tools/win2sac.src")
14 | 
15 | # %%
16 | waveform_path = "local/wavefroms/"
17 | 
18 | # %%
19 | client = Client("", "")
20 | 
21 | data, ctable = client.get_continuous_waveform("0101", "201001010000", 20, outdir=f"{waveform_path}/cnt")
22 | 
23 | 
24 | # %%
25 | # data = "2010010100000101VM.cnt"
26 | # ctable = "01_01_20100101.euc.ch"
27 | 
28 | win32.extract_sac(data, ctable, outdir="local/wavefroms")
29 | win32.extract_sacpz(ctable)


--------------------------------------------------------------------------------
/scripts/load_cloud_picks.py:
--------------------------------------------------------------------------------
 1 | # %%
 2 | import json
 3 | import os
 4 | from concurrent.futures import ThreadPoolExecutor
 5 | 
 6 | import fsspec
 7 | import pandas as pd
 8 | from tqdm import tqdm
 9 | 
10 | # %%
11 | if __name__ == "__main__":
12 | 
13 |     # %%
14 |     protocol = "gs"
15 |     token_json = f"{os.environ['HOME']}/.config/gcloud/application_default_credentials.json"
16 |     with open(token_json, "r") as fp:
17 |         token = json.load(fp)
18 | 
19 |     bucket = "quakeflow_catalog"
20 |     folder = "NC/phasenet_merged"  ## NCEDC
21 |     # folder = "SC/phasenet_merged"  ## SCEDC
22 | 
23 |     fs = fsspec.filesystem(protocol, token=token)
24 | 
25 |     def load_csv(jday):
26 |         with fs.open(jday, "r") as fp:
27 |             return pd.read_csv(fp, dtype=str)
28 | 
29 |     # %%
30 |     years = range(2023, 2024)
31 | 
32 |     for year in years:
33 |         jdays = fs.glob(f"{bucket}/{folder}/{year}/????.???.csv")
34 | 
35 |         with ThreadPoolExecutor(max_workers=32) as executor:
36 |             picks = list(
37 |                 tqdm(executor.map(load_csv, jdays), total=len(jdays), desc=f"Loading {bucket}/{folder}/{year}")
38 |             )
39 | 
40 |     # %%
41 |     picks = pd.concat(picks)
42 |     picks.to_csv("phasenet_picks.csv", index=False)
43 | 
44 |     # %%
45 |     picks = pd.read_csv("phasenet_picks.csv")
46 |     print(f"Loaded {len(picks):,} picks")
47 | 


--------------------------------------------------------------------------------
/scripts/load_cloud_templates.py:
--------------------------------------------------------------------------------
 1 | # %%
 2 | import json
 3 | import os
 4 | from concurrent.futures import ThreadPoolExecutor
 5 | 
 6 | import fsspec
 7 | import matplotlib.pyplot as plt
 8 | import numpy as np
 9 | import pandas as pd
10 | from tqdm import tqdm
11 | 
12 | # %%
13 | if __name__ == "__main__":
14 | 
15 |     # %%
16 |     result_path = "results/"
17 |     if not os.path.exists(result_path):
18 |         os.makedirs(result_path)
19 | 
20 |     # %%
21 |     protocol = "gs"
22 |     token_json = f"{os.environ['HOME']}/.config/gcloud/application_default_credentials.json"
23 |     with open(token_json, "r") as fp:
24 |         token = json.load(fp)
25 | 
26 |     bucket = "quakeflow_catalog"
27 |     folder = "Cal/cctorch"
28 | 
29 |     fs = fsspec.filesystem(protocol, token=token)
30 | 
31 |     # %%
32 |     def plot_templates(templates, events, picks):
33 |         templates = templates - np.nanmean(templates, axis=(-1), keepdims=True)
34 |         std = np.std(templates, axis=(-1), keepdims=True)
35 |         std[std == 0] = 1.0
36 |         templates = templates / std
37 | 
38 |         plt.figure(figsize=(10, 10))
39 |         plt.imshow(templates[:, -1, 0, :], origin="lower", aspect="auto", vmin=-0.3, vmax=0.3, cmap="RdBu_r")
40 |         plt.colorbar()
41 |         plt.show()
42 | 
43 |     # %%
44 |     years = [2023]
45 | 
46 |     for year in years:
47 |         num_jday = 366 if (year % 4 == 0 and year % 100 != 0) or year % 400 == 0 else 365
48 | 
49 |         for jday in range(1, num_jday + 1):
50 | 
51 |             if not fs.exists(f"{bucket}/{folder}/{year}/template_{jday:03d}.dat"):
52 |                 continue
53 | 
54 |             with fs.open(f"{bucket}/{folder}/{year}/cctorch_picks_{jday:03d}.csv", "r") as fp:
55 |                 picks = pd.read_csv(fp, dtype=str)
56 |             with fs.open(f"{bucket}/{folder}/{year}/cctorch_events_{jday:03d}.csv", "r") as fp:
57 |                 events = pd.read_csv(fp, dtype=str)
58 |             with fs.open(f"{bucket}/{folder}/{year}/config_{jday:03d}.json", "r") as fp:
59 |                 config = json.load(fp)
60 |             template_file = fs.open(f"{bucket}/{folder}/{year}/template_{jday:03d}.dat", "rb")
61 |             templates = np.frombuffer(template_file.read(), dtype=np.float32).reshape(tuple(config["template_shape"]))
62 |             template_file.close()
63 | 
64 |             print(f"events: {len(events):,} ")
65 |             print(f"picks: {len(picks):,} ")
66 |             print(f"templates: {templates.shape}")
67 | 
68 |             picks.to_csv(f"{result_path}/picks_{year:04d}_{jday:03d}.csv", index=False)
69 |             events.to_csv(f"{result_path}/events_{year:04d}_{jday:03d}.csv", index=False)
70 |             np.save(f"{result_path}/templates_{year:04d}_{jday:03d}.npy", templates)
71 | 
72 |             plot_templates(templates, events, picks)
73 | 
74 |             # break
75 | 
76 | # %%
77 | 


--------------------------------------------------------------------------------
/scripts/merge_gamma_picks.py:
--------------------------------------------------------------------------------
 1 | # %%
 2 | import json
 3 | import multiprocessing as mp
 4 | import os
 5 | from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed
 6 | from datetime import datetime, timedelta, timezone
 7 | from threading import Lock, Thread
 8 | 
 9 | import fsspec
10 | import numpy as np
11 | import pandas as pd
12 | import pyproj
13 | from obspy import read_inventory
14 | from obspy.clients.fdsn import Client
15 | from sklearn.cluster import DBSCAN
16 | from tqdm import tqdm
17 | from args import parse_args
18 | from glob import glob
19 | 
20 | 
21 | # %%
22 | if __name__ == "__main__":
23 | 
24 |     args = parse_args()
25 |     root_path = args.root_path
26 |     region = args.region
27 | 
28 |     data_path = f"{region}/gamma"
29 |     result_path = f"{region}/gamma"
30 | 
31 |     # %%
32 |     # protocol = "gs"
33 |     # token_json = f"{os.environ['HOME']}/.config/gcloud/application_default_credentials.json"
34 |     # with open(token_json, "r") as fp:
35 |     #     token = json.load(fp)
36 |     # fs = fsspec.filesystem(protocol, token=token)
37 | 
38 |     # %%
39 |     event_csvs = sorted(glob(f"{root_path}/{data_path}/????/????.???.events.csv"))
40 | 
41 |     # %%
42 |     events = []
43 |     picks = []
44 |     for event_csv in tqdm(event_csvs, desc="Load event csvs"):
45 |         pick_csv = event_csv.replace("events.csv", "picks.csv")
46 |         year, jday = event_csv.split("/")[-1].split(".")[:2]
47 |         events_ = pd.read_csv(event_csv, dtype=str)
48 |         picks_ = pd.read_csv(pick_csv, dtype=str)
49 |         events_["year"] = year
50 |         events_["jday"] = jday
51 |         picks_["year"] = year
52 |         picks_["jday"] = jday
53 |         events.append(events_)
54 |         picks.append(picks_)
55 | 
56 |     events = pd.concat(events, ignore_index=True)
57 |     picks = pd.concat(picks, ignore_index=True)
58 | 
59 |     events["dummy_id"] = events["year"] + "." + events["jday"] + "." + events["event_index"]
60 |     picks["dummy_id"] = picks["year"] + "." + picks["jday"] + "." + picks["event_index"]
61 | 
62 |     events["event_index"] = np.arange(len(events))
63 |     picks = picks.drop("event_index", axis=1)
64 |     picks = picks.merge(events[["dummy_id", "event_index"]], on="dummy_id", how="left")
65 | 
66 |     events.drop(["year", "jday", "dummy_id"], axis=1, inplace=True)
67 |     picks.drop(["year", "jday", "dummy_id"], axis=1, inplace=True)
68 | 
69 |     events.to_csv(f"{root_path}/{result_path}/gamma_events.csv", index=False)
70 |     picks.to_csv(f"{root_path}/{result_path}/gamma_picks.csv", index=False)
71 | 
72 | # %%
73 | 


--------------------------------------------------------------------------------
/scripts/quakeflow_job.yaml:
--------------------------------------------------------------------------------
 1 | name: quakeflow
 2 | 
 3 | workdir: ./
 4 | 
 5 | num_nodes: 2
 6 | 
 7 | resources:
 8 | 
 9 |   cloud: gcp
10 | 
11 |   region: us-west1
12 | 
13 |   zone: us-west1-b
14 | 
15 |   # instance_type: 
16 | 
17 |   accelerators: V100:1
18 | 
19 |   cpus: 4+
20 | 
21 |   use_spot: True
22 |   # spot_recovery: none
23 | 
24 |   # image_id: docker:zhuwq0/quakeflow:latest
25 | 
26 | envs:
27 |   JOB: quakeflow
28 |   NCPU: 1
29 |   ROOT: /data/local
30 |   REGION: demo
31 | 
32 | file_mounts:
33 | 
34 |   /data:
35 |     # source: s3://scedc-pds
36 |     # source: gs://quakeflow_dataset
37 |     source: gs://quakeflow_share/
38 |     mode: MOUNT
39 | 
40 |   /quakeflow_dataset:
41 |     source: gs://quakeflow_dataset/
42 |     mode: MOUNT
43 | 
44 |   ~/.ssh/id_rsa.pub: ~/.ssh/id_rsa.pub
45 |   ~/.ssh/id_rsa: ~/.ssh/id_rsa
46 |   ~/.config/rclone/rclone.conf: ~/.config/rclone/rclone.conf
47 |   ~/EQNet: ../EQNet
48 | 
49 | setup: |
50 |   echo "Begin setup."
51 |   sudo apt install rclone
52 |   pip3 install fsspec gcsfs kfp==2.3
53 |   pip3 install obspy pyproj
54 |   pip3 install cartopy
55 |   pip3 install h5py tqdm wandb
56 |   pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
57 |   mkdir ~/data && rclone mount range:/ ~/data --daemon
58 | 
59 | run: |
60 |   num_nodes=`echo "$SKYPILOT_NODE_IPS" | wc -l`
61 |   master_addr=`echo "$SKYPILOT_NODE_IPS" | head -n1`
62 |   [[ ${SKYPILOT_NUM_GPUS_PER_NODE} -gt $NCPU ]] && nproc_per_node=${SKYPILOT_NUM_GPUS_PER_NODE} || nproc_per_node=$NCPU
63 |   if [ "${SKYPILOT_NODE_RANK}" == "0" ]; then
64 |     ls -al /data
65 |     python set_config.py $ROOT $REGION
66 |     python download_catalog.py $ROOT $REGION
67 |     python download_station.py $ROOT $REGION
68 |   fi
69 |   torchrun \
70 |     --nproc_per_node=${nproc_per_node} \
71 |     --node_rank=${SKYPILOT_NODE_RANK} \
72 |     --nnodes=$num_nodes \
73 |     --master_addr=$master_addr \
74 |     --master_port=8008 \
75 |     download_waveform.py $ROOT $REGION
76 |   torchrun \
77 |     --nproc_per_node=${nproc_per_node} \
78 |     --node_rank=${SKYPILOT_NODE_RANK} \
79 |     --nnodes=$num_nodes \
80 |     --master_addr=$master_addr \
81 |     --master_port=8008 \
82 |     run_phasenet_v2.py $ROOT $REGION
83 |   


--------------------------------------------------------------------------------
/scripts/run_eqnet.py:
--------------------------------------------------------------------------------
 1 | # %%
 2 | from pathlib import Path
 3 | import os
 4 | import torch
 5 | 
 6 | # %%
 7 | # region = "Hawaii_Loa"
 8 | # region = "South_Pole"
 9 | # region = "Kilauea"
10 | region = "Kilauea_debug"
11 | root_path = Path(region)
12 | data_path = root_path / "obspy"
13 | result_path = root_path / "eqnet"
14 | if not result_path.exists():
15 |     result_path.mkdir()
16 | 
17 | # %%
18 | mseed_path = data_path / "waveforms"
19 | mseeds = sorted(list(mseed_path.rglob("*.mseed")))
20 | file_list = []
21 | for f in mseeds:
22 |     file_list.append(str(f).split(".mseed")[0][:-1]+"*.mseed")
23 | 
24 | file_list = sorted(list(set(file_list)))
25 | 
26 | # %%
27 | with open(result_path / "mseed_list.txt", "w") as fp:
28 |     fp.write("\n".join(file_list))
29 | 
30 | # %%
31 | num_gpu = torch.cuda.device_count()
32 | 
33 | # %%
34 | # os.system(f"torchrun --standalone --nproc_per_node 4 ../EQNet/predict.py --model phasenet --add_polarity --add_event --data_path ./ --data_list mseed_list.txt  --response_xml '{root_path}/stations/*xml'  --result_path ./eqnet_picks --batch_size=1 --format mseed")
35 | os.system(f"torchrun --standalone --nproc_per_node {num_gpu} ../EQNet/predict.py --model phasenet --add_polarity --add_event --data_path ./ --data_list {result_path}/mseed_list.txt  --response_xml {data_path}/inventory.xml  --result_path {result_path}/results --batch_size=1 --format mseed")
36 | 
37 | os.system(f"cp {result_path}/results/picks_phasenet_raw.csv {result_path}/picks.csv")
38 | 


--------------------------------------------------------------------------------
/scripts/run_growclust_cc.py:
--------------------------------------------------------------------------------
 1 | # %%
 2 | import os
 3 | from datetime import datetime
 4 | 
 5 | import pandas as pd
 6 | from args import parse_args
 7 | from tqdm import tqdm
 8 | 
 9 | args = parse_args()
10 | 
11 | # %%
12 | root_path = args.root_path
13 | region = args.region
14 | result_path = f"{region}/growclust"
15 | if not os.path.exists(f"{root_path}/{result_path}"):
16 |     os.makedirs(f"{root_path}/{result_path}")
17 | 
18 | # %%
19 | # stations_json = f"{region}/results/data/stations.json"
20 | # stations = pd.read_json(f"{root_path}/{stations_json}", orient="index")
21 | station_csv = f"{region}/cctorch/cctorch_stations.csv"
22 | stations = pd.read_csv(f"{root_path}/{station_csv}")
23 | stations.set_index("station_id", inplace=True)
24 | 
25 | 
26 | lines = []
27 | for i, row in stations.iterrows():
28 |     # line = f"{row['network']}{row['station']:<4} {row['latitude']:.4f} {row['longitude']:.4f}\n"
29 |     line = f"{row['station']:<4} {row['latitude']:.4f} {row['longitude']:.4f}\n"
30 |     lines.append(line)
31 | 
32 | with open(f"{root_path}/{result_path}/stlist.txt", "w") as fp:
33 |     fp.writelines(lines)
34 | 
35 | 
36 | # %%
37 | # events_csv = f"{region}/results/phase_association/events.csv"
38 | # events_csv = f"{region}/adloc/ransac_events.csv"
39 | events_csv = f"{region}/cctorch/cctorch_events.csv"
40 | # event_file = f"{region}/cctorch/events.csv"
41 | events = pd.read_csv(f"{root_path}/{events_csv}")
42 | # event_df = event_df[event_df["gamma_score"] > 10]
43 | # event_index = [f"{x:06d}" for x in event_df["event_index"]]
44 | # events["time"] = pd.to_datetime(events["time"])
45 | events["time"] = pd.to_datetime(events["event_time"])
46 | if "magnitude" not in events.columns:
47 |     events["magnitude"] = 0.0
48 | 
49 | events[["year", "month", "day", "hour", "minute", "second"]] = (
50 |     events["time"]
51 |     # .apply(lambda x: datetime.fromisoformat(x).strftime("%Y %m %d %H %M %S.%f").split(" "))
52 |     .apply(lambda x: x.strftime("%Y %m %d %H %M %S.%f").split(" "))
53 |     .apply(pd.Series)
54 |     .apply(pd.to_numeric)
55 | )
56 | 
57 | lines = []
58 | for i, row in events.iterrows():
59 |     # yr mon day hr min sec lat lon dep mag eh ez rms evid
60 |     line = f"{row['year']:4d} {row['month']:2d} {row['day']:2d} {row['hour']:2d} {row['minute']:2d} {row['second']:7.3f} {row['latitude']:.4f} {row['longitude']:.4f} {row['depth_km']:7.3f} {row['magnitude']:.2f} 0.000 0.000 0.000 {row['event_index']:6d}\n"
61 |     lines.append(line)
62 | 
63 | with open(f"{root_path}/{result_path}/evlist.txt", "w") as fp:
64 |     fp.writelines(lines)
65 | 
66 | # %%
67 | os.system(f"bash run_growclust_cc.sh {root_path} {region}")
68 | 


--------------------------------------------------------------------------------
/scripts/run_growclust_cc.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | set -x
  3 | WORKING_DIR=$PWD
  4 | if [ $# -eq 2 ]; then
  5 |   root_path=$1
  6 |   region=$2
  7 | else
  8 |   root_path="local"
  9 |   region="demo"
 10 | fi
 11 | 
 12 | if [ ! -d "$root_path/$region/growclust" ]; then
 13 |   mkdir -p $root_path/$region/growclust
 14 | fi
 15 | 
 16 | cp $root_path/$region/cctorch/dt.cc $root_path/$region/growclust/dt.cc 
 17 | cd $root_path/$region/growclust
 18 | mkdir -p TT OUT
 19 | 
 20 | if [ ! -d "GrowClust" ]; then
 21 |    git clone https://github.com/zhuwq0/GrowClust.git
 22 |    make -C GrowClust/SRC/
 23 | fi
 24 | 
 25 | cat <<EOF > growclust.inp
 26 | ****  Example GrowClust Control File  *****
 27 | ********  Daniel Trugman, 2016   **********
 28 | *******************************************
 29 | *
 30 | *******************************************
 31 | *************  Event list  ****************
 32 | *******************************************
 33 | * evlist_fmt (0 = evlist, 1 = phase, 2 = GrowClust, 3 = HypoInverse)
 34 | 1
 35 | * fin_evlist (event list file name)
 36 | evlist.txt
 37 | *
 38 | *******************************************
 39 | ************   Station list   *************
 40 | *******************************************
 41 | * stlist_fmt (0 = SEED channel, 1 = station name)
 42 | 1
 43 | * fin_stlist (station list file name)
 44 | stlist.txt
 45 | *
 46 | *******************************************
 47 | *************   XCOR data   ***************
 48 | *******************************************
 49 | * xcordat_fmt (0 = binary, 1 = text), tdif_fmt (21 = tt2-tt1, 12 = tt1-tt2)
 50 | 1  12
 51 | * fin_xcordat
 52 | dt.cc
 53 | *
 54 | *******************************************
 55 | *** Velocity Model / Travel Time Tables ***
 56 | *******************************************
 57 | * fin_vzmdl (input vz model file)
 58 | vzmodel.txt
 59 | * fout_vzfine (output, interpolated vz model file)
 60 | TT/vzfine.txt
 61 | * fout_pTT (output travel time table, P phase)
 62 | TT/tt.pg
 63 | * fout_sTT (output travel time table, S phase)
 64 | TT/tt.sg
 65 | *
 66 | ******************************************
 67 | ***** Travel Time Table Parameters  ******
 68 | ******************************************
 69 | * vpvs_factor  rayparam_min (-1 = default)
 70 |   1.732             0.0
 71 | * tt_dep0  tt_dep1  tt_ddep
 72 |    0.        71.       1.
 73 | * tt_del0  tt_del1  tt_ddel
 74 |    0.        500.      2.
 75 | *
 76 | ******************************************
 77 | ***** GrowClust Algorithm Parameters *****
 78 | ******************************************
 79 | * rmin  delmax rmsmax 
 80 |    0.1    120    1.0
 81 | * rpsavgmin, rmincut  ngoodmin   iponly 
 82 |     0          0.1         8        0
 83 | *
 84 | ******************************************
 85 | ************ Output files ****************
 86 | ******************************************
 87 | * nboot  nbranch_min
 88 |    0         1
 89 | * fout_cat (relocated catalog)
 90 | OUT/out.growclust_cc_cat
 91 | * fout_clust (relocated cluster file)
 92 | OUT/out.growclust_cc_clust
 93 | * fout_log (program log)
 94 | OUT/out.growclust_cc_log
 95 | * fout_boot (bootstrap distribution)
 96 | OUT/out.growclust_cc_boot
 97 | ******************************************
 98 | ******************************************
 99 | EOF
100 | 
101 | cat <<EOF > vzmodel.txt
102 | 0.0 5.30 0.00
103 | 1.0 5.65 0.00
104 | 3.0 5.93 0.00
105 | 5.0 6.20 0.00
106 | 7.0 6.20 0.00
107 | 9.0 6.20 0.00
108 | 11.0 6.20 0.00
109 | 13.0 6.20 0.00
110 | 17.0 6.20 0.00
111 | 21.0 6.20 0.00
112 | 31.00 7.50 0.00
113 | 31.10 8.11 0.00
114 | 100.0 8.11 0.00
115 | EOF
116 | 
117 | ./GrowClust/SRC/growclust  growclust.inp
118 | cp OUT/out.growclust_cc_cat growclust_cc_catalog.txt
119 | cd $WORKING_DIR
120 | 


--------------------------------------------------------------------------------
/scripts/run_growclust_ct.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | WORKING_DIR=$PWD
  3 | if [ $# -eq 2 ]; then
  4 |   root_path=$1
  5 |   region=$2
  6 | else
  7 |   root_path="local"
  8 |   region="demo"
  9 | fi
 10 | 
 11 | if [ ! -d "$root_path/$region/growclust" ]; then
 12 |   mkdir -p $root_path/$region/growclust
 13 | fi
 14 | 
 15 | cd $root_path/$region/growclust
 16 | mkdir -p TT OUT
 17 | 
 18 | if [ ! -d "GrowClust" ]; then
 19 |   git clone https://github.com/zhuwq0/GrowClust.git
 20 |   make -C GrowClust/SRC/
 21 | fi
 22 | 
 23 | cat <<EOF > growclust.inp
 24 | ****  Example GrowClust Control File  *****
 25 | ********  Daniel Trugman, 2016   **********
 26 | *******************************************
 27 | *
 28 | *******************************************
 29 | *************  Event list  ****************
 30 | *******************************************
 31 | * evlist_fmt (0 = evlist, 1 = phase, 2 = GrowClust, 3 = HypoInverse)
 32 | 1
 33 | * fin_evlist (event list file name)
 34 | evlist.txt
 35 | *
 36 | *******************************************
 37 | ************   Station list   *************
 38 | *******************************************
 39 | * stlist_fmt (0 = SEED channel, 1 = station name)
 40 | 1
 41 | * fin_stlist (station list file name)
 42 | stlist.txt
 43 | *
 44 | *******************************************
 45 | *************   XCOR data   ***************
 46 | *******************************************
 47 | * xcordat_fmt (0 = binary, 1 = text), tdif_fmt (21 = tt2-tt1, 12 = tt1-tt2)
 48 | 1  12
 49 | * fin_xcordat
 50 | dt.ct
 51 | *
 52 | *******************************************
 53 | *** Velocity Model / Travel Time Tables ***
 54 | *******************************************
 55 | * fin_vzmdl (input vz model file)
 56 | vzmodel.txt
 57 | * fout_vzfine (output, interpolated vz model file)
 58 | TT/vzfine.txt
 59 | * fout_pTT (output travel time table, P phase)
 60 | TT/tt.pg
 61 | * fout_sTT (output travel time table, S phase)
 62 | TT/tt.sg
 63 | *
 64 | ******************************************
 65 | ***** Travel Time Table Parameters  ******
 66 | ******************************************
 67 | * vpvs_factor  rayparam_min (-1 = default)
 68 |   1.732             0.0
 69 | * tt_dep0  tt_dep1  tt_ddep
 70 |    0.        81.       1.
 71 | * tt_del0  tt_del1  tt_ddel
 72 |    0.        500.      2.
 73 | *
 74 | ******************************************
 75 | ***** GrowClust Algorithm Parameters *****
 76 | ******************************************
 77 | * rmin  delmax rmsmax 
 78 |    0.6    120    1.0
 79 | * rpsavgmin, rmincut  ngoodmin   iponly 
 80 |     0          0.6         8        0
 81 | *
 82 | ******************************************
 83 | ************ Output files ****************
 84 | ******************************************
 85 | * nboot  nbranch_min
 86 |    0         1
 87 | * fout_cat (relocated catalog)
 88 | OUT/out.growclust_ct_cat
 89 | * fout_clust (relocated cluster file)
 90 | OUT/out.growclust_ct_clust
 91 | * fout_log (program log)
 92 | OUT/out.growclust_ct_log
 93 | * fout_boot (bootstrap distribution)
 94 | OUT/out.growclust_ct_boot
 95 | ******************************************
 96 | ******************************************
 97 | EOF
 98 | 
 99 | cat <<EOF > vzmodel.txt
100 | 0.0 5.30 0.00
101 | 1.0 5.65 0.00
102 | 3.0 5.93 0.00
103 | 5.0 6.20 0.00
104 | 7.0 6.20 0.00
105 | 9.0 6.20 0.00
106 | 11.0 6.20 0.00
107 | 13.0 6.20 0.00
108 | 17.0 6.20 0.00
109 | 21.0 6.20 0.00
110 | 31.00 7.50 0.00
111 | 31.10 8.11 0.00
112 | 100.0 8.11 0.00
113 | EOF
114 | 
115 | ./GrowClust/SRC/growclust  growclust.inp
116 | cp OUT/out.growclust_ct_cat growclust_ct_catalog.txt
117 | cd $WORKING_DIR
118 | 


--------------------------------------------------------------------------------
/scripts/run_hypodd_cc.py:
--------------------------------------------------------------------------------
 1 | # %%
 2 | import json
 3 | import os
 4 | 
 5 | import numpy as np
 6 | import pandas as pd
 7 | from args import parse_args
 8 | 
 9 | # %%
10 | args = parse_args()
11 | root_path = args.root_path
12 | region = args.region
13 | 
14 | with open(f"{root_path}/{region}/config.json", "r") as fp:
15 |     config = json.load(fp)
16 | 
17 | # %%
18 | data_path = f"{region}/cctorch"
19 | result_path = f"{region}/hypodd"
20 | if not os.path.exists(f"{root_path}/{result_path}"):
21 |     os.makedirs(f"{root_path}/{result_path}")
22 | 
23 | # %%
24 | stations = pd.read_csv(f"{root_path}/{data_path}/cctorch_stations.csv")
25 | 
26 | station_lines = {}
27 | for i, row in stations.iterrows():
28 |     station_id = row["station_id"]
29 |     network_code, station_code, comp_code, channel_code = station_id.split(".")
30 |     # tmp_code = f"{station_code}{channel_code}"
31 |     tmp_code = f"{station_code}"
32 |     station_lines[tmp_code] = f"{tmp_code:<8s} {row['latitude']:.3f} {row['longitude']:.3f}\n"
33 | 
34 | 
35 | with open(f"{root_path}/{result_path}/stations.dat", "w") as f:
36 |     for line in sorted(station_lines.values()):
37 |         f.write(line)
38 | 
39 | # %%
40 | events = pd.read_csv(f"{root_path}/{data_path}/cctorch_events.csv")
41 | events["time"] = pd.to_datetime(events["event_time"], format="mixed")
42 | 
43 | event_lines = []
44 | 
45 | for i, row in events.iterrows():
46 |     event_index = row["event_index"]
47 |     origin = row["time"]
48 |     magnitude = row["magnitude"]
49 |     x_err = 0.0
50 |     z_err = 0.0
51 |     time_err = 0.0
52 |     dx, dy, dz = 0.0, 0.0, 0.0
53 |     # dx = np.random.uniform(-0.01, 0.01)
54 |     # dy = np.random.uniform(-0.01, 0.01)
55 |     # dz = np.random.uniform(0, 10)
56 |     # dz = 0
57 |     event_lines.append(
58 |         f"{origin.year:4d}{origin.month:02d}{origin.day:02d}  "
59 |         f"{origin.hour:2d}{origin.minute:02d}{origin.second:02d}{round(origin.microsecond / 1e4):02d}  "
60 |         # f"{row['latitude']:8.4f}  {row['longitude']:9.4f}   {row['depth_km']:8.4f}  "
61 |         f"{row['latitude'] + dy:8.4f}  {row['longitude']+ dx:9.4f}   {row['depth_km']+dz:8.4f}  "
62 |         f"{magnitude:5.2f}  {x_err:5.2f}  {z_err:5.2f}  {time_err:5.2f}  {event_index:9d}\n"
63 |     )
64 | 
65 | with open(f"{root_path}/{result_path}/events.dat", "w") as f:
66 |     f.writelines(event_lines)
67 | 
68 | # %%
69 | os.system(f"bash run_hypodd_cc.sh {root_path} {region}")
70 | 


--------------------------------------------------------------------------------
/scripts/run_hypodd_cc.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | set -x
  3 | WORKING_DIR=$PWD
  4 | if [ $# -eq 2 ]; then
  5 |   root_path=$1
  6 |   region=$2
  7 | else
  8 |   root_path="local"
  9 |   region="demo"
 10 | fi
 11 | 
 12 | if [ ! -d "$root_path/$region/hypodd" ]; then
 13 |   mkdir -p $root_path/$region/hypodd
 14 | fi
 15 | 
 16 | cp $root_path/$region/cctorch/dt.cc $root_path/$region/hypodd/dt.cc 
 17 | cd $root_path/$region/hypodd
 18 | 
 19 | if [ ! -d "HypoDD" ]; then
 20 |   git clone https://github.com/zhuwq0/HypoDD.git
 21 |   export PATH=$PATH:$PWD/HypoDD
 22 |   make -C HypoDD/src/
 23 | fi
 24 | 
 25 | cat <<EOF > cc.inp
 26 | * RELOC.INP:
 27 | *--- input file selection
 28 | * cross correlation diff times:
 29 | dt.cc
 30 | *
 31 | *catalog P diff times:
 32 | 
 33 | *
 34 | * event file:
 35 | events.dat
 36 | *
 37 | * station file:
 38 | stations.dat
 39 | *
 40 | *--- output file selection
 41 | * original locations:
 42 | hypodd_cc.loc
 43 | * relocations:
 44 | hypodd_cc.reloc
 45 | * station information:
 46 | hypodd.sta
 47 | * residual information:
 48 | hypodd.res
 49 | * source paramater information:
 50 | hypodd.src
 51 | *
 52 | *--- data type selection: 
 53 | * IDAT:  0 = synthetics; 1= cross corr; 2= catalog; 3= cross & cat 
 54 | * IPHA: 1= P; 2= S; 3= P&S
 55 | * DIST:max dist [km] between cluster centroid and station 
 56 | * IDAT   IPHA   DIST
 57 |     1     3     120
 58 | *
 59 | *--- event clustering:
 60 | * OBSCC:    min # of obs/pair for crosstime data (0= no clustering)
 61 | * OBSCT:    min # of obs/pair for network data (0= no clustering)
 62 | * OBSCC  OBSCT    
 63 |      0     0        
 64 | *
 65 | *--- solution control:
 66 | * ISTART:  	1 = from single source; 2 = from network sources
 67 | * ISOLV:	1 = SVD, 2=lsqr
 68 | * NSET:      	number of sets of iteration with specifications following
 69 | *  ISTART  ISOLV  NSET
 70 |     2        2      4
 71 | *
 72 | *--- data weighting and re-weighting: 
 73 | * NITER: 		last iteration to used the following weights
 74 | * WTCCP, WTCCS:		weight cross P, S 
 75 | * WTCTP, WTCTS:		weight catalog P, S 
 76 | * WRCC, WRCT:		residual threshold in sec for cross, catalog data 
 77 | * WDCC, WDCT:  		max dist [km] between cross, catalog linked pairs
 78 | * DAMP:    		damping (for lsqr only) 
 79 | *       ---  CROSS DATA ----- ----CATALOG DATA ----
 80 | * NITER WTCCP WTCCS WRCC WDCC WTCTP WTCTS WRCT WDCT DAMP
 81 |    4      1    1    -9    -9    -9    -9     -9    -9  70
 82 |    4      1    1     6    -9    -9    -9     -9    -9  70
 83 |    4      1    0.8   3     4    -9    -9     -9    -9  70
 84 |    4      1    0.8   2     2    -9    -9     -9    -9  70  
 85 | *
 86 | *--- 1D model:
 87 | * NLAY:		number of model layers  
 88 | * RATIO:	vp/vs ratio 
 89 | * TOP:		depths of top of layer (km) 
 90 | * VEL: 		layer velocities (km/s)
 91 | * NLAY  RATIO 
 92 |    12     1.73
 93 | * TOP 
 94 | 0.0 1.0 3.0 5.0 7.0 9.0 11.0 13.0 17.0 21.0 31.00 31.10
 95 | * VEL
 96 | 5.30 5.65 5.93 6.20 6.20 6.20 6.20 6.20 6.20 6.20 7.50 8.11
 97 | *
 98 | *--- event selection:
 99 | * CID: 	cluster to be relocated (0 = all)
100 | * ID:	cuspids of event to be relocated (8 per line)
101 | * CID    
102 |     0      
103 | * ID
104 | EOF
105 | 
106 | ./HypoDD/src/hypoDD/hypoDD cc.inp
107 | cd $WORKING_DIR


--------------------------------------------------------------------------------
/scripts/run_phasenet_das.yaml:
--------------------------------------------------------------------------------
 1 | name: quakeflow
 2 | 
 3 | workdir: .
 4 | 
 5 | num_nodes: 1
 6 | 
 7 | resources:
 8 | 
 9 |   cloud: gcp
10 | 
11 |   region: us-west1
12 | 
13 |   zone: us-west1-b
14 | 
15 |   # instance_type: n2-highmem-16
16 | 
17 |   # accelerators: V100:1
18 | 
19 |   cpus: 8+
20 | 
21 |   use_spot: True
22 | 
23 |   # image_id: docker:zhuwq0/quakeflow:latest
24 | 
25 | envs:
26 |   JOB: quakeflow
27 |   NCPU: 1
28 |   ROOT_PATH: /data
29 |   RESULT_PATH: phasenet_das
30 | 
31 | file_mounts:
32 | 
33 |   /data:
34 |     # source: s3://scedc-pds/
35 |     # source: gs://quakeflow_dataset/
36 |     # source: gs://quakeflow_share/
37 |     source: gs://das_arcata/
38 |     mode: MOUNT
39 | 
40 |   /quakeflow_dataset:
41 |     source: gs://quakeflow_dataset/
42 |     mode: MOUNT
43 | 
44 |   ~/.ssh/id_rsa.pub: ~/.ssh/id_rsa.pub
45 |   ~/.ssh/id_rsa: ~/.ssh/id_rsa
46 |   ~/.config/rclone/rclone.conf: ~/.config/rclone/rclone.conf
47 |   # EQNet: ../EQNet
48 | 
49 | setup: |
50 |   echo "Begin setup."
51 |   sudo apt install rclone
52 |   pip3 install fsspec gcsfs kfp==2.3
53 |   pip3 install obspy pyproj
54 |   pip3 install cartopy
55 |   pip3 install h5py tqdm wandb
56 |   pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
57 |   # pip3 install torch torchvision torchaudio
58 |   # mkdir ~/data && rclone mount range:/ ~/data --daemon
59 | 
60 | run: |
61 |   [ -d "EQNet" ] && rm -r "EQNet"
62 |   git clone https://github.com/AI4EPS/EQNet.git
63 |   num_nodes=`echo "$SKYPILOT_NODE_IPS" | wc -l`
64 |   master_addr=`echo "$SKYPILOT_NODE_IPS" | head -n1`
65 |   [[ ${SKYPILOT_NUM_GPUS_PER_NODE} -gt $NCPU ]] && nproc_per_node=${SKYPILOT_NUM_GPUS_PER_NODE} || nproc_per_node=$NCPU
66 |   if [ "${SKYPILOT_NODE_RANK}" == "0" ]; then
67 |     ls -al /data
68 |     python create_filelist.py ${ROOT_PATH} ""
69 |   fi
70 |   
71 |   torchrun \
72 |     --nproc_per_node=${nproc_per_node} \
73 |     --node_rank=${SKYPILOT_NODE_RANK} \
74 |     --nnodes=$num_nodes \
75 |     --master_addr=$master_addr \
76 |     --master_port=8008 \
77 |     EQNet/predict.py --model phasenet_das --format=h5  --data_list=${ROOT_PATH}/${RESULT_PATH}/filelist.csv --result_path=${ROOT_PATH}/${RESULT_PATH} --batch_size 1 --workers 6 --folder_depth=2 --system optasense
78 | 
79 |   


--------------------------------------------------------------------------------
/scripts/run_phasenet_v2.py:
--------------------------------------------------------------------------------
  1 | # %%
  2 | import json
  3 | import os
  4 | import sys
  5 | from collections import defaultdict
  6 | from glob import glob
  7 | from typing import Dict, List
  8 | 
  9 | import fsspec
 10 | import numpy as np
 11 | from args import parse_args
 12 | 
 13 | 
 14 | def run_phasenet(
 15 |     root_path: str,
 16 |     region: str,
 17 |     config: Dict,
 18 |     node_rank: int = 0,
 19 |     num_nodes: int = 1,
 20 |     overwrite: bool = False,
 21 |     model_path: str = "../PhaseNet/",
 22 |     protocol: str = "file",
 23 |     bucket: str = "",
 24 |     token: Dict = None,
 25 | ) -> str:
 26 | 
 27 |     # %%
 28 |     fs = fsspec.filesystem(protocol=protocol, token=token)
 29 | 
 30 |     # %%
 31 |     result_path = f"{region}/phasenet"
 32 |     if not os.path.exists(f"{root_path}/{result_path}"):
 33 |         os.makedirs(f"{root_path}/{result_path}")
 34 | 
 35 |     # %%
 36 |     waveform_dir = f"{region}/waveforms"
 37 |     # mseed_list = sorted(glob(f"{root_path}/{waveform_dir}/????/???/??/*.mseed"))
 38 |     # subdir = 3
 39 |     mseed_list = sorted(glob(f"{root_path}/{waveform_dir}/????/???/*.mseed"))
 40 |     subdir = 2
 41 | 
 42 |     # %%
 43 |     mseed_3c = defaultdict(list)
 44 |     for mseed in mseed_list:
 45 |         key = "/".join(mseed.replace(".mseed", "").split("/")[-subdir - 1 :])
 46 |         key = key[:-1]  ## remove the channel suffix
 47 |         mseed_3c[key].append(mseed)
 48 |     print(f"Number of mseed files: {len(mseed_3c)}")
 49 | 
 50 |     # %%
 51 |     if not overwrite:
 52 |         # processed = sorted(glob(f"{root_path}/{result_path}/picks/????/???/??/*.csv"))
 53 |         processed = sorted(glob(f"{root_path}/{result_path}/picks/????/???/*.csv"))
 54 |         processed = ["/".join(f.replace(".csv", "").split("/")[-subdir - 1 :]) for f in processed]
 55 |         processed = [p[:-1] for p in processed]  ## remove the channel suffix
 56 |         print(f"Number of processed files: {len(processed)}")
 57 | 
 58 |     keys = sorted(list(set(mseed_3c.keys()) - set(processed)))
 59 |     print(f"Number of unprocessed files: {len(keys)}")
 60 |     keys = list(np.array_split(keys, num_nodes)[node_rank])
 61 |     print(f"Node {node_rank:03d}/{num_nodes:03d}: processing {len(keys)} files")
 62 | 
 63 |     if len(keys) == 0:
 64 |         return 0
 65 | 
 66 |     mseed_3c = [",".join(sorted(mseed_3c[k])) for k in keys]
 67 | 
 68 |     # %%
 69 |     mseed_file = f"{root_path}/{result_path}/mseed_list_{node_rank:03d}_{num_nodes:03d}.csv"
 70 |     with open(mseed_file, "w") as fp:
 71 |         fp.write("\n".join(mseed_3c))
 72 | 
 73 |     # %%
 74 |     inventory_path = f"{root_path}/{region}/obspy/inventory"
 75 | 
 76 |     # %%
 77 |     os.system(
 78 |         f"python {model_path}/phasenet/predict.py --model={model_path}/model/190703-214543 --data_dir=./ --data_list={mseed_file} --response_xml={inventory_path} --format=mseed --amplitude --highpass_filter=1.0 --result_dir={root_path}/{result_path} --result_fname=phasenet_picks_{node_rank:03d}_{num_nodes:03d} --batch_size=1 --subdir_level={subdir}"
 79 |     )
 80 | 
 81 | 
 82 | if __name__ == "__main__":
 83 | 
 84 |     args = parse_args()
 85 |     root_path = args.root_path
 86 |     region = args.region
 87 |     num_nodes = args.num_nodes
 88 |     node_rank = args.node_rank
 89 | 
 90 |     with open(f"{root_path}/{region}/config.json", "r") as fp:
 91 |         config = json.load(fp)
 92 | 
 93 |     os.system("cd ../PhaseNet && git checkout quakeflow && git pull")
 94 |     run_phasenet(root_path=root_path, region=region, config=config)
 95 | 
 96 |     if num_nodes == 1:
 97 |         os.system(f"python merge_phasenet_picks.py --region {region}")
 98 | 
 99 | # %%
100 | 


--------------------------------------------------------------------------------
/scripts/set_config.py:
--------------------------------------------------------------------------------
 1 | # %%
 2 | import json
 3 | import os
 4 | from typing import Dict
 5 | 
 6 | import fsspec
 7 | from args import parse_args
 8 | 
 9 | 
10 | def set_config(root_path: str, region: str, config: Dict, protocol: str, bucket: str, token: Dict) -> Dict:
11 | 
12 |     fs = fsspec.filesystem(protocol, token=token)
13 |     if not os.path.exists(root_path):
14 |         os.makedirs(root_path)
15 |     data_dir = f"{region}"
16 |     if not os.path.exists(f"{root_path}/{data_dir}"):
17 |         os.makedirs(f"{root_path}/{data_dir}", exist_ok=True)
18 |     for subfolder in [
19 |         "network",
20 |         "waveforms",
21 |         "picking",
22 |         "association",
23 |         "location",
24 |         "relocation",
25 |         "mechanism",
26 |     ]:
27 |         if not os.path.exists(f"{root_path}/{data_dir}/results/{subfolder}"):
28 |             os.makedirs(f"{root_path}/{data_dir}/results/{subfolder}", exist_ok=True)
29 | 
30 |     config_region = {}
31 |     ## default values
32 |     config_region["num_nodes"] = 1
33 |     ## submodules config
34 |     if "obspy" in config:
35 |         config_region["obspy"] = config["obspy"]
36 |     if "phasenet" in config:
37 |         config_region["phasenet"] = config["phasenet"]
38 |     if "gamma" in config:
39 |         config_region["gamma"] = config["gamma"]
40 |     if "adloc" in config:
41 |         config_region["adloc"] = config["adloc"]
42 |     if "cctorch" in config:
43 |         config_region["cctorch"] = config["cctorch"]
44 |     if "adtomo" in config:
45 |         config_region["adtomo"] = config["adtomo"]
46 |     if "region" in config:
47 |         if region in config["region"]:
48 |             config_region.update(config["region"][region])
49 | 
50 |     with open(f"{root_path}/{data_dir}/config.json", "w") as fp:
51 |         json.dump(config_region, fp, indent=4)
52 | 
53 |     if protocol != "file":
54 |         fs.put(f"{root_path}/{data_dir}/config.json", f"{bucket}/{data_dir}/config.json")
55 |     print(json.dumps(config_region, indent=4))
56 | 
57 |     return config_region
58 | 
59 | 
60 | if __name__ == "__main__":
61 | 
62 |     args = parse_args()
63 |     root_path = args.root_path
64 |     region = args.region
65 |     protocol = args.protocol
66 |     bucket = args.bucket
67 |     token = args.token
68 | 
69 |     with open("config.json", "r") as fp:
70 |         config = json.load(fp)
71 | 
72 |     set_config(root_path=root_path, region=region, config=config, protocol=protocol, bucket=bucket, token=token)
73 | 


--------------------------------------------------------------------------------
/scripts/submit_download_waveform.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | from concurrent.futures import ThreadPoolExecutor
  3 | 
  4 | import sky
  5 | from args import parse_args
  6 | 
  7 | args = parse_args()
  8 | ROOT_PATH = args.root_path
  9 | REGION = args.region
 10 | PROTOCOL = args.protocol
 11 | BUCKET = args.bucket
 12 | TOKEN = args.token
 13 | NUM_NODES = args.num_nodes
 14 | 
 15 | task = sky.Task(
 16 |     name="download_waveform",
 17 |     setup="""
 18 | echo "Begin setup."
 19 | pip install obspy
 20 | pip install pandas numpy
 21 | pip install -U fsspec gcsfs s3fs
 22 | echo "Setup complete."
 23 | """,
 24 |     run="""
 25 | num_nodes=`echo "$SKYPILOT_NODE_IPS" | wc -l`
 26 | master_addr=`echo "$SKYPILOT_NODE_IPS" | head -n1`
 27 | if [ "$SKYPILOT_NODE_RANK" == "0" ]; then
 28 |     ls -al /opt
 29 |     ls -al /data
 30 |     ls -al ./
 31 | fi
 32 | python download_waveform_v3.py --region $REGION --bucket $BUCKET --protocol $PROTOCOL --token $TOKEN  --num_nodes $NUM_NODES --node_rank $NODE_RANK
 33 | """,
 34 |     workdir=".",
 35 |     num_nodes=1,
 36 |     envs={
 37 |         "ROOT_PATH": ROOT_PATH,
 38 |         "REGION": REGION,
 39 |         "PROTOCOL": PROTOCOL,
 40 |         "BUCKET": BUCKET,
 41 |         "TOKEN": TOKEN,
 42 |         "NUM_NODES": NUM_NODES,
 43 |         "NODE_RANK": 0,
 44 |     },
 45 | )
 46 | 
 47 | task.set_file_mounts(
 48 |     {},
 49 | )
 50 | # task.set_storage_mounts({
 51 | #     '/remote/imagenet/': sky.Storage(name='my-bucket',
 52 | #                                      source='/local/imagenet'),
 53 | # })
 54 | 
 55 | task.set_resources(
 56 |     sky.Resources(
 57 |         cloud=sky.GCP(),
 58 |         region="us-west1",  # GCP
 59 |         # region="us-west-2",  # AWS
 60 |         accelerators=None,
 61 |         cpus=2,
 62 |         disk_tier="low",
 63 |         disk_size=50,  # GB
 64 |         memory=None,
 65 |         use_spot=True,
 66 |     ),
 67 | )
 68 | 
 69 | jobs = []
 70 | try:
 71 |     sky.status(refresh=True)
 72 | except Exception as e:
 73 |     print(e)
 74 | 
 75 | with ThreadPoolExecutor(max_workers=NUM_NODES) as executor:
 76 |     for NODE_RANK in range(NUM_NODES):
 77 | 
 78 |         task.update_envs({"NODE_RANK": NODE_RANK})
 79 |         cluster_name = f"obspy-{NODE_RANK:02d}-{NUM_NODES:02d}-{REGION}"
 80 | 
 81 |         status = sky.status(cluster_names=[f"{cluster_name}"], refresh=True)
 82 |         if len(status) > 0:
 83 |             if status[0]["status"].value == "INIT":
 84 |                 sky.down(f"{cluster_name}")
 85 |             if (not status[0]["to_down"]) and (not status[0]["status"].value == "INIT"):
 86 |                 sky.autostop(f"{cluster_name}", idle_minutes=10, down=True)
 87 |             print(f"Cluster {cluster_name}/{NUM_NODES} already exists.")
 88 |             continue
 89 | 
 90 |         status = sky.status(cluster_names=[f"{cluster_name}"])
 91 |         if len(status) == 0:
 92 |             print(f"Launching cluster {cluster_name}/{NUM_NODES}...")
 93 |             jobs.append(
 94 |                 executor.submit(
 95 |                     sky.launch,
 96 |                     task,
 97 |                     cluster_name=f"{cluster_name}",
 98 |                     idle_minutes_to_autostop=10,
 99 |                     down=True,
100 |                     detach_setup=False,
101 |                     detach_run=False,
102 |                 )
103 |             )
104 |             time.sleep(5)
105 | 
106 | for job in jobs:
107 |     print(job.result())
108 | 


--------------------------------------------------------------------------------
/scripts/tests/.gitignore:
--------------------------------------------------------------------------------
1 | hypodd/
2 | 


--------------------------------------------------------------------------------
/scripts/tests/prepare_data.py:
--------------------------------------------------------------------------------
 1 | # %%
 2 | import os
 3 | import pandas as pd
 4 | import numpy
 5 | import matplotlib.pyplot as plt
 6 | 
 7 | # %%
 8 | catalog_url = "https://www.sciencebase.gov/catalog/file/get/5dd715f3e4b0695797650d18?f=__disk__db%2F88%2Fa1%2Fdb88a1f6754843800f25bd63712ed438dfa7699f"
 9 | os.system(f"curl -o catalog.txt {catalog_url}")
10 | 
11 | # %%
12 | events = pd.read_csv(
13 |     "catalog.txt",
14 |     sep="\s+",
15 |     comment="#",
16 |     header=None,
17 |     names=[
18 |         "year",
19 |         "month",
20 |         "day",
21 |         "hour",
22 |         "minute",
23 |         "second",
24 |         "latitude",
25 |         "longitude",
26 |         "depth_km",
27 |         "magnitude",
28 |         "event_index",
29 |     ],
30 | )
31 | events["time"] = pd.to_datetime(events[["year", "month", "day", "hour", "minute", "second"]])
32 | # events["event_index"] = numpy.arange(len(events))
33 | events.drop(columns=["year", "month", "day", "hour", "minute", "second"], inplace=True, errors="ignore")
34 | 
35 | plt.figure(figsize=(10, 10))
36 | plt.scatter(events["longitude"], events["latitude"], s=0.1, linewidths=0.0)
37 | plt.show()
38 | 
39 | 
40 | events = events[
41 |     (events["latitude"] >= 35.57)
42 |     & (events["latitude"] <= 35.62)
43 |     & (events["longitude"] >= -117.47)
44 |     & (events["longitude"] <= -117.36)
45 | ]
46 | 
47 | plt.figure(figsize=(10, 10))
48 | plt.scatter(events["longitude"], events["latitude"], s=0.5)
49 | plt.title(f"Number of events: {len(events)}")
50 | 
51 | events = events[
52 |     (events["latitude"] >= 35.585)
53 |     & (events["latitude"] <= 35.592)
54 |     & (events["longitude"] >= -117.42)
55 |     & (events["longitude"] <= -117.41)
56 | ]
57 | 
58 | plt.scatter(events["longitude"], events["latitude"], s=0.5)
59 | # plt.title(f"Number of events: {len(events)}")
60 | plt.show()
61 | 
62 | # %%
63 | events.to_csv("events.csv", index=False, date_format="%Y-%m-%dT%H:%M:%S.%f")
64 | 


--------------------------------------------------------------------------------
/scripts/tests/prepare_data_quakeflow.py:
--------------------------------------------------------------------------------
 1 | # %%
 2 | import os
 3 | import pandas as pd
 4 | import numpy
 5 | import matplotlib.pyplot as plt
 6 | 
 7 | # %%
 8 | events = pd.read_csv("../local/Ridgecrest/adloc/ransac_events_sst.csv")
 9 | events["event_id"] = events["event_index"].astype(str)
10 | 
11 | plt.figure(figsize=(10, 10))
12 | plt.scatter(events["longitude"], events["latitude"], s=0.1, linewidths=0.0)
13 | plt.show()
14 | 
15 | 
16 | events = events[
17 |     (events["latitude"] >= 35.57)
18 |     & (events["latitude"] <= 35.62)
19 |     & (events["longitude"] >= -117.47)
20 |     & (events["longitude"] <= -117.36)
21 | ]
22 | 
23 | plt.figure(figsize=(10, 10))
24 | plt.scatter(events["longitude"], events["latitude"], s=0.5)
25 | plt.title(f"Number of events: {len(events)}")
26 | 
27 | # events = events[
28 | #     (events["latitude"] >= 35.585)
29 | #     & (events["latitude"] <= 35.592)
30 | #     & (events["longitude"] >= -117.42)
31 | #     & (events["longitude"] <= -117.41)
32 | # ]
33 | 
34 | plt.scatter(events["longitude"], events["latitude"], s=0.5)
35 | # plt.title(f"Number of events: {len(events)}")
36 | plt.show()
37 | 
38 | # %%
39 | events.to_csv("events.csv", index=False, date_format="%Y-%m-%dT%H:%M:%S.%f")
40 | 
41 | # %%
42 | 


--------------------------------------------------------------------------------
/scripts/tests/run_hypodd_cc.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | set -x
  3 | WORKING_DIR=$PWD
  4 | if [ $# -eq 2 ]; then
  5 |   root_path=$1
  6 |   region=$2
  7 | else
  8 |   root_path="."
  9 |   region="."
 10 | fi
 11 | 
 12 | if [ ! -d "$root_path/$region/hypodd" ]; then
 13 |   mkdir -p $root_path/$region/hypodd
 14 | fi
 15 | 
 16 | mv $root_path/$region/hypodd/dt.cc $root_path/$region/hypodd/dt_old.cc 
 17 | cp $root_path/$region/cctorch/dt.cc $root_path/$region/hypodd/dt.cc 
 18 | cd $root_path/$region/hypodd
 19 | 
 20 | if [ ! -d "HypoDD" ]; then
 21 |   git clone git@github.com:zhuwq0/HypoDD.git
 22 |   export PATH=$PATH:$PWD/HypoDD
 23 |   make -C HypoDD/src/
 24 | fi
 25 | 
 26 | cat <<EOF > cc.inp
 27 | * RELOC.INP:
 28 | *--- input file selection
 29 | * cross correlation diff times:
 30 | dt.cc
 31 | *
 32 | *catalog P diff times:
 33 | 
 34 | *
 35 | * event file:
 36 | events.dat
 37 | *
 38 | * station file:
 39 | stations.dat
 40 | *
 41 | *--- output file selection
 42 | * original locations:
 43 | hypodd_cc.loc
 44 | * relocations:
 45 | hypodd_cc.reloc
 46 | * station information:
 47 | hypodd.sta
 48 | * residual information:
 49 | hypodd.res
 50 | * source paramater information:
 51 | hypodd.src
 52 | *
 53 | *--- data type selection: 
 54 | * IDAT:  0 = synthetics; 1= cross corr; 2= catalog; 3= cross & cat 
 55 | * IPHA: 1= P; 2= S; 3= P&S
 56 | * DIST:max dist [km] between cluster centroid and station 
 57 | * IDAT   IPHA   DIST
 58 |     1     3     120
 59 | *
 60 | *--- event clustering:
 61 | * OBSCC:    min # of obs/pair for crosstime data (0= no clustering)
 62 | * OBSCT:    min # of obs/pair for network data (0= no clustering)
 63 | * OBSCC  OBSCT    
 64 |      0     0        
 65 | *
 66 | *--- solution control:
 67 | * ISTART:  	1 = from single source; 2 = from network sources
 68 | * ISOLV:	1 = SVD, 2=lsqr
 69 | * NSET:      	number of sets of iteration with specifications following
 70 | *  ISTART  ISOLV  NSET
 71 |     2        2      4
 72 | *
 73 | *--- data weighting and re-weighting: 
 74 | * NITER: 		last iteration to used the following weights
 75 | * WTCCP, WTCCS:		weight cross P, S 
 76 | * WTCTP, WTCTS:		weight catalog P, S 
 77 | * WRCC, WRCT:		residual threshold in sec for cross, catalog data 
 78 | * WDCC, WDCT:  		max dist [km] between cross, catalog linked pairs
 79 | * DAMP:    		damping (for lsqr only) 
 80 | *       ---  CROSS DATA ----- ----CATALOG DATA ----
 81 | * NITER WTCCP WTCCS WRCC WDCC WTCTP WTCTS WRCT WDCT DAMP
 82 |    4      1    1    -9    -9    -9    -9     -9    -9  70
 83 |    4      1    1     6    -9    -9    -9     -9    -9  70
 84 |    4      1    0.8   3     4    -9    -9     -9    -9  70
 85 |    4      1    0.8   2     2    -9    -9     -9    -9  70  
 86 | *
 87 | *--- 1D model:
 88 | * NLAY:		number of model layers  
 89 | * RATIO:	vp/vs ratio 
 90 | * TOP:		depths of top of layer (km) 
 91 | * VEL: 		layer velocities (km/s)
 92 | * NLAY  RATIO 
 93 |    10     1.73
 94 | * TOP 
 95 | 0.0 1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0 30.0
 96 | * VEL
 97 | 4.74 5.01 5.35 5.71 6.07 6.17 6.27 6.34 6.39 7.80
 98 | *
 99 | *--- event selection:
100 | * CID: 	cluster to be relocated (0 = all)
101 | * ID:	cuspids of event to be relocated (8 per line)
102 | * CID    
103 |     0      
104 | * ID
105 | EOF
106 | 
107 | ./HypoDD/src/hypoDD/hypoDD cc.inp
108 | cd $WORKING_DIR


--------------------------------------------------------------------------------
/scripts/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AI4EPS/QuakeFlow/f558a63dd6afc1a7b4fd4ad89d6ca6961a7d937b/scripts/utils/__init__.py


--------------------------------------------------------------------------------
/scripts/utils/convert_cctorch_turkey.py:
--------------------------------------------------------------------------------
 1 | # %%
 2 | import pandas as pd
 3 | import json
 4 | from pathlib import Path
 5 | from datetime import datetime
 6 | import shutil
 7 | import obspy
 8 | from tqdm import tqdm
 9 | import multiprocessing as mp
10 | 
11 | # %%
12 | catalog = pd.read_csv("../EikoLoc/eikoloc_catalog.csv", parse_dates=["time"])
13 | 
14 | # %%
15 | catalog["index"] = catalog["event_index"]
16 | catalog = catalog.set_index("index")
17 | 
18 | # %%
19 | picks = pd.read_csv("../EikoLoc/gamma_picks.csv", parse_dates=["phase_time"])
20 | 
21 | # %%
22 | picks["index"] = picks["event_index"]
23 | 
24 | # %%
25 | picks = picks.set_index("event_index")
26 | 
27 | # %%
28 | # picks[["network", "station", "location", "channel"]] = picks["station_id"].str.split(".", expand=True)
29 | # picks["phase_time"] = picks["phase_time"].dt.strftime("%Y-%m-%d %H:%M:%S.%f")
30 | 
31 | # %%
32 | waveform_path = Path("../waveforms")
33 | output_path = Path("waveforms")
34 | if not output_path.exists():
35 |     output_path.mkdir()
36 | 
37 | # %%
38 | def save_mseed(f, year, jday):
39 |     try:
40 |         meta = obspy.read(f)
41 |     except:
42 |         return
43 |     
44 |     date = datetime.strptime(f"{year}-{jday}", "%Y-%j")
45 |     month, day = date.strftime("%m"), date.strftime("%d")
46 | 
47 |     meta = meta.merge(fill_value="latest")
48 |     min_time = min([tr.stats.starttime for tr in meta])
49 |     max_time = max([tr.stats.endtime for tr in meta])
50 |     meta = meta.slice(starttime=min_time, endtime=max_time)
51 |     for trace in meta:
52 |         station_id = trace.get_id()
53 |         network, station, location, channel = station_id.split(".")
54 |         for hour in range(24):
55 |             starttime = obspy.UTCDateTime(f"{year}-{month}-{day}T{hour:02d}:00:00.000Z")
56 |             endtime = obspy.UTCDateTime(f"{year}-{month}-{day}T{hour:02d}:00:00.000Z")+3600
57 |             trace_hour = trace.slice(starttime=starttime, endtime=endtime)
58 |             if len(trace_hour.data) > 0:
59 |                 trace_hour.write(output_path / f"{year}-{jday}" / f"{hour:02d}" / f"{station_id}.mseed", format="MSEED")
60 |             # except Exception as e:
61 |             #     print(f"{year}-{month}-{day}T{hour:02d}:00:00.000Z")
62 |             #     print(obspy.UTCDateTime(f"{year}-{month}-{day}T{hour:02d}:00:00.000Z"))
63 |             #     print(e)
64 |             #     print(min_time, max_time, f"{year}-{month}-{day}T{hour:02d}")
65 |             #     raise
66 |     print("Finish: ", output_path / f"{year}-{jday}", f)
67 |     
68 | 
69 | # %%
70 | for day_dir in waveform_path.iterdir():
71 | 
72 |     year = datetime.fromisoformat(day_dir.name).strftime("%Y")
73 |     jday = datetime.fromisoformat(day_dir.name).strftime("%j")
74 |     
75 |     if not (output_path / f"{year}-{jday}").exists():
76 |         (output_path / f"{year}-{jday}").mkdir()
77 |     for hour in range(24):
78 |         if not (output_path / f"{year}-{jday}" / f"{hour:02d}").exists():
79 |             (output_path / f"{year}-{jday}" / f"{hour:02d}").mkdir()
80 | 
81 |     file_list = set()
82 |     mseeds = list(day_dir.rglob("*.mseed_[ENZ].mseed"))
83 |     for x in mseeds:
84 |         file_name = str(x)
85 |         file_name = "_".join(file_name.split("_")[:-1] + ["?.mseed"])
86 |         file_list.add(file_name)
87 | 
88 |     mseeds = list(day_dir.rglob("*_tdvms_?.mseed"))
89 |     for x in mseeds:
90 |         file_list.add(str(x))
91 |     
92 |     ncpu = mp.cpu_count()//2
93 |     with mp.Pool(ncpu) as p:
94 |         p.starmap(save_mseed, [(f, year, jday) for f in file_list])
95 | 
96 | 
97 | 


--------------------------------------------------------------------------------
/scripts/utils/preprocess_focal_mechanism.py:
--------------------------------------------------------------------------------
 1 | # %%
 2 | import json
 3 | import os
 4 | import warnings
 5 | from glob import glob
 6 | 
 7 | import obspy
 8 | import obspy.taup
 9 | import pandas as pd
10 | from tqdm import tqdm
11 | 
12 | warnings.filterwarnings("ignore")
13 | 
14 | # %%
15 | os.system(
16 |     "curl -L -O https://github.com/AI4EPS/EPS207_Observational_Seismology/releases/download/fm_data/fm_data.zip && unzip -q -o fm_data.zip"
17 | )
18 | 
19 | # %%
20 | data_path = "fm_data"
21 | os.system("mv fm_data/picks fm_data/picks_raw")
22 | 
23 | # %%
24 | with open(f"{data_path}/stations.json", "r") as f:
25 |     stations = json.load(f)
26 | 
27 | stations = pd.DataFrame.from_dict(stations, orient="index")
28 | events = pd.read_csv(f"{data_path}/catalog.csv", parse_dates=["time"])
29 | events["time"] = events["time"].dt.tz_localize(None)
30 | events.set_index("event_id", inplace=True)
31 | 
32 | # %%
33 | model = obspy.taup.TauPyModel("iasp91")
34 | max_timediff = 2.0
35 | 
36 | plotting = False
37 | if plotting:
38 |     plt.figure(figsize=(10, 10))
39 | for file in tqdm(list(glob(f"{data_path}/picks_raw/*.csv"))):
40 |     picks = pd.read_csv(file, parse_dates=["phase_time"])
41 |     event_id = file.split("/")[-1].replace(".csv", "")
42 |     evot, mag, evla, evlo, evdp, x, y, z = events.loc[
43 |         event_id, ["time", "magnitude", "latitude", "longitude", "depth_km", "x_km", "y_km", "z_km"]
44 |     ].to_numpy()
45 | 
46 |     keep_idx = []
47 |     for i, pick in picks.iterrows():
48 |         stlo, stla = stations.loc[pick["station_id"], ["longitude", "latitude"]].to_numpy()
49 |         epicdist = obspy.geodetics.gps2dist_azimuth(evla, evlo, stla, stlo)[0] / 1000
50 |         prac_phase_time = (pick["phase_time"] - evot).total_seconds()
51 | 
52 |         phase_type = pick["phase_type"]
53 |         if phase_type == "P":
54 |             arrivals = model.get_travel_times_geo(max(0, evdp), evla, evlo, stla, stlo, phase_list=["p", "P"])
55 |             if plotting:
56 |                 plt.scatter(prac_phase_time, epicdist, color="b")
57 |         else:
58 |             arrivals = model.get_travel_times_geo(max(0, evdp), evla, evlo, stla, stlo, phase_list=["s", "S"])
59 |             if plotting:
60 |                 plt.scatter(prac_phase_time, epicdist, color="r")
61 | 
62 |         theo_phase_time = arrivals[0].time
63 |         if abs(theo_phase_time - prac_phase_time) < max_timediff:
64 |             keep_idx.append(i)
65 |         else:
66 |             if plotting:
67 |                 plt.scatter(prac_phase_time, epicdist, color="g")
68 | 
69 |     picks_ = picks.iloc[keep_idx]
70 |     picks_["event_index"] = event_id
71 | 
72 |     try:
73 |         picks_.to_csv(f"{data_path}/picks/{event_id}.csv", index=None)
74 |     except:
75 |         os.mkdir(f"{data_path}/picks")
76 |         picks_.to_csv(f"{data_path}/picks/{event_id}.csv", index=None)
77 | 
78 |     if plotting:
79 |         plt.xlabel("Time (s)")
80 |         plt.ylabel("Epicentral distance (km)")
81 |         plt.title(event_id)
82 |         plt.show()
83 |         plt.close()
84 | 
85 | # %%
86 | picks = []
87 | for file in tqdm(list(glob(f"{data_path}/picks/*.csv"))):
88 |     picks.append(pd.read_csv(file))
89 | picks = pd.concat(picks, ignore_index=True)
90 | picks.to_csv(f"{data_path}/picks.csv", index=None)
91 | 
92 | 
93 | # %%
94 | 


--------------------------------------------------------------------------------
/seedlink/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM continuumio/miniconda3
 2 | 
 3 | WORKDIR /app
 4 | 
 5 | ENV PYTHONUNBUFFERED=1
 6 | 
 7 | # Create the environment:
 8 | COPY env.yml /app
 9 | RUN conda env create --name quakeflow --file=env.yml
10 | # Make RUN commands use the new environment:
11 | SHELL ["conda", "run", "-n", "quakeflow", "/bin/bash", "-c"]
12 | 
13 | # Copy files
14 | COPY . /app
15 | 
16 | # Start API server
17 | # ENTRYPOINT ["conda", "run", "--no-capture-output", "-n", "quakeflow", "python", "producer.py"]
18 | ENTRYPOINT ["conda", "run", "--no-capture-output", "-n", "quakeflow", "python", "producer_iris.py"]


--------------------------------------------------------------------------------
/seedlink/env.yml:
--------------------------------------------------------------------------------
 1 | name: quakeflow
 2 | channels:
 3 |   - defaults
 4 |   - conda-forge
 5 | dependencies:
 6 |   - python=3.7
 7 |   - numpy
 8 |   - scikit-learn
 9 |   - pandas
10 |   - tensorflow
11 |   - obspy
12 |   - pip
13 |   - pip:
14 |     - kafka-python
15 |     - pyspark==2.4.4
16 | 


--------------------------------------------------------------------------------
/seedlink/readme.md:
--------------------------------------------------------------------------------
 1 | # Waveform Generator
 2 | 
 3 | Fake streaming data generator.
 4 | 
 5 | Build the docker image
 6 | 
 7 | ```
 8 | docker build --tag quakeflow-waveform:1.0 .  
 9 | ```
10 | 
11 | Run the Waveform Generator
12 | 
13 | ```
14 | docker run -it quakeflow-waveform:1.0 
15 | ```


--------------------------------------------------------------------------------
/skaffold.yaml:
--------------------------------------------------------------------------------
 1 | # apiVersion: skaffold/v4beta2
 2 | # kind: Config
 3 | # metadata:
 4 | #   name: quakeflow
 5 | # build:
 6 | #   artifacts:
 7 | #   - image: phasenet-api
 8 | #     context: PhaseNet
 9 | #   - image: gamma-api
10 | #     context: GaMMA
11 | #   - image: deepdenoiser-api
12 | #     context: DeepDenoiser
13 | # manifests:
14 | #   rawYaml:
15 | #     - kubernetes/quakeflow-local.yaml
16 | 
17 | 
18 | apiVersion: skaffold/v2beta19
19 | kind: Config
20 | build:
21 |   tagPolicy:
22 |     sha256: {}
23 |   # defines where to find the code at build time and where to push the resulting image
24 |   artifacts:
25 |   - context: quakeflow/demo/hub
26 |     image: zhuwq0/quakeflow-hub
27 |   - context: quakeflow/demo/data
28 |     image: zhuwq0/quakeflow-data
29 |   - context: quakeflow/demo/picking
30 |     image: zhuwq0/picking-api
31 |   - context: quakeflow/demo/association
32 |     image: zhuwq0/association-api
33 |   - context: quakeflow/demo/location
34 |     image: zhuwq0/location-api
35 | # defines the Kubernetes manifests to deploy on each run
36 | deploy:
37 |   kubectl:
38 |     manifests:
39 |     - quakeflow/deployment.yaml
40 |     - quakeflow/service.yaml
41 | # use the cloudbuild profile to build images using Google Cloud Build
42 | profiles:
43 | - name: cloudbuild
44 |   build:
45 |     googleCloudBuild: {}


--------------------------------------------------------------------------------
/spark/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:18.04
 2 | 
 3 | # Install Java and Spark
 4 | RUN apt-get update
 5 | RUN DEBIAN_FRONTEND="noninteractive" apt-get install -y openjdk-11-jdk git wget tzdata
 6 | RUN ln -fs /usr/share/zoneinfo/US/Pacific-New /etc/localtime && dpkg-reconfigure -f noninteractive tzdata
 7 | 
 8 | ENV PATH="/root/miniconda3/bin:${PATH}"
 9 | ARG PATH="/root/miniconda3/bin:${PATH}"
10 | RUN apt-get update
11 | 
12 | RUN apt-get install -y wget && rm -rf /var/lib/apt/lists/*
13 | 
14 | RUN wget \
15 |     https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh \
16 |     && mkdir /root/.conda \
17 |     && bash Miniconda3-latest-Linux-x86_64.sh -b \
18 |     && rm -f Miniconda3-latest-Linux-x86_64.sh 
19 | RUN conda --version
20 | 
21 | # Setup env variables
22 | ENV PYTHONUNBUFFERED=1
23 | 
24 | 
25 | WORKDIR /app
26 | COPY env.yml /app
27 | RUN conda env create --name quakeflow --file=env.yml
28 | SHELL ["conda", "run", "-n", "quakeflow", "/bin/bash", "-c"]
29 | 
30 | 
31 | COPY . /app
32 | ENTRYPOINT ["conda", "run", "--no-capture-output", "-n", "quakeflow", "python", "/app/spark_streaming.py"]
33 | # CMD /spark-2.4.7-bin-hadoop2.7/bin/spark-submit --packages org.apache.spark:spark-streaming-kafka-0-8_2.11:2.4.0 /app/spark.py
34 | 


--------------------------------------------------------------------------------
/spark/env.yml:
--------------------------------------------------------------------------------
 1 | name: quakeflow
 2 | channels:
 3 |   - defaults
 4 |   - anaconda
 5 | dependencies:
 6 |   - python=3.7
 7 |   - numpy
 8 |   - pip
 9 |   - pip:
10 |     - kafka-python
11 |     - pyspark==3.1.1
12 |     - requests
13 | 


--------------------------------------------------------------------------------
/spark/readme.md:
--------------------------------------------------------------------------------
 1 | # Spark ETL Pipeline
 2 | 
 3 | Spark streaming ETL Pipeline
 4 | 
 5 | Build the docker image
 6 | 
 7 | ```
 8 | docker build --tag quakeflow-spark:1.0 .
 9 | ```
10 | 
11 | Run the Spark ETL Pipeline
12 | 
13 | ```
14 | docker run -it quakeflow-spark:1.0
15 | ```
16 | 
17 | Run it locally (make sure update the spark lib to 3.1.1)
18 | ```
19 | python spark_structured_streaming.py
20 | ```
21 | 


--------------------------------------------------------------------------------
/spark/requirements.txt:
--------------------------------------------------------------------------------
1 | pyspark==2.4.4
2 | numpy
3 | kafka-python
4 | requests


--------------------------------------------------------------------------------
/tests/analysis/.gitignore:
--------------------------------------------------------------------------------
 1 | __pycache__
 2 | figures
 3 | 
 4 | 2019.catalog
 5 | Liu2020.txt
 6 | Ross2019.txt
 7 | Shelly2020.txt
 8 | 
 9 | 
10 | performance*.txt
11 | merged*.csv
12 | 
13 | input
14 | output
15 | 


--------------------------------------------------------------------------------
/tests/analysis/config.json:
--------------------------------------------------------------------------------
1 | {"region": "Ridgecrest_oneweek", "center": [-117.504, 35.705], "xlim_degree": [-118.004, -117.004], "ylim_degree": [35.205, 36.205], "degree2km": 111.19492474777779, "starttime": "2019-07-04T00:00:00", "endtime": "2019-07-10T00:00:00", "networks": ["CI"], "channels": "HH*,BH*,EH*,HN*", "client": "SCEDC"}


--------------------------------------------------------------------------------
/tests/check_pvc.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: Pod
 3 | metadata:
 4 |   name: dataaccess
 5 | spec:
 6 |   containers:
 7 |   - name: alpine
 8 |     image: alpine:latest
 9 |     command: ['sleep', 'infinity']
10 |     volumeMounts:
11 |     - name: mypvc
12 |       mountPath: /data
13 |   volumes:
14 |   - name: mypvc
15 |     persistentVolumeClaim:
16 |       # claimName: quakeflow-4ldv8-data-volume-37
17 |       claimName: quakeflow-4ldv8-data-volume-0


--------------------------------------------------------------------------------
/tests/kafka-spark/consumer.py:
--------------------------------------------------------------------------------
 1 | from kafka import KafkaConsumer
 2 | from json import loads
 3 | 
 4 | consumer = KafkaConsumer(
 5 |     'testtopic',
 6 |     bootstrap_servers=['localhost:9092'],
 7 |     auto_offset_reset='earliest',
 8 |     enable_auto_commit=True,
 9 |     group_id='my-group',
10 |     value_deserializer=lambda x: loads(x.decode('utf-8'))
11 | 
12 | )
13 | # client = MongoClient('localhost:27017')
14 | # collection = client.testtopic.testtopic
15 | for message in consumer:
16 |     message = message.value
17 |     # message['timestamp'] = message['timestamp'][0]
18 |     # message['vec'] = message['vec'][0][:10]
19 |     print(message)
20 |     # collection.insert_one(message)
21 |     # print('{} added to {}'.format(message, collection))
22 | 


--------------------------------------------------------------------------------
/tests/kafka-spark/env.yml:
--------------------------------------------------------------------------------
 1 | name: cs329s
 2 | channels:
 3 |   - defaults
 4 |   - anaconda
 5 | dependencies:
 6 |   - python=3.7
 7 |   - jupyter
 8 |   - matplotlib
 9 |   - numpy
10 |   - scikit-learn
11 |   - pandas
12 |   - tensorflow
13 |   - pip
14 |   - pip:
15 |     - kafka-python
16 |     - pyspark==2.4.4
17 |     - fastapi
18 |     - uvicorn
19 |     - tqdm
20 |     - streamlit
21 |     - tweepy
22 | 


--------------------------------------------------------------------------------
/tests/kafka-spark/quakeflow logo design 2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AI4EPS/QuakeFlow/f558a63dd6afc1a7b4fd4ad89d6ca6961a7d937b/tests/kafka-spark/quakeflow logo design 2.jpg


--------------------------------------------------------------------------------
/tests/kafka-spark/readme.md:
--------------------------------------------------------------------------------
 1 | # Kafka & Pyspark 
 2 | 
 3 | This folder will be deprecated as we split things into individual docker containers.
 4 | 
 5 | ## Setup
 6 | 
 7 | 1. Install Conda Env 
 8 | ```
 9 | conda env create --name cs329s --file=env.yml
10 | ```
11 | 
12 | 2. Run your Zookeeper and Kafka cluster
13 | 
14 | See https://kafka.apache.org/quickstart for the installation and detailed steps.
15 | 
16 | ```
17 | # Start the ZooKeeper service
18 | $ bin/zookeeper-server-start.sh config/zookeeper.properties
19 | 
20 | # Start the Kafka broker service
21 | $ bin/kafka-server-start.sh config/server.properties
22 | ```
23 | 
24 | 3. Create a topic `testtopic` (just for test purpose)
25 | 
26 | ```
27 | $ bin/kafka-topics.sh --create --topic waveform_raw --bootstrap-server localhost:9092
28 | ```
29 | 
30 | 4. Setup PhaseNet and GMMA
31 | 
32 | PhaseNet and GMMA are independent to this Quakeflow repo. You can clone and download 
33 | both of them in a different folder.
34 | 
35 | PhaseNet: https://github.com/wayneweiqiang/PhaseNet
36 | 
37 | ```
38 | $ git clone -b quakeflow https://github.com/wayneweiqiang/PhaseNet
39 | $ cd PhaseNet
40 | $ uvicorn app:app --reload --port 8000
41 | ```
42 | 
43 | Open another terminal and run
44 | 
45 | GMMA: https://github.com/wayneweiqiang/GMMA
46 | 
47 | ```
48 | $ git clone -b quakeflow https://github.com/wayneweiqiang/GMMA
49 | $ cd GMMA
50 | $ uvicorn app:app --reload --port 8001
51 | ```
52 | 
53 | 5. Run the `producer.py` script
54 | 
55 | ```
56 | $ python producer.py
57 | ```
58 | 
59 | and you should see the script print out some timestamps every second
60 | 
61 | 
62 | <!-- 6. Run the `consumer.py` script
63 | 
64 | The consumer will read the messages from the Kafka cluster. -->
65 | 
66 | 6. Run the `spark.py` script for testing the Spark features
67 | 
68 | - `spark-submit` is pre-installed in our environment
69 | 
70 | - Run the following command, and you will see the logs in `logs.txt`
71 | 
72 | ```
73 | $ spark-submit --packages org.apache.spark:spark-streaming-kafka-0-8_2.11:2.3.3 spark.py > logs.txt
74 | ```
75 | 
76 | 7. Check the `GMMA` API service after 30 seconds, you should see [200 OK] and some outputs about the earthquakes
77 | 
78 | <img src="https://i.imgur.com/qPEzICR.png">
79 | 
80 | Go to the Spark UI portal (http://localhost:4040/) and you can see the jobs, stages and streaming statistics. 
81 | 
82 | <img src="https://i.imgur.com/Q7ndx2R.png">
83 | 
84 | Also some cool DAG Visualization about how the streaming ETL pipeline is done
85 | 
86 | <img src="https://i.imgur.com/TR1dUHA.png" height="900px">
87 | 
88 | <!-- https://stackoverflow.com/questions/40384458/spark-streaming-processing-time-vs-total-delay-vs-processing-delay -->
89 | 


--------------------------------------------------------------------------------
/ui/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM continuumio/miniconda3
 2 | 
 3 | WORKDIR /app
 4 | 
 5 | # Create the environment:
 6 | COPY env.yml /app
 7 | RUN conda env create --name quakeflow --file=env.yml
 8 | # Make RUN commands use the new environment:
 9 | SHELL ["conda", "run", "-n", "quakeflow", "/bin/bash", "-c"]
10 | 
11 | # Copy files
12 | COPY . /app
13 | 
14 | # Expose API port
15 | EXPOSE 8005
16 | 
17 | ENV PYTHONUNBUFFERED=1
18 | 
19 | # Start API server
20 | # ENTRYPOINT ["conda", "run", "--no-capture-output", "-n", "quakeflow", "python", "app_plotly.py"]
21 | ENTRYPOINT ["conda", "run", "--no-capture-output", "-n", "quakeflow", "gunicorn", "app_plotly:server", "--reload", "-b", "0.0.0.0:8005"]
22 | 


--------------------------------------------------------------------------------
/ui/Procfile:
--------------------------------------------------------------------------------
1 | web: gunicorn app:server --workers 4


--------------------------------------------------------------------------------
/ui/assets/demo-button.css:
--------------------------------------------------------------------------------
1 | .link-button {
2 |     margin-top: 10px;
3 |     margin-right: 10px;
4 |     vertical-align: top;
5 |     color: white;
6 | }


--------------------------------------------------------------------------------
/ui/assets/style.css:
--------------------------------------------------------------------------------
  1 | body {
  2 |     font-family: "Open Sans", sans-serif;
  3 |     background-color: #061E44;
  4 | }
  5 | 
  6 | h6 {
  7 |     margin-top: 0;
  8 | }
  9 | 
 10 | .app__container {
 11 |     margin: 3% 5%;
 12 | }
 13 | 
 14 | .app__header {
 15 |     display: flex;
 16 |     justify-content: space-between;
 17 |     color: #fff;
 18 | }
 19 | 
 20 | .app__header__title {
 21 |     letter-spacing: 0.23rem;
 22 | }
 23 | 
 24 | .app__header__title--grey {
 25 |     color: #C4CDD5;
 26 | }
 27 | 
 28 | .app__menu__img {
 29 |     height: 50px;
 30 |     width: auto;
 31 | }
 32 | 
 33 | .app__content {
 34 |     display: flex;
 35 |     margin-top: 20px;
 36 | }
 37 | 
 38 | .wind__speed__container {
 39 |     display: flex;
 40 |     flex-direction: column;
 41 |     background-color: #082255;
 42 |     border-radius: 0.55rem;
 43 |     box-shadow: 0 1px 3px rgba(0,0,0,0.12), 0 1px 2px rgba(0,0,0,0.24);
 44 | }
 45 | 
 46 | .graph__title {
 47 |     color: #fff !important;
 48 |     letter-spacing: 0.3rem !important;
 49 |     padding: 25px 25px 0px 25px;
 50 |     margin-bottom: 0px !important;
 51 |     font-size: 1em;
 52 | }
 53 | 
 54 | .slider {
 55 |     padding: 15px;
 56 | }
 57 | 
 58 | .auto__checkbox {
 59 |     margin-right: 10px;
 60 | }
 61 | 
 62 | .auto__label {
 63 |     color: #DFE3E8;
 64 | }
 65 | 
 66 | .auto__container {
 67 |     display: flex;
 68 |     justify-content: space-between;
 69 |     color: #DFE3E8;
 70 |     padding: 0px 15px;
 71 | }
 72 | 
 73 | .auto__p {
 74 |     margin-bottom: 0;
 75 | }
 76 | 
 77 | .graph__container {
 78 |     background-color: #082255;
 79 |     border-radius: 0.55rem;
 80 |     box-shadow: 0 1px 3px rgba(0,0,0,0.12), 0 1px 2px rgba(0,0,0,0.24);
 81 | }
 82 | 
 83 | .first {
 84 |     margin-bottom: 15px;
 85 | }
 86 | 
 87 | .second {
 88 |     margin-top: 15px;
 89 | }
 90 | 
 91 | .histogram__direction {
 92 |     margin-left: 15px;
 93 | }
 94 | 
 95 | #wind-direction {
 96 |     display: flex;
 97 |     justify-content: center;
 98 | }
 99 | 
100 | @media only screen and (max-width: 600px) {
101 |     .histogram__direction {
102 |         margin: 15px 0px 20px 0px;
103 |     }
104 |     .app__content {
105 |         display: block;
106 |     }    
107 |     .app__menu__img {
108 |         height: 30px;
109 |         width: auto;
110 |     }
111 |     .app__header {
112 |         display: flex;
113 |         flex-direction: column;
114 |     }
115 |     .app__header__desc {
116 |         order: 1;
117 |         text-align: center;
118 |     }
119 |     .app__header__logo {
120 |         order: 0;
121 |     }
122 |     .app__header__title {
123 |         font-size: 1.5em;
124 |         padding-top: 15px;
125 |     }
126 | }


--------------------------------------------------------------------------------
/ui/env.yml:
--------------------------------------------------------------------------------
 1 | name: quakeflow
 2 | channels:
 3 |   - defaults
 4 |   - anaconda
 5 | dependencies:
 6 |   - python=3.7
 7 |   - matplotlib
 8 |   - numpy
 9 |   - scikit-learn
10 |   - pandas
11 |   - tensorflow
12 |   - pip
13 |   - pip:
14 |     - kafka-python
15 |     - tweepy
16 |     - plotly
17 |     - plotly-express
18 |     - dash
19 |     - Pillow
20 |     - geopy
21 |     - kaleido
22 |     - gunicorn
23 | 
24 |   
25 | 


--------------------------------------------------------------------------------
/ui/gradio/test_api.py:
--------------------------------------------------------------------------------
 1 | # %%
 2 | from gradio_client import Client
 3 | import obspy
 4 | import numpy as np
 5 | import json
 6 | import pandas as pd
 7 | 
 8 | # %%
 9 | 
10 | waveform = obspy.read()
11 | array = np.array([x.data for x in waveform]).T
12 | 
13 | # pipeline = PreTrainedPipeline()
14 | inputs = array.tolist()
15 | inputs = json.dumps(inputs)
16 | # picks = pipeline(inputs)
17 | # print(picks)
18 | 
19 | # %%
20 | client = Client("ai4eps/phasenet") 
21 | output, file = client.predict(["test_test.mseed"])
22 | # %%
23 | with open(output, "r") as f:
24 |     picks = json.load(f)["data"]
25 | 
26 | # %%
27 | picks = pd.read_csv(file)
28 | 
29 | 
30 | # %%
31 | job = client.submit(["test_test.mseed", "test_test.mseed"], api_name="/predict")  # This is not blocking
32 | 
33 | print(job.status())
34 | 
35 | # %%
36 | output, file = job.result() 
37 | 
38 | 


--------------------------------------------------------------------------------
/ui/streamlit/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM continuumio/miniconda3
 2 | 
 3 | WORKDIR /app
 4 | 
 5 | # Create the environment:
 6 | COPY env.yml /app
 7 | RUN conda env create --name cs329s --file=env.yml
 8 | # Make RUN commands use the new environment:
 9 | SHELL ["conda", "run", "-n", "cs329s", "/bin/bash", "-c"]
10 | 
11 | # Copy files
12 | COPY . /app
13 | 
14 | # Expose API port
15 | EXPOSE 8501
16 | 
17 | ENV PYTHONUNBUFFERED=1
18 | 
19 | # Start API server
20 | ENTRYPOINT ["conda", "run", "--no-capture-output", "-n", "cs329s", "streamlit", "run", "ui_streamlit_iris.py"]
21 | 


--------------------------------------------------------------------------------
/ui/streamlit/env.yml:
--------------------------------------------------------------------------------
 1 | name: cs329s
 2 | channels:
 3 |   - defaults
 4 |   - anaconda
 5 | dependencies:
 6 |   - python=3.7
 7 |   - matplotlib
 8 |   - numpy
 9 |   - scikit-learn
10 |   - pandas
11 |   - tensorflow
12 |   - pip
13 |   - pip:
14 |     - kafka-python
15 |     - streamlit
16 |     - tweepy
17 |     - plotly
18 |     - Pillow
19 |     - geopy
20 |     - kaleido
21 |   
22 | 


--------------------------------------------------------------------------------