├── .devcontainer └── devcontainer.json ├── .github ├── environment.yml └── workflows │ └── docs.yml ├── .gitignore ├── .gitmodules ├── HypoDD ├── .gitignore ├── Dockerfile ├── Makefile ├── convert_stations.py ├── gamma2hypodd.py ├── gamma2hypoinverse.py ├── hyp.command ├── hypoDD.inc ├── hypodd_cc.inp ├── hypodd_ct.inp ├── hypoinverse2hypodd.py ├── ph2dt.inp ├── plot_catalog.ipynb ├── plotly_3d.ipynb ├── run.sh ├── vel_model_P.crh ├── vel_model_S.crh └── visulization.ipynb ├── LICENSE ├── datasets ├── NCEDC │ ├── .gitignore │ ├── convert_hdf5.py │ ├── convert_hdf5_v2.py │ ├── download_catalog.py │ ├── download_fm.py │ ├── download_station.py │ ├── download_waveform.py │ ├── extract_csv.py │ ├── extract_ps.py │ ├── merge_hdf5.py │ └── run.yaml └── SCEDC │ ├── .gitignore │ ├── convert_hdf5.py │ ├── convert_hdf5_v2.py │ ├── download_catalog.py │ ├── download_station.py │ ├── download_waveform.py │ ├── download_waveform_v2.py │ ├── extract_ps.py │ ├── merge_hdf5.py │ ├── run.yaml │ └── split_large_files.py ├── docs ├── .gitignore ├── README.md ├── assets │ ├── inference_pipeline_plotly.png │ ├── logo.jpg │ ├── logo.png │ ├── quakeflow.gif │ └── quakeflow_diagram.png ├── data.md ├── data_format.md ├── deepdenoiser.md ├── earthquake_location.md ├── fastapi.ipynb ├── gamma.md ├── gcp_readme.md ├── k8s_readme.md ├── kubeflow └── phasenet.md ├── environment.yml ├── examples ├── california │ ├── .gitignore │ ├── .skyignore │ ├── args.py │ ├── cut_templates_cc.py │ ├── cut_templates_merge.py │ ├── download_waveform.py │ ├── filter_gamma_ncedc.py │ ├── generate_pairs.py │ ├── load_cloud_data.py │ ├── load_cloud_picks.py │ ├── merge_ncedc.py │ ├── monitor.py │ ├── plot_catalog.py │ ├── plotting.py │ ├── refresh.py │ ├── run_adloc.py │ ├── run_adloc_ct.py │ ├── run_cctorch.py │ ├── run_gamma.py │ ├── run_gamma.yaml │ ├── run_gamma_ncedc.py │ ├── run_growclust_cc.py │ ├── run_growclust_cc.sh │ ├── run_hypodd_cc.py │ ├── run_hypodd_cc.sh │ ├── run_phasenet.py │ ├── run_phasenet.yaml │ ├── run_phasenet_ncedc.py │ ├── run_phasenet_scedc.py │ ├── set_config_ncedc.py │ ├── submit_adloc.py │ ├── submit_cctorch.py │ ├── submit_download.py │ ├── submit_gamma.py │ ├── submit_phasenet.py │ ├── submit_template.py │ └── tests │ │ └── clustering.py ├── forge │ └── load_data.py ├── hawaii │ └── workflow.ipynb ├── japan │ ├── .gitignore │ ├── convert_data_hinet.py │ ├── cut_templates_cc.py │ ├── download_data_hinet.py │ ├── filter_similar_pairs.py │ ├── merge_csv.py │ ├── merge_events.py │ ├── merge_picks.py │ ├── plot_catalog.py │ ├── plotting.py │ ├── run_adloc.py │ ├── run_adloc_cc_bak.py │ ├── run_cctorch.py │ ├── run_gamma.py │ ├── run_growclust_cc.py │ ├── run_growclust_cc.sh │ ├── run_hypodd_cc.py │ ├── run_hypodd_cc.sh │ ├── run_phasenet.py │ ├── run_qtm.py │ └── set_config.py └── seafoam │ └── load_data.py ├── kubeflow ├── .gitignore ├── Dockerfile ├── README.md ├── Stream.ipynb ├── Training.ipynb ├── cloud_dataset.ipynb ├── debug_magnitude.ipynb ├── debug_pvc.yaml ├── env.yml ├── plot_catalog.ipynb ├── prepare_test_data.ipynb ├── rsync.yaml ├── tweepy_test.ipynb ├── waveforms │ ├── Dockerfile │ └── download_waveform.ipynb ├── workflow-api.ipynb ├── workflow-kfp2.ipynb ├── workflow.ipynb └── workflow_debug.ipynb ├── kubernetes ├── deploy_gcp.sh ├── deploy_local.sh ├── metrics-server.yaml ├── quakeflow-autoscaling.yaml ├── quakeflow-gcp.yaml ├── quakeflow-ingress.yaml ├── quakeflow-local.yaml └── replay │ ├── real_data.py │ └── replay_data.py ├── mkdocs.yml ├── mongodb └── test_mongodb.ipynb ├── quakeflow ├── demo │ ├── association │ │ ├── Dockerfile │ │ ├── app.py │ │ └── requirements.txt │ ├── data │ │ ├── Dockerfile │ │ ├── app.py │ │ └── requirements.txt │ ├── hub │ │ ├── Dockerfile │ │ ├── app.py │ │ └── requirements.txt │ ├── location │ │ ├── Dockerfile │ │ ├── app.py │ │ └── requirements.txt │ └── picking │ │ ├── Dockerfile │ │ ├── app.py │ │ └── requirements.txt ├── deployment.yaml ├── helm.sh ├── index.html ├── main.py ├── replay_data.py └── service.yaml ├── requirements.txt ├── scripts ├── .gitignore ├── Dockerfile ├── README.md ├── args.py ├── config.json ├── convert_dtcc.py ├── convert_qtm.py ├── convert_velest.py ├── convert_velest_output.py ├── create_filelist.py ├── cut_templates.py ├── cut_templates_cc.py ├── cut_templates_qtm.py ├── cut_templates_v2.py ├── debug_growclust.py ├── download_catalog.py ├── download_event_hinet.py ├── download_station.py ├── download_waveform.py ├── download_waveform_event.py ├── download_waveform_v2.py ├── download_waveform_v3.py ├── generate_pairs.py ├── load_cloud_picks.py ├── load_cloud_templates.py ├── merge_adloc_picks.py ├── merge_csv.py ├── merge_gamma_picks.py ├── merge_phasenet_picks.py ├── merge_phasenet_plus_picks.py ├── plot_catalog.py ├── plot_gamma.py ├── quakeflow.py ├── quakeflow_demo.ipynb ├── quakeflow_job.yaml ├── run_adloc.py ├── run_adloc_cc.py ├── run_adloc_ct.py ├── run_adloc_v2.py ├── run_cctorch.py ├── run_eqnet.py ├── run_event_association.py ├── run_gamma.py ├── run_gamma_v2.py ├── run_growclust_cc.py ├── run_growclust_cc.sh ├── run_growclust_ct.sh ├── run_hypodd_cc.py ├── run_hypodd_cc.sh ├── run_hypodd_ct.py ├── run_hypodd_ct.sh ├── run_phasenet.py ├── run_phasenet_das.yaml ├── run_phasenet_plus.py ├── run_phasenet_v2.py ├── run_qtm.py ├── run_qtm_association.py ├── run_skhash.py ├── run_velest.sh ├── set_config.py ├── station_clustering.ipynb ├── submit_download_waveform.py ├── submit_vertex.py ├── synthetic_test.ipynb ├── tests │ ├── .gitignore │ ├── cut_template_picks.py │ ├── cut_template_picks_dummy.py │ ├── prepare_data.py │ ├── prepare_data_quakeflow.py │ ├── run_hypodd.py │ └── run_hypodd_cc.sh └── utils │ ├── __init__.py │ ├── convert_cctorch_turkey.py │ ├── plotting.py │ └── preprocess_focal_mechanism.py ├── seedlink ├── Dockerfile ├── env.yml ├── producer.py ├── producer_iris.py ├── producer_parallel.py ├── readme.md ├── realtime-iris.ipynb └── realtime-stations.txt ├── skaffold.yaml ├── spark ├── Dockerfile ├── env.yml ├── readme.md ├── requirements.txt └── spark_streaming.py ├── tests ├── analysis │ ├── .gitignore │ ├── check_waveforms_v2.ipynb │ ├── check_waveforms_v2.py │ ├── comparison.ipynb │ ├── config.json │ ├── mccc.py │ ├── mccc_plot.ipynb │ └── util.py ├── check_pvc.yaml └── kafka-spark │ ├── TEST - Structured Streaming.ipynb │ ├── consumer.py │ ├── env.yml │ ├── producer.py │ ├── quakeflow logo design 2.jpg │ ├── readme.md │ ├── spark.py │ └── ui_streamlit.py └── ui ├── Dockerfile ├── Procfile ├── app_plotly.py ├── assets ├── app.css ├── demo-button.css └── style.css ├── env.yml ├── gradio └── test_api.py └── streamlit ├── Dockerfile ├── env.yml ├── ui_streamlit.py ├── ui_streamlit_debug.py └── ui_streamlit_iris.py /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "image": "mcr.microsoft.com/devcontainers/universal:2", 3 | "hostRequirements": { 4 | "cpus": 4 5 | }, 6 | "waitFor": "onCreateCommand", 7 | "updateContentCommand": "sudo apt update && sudo apt install -y libgeos-dev && python3 -m pip install -r requirements.txt && python -m pip install kfp --pre", 8 | // "updateContentCommand": "conda env update --file environment.yml", 9 | "postCreateCommand": "", 10 | "customizations": { 11 | "codespaces": { 12 | "openFiles": [] 13 | }, 14 | "vscode": { 15 | "extensions": [ 16 | "ms-toolsai.jupyter", 17 | "ms-python.python", 18 | "googlecloudtools.cloudcode" 19 | ] 20 | } 21 | } 22 | } -------------------------------------------------------------------------------- /.github/environment.yml: -------------------------------------------------------------------------------- 1 | name: mkdocs 2 | channels: 3 | - conda-forge 4 | - defaults 5 | dependencies: 6 | - _libgcc_mutex=0.1=main 7 | - _openmp_mutex=5.1=1_gnu 8 | - attrs=22.2.0=pyh71513ae_0 9 | - beautifulsoup4=4.11.1=pyha770c72_0 10 | - bleach=5.0.1=pyhd8ed1ab_0 11 | - ca-certificates=2022.12.7=ha878542_0 12 | - certifi=2022.12.7=pyhd8ed1ab_0 13 | - click=8.1.3=unix_pyhd8ed1ab_2 14 | - defusedxml=0.7.1=pyhd8ed1ab_0 15 | - entrypoints=0.4=pyhd8ed1ab_0 16 | - ghp-import=2.1.0=pyhd8ed1ab_0 17 | - importlib-metadata=6.0.0=pyha770c72_0 18 | - importlib_resources=5.10.2=pyhd8ed1ab_0 19 | - jinja2=3.1.2=pyhd8ed1ab_1 20 | - jsonschema=4.17.3=pyhd8ed1ab_0 21 | - jupyter_client=7.3.4=pyhd8ed1ab_0 22 | - jupyter_core=5.1.3=py38h578d9bd_0 23 | - jupyterlab_pygments=0.2.2=pyhd8ed1ab_0 24 | - jupytext=1.14.4=pyhcff175f_0 25 | - ld_impl_linux-64=2.38=h1181459_1 26 | - libffi=3.4.2=h6a678d5_6 27 | - libgcc-ng=11.2.0=h1234567_1 28 | - libgomp=11.2.0=h1234567_1 29 | - libsodium=1.0.18=h36c2ea0_1 30 | - libstdcxx-ng=11.2.0=h1234567_1 31 | - markdown=3.4.1=pyhd8ed1ab_0 32 | - markdown-it-py=2.1.0=pyhd8ed1ab_0 33 | - markupsafe=2.1.1=py38h0a891b7_1 34 | - mdit-py-plugins=0.3.3=pyhd8ed1ab_0 35 | - mdurl=0.1.0=pyhd8ed1ab_0 36 | - mergedeep=1.3.4=pyhd8ed1ab_0 37 | - mistune=0.8.4=pyh1a96a4e_1006 38 | - mkdocs=1.3.0=pyhd8ed1ab_0 39 | - mkdocs-exclude=1.0.2=pyhd8ed1ab_0 40 | - mkdocs-jupyter=0.21.0=pyhd8ed1ab_0 41 | - mkdocs-material=8.4.1=pyhd8ed1ab_0 42 | - mkdocs-material-extensions=1.0.3=pyhd8ed1ab_2 43 | - nbclient=0.7.2=pyhd8ed1ab_0 44 | - nbconvert=6.5.0=pyhd8ed1ab_0 45 | - nbconvert-core=6.5.0=pyhd8ed1ab_0 46 | - nbconvert-pandoc=6.5.0=pyhd8ed1ab_0 47 | - nbformat=5.7.3=pyhd8ed1ab_0 48 | - ncurses=6.3=h5eee18b_3 49 | - nest-asyncio=1.5.6=pyhd8ed1ab_0 50 | - openssl=1.1.1s=h7f8727e_0 51 | - packaging=23.0=pyhd8ed1ab_0 52 | - pandoc=2.19.2=ha770c72_0 53 | - pandocfilters=1.5.0=pyhd8ed1ab_0 54 | - pip=22.3.1=py38h06a4308_0 55 | - pkgutil-resolve-name=1.3.10=pyhd8ed1ab_0 56 | - platformdirs=2.6.2=pyhd8ed1ab_0 57 | - pygments=2.14.0=pyhd8ed1ab_0 58 | - pymdown-extensions=9.9.1=pyhd8ed1ab_0 59 | - pyrsistent=0.18.1=py38h0a891b7_1 60 | - python=3.8.16=h7a1cb2a_2 61 | - python-dateutil=2.8.2=pyhd8ed1ab_0 62 | - python-fastjsonschema=2.16.2=pyhd8ed1ab_0 63 | - python_abi=3.8=2_cp38 64 | - pyyaml=6.0=py38h0a891b7_4 65 | - pyyaml-env-tag=0.1=pyhd8ed1ab_0 66 | - pyzmq=23.0.0=py38hfc09fa9_0 67 | - readline=8.2=h5eee18b_0 68 | - setuptools=65.6.3=py38h06a4308_0 69 | - six=1.16.0=pyh6c4a22f_0 70 | - soupsieve=2.3.2.post1=pyhd8ed1ab_0 71 | - sqlite=3.40.1=h5082296_0 72 | - tinycss2=1.2.1=pyhd8ed1ab_0 73 | - tk=8.6.12=h1ccaba5_0 74 | - toml=0.10.2=pyhd8ed1ab_0 75 | - tornado=6.1=py38h0a891b7_3 76 | - traitlets=5.8.1=pyhd8ed1ab_0 77 | - typing-extensions=4.4.0=hd8ed1ab_0 78 | - typing_extensions=4.4.0=pyha770c72_0 79 | - watchdog=2.2.1=py38h578d9bd_0 80 | - webencodings=0.5.1=py_1 81 | - wheel=0.37.1=pyhd3eb1b0_0 82 | - xz=5.2.10=h5eee18b_1 83 | - yaml=0.2.5=h7f98852_2 84 | - zeromq=4.3.4=h9c3ff4c_1 85 | - zipp=3.11.0=pyhd8ed1ab_0 86 | - zlib=1.2.13=h5eee18b_0 87 | prefix: /home/weiqiang/.local/miniconda3/envs/mkdocs 88 | -------------------------------------------------------------------------------- /.github/workflows/docs.yml: -------------------------------------------------------------------------------- 1 | name: documentation 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | - main 8 | 9 | jobs: 10 | deploy: 11 | runs-on: ubuntu-latest 12 | # runs-on: macos-latest 13 | # runs-on: windows-latest 14 | steps: 15 | - uses: actions/checkout@v3 16 | with: 17 | submodules: true 18 | 19 | # - uses: actions/setup-python@v4 20 | # with: 21 | # python-version: 3.8 22 | # cache: 'pip' 23 | # - run: pip install -r requirements.txt 24 | # # - run: pip install mkdocs mkdocs-material mkdocs-jupyter mkdocs-exclude 25 | # - run: mkdocs gh-deploy --force 26 | 27 | - uses: conda-incubator/setup-miniconda@v2 28 | with: 29 | python-version: 3.8 30 | miniconda-version: "latest" 31 | activate-environment: mkdocs 32 | environment-file: .github/environment.yml 33 | # - run: conda install mkdocs=1.3.0 mkdocs-material=8.4.1 mkdocs-material-extensions==1.0.3 mkdocs-jupyter=0.21.0 mkdocs-exclude -c conda-forge 34 | 35 | - name: mkdocs 36 | shell: bash -el {0} 37 | run: mkdocs gh-deploy --force 38 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Spark checkpoints 2 | checkpoint-* 3 | twitter_fig.* 4 | 5 | # log files 6 | log.* 7 | 8 | # Byte-compiled / optimized / DLL files 9 | __pycache__/ 10 | *.py[cod] 11 | *$py.class 12 | .DS_Store 13 | 14 | # C extensions 15 | *.so 16 | 17 | # Distribution / packaging 18 | .Python 19 | *pyc 20 | build/ 21 | develop-eggs/ 22 | dist/ 23 | downloads/ 24 | eggs/ 25 | .eggs/ 26 | lib/ 27 | lib64/ 28 | parts/ 29 | sdist/ 30 | var/ 31 | wheels/ 32 | share/python-wheels/ 33 | *.egg-info/ 34 | .installed.cfg 35 | *.egg 36 | MANIFEST 37 | 38 | # PyInstaller 39 | # Usually these files are written by a python script from a template 40 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 41 | *.manifest 42 | *.spec 43 | 44 | # Installer logs 45 | pip-log.txt 46 | pip-delete-this-directory.txt 47 | 48 | # Unit test / coverage reports 49 | htmlcov/ 50 | .tox/ 51 | .nox/ 52 | .coverage 53 | .coverage.* 54 | .cache 55 | nosetests.xml 56 | coverage.xml 57 | *.cover 58 | *.py,cover 59 | .hypothesis/ 60 | .pytest_cache/ 61 | cover/ 62 | 63 | # Translations 64 | *.mo 65 | *.pot 66 | 67 | # Django stuff: 68 | *.log 69 | local_settings.py 70 | db.sqlite3 71 | db.sqlite3-journal 72 | 73 | # Flask stuff: 74 | instance/ 75 | .webassets-cache 76 | 77 | # Scrapy stuff: 78 | .scrapy 79 | 80 | # Sphinx documentation 81 | docs/_build/ 82 | 83 | # PyBuilder 84 | .pybuilder/ 85 | target/ 86 | 87 | # Jupyter Notebook 88 | .ipynb_checkpoints 89 | 90 | # IPython 91 | profile_default/ 92 | ipython_config.py 93 | 94 | # pyenv 95 | # For a library or package, you might want to ignore these files since the code is 96 | # intended to run in multiple environments; otherwise, check them in: 97 | # .python-version 98 | 99 | # pipenv 100 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 101 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 102 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 103 | # install all needed dependencies. 104 | #Pipfile.lock 105 | 106 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 107 | __pypackages__/ 108 | 109 | # Celery stuff 110 | celerybeat-schedule 111 | celerybeat.pid 112 | 113 | # SageMath parsed files 114 | *.sage.py 115 | 116 | # Environments 117 | .env 118 | .venv 119 | env/ 120 | venv/ 121 | ENV/ 122 | env.bak/ 123 | venv.bak/ 124 | 125 | # Spyder project settings 126 | .spyderproject 127 | .spyproject 128 | 129 | # Rope project settings 130 | .ropeproject 131 | 132 | # mkdocs documentation 133 | /site 134 | 135 | # mypy 136 | .mypy_cache/ 137 | .dmypy.json 138 | dmypy.json 139 | 140 | # Pyre type checker 141 | .pyre/ 142 | 143 | # pytype static type analyzer 144 | .pytype/ 145 | 146 | # Cython debug symbols 147 | cython_debug/ 148 | 149 | Trash 150 | 151 | # seismic data 152 | *.mseed 153 | *.pdf 154 | *.png 155 | *.csv 156 | *.pkl 157 | hypoinverse/ 158 | notebooks/*/config.json 159 | Trash 160 | slurm/stations/ 161 | slurm/figures/ 162 | slurm/waveforms/ 163 | slurm/results/ 164 | slurm/templates/ 165 | slurm/relocation/hypodd/ 166 | slurm/relocation/growclust/ 167 | 168 | slurm/*/stations/ 169 | slurm/*/waveforms/ 170 | slurm/*/waveforms/ 171 | slurm/*/results/ 172 | slurm/*/*.xml 173 | .history/ 174 | *.npy -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "DeepDenoiser"] 2 | path = DeepDenoiser 3 | url = https://github.com/wayneweiqiang/DeepDenoiser.git 4 | branch = master 5 | [submodule "PhaseNet"] 6 | path = PhaseNet 7 | url = https://github.com/wayneweiqiang/PhaseNet.git 8 | branch = master 9 | [submodule "GaMMA"] 10 | path = GaMMA 11 | url = https://github.com/wayneweiqiang/GaMMA.git 12 | branch = master 13 | [submodule "EQNet"] 14 | path = EQNet 15 | url = https://github.com/AI4EPS/EQNet.git 16 | [submodule "CCTorch"] 17 | path = CCTorch 18 | url = https://github.com/AI4EPS/CCTorch.git 19 | [submodule "ADLoc"] 20 | path = ADLoc 21 | url = https://github.com/AI4EPS/ADLoc.git 22 | -------------------------------------------------------------------------------- /HypoDD/.gitignore: -------------------------------------------------------------------------------- 1 | HYPODD 2 | HYPODD* 3 | dt*.ct 4 | event*.dat 5 | event*.sel 6 | f77 7 | g77 8 | stations_*.dat 9 | 10 | Hawaii* 11 | Ridgecrest* 12 | PuertoRico* 13 | tmp_* 14 | *.html 15 | test/ 16 | -------------------------------------------------------------------------------- /HypoDD/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.8 2 | 3 | RUN apt-get update && apt-get install -y gfortran && rm -rf /var/lib/apt/lists/* 4 | 5 | WORKDIR /opt 6 | 7 | ARG CACHEBUST=1 8 | 9 | ENV PATH="/opt/:${PATH}" 10 | RUN wget -O HYPODD_1.3.tar.gz http://www.ldeo.columbia.edu/~felixw/HYPODD/HYPODD_1.3.tar.gz 11 | RUN tar -xf HYPODD_1.3.tar.gz 12 | COPY Makefile /opt/HYPODD/src/hypodd 13 | COPY hypoDD.inc /opt/HYPODD/include 14 | RUN ln -s $(which gfortran) f77 15 | RUN ln -s $(which gfortran) g77 16 | RUN make -C HYPODD/src 17 | 18 | RUN mkdir hypodd 19 | RUN cp HYPODD/src/ph2dt/ph2dt hypodd/ph2dt 20 | RUN cp HYPODD/src/hypoDD/hypoDD hypodd/hypoDD 21 | 22 | # RUN ls -l /opt/HYPODD/src/hypoDD/hypoDD 23 | # COPY gamma2hypodd.py convert_stations.py /opt/ 24 | # COPY hypoDD_ct.inp hypoDD_cc.inp ph2dt.inp /opt/ 25 | RUN python -m pip install --upgrade numpy pandas tqdm minio && rm -rf /var/cache/apk/* -------------------------------------------------------------------------------- /HypoDD/Makefile: -------------------------------------------------------------------------------- 1 | CMD = hypoDD 2 | CC = gcc 3 | FC = g77 4 | #FC = gfortran 5 | #FC = f77 6 | SRCS = $(CMD).f \ 7 | aprod.f cluster1.f covar.f datum.f \ 8 | delaz.f delaz2.f direct1.f dist.f dtres.f exist.f \ 9 | freeunit.f getdata.f getinp.f ifindi.f \ 10 | indexxi.f juliam.f lsfit_lsqr.f lsfit_svd.f \ 11 | lsqr.f matmult1.f matmult2.f matmult3.f mdian1.f \ 12 | normlz.f partials.f ran.f redist.f refract.f \ 13 | resstat.f scopy.f sdc2.f setorg.f skip.f \ 14 | snrm2.f sort.f sorti.f sscal.f \ 15 | svd.f tiddid.f trialsrc.f trimlen.f \ 16 | ttime.f vmodel.f weighting.f 17 | CSRCS = atoangle_.c atoangle.c datetime_.c hypot_.c rpad_.c sscanf3_.c 18 | OBJS = $(SRCS:%.f=%.o) $(CSRCS:%.c=%.o) 19 | INCLDIR = ../../include 20 | CFLAGS = -O -I$(INCLDIR) 21 | 22 | # Flags for GNU g77 compiler 23 | #FFLAGS = -O -I$(INCLDIR) -fno-silent -Wall -implicit 24 | 25 | # Flags for SUN f77 compiler 26 | FFLAGS = -I$(INCLDIR) 27 | 28 | # Following line needed on HP-UX (hasn't been tested, though). 29 | #LDFLAGS = +U77 30 | 31 | all: $(CMD) 32 | 33 | $(CMD): $(OBJS) 34 | $(FC) $(LDFLAGS) $(OBJS) $(LIBS) -o $@ 35 | 36 | %.o: %.f 37 | $(FC) $(FFLAGS) -c $(@F:.o=.f) -o $@ 38 | 39 | # Extensive lint-like diagnostic listing (SUN f77 only) 40 | hypoDD.lst: $(SRCS) 41 | f77 -e -Xlist -c $(SRCS) 42 | 43 | clean: 44 | -rm -f $(CMD) *.o core a.out *.fln junk 45 | 46 | # Include-file dependencies 47 | 48 | dist.o : $(INCLDIR)/geocoord.inc 49 | redist.o : $(INCLDIR)/geocoord.inc 50 | sdc2.o : $(INCLDIR)/geocoord.inc 51 | setorg.o : $(INCLDIR)/geocoord.inc 52 | 53 | cluster1.o : $(INCLDIR)/hypoDD.inc 54 | dtres.o : $(INCLDIR)/hypoDD.inc 55 | getdata.o : $(INCLDIR)/hypoDD.inc 56 | hypoDD.o : $(INCLDIR)/hypoDD.inc 57 | lsfit_lsqr.o : $(INCLDIR)/hypoDD.inc 58 | lsfit_svd.o : $(INCLDIR)/hypoDD.inc 59 | partials.o : $(INCLDIR)/hypoDD.inc 60 | refract.o : $(INCLDIR)/hypoDD.inc 61 | resstat.o : $(INCLDIR)/hypoDD.inc 62 | skip.o : $(INCLDIR)/hypoDD.inc 63 | tiddid.o : $(INCLDIR)/hypoDD.inc 64 | trialsrc.o : $(INCLDIR)/hypoDD.inc 65 | ttime.o : $(INCLDIR)/hypoDD.inc 66 | vmodel.o : $(INCLDIR)/hypoDD.inc 67 | weighting.o : $(INCLDIR)/hypoDD.inc 68 | 69 | atoangle_.o : $(INCLDIR)/compat.h 70 | atoangle_.o : $(INCLDIR)/f77types.h 71 | datetime_.o : $(INCLDIR)/f77types.h 72 | rpad_.o : $(INCLDIR)/f77types.h 73 | sscanf3_.o : $(INCLDIR)/compat.h 74 | sscanf3_.o : $(INCLDIR)/f77types.h 75 | 76 | -------------------------------------------------------------------------------- /HypoDD/convert_stations.py: -------------------------------------------------------------------------------- 1 | #%% 2 | import numpy as np 3 | import pandas as pd 4 | from tqdm import tqdm 5 | 6 | # %% 7 | stations = pd.read_csv('stations.csv', sep="\t") 8 | 9 | # %% 10 | converted_hypoinverse = [] 11 | converted_hypoDD = {} 12 | 13 | for i in tqdm(range(len(stations))): 14 | 15 | network_code, station_code, comp_code, channel_code = stations.iloc[i]['station'].split('.') 16 | station_weight = " " 17 | lat_degree = int(stations.iloc[i]['latitude']) 18 | lat_minute = (stations.iloc[i]['latitude'] - lat_degree) * 60 19 | north = "N" if lat_degree >= 0 else "S" 20 | lng_degree = int(stations.iloc[i]['longitude']) 21 | lng_minute = (stations.iloc[i]['longitude'] - lng_degree) * 60 22 | west = "W" if lng_degree <= 0 else "E" 23 | elevation = stations.iloc[i]['elevation(m)'] 24 | line_hypoinverse = f"{station_code:<5} {network_code:<2} {comp_code[:-1]:<1}{channel_code:<3} {station_weight}{abs(lat_degree):2.0f} {abs(lat_minute):7.4f}{north}{abs(lng_degree):3.0f} {abs(lng_minute):7.4f}{west}{elevation:4.0f}\n" 25 | # line_hypoDD = f"{network_code:<2}.{station_code:<5} {stations.iloc[i]['latitude']:.3f}, {stations.iloc[i]['longitude']:.3f}\n" 26 | #line_hypoDD = f"{station_code} {stations.iloc[i]['latitude']:.3f} {stations.iloc[i]['longitude']:.3f}\n" 27 | converted_hypoinverse.append(line_hypoinverse) 28 | #converted_hypoDD.append(line_hypoDD) 29 | converted_hypoDD[f"{station_code}"] = f"{station_code} {stations.iloc[i]['latitude']:.3f} {stations.iloc[i]['longitude']:.3f}\n" 30 | 31 | # %% 32 | out_file = 'stations_hypoinverse.dat' 33 | with open(out_file, 'w') as f: 34 | f.writelines(converted_hypoinverse) 35 | 36 | out_file = 'stations_hypoDD.dat' 37 | # converted_hypoDD = list(set(converted_hypoDD)) 38 | with open(out_file, 'w') as f: 39 | #f.writelines(converted_hypoDD) 40 | for k, v in converted_hypoDD.items(): 41 | f.write(v) 42 | 43 | # %% 44 | -------------------------------------------------------------------------------- /HypoDD/gamma2hypoinverse.py: -------------------------------------------------------------------------------- 1 | #%% 2 | from datetime import datetime 3 | 4 | import numpy as np 5 | import pandas as pd 6 | from tqdm import tqdm 7 | 8 | # %% 9 | picks = pd.read_csv('gamma_picks.csv', sep="\t") 10 | events = pd.read_csv('gamma_catalog.csv', sep="\t") 11 | 12 | # %% 13 | events["match_id"] = events.apply(lambda x: f'{x["event_idx"]}_{x["file_index"]}', axis=1) 14 | picks["match_id"] = picks.apply(lambda x: f'{x["event_idx"]}_{x["file_index"]}', axis=1) 15 | 16 | # %% 17 | out_file = open("hypoInput.arc", "w") 18 | 19 | picks_by_event = picks.groupby("match_id").groups 20 | 21 | for i in tqdm(range(len(events))): 22 | 23 | event = events.iloc[i] 24 | event_time = datetime.strptime(event["time"], "%Y-%m-%dT%H:%M:%S.%f").strftime("%Y%m%d%H%M%S%f")[:-4] 25 | lat_degree = int(event["latitude"]) 26 | lat_minute = (event["latitude"] - lat_degree) * 60 * 100 27 | south = "S" if lat_degree <= 0 else " " 28 | lng_degree = int(event["longitude"]) 29 | lng_minute = (event["longitude"] - lng_degree) * 60 * 100 30 | east = "E" if lng_degree >= 0 else " " 31 | depth = event["depth(m)"] / 1e3 * 100 32 | event_line = f"{event_time}{abs(lat_degree):2d}{south}{abs(lat_minute):4.0f}{abs(lng_degree):3d}{east}{abs(lng_minute):4.0f}{depth:5.0f}" 33 | out_file.write(event_line + "\n") 34 | 35 | picks_idx = picks_by_event[event["match_id"]] 36 | for j in picks_idx: 37 | pick = picks.iloc[j] 38 | network_code, station_code, comp_code, channel_code = pick['id'].split('.') 39 | phase_type = pick['type'] 40 | phase_weight = min(max(int((1 - pick['prob']) / (1 - 0.3) * 4) - 1, 0), 3) 41 | pick_time = datetime.strptime(pick["timestamp"], "%Y-%m-%dT%H:%M:%S.%f") 42 | phase_time_minute = pick_time.strftime("%Y%m%d%H%M") 43 | phase_time_second = pick_time.strftime("%S%f")[:-4] 44 | tmp_line = f"{station_code:<5}{network_code:<2} {comp_code:<1}{channel_code:<3}" 45 | if phase_type.upper() == 'P': 46 | pick_line = f"{tmp_line:<13} P {phase_weight:<1d}{phase_time_minute} {phase_time_second}" 47 | elif phase_type.upper() == 'S': 48 | pick_line = f"{tmp_line:<13} 4{phase_time_minute} {'':<12}{phase_time_second} S {phase_weight:<1d}" 49 | else: 50 | raise (f"Phase type error {phase_type}") 51 | out_file.write(pick_line + "\n") 52 | 53 | out_file.write("\n") 54 | if i > 1e3: 55 | break 56 | 57 | out_file.close() 58 | -------------------------------------------------------------------------------- /HypoDD/hyp.command: -------------------------------------------------------------------------------- 1 | * This is a very simple hypoinverse test command file. 2 | * It uses only a simple station and crust model file, 3 | * with no station delay file or other options. 4 | * Run hypoinverse, then type @test2000.hyp at the command prompt. 5 | 6 | 200 t 2000 0 /enable y2000 formats 7 | H71 3 1 3 /use new hypoinverse station format 8 | DIS 4 50 1 3 /Main Distance weighting 9 | RMS 4 0.16 1.5 3 /Residual weighting 10 | ERR .10 11 | *POS 1.8 12 | MIN 5 /min number of stations 13 | ZTR 8 /trial depth 14 | *WET 1. .5 .2 .1 /weighting by pick quanlity 15 | *PRE 3, 3 0 0 9 /magnitude 16 | * OUTPUT 17 | ERF T 18 | TOP F 19 | 20 | STA 'stations_hypoinverse.dat' 21 | LET 5 2 0 /Net Sta Chn 22 | TYP Read in crustal model(s): 23 | CRH 1 'vel_model_P.crh' /read crust model for Vp, here depth 0 is relative to the averge elevation of stations 24 | CRH 2 'vel_model_S.crh' /read crust model for Vs 25 | SAL 1 2 26 | PHS 'hypoInput.arc' /input phase file 27 | 28 | FIL /automatically set phase format from file 29 | ARC 'hypoOut.arc' /output archive file 30 | PRT 'prtOut.prt' /output print file 31 | SUM 'catOut.sum' /output location summary 32 | *RDM T 33 | CAR 1 34 | *LST 2 35 | LOC /locate the earthquake 36 | STO 37 | -------------------------------------------------------------------------------- /HypoDD/hypoDD.inc: -------------------------------------------------------------------------------- 1 | c hypoDD.inc: Stores parameters that define array dimensions in hypoDD. 2 | c Modify to fit size of problem and available computer memory. 3 | c Parameter Description: 4 | c MAXEVE: Max number of events (must be at least the size of the number 5 | c of events listed in the event file) 6 | c MAXDATA: Max number of observations (must be at least the size of the 7 | c number of observations). 8 | c MAXEVE0: Max number of events used for SVD. If only LSQR is used, 9 | c MAXEVE0 can be set to 2 to free up memory. 10 | c MAXDATA0: Max number of observations used for SVD. If only LSQR is used, 11 | c MAXDATA0 can be set to 1 to free up memory. 12 | c MAXLAY: Max number of model layers. 13 | c MAXSTA: Max number of stations. 14 | c MAXCL: Max number of clusters allowed. 15 | integer*4 MAXEVE, MAXLAY, MAXDATA, MAXSTA, MAXEVE0, MAXDATA0, 16 | & MAXCL 17 | 18 | cc parameters for small size problems (e.g. SUN ULTRA-5, 256 MB RAM) 19 | c parameter(MAXEVE= 7000, 20 | c & MAXDATA= 5000000, 21 | c & MAXEVE0= 30, 22 | c & MAXDATA0= 5000, 23 | c & MAXLAY= 12, 24 | c & MAXSTA= 2000, 25 | c & MAXCL= 20) 26 | 27 | c parameters for medium size problems (e.g. : SUN ULTRA-2, 768 MB RAM) 28 | c parameter(MAXEVE= 8000, 29 | c & MAXDATA= 5000000, 30 | c parameter(MAXEVE= 10800, 31 | c & MAXDATA= 3500000, 32 | c & MAXEVE0= 2, 33 | c & MAXDATA0= 1, 34 | c & MAXLAY= 20, 35 | c & MAXSTA= 2400, 36 | c & MAXCL= 50) 37 | 38 | cc parameters for large problems (e.g. SUN BLADE 100, 2 GB RAM): 39 | parameter(MAXEVE= 10800, 40 | & MAXDATA= 9100000, 41 | & MAXEVE0= 50, 42 | & MAXDATA0= 10000, 43 | & MAXLAY= 15, 44 | & MAXSTA= 1300, 45 | & MAXCL= 100) 46 | 47 | cc parameters for very large problems, with cluster1 enabled (e.g. SUN BLADE 100, 2 GB RAM): 48 | c parameter(MAXEVE= 100000, 49 | c & MAXDATA= 10000000, 50 | c & MAXEVE0= 2, 51 | c & MAXDATA0= 1, 52 | c & MAXLAY= 12, 53 | c & MAXSTA= 600, 54 | c & MAXCL= 1) 55 | 56 | cc parameters for customized problems (e.g. SUN BLADE 100, 2 GB RAM): 57 | c parameter(MAXEVE= 27000, 58 | c & MAXDATA= 2900000, 59 | c & MAXEVE0= 2, 60 | c & MAXDATA0= 1, 61 | c & MAXLAY= 12, 62 | c & MAXSTA= 155, 63 | c & MAXCL= 10) 64 | 65 | -------------------------------------------------------------------------------- /HypoDD/hypodd_cc.inp: -------------------------------------------------------------------------------- 1 | * RELOC.INP: 2 | *--- input file selection 3 | * cross correlation diff times: 4 | 5 | * 6 | *catalog P diff times: 7 | dt.ct 8 | * 9 | * event file: 10 | event.sel 11 | * 12 | * station file: 13 | stations.dat 14 | * 15 | *--- output file selection 16 | * original locations: 17 | hypoDD.loc 18 | * relocations: 19 | hypoDD.reloc 20 | * station information: 21 | hypoDD.sta 22 | * residual information: 23 | hypoDD.res 24 | * source paramater information: 25 | hypoDD.src 26 | * 27 | *--- data type selection: 28 | * IDAT: 0 = synthetics; 1= cross corr; 2= catalog; 3= cross & cat 29 | * IPHA: 1= P; 2= S; 3= P&S 30 | * DIST:max dist [km] between cluster centroid and station 31 | * IDAT IPHA DIST 32 | 2 3 120 33 | * 34 | *--- event clustering: 35 | * OBSCC: min # of obs/pair for crosstime data (0= no clustering) 36 | * OBSCT: min # of obs/pair for network data (0= no clustering) 37 | * OBSCC OBSCT 38 | 0 8 39 | * 40 | *--- solution control: 41 | * ISTART: 1 = from single source; 2 = from network sources 42 | * ISOLV: 1 = SVD, 2=lsqr 43 | * NSET: number of sets of iteration with specifications following 44 | * ISTART ISOLV NSET 45 | 2 2 4 46 | * 47 | *--- data weighting and re-weighting: 48 | * NITER: last iteration to used the following weights 49 | * WTCCP, WTCCS: weight cross P, S 50 | * WTCTP, WTCTS: weight catalog P, S 51 | * WRCC, WRCT: residual threshold in sec for cross, catalog data 52 | * WDCC, WDCT: max dist [km] between cross, catalog linked pairs 53 | * DAMP: damping (for lsqr only) 54 | * --- CROSS DATA ----- ----CATALOG DATA ---- 55 | * NITER WTCCP WTCCS WRCC WDCC WTCTP WTCTS WRCT WDCT DAMP 56 | 4 -9 -9 -9 -9 1 1 8 -9 70 57 | 4 -9 -9 -9 -9 1 1 6 4 70 58 | 4 -9 -9 -9 -9 1 0.8 4 2 70 59 | 4 -9 -9 -9 -9 1 0.8 3 2 70 60 | * 61 | *--- 1D model: 62 | * NLAY: number of model layers 63 | * RATIO: vp/vs ratio 64 | * TOP: depths of top of layer (km) 65 | * VEL: layer velocities (km/s) 66 | * NLAY RATIO 67 | 12 1.82 68 | * TOP 69 | 0.0 1.0 3.0 5.0 7.0 9.0 11.0 13.0 17.0 21.0 31.00 31.10 70 | * VEL 71 | 5.30 5.65 5.93 6.20 6.20 6.20 6.20 6.20 6.20 6.20 7.50 8.11 72 | * 73 | *--- event selection: 74 | * CID: cluster to be relocated (0 = all) 75 | * ID: cuspids of event to be relocated (8 per line) 76 | * CID 77 | 0 78 | * ID 79 | -------------------------------------------------------------------------------- /HypoDD/hypodd_ct.inp: -------------------------------------------------------------------------------- 1 | * RELOC.INP: 2 | *--- input file selection 3 | * cross correlation diff times: 4 | 5 | * 6 | *catalog P diff times: 7 | dt.ct 8 | * 9 | * event file: 10 | event.sel 11 | * 12 | * station file: 13 | stations.dat 14 | * 15 | *--- output file selection 16 | * original locations: 17 | hypoDD.loc 18 | * relocations: 19 | hypoDD.reloc 20 | * station information: 21 | hypoDD.sta 22 | * residual information: 23 | hypoDD.res 24 | * source paramater information: 25 | hypoDD.src 26 | * 27 | *--- data type selection: 28 | * IDAT: 0 = synthetics; 1= cross corr; 2= catalog; 3= cross & cat 29 | * IPHA: 1= P; 2= S; 3= P&S 30 | * DIST:max dist [km] between cluster centroid and station 31 | * IDAT IPHA DIST 32 | 2 3 120 33 | * 34 | *--- event clustering: 35 | * OBSCC: min # of obs/pair for crosstime data (0= no clustering) 36 | * OBSCT: min # of obs/pair for network data (0= no clustering) 37 | * OBSCC OBSCT 38 | 0 8 39 | * 40 | *--- solution control: 41 | * ISTART: 1 = from single source; 2 = from network sources 42 | * ISOLV: 1 = SVD, 2=lsqr 43 | * NSET: number of sets of iteration with specifications following 44 | * ISTART ISOLV NSET 45 | 2 2 4 46 | * 47 | *--- data weighting and re-weighting: 48 | * NITER: last iteration to used the following weights 49 | * WTCCP, WTCCS: weight cross P, S 50 | * WTCTP, WTCTS: weight catalog P, S 51 | * WRCC, WRCT: residual threshold in sec for cross, catalog data 52 | * WDCC, WDCT: max dist [km] between cross, catalog linked pairs 53 | * DAMP: damping (for lsqr only) 54 | * --- CROSS DATA ----- ----CATALOG DATA ---- 55 | * NITER WTCCP WTCCS WRCC WDCC WTCTP WTCTS WRCT WDCT DAMP 56 | 4 -9 -9 -9 -9 1 1 8 -9 70 57 | 4 -9 -9 -9 -9 1 1 6 4 70 58 | 4 -9 -9 -9 -9 1 0.8 4 2 70 59 | 4 -9 -9 -9 -9 1 0.8 3 2 70 60 | * 61 | *--- 1D model: 62 | * NLAY: number of model layers 63 | * RATIO: vp/vs ratio 64 | * TOP: depths of top of layer (km) 65 | * VEL: layer velocities (km/s) 66 | * NLAY RATIO 67 | 12 1.82 68 | * TOP 69 | 0.0 1.0 3.0 5.0 7.0 9.0 11.0 13.0 17.0 21.0 31.00 31.10 70 | * VEL 71 | 5.30 5.65 5.93 6.20 6.20 6.20 6.20 6.20 6.20 6.20 7.50 8.11 72 | * 73 | *--- event selection: 74 | * CID: cluster to be relocated (0 = all) 75 | * ID: cuspids of event to be relocated (8 per line) 76 | * CID 77 | 0 78 | * ID 79 | -------------------------------------------------------------------------------- /HypoDD/ph2dt.inp: -------------------------------------------------------------------------------- 1 | * ph2dt.inp - input control file for program ph2dt 2 | * Input station file: 3 | stations_hypoDD.dat 4 | * Input phase file: 5 | hypoDD.pha 6 | *MINWGHT: min. pick weight allowed [0] 7 | *MAXDIST: max. distance in km between event pair and stations [200] 8 | *MAXSEP: max. hypocentral separation in km [10] 9 | *MAXNGH: max. number of neighbors per event [10] 10 | *MINLNK: min. number of links required to define a neighbor [8] 11 | *MINOBS: min. number of links per pair saved [8] 12 | *MAXOBS: max. number of links per pair saved [20] 13 | *MINWGHT MAXDIST MAXSEP MAXNGH MINLNK MINOBS MAXOBS 14 | 0 120 10 50 8 8 100 15 | -------------------------------------------------------------------------------- /HypoDD/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | hypo=../hyp1.40/source/hyp1.40 3 | ph2dt=../HYPODD/src/ph2dt/ph2dt 4 | hypoDD=../HYPODD/src/hypoDD/hypoDD 5 | 6 | ## gamma to hypoDD 7 | python gamma2hypoDD.py 8 | # $ph2dt ph2dt.inp 9 | # $hypoDD hypoDD.inp 10 | 11 | # ## gamma to hypoinverse 12 | # python convert_stations.py 13 | # python convert_picks.py 14 | # $hypo < hyp.command 15 | 16 | # ## hypoinvese to hypoDD 17 | # python hypoinverse2hypoDD.py 18 | # $ph2dt ph2dt.inp 19 | # $hypoDD hypoDD.inp 20 | -------------------------------------------------------------------------------- /HypoDD/vel_model_P.crh: -------------------------------------------------------------------------------- 1 | MODEL Vp from REAL 2 | 5.30 0.00 3 | 5.65 1.00 4 | 5.93 3.00 5 | 6.20 7.00 6 | 7.50 31.00 7 | 8.11 31.10 8 | -------------------------------------------------------------------------------- /HypoDD/vel_model_S.crh: -------------------------------------------------------------------------------- 1 | MODEL Vs from REAL 2 | 2.75 0.00 3 | 2.80 1.00 4 | 3.10 3.00 5 | 3.40 7.00 6 | 4.00 31.00 7 | 4.49 31.10 8 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Weiqiang Zhu 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /datasets/NCEDC/.gitignore: -------------------------------------------------------------------------------- 1 | gs 2 | data 3 | dataset 4 | waveform_h5 5 | waveform_ps_h5 6 | *.h5 7 | FDSNstationXML 8 | tmp_* 9 | mseed_list_NC 10 | mseed_list_SC 11 | 12 | -------------------------------------------------------------------------------- /datasets/NCEDC/extract_csv.py: -------------------------------------------------------------------------------- 1 | # %% 2 | import multiprocessing as mp 3 | 4 | import h5py 5 | import numpy as np 6 | import pandas as pd 7 | from tqdm import tqdm 8 | 9 | 10 | # %% 11 | # for folder in ["/nfs/quakeflow_dataset/NC/quakeflow_nc", "/nfs/quakeflow_dataset/NC"]: 12 | # for mode in ["", "_train", "_test"]: 13 | def process(i, folder, mode): 14 | h5_file = f"{folder}/waveform{mode}.h5" 15 | print(f"Processing {h5_file}") 16 | 17 | events_df = [] 18 | picks_df = [] 19 | with h5py.File(h5_file, "r") as f: 20 | event_ids = list(f.keys()) 21 | for event_id in tqdm(event_ids, desc=f"{h5_file}", position=i): 22 | event_attrs = dict(f[event_id].attrs) 23 | events_df.append(event_attrs) 24 | 25 | station_ids = list(f[event_id].keys()) 26 | for station_id in station_ids: 27 | station_attrs = dict(f[event_id][station_id].attrs) 28 | station_attrs["event_id"] = event_id 29 | station_attrs["station_id"] = station_id 30 | picks_df.append(station_attrs) 31 | 32 | events_df = pd.DataFrame(events_df) 33 | picks_df = pd.DataFrame(picks_df) 34 | events_df.to_csv(f"{folder}/events{mode}.csv", index=False) 35 | picks_df.to_csv(f"{folder}/picks{mode}.csv", index=False) 36 | 37 | 38 | # %% 39 | folders = [ 40 | "/nfs/quakeflow_dataset/NC/quakeflow_nc", 41 | "/nfs/quakeflow_dataset/SC/quakeflow_sc", 42 | "/nfs/quakeflow_dataset/NC", 43 | "/nfs/quakeflow_dataset/SC", 44 | ] 45 | mode = ["", "_train", "_test"] 46 | inputs = [(folder, m) for folder in folders for m in mode] 47 | 48 | pbar = tqdm(total=len(inputs)) 49 | callback = lambda *args: pbar.update() 50 | 51 | with mp.Pool(len(inputs)) as pool: 52 | jobs = [] 53 | for i, (folder, mode) in enumerate(inputs): 54 | job = pool.apply_async( 55 | process, 56 | args=( 57 | i, 58 | folder, 59 | mode, 60 | ), 61 | callback=callback, 62 | ) 63 | jobs.append(job) 64 | pool.close() 65 | pool.join() 66 | 67 | results = [job.get() for job in jobs] 68 | -------------------------------------------------------------------------------- /datasets/NCEDC/extract_ps.py: -------------------------------------------------------------------------------- 1 | # %% 2 | import multiprocessing as mp 3 | import os 4 | 5 | import h5py 6 | from tqdm import tqdm 7 | 8 | # %% 9 | h5_dir = "waveform_h5" 10 | h5_files = os.listdir(h5_dir) 11 | 12 | # %% 13 | result_path = "waveform_ps_h5" 14 | if not os.path.exists(result_path): 15 | os.makedirs(result_path) 16 | 17 | 18 | # %% 19 | def run(h5_file): 20 | h5_input = os.path.join(h5_dir, h5_file) 21 | h5_output = os.path.join(result_path, h5_file) 22 | pos = 2022 - int(h5_file.split("/")[-1].split(".")[0]) 23 | with h5py.File(h5_input, "r") as fin: 24 | with h5py.File(h5_output, "w") as fout: 25 | for event in tqdm(fin.keys(), desc=h5_file, total=len(fin.keys()), position=pos, leave=True): 26 | # copy event and attributes 27 | gp = fout.create_group(event) 28 | for key in fin[event].attrs.keys(): 29 | gp.attrs[key] = fin[event].attrs[key] 30 | num_station = 0 31 | for station in fin[event].keys(): 32 | if "S" in fin[event][station].attrs["phase_type"]: 33 | ds = gp.create_dataset(station, data=fin[event][station]) 34 | for key in fin[event][station].attrs.keys(): 35 | ds.attrs[key] = fin[event][station].attrs[key] 36 | num_station += 1 37 | else: 38 | continue 39 | gp.attrs["nx"] = num_station 40 | 41 | 42 | # %% 43 | if __name__ == "__main__": 44 | # run(0, h5_files[0]) 45 | 46 | ncpu = len(h5_files) 47 | print(f"Using {ncpu} CPUs") 48 | with mp.Pool(ncpu) as p: 49 | p.map(run, h5_files) 50 | -------------------------------------------------------------------------------- /datasets/NCEDC/merge_hdf5.py: -------------------------------------------------------------------------------- 1 | # %% 2 | import os 3 | 4 | import h5py 5 | import matplotlib.pyplot as plt 6 | from tqdm import tqdm 7 | 8 | # %% 9 | h5_dir = "waveform_ps_h5" 10 | h5_out = "waveform_ps.h5" 11 | h5_train = "waveform_ps_train.h5" 12 | h5_test = "waveform_ps_test.h5" 13 | 14 | # # %% 15 | # h5_dir = "waveform_h5" 16 | # h5_out = "waveform.h5" 17 | # h5_train = "waveform_train.h5" 18 | # h5_test = "waveform_test.h5" 19 | 20 | h5_files = sorted(os.listdir(h5_dir)) 21 | train_files = h5_files[:-1] 22 | test_files = h5_files[-1:] 23 | # train_files = h5_files 24 | # train_files = [x for x in train_files if (x != "2014.h5") and (x not in [])] 25 | # test_files = [] 26 | print(f"train files: {train_files}") 27 | print(f"test files: {test_files}") 28 | 29 | # %% 30 | with h5py.File(h5_out, "w") as fp: 31 | # external linked file 32 | for h5_file in h5_files: 33 | with h5py.File(os.path.join(h5_dir, h5_file), "r") as f: 34 | for event in tqdm(f.keys(), desc=h5_file, total=len(f.keys())): 35 | if event not in fp: 36 | fp[event] = h5py.ExternalLink(os.path.join(h5_dir, h5_file), event) 37 | else: 38 | print(f"{event} already exists") 39 | continue 40 | 41 | # %% 42 | with h5py.File(h5_train, "w") as fp: 43 | # external linked file 44 | for h5_file in train_files: 45 | with h5py.File(os.path.join(h5_dir, h5_file), "r") as f: 46 | for event in tqdm(f.keys(), desc=h5_file, total=len(f.keys())): 47 | if event not in fp: 48 | fp[event] = h5py.ExternalLink(os.path.join(h5_dir, h5_file), event) 49 | else: 50 | print(f"{event} already exists") 51 | continue 52 | 53 | # %% 54 | with h5py.File(h5_test, "w") as fp: 55 | # external linked file 56 | for h5_file in test_files: 57 | with h5py.File(os.path.join(h5_dir, h5_file), "r") as f: 58 | for event in tqdm(f.keys(), desc=h5_file, total=len(f.keys())): 59 | if event not in fp: 60 | fp[event] = h5py.ExternalLink(os.path.join(h5_dir, h5_file), event) 61 | else: 62 | print(f"{event} already exists") 63 | continue 64 | 65 | # %% 66 | -------------------------------------------------------------------------------- /datasets/NCEDC/run.yaml: -------------------------------------------------------------------------------- 1 | name: quakeflow 2 | 3 | workdir: . 4 | 5 | num_nodes: 1 6 | 7 | resources: 8 | 9 | cloud: gcp 10 | 11 | region: us-west1 12 | 13 | zone: us-west1-b 14 | 15 | # instance_type: n2-highmem-16 16 | 17 | # accelerators: P100:4 18 | 19 | cpus: 16+ 20 | # cpus: 64+ 21 | 22 | # disk_size: 300 23 | 24 | # disk_tier: high 25 | 26 | use_spot: True 27 | 28 | # spot_recovery: FAILOVER 29 | 30 | # image_id: docker:zhuwq0/quakeflow:latest 31 | 32 | envs: 33 | JOB: quakeflow_dataset 34 | 35 | file_mounts: 36 | 37 | /scedc-pds: 38 | source: s3://scedc-pds/ 39 | mode: MOUNT 40 | 41 | /ncedc-pds: 42 | source: s3://ncedc-pds/ 43 | mode: MOUNT 44 | 45 | /quakeflow_dataset: 46 | source: gs://quakeflow_dataset/ 47 | mode: MOUNT 48 | 49 | ~/.ssh/id_rsa.pub: ~/.ssh/id_rsa.pub 50 | ~/.ssh/id_rsa: ~/.ssh/id_rsa 51 | ~/.config/rclone/rclone.conf: ~/.config/rclone/rclone.conf 52 | ~/.config/gcloud/application_default_credentials.json: ~/.config/gcloud/application_default_credentials.json 53 | 54 | setup: | 55 | echo "Begin setup." 56 | sudo apt install rclone 57 | pip install fsspec gcsfs 58 | pip install obspy matplotlib 59 | pip install h5py tqdm 60 | 61 | run: | 62 | echo "Begin run." 63 | # python download_catalog.py 64 | python download_waveform.py -------------------------------------------------------------------------------- /datasets/SCEDC/.gitignore: -------------------------------------------------------------------------------- 1 | *.h5 2 | FDSNstationXML 3 | -------------------------------------------------------------------------------- /datasets/SCEDC/download_station.py: -------------------------------------------------------------------------------- 1 | # %% 2 | import os 3 | from datetime import timezone 4 | 5 | import fsspec 6 | import obspy 7 | import pandas as pd 8 | from tqdm import tqdm 9 | 10 | # %% 11 | input_protocol = "s3" 12 | input_bucket = "scedc-pds" 13 | input_fs = fsspec.filesystem(input_protocol, anon=True) 14 | 15 | output_protocol = "gs" 16 | output_token = f"{os.environ['HOME']}/.config/gcloud/application_default_credentials.json" 17 | output_bucket = "quakeflow_dataset/SC" 18 | output_fs = fsspec.filesystem(output_protocol, token=output_token) 19 | 20 | # %% 21 | station_path = f"{input_bucket}/FDSNstationXML" 22 | 23 | 24 | # %% 25 | def parse_inventory_csv(inventory): 26 | channel_list = [] 27 | for network in inventory: 28 | for station in network: 29 | for channel in station: 30 | if channel.sensor is None: 31 | sensor_description = "" 32 | else: 33 | sensor_description = channel.sensor.description 34 | channel_list.append( 35 | { 36 | "network": network.code, 37 | "station": station.code, 38 | "location": channel.location_code, 39 | "instrument": channel.code[:-1], 40 | "component": channel.code[-1], 41 | "channel": channel.code, 42 | "longitude": channel.longitude, 43 | "latitude": channel.latitude, 44 | "elevation_m": channel.elevation, 45 | "local_depth_m": channel.depth, 46 | "depth_km": round(-channel.elevation / 1000, 4), 47 | # "depth_km": channel.depth, 48 | "begin_time": ( 49 | channel.start_date.datetime.replace(tzinfo=timezone.utc).isoformat() 50 | if channel.start_date is not None 51 | else None 52 | ), 53 | "end_time": ( 54 | channel.end_date.datetime.replace(tzinfo=timezone.utc).isoformat() 55 | if channel.end_date is not None 56 | else None 57 | ), 58 | "azimuth": channel.azimuth, 59 | "dip": channel.dip, 60 | "sensitivity": ( 61 | channel.response.instrument_sensitivity.value 62 | if channel.response.instrument_sensitivity 63 | else None 64 | ), 65 | "site": station.site.name, 66 | "sensor": sensor_description, 67 | } 68 | ) 69 | channel_list = pd.DataFrame(channel_list) 70 | 71 | print(f"Parse {len(channel_list)} channels into csv") 72 | 73 | return channel_list 74 | 75 | 76 | # %% 77 | inv = obspy.Inventory() 78 | for network in input_fs.glob(f"{station_path}/*"): 79 | print(f"Parse {network}") 80 | for xml in tqdm(input_fs.glob(f"{network}/*.xml")): 81 | with input_fs.open(xml) as f: 82 | inv += obspy.read_inventory(f) 83 | 84 | # %% 85 | stations = parse_inventory_csv(inv) 86 | 87 | # %% 88 | for network, sta in stations.groupby("network"): 89 | with output_fs.open(f"{output_bucket}/station/{network}.csv", "wb") as f: 90 | sta.to_csv(f, index=False) 91 | 92 | # %% 93 | -------------------------------------------------------------------------------- /datasets/SCEDC/extract_ps.py: -------------------------------------------------------------------------------- 1 | ../NCEDC/extract_ps.py -------------------------------------------------------------------------------- /datasets/SCEDC/merge_hdf5.py: -------------------------------------------------------------------------------- 1 | # %% 2 | import os 3 | 4 | import h5py 5 | import matplotlib.pyplot as plt 6 | from tqdm import tqdm 7 | 8 | # %% 9 | h5_dir = "waveform_ps_h5" 10 | h5_out = "waveform_ps.h5" 11 | h5_train = "waveform_ps_train.h5" 12 | h5_test = "waveform_ps_test.h5" 13 | 14 | # # %% 15 | # h5_dir = "waveform_h5" 16 | # h5_out = "waveform.h5" 17 | # h5_train = "waveform_train.h5" 18 | # h5_test = "waveform_test.h5" 19 | 20 | h5_files = sorted(os.listdir(h5_dir)) 21 | h5_files = [x for x in h5_files if (x not in ["2019.h5", "2020.h5"])] 22 | train_files = h5_files[:-1] 23 | test_files = h5_files[-1:] 24 | # train_files = h5_files 25 | # train_files = [x for x in train_files if (x != "2014.h5") and (x not in [])] 26 | # test_files = [] 27 | print(f"train files: {train_files}") 28 | print(f"test files: {test_files}") 29 | 30 | # %% 31 | with h5py.File(h5_out, "w") as fp: 32 | # external linked file 33 | for h5_file in h5_files: 34 | with h5py.File(os.path.join(h5_dir, h5_file), "r") as f: 35 | for event in tqdm(f.keys(), desc=h5_file, total=len(f.keys())): 36 | if event not in fp: 37 | fp[event] = h5py.ExternalLink(os.path.join(h5_dir, h5_file), event) 38 | else: 39 | print(f"{event} already exists") 40 | continue 41 | 42 | # %% 43 | with h5py.File(h5_train, "w") as fp: 44 | # external linked file 45 | for h5_file in train_files: 46 | with h5py.File(os.path.join(h5_dir, h5_file), "r") as f: 47 | for event in tqdm(f.keys(), desc=h5_file, total=len(f.keys())): 48 | if event not in fp: 49 | fp[event] = h5py.ExternalLink(os.path.join(h5_dir, h5_file), event) 50 | else: 51 | print(f"{event} already exists") 52 | continue 53 | 54 | # %% 55 | with h5py.File(h5_test, "w") as fp: 56 | # external linked file 57 | for h5_file in test_files: 58 | with h5py.File(os.path.join(h5_dir, h5_file), "r") as f: 59 | for event in tqdm(f.keys(), desc=h5_file, total=len(f.keys())): 60 | if event not in fp: 61 | fp[event] = h5py.ExternalLink(os.path.join(h5_dir, h5_file), event) 62 | else: 63 | print(f"{event} already exists") 64 | continue 65 | 66 | # %% 67 | -------------------------------------------------------------------------------- /datasets/SCEDC/run.yaml: -------------------------------------------------------------------------------- 1 | name: quakeflow 2 | 3 | workdir: . 4 | 5 | num_nodes: 1 6 | 7 | resources: 8 | 9 | cloud: gcp 10 | 11 | region: us-west1 12 | 13 | zone: us-west1-b 14 | 15 | # instance_type: n2-highmem-16 16 | 17 | # accelerators: P100:4 18 | 19 | cpus: 16+ 20 | # cpus: 64+ 21 | 22 | # disk_size: 300 23 | 24 | # disk_tier: high 25 | 26 | # use_spot: True 27 | 28 | # spot_recovery: FAILOVER 29 | 30 | # image_id: docker:zhuwq0/quakeflow:latest 31 | 32 | envs: 33 | JOB: quakeflow_dataset 34 | 35 | file_mounts: 36 | 37 | /scedc-pds: 38 | source: s3://scedc-pds/ 39 | mode: MOUNT 40 | 41 | /ncedc-pds: 42 | source: s3://ncedc-pds/ 43 | mode: MOUNT 44 | 45 | ~/.ssh/id_rsa.pub: ~/.ssh/id_rsa.pub 46 | ~/.ssh/id_rsa: ~/.ssh/id_rsa 47 | ~/.config/rclone/rclone.conf: ~/.config/rclone/rclone.conf 48 | ~/.config/gcloud/application_default_credentials.json: ~/.config/gcloud/application_default_credentials.json 49 | 50 | setup: | 51 | echo "Begin setup." 52 | sudo apt install rclone 53 | pip install fsspec gcsfs s3fs 54 | pip install obspy matplotlib 55 | pip install h5py tqdm 56 | 57 | run: | 58 | echo "Begin run." 59 | # python download_catalog.py 60 | python download_waveform.py 61 | -------------------------------------------------------------------------------- /datasets/SCEDC/split_large_files.py: -------------------------------------------------------------------------------- 1 | # %% 2 | import multiprocessing as mp 3 | import os 4 | from glob import glob 5 | 6 | import h5py 7 | import numpy as np 8 | import pandas as pd 9 | from tqdm import tqdm 10 | 11 | # %% 12 | data_path = "waveform_ps_h5" 13 | result_path = "data" 14 | file_list = sorted(glob(f"{data_path}/*.h5")) 15 | # %% 16 | file_size = {file: os.path.getsize(file)/1e9 for file in file_list} 17 | 18 | # %% 19 | MAX_SIZE = 45 # GB 20 | for file, size in file_size.items(): 21 | if size > MAX_SIZE: 22 | # split into smaller files 23 | NUM_FILES = int(np.ceil(size / MAX_SIZE)) 24 | with h5py.File(file, "r") as f: 25 | event_ids = list(f.keys()) 26 | for event_id in tqdm(event_ids, desc=f"Processing {file}"): 27 | index = int(event_id[-1]) % NUM_FILES 28 | # with h5py.File(f"{result_path}/{file.split('/')[-1].replace('.h5', '')}_{index}.h5", "a") as g: 29 | with h5py.File(f"{data_path}/{file.split('/')[-1].replace('.h5', '')}_{index}.h5", "a") as g: 30 | if event_id in g: 31 | print(f"Event {event_id} already exists in {file.split('/')[-1].replace('.h5', '')}_{index}.h5") 32 | continue 33 | # copy 34 | f.copy(event_id, g) 35 | # else: 36 | # print(f"Copying {file} to {result_path}") 37 | # os.system(f"cp {file} {result_path}") -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- 1 | Ridgecrest_demo 2 | Ridgecrest_oneweek 3 | Tahoe 4 | SmithValley 5 | Antilles 6 | 7 | condaenv.*.txt 8 | 9 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # [QuakeFlow: A Scalable Machine-learning-based Earthquake Monitoring Workflow with Cloud Computing](https://ai4eps.github.io/QuakeFlow/) 4 | [![documentation](https://github.com/ai4eps/QuakeFlow/actions/workflows/docs.yml/badge.svg)](https://ai4eps.github.io/QuakeFlow/) 5 | 6 | ## Overview 7 | 8 | ![](https://raw.githubusercontent.com/ai4eps/QuakeFlow/master/docs/assets/quakeflow_diagram.png) 9 | 10 | QuakeFlow is a scalable deep-learning-based earthquake monitoring system with cloud computing. 11 | It applies the state-of-art deep learning/machine learning models for earthquake detection. 12 | With auto-scaling enabled on Kubernetes, our system can balance computational loads with computational resources. 13 | 14 | 15 | 16 | ## Current Modules 17 | 18 | ### Models 19 | - [DeepDenoiser](https://ai4eps.github.io/DeepDenoiser/): [(paper)](https://arxiv.org/abs/1811.02695) [(example)](https://ai4eps.github.io/DeepDenoiser/example_interactive/) 20 | - [PhaseNet](https://ai4eps.github.io/PhaseNet/): [(paper)](https://arxiv.org/abs/1803.03211) [(example)](https://ai4eps.github.io/PhaseNet/example_interactive/) 21 | - [GaMMA](https://ai4eps.github.io/GaMMA/): [(paper)](https://arxiv.org/abs/2109.09008) [(example)](https://ai4eps.github.io/GaMMA/example_interactive/) 22 | - [HypoDD](https://www.ldeo.columbia.edu/~felixw/hypoDD.html) [(paper)](https://pubs.geoscienceworld.org/ssa/bssa/article-abstract/90/6/1353/120565/A-Double-Difference-Earthquake-Location-Algorithm?redirectedFrom=fulltext) [(example)](https://github.com/ai4eps/QuakeFlow/blob/master/HypoDD/gamma2hypodd.py) 23 | - More models to be added. Contributions are highly welcomed! 24 | 25 | ### Data stream 26 | - [Plotly](https://dash.gallery/Portal/): [ui.quakeflow.com](http://ui.quakeflow.com) 27 | - [Kafka](https://www.confluent.io/what-is-apache-kafka/) 28 | - [Spark Streaming](https://spark.apache.org/docs/latest/streaming-programming-guide.html) 29 | 30 | ### Data process 31 | - [Colab example](https://colab.research.google.com/drive/19dC8-Vq0mv1Q9K-OS8VJf3xNEweKv4SN) 32 | - [Kubeflow](https://www.kubeflow.org/): [(example)](https://ai4eps.github.io/QuakeFlow/kubeflow/workflow/) 33 | 34 | ![](https://raw.githubusercontent.com/wayneweiqiang/QuakeFlow/master/docs/assets/quakeflow.gif) 35 | 36 | ## Deployment 37 | 38 | QuakeFlow can be deployed on any cloud platforms with Kubernetes service. 39 | 40 | - For google cloud platform (GCP), check out the [GCP README](gcp_readme.md). 41 | - For on-premise servers, check out the [Kubernetes README](k8s_readme.md). 42 | 43 | 53 | 54 | -------------------------------------------------------------------------------- /docs/assets/inference_pipeline_plotly.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI4EPS/QuakeFlow/f558a63dd6afc1a7b4fd4ad89d6ca6961a7d937b/docs/assets/inference_pipeline_plotly.png -------------------------------------------------------------------------------- /docs/assets/logo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI4EPS/QuakeFlow/f558a63dd6afc1a7b4fd4ad89d6ca6961a7d937b/docs/assets/logo.jpg -------------------------------------------------------------------------------- /docs/assets/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI4EPS/QuakeFlow/f558a63dd6afc1a7b4fd4ad89d6ca6961a7d937b/docs/assets/logo.png -------------------------------------------------------------------------------- /docs/assets/quakeflow.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI4EPS/QuakeFlow/f558a63dd6afc1a7b4fd4ad89d6ca6961a7d937b/docs/assets/quakeflow.gif -------------------------------------------------------------------------------- /docs/assets/quakeflow_diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI4EPS/QuakeFlow/f558a63dd6afc1a7b4fd4ad89d6ca6961a7d937b/docs/assets/quakeflow_diagram.png -------------------------------------------------------------------------------- /docs/data.md: -------------------------------------------------------------------------------- 1 | # Downloading Data using Obspy -------------------------------------------------------------------------------- /docs/data_format.md: -------------------------------------------------------------------------------- 1 | # Standard Data Formats of QuakeFlow 2 | 3 | - Raw data: 4 | - Waveform (MSEED): 5 | - Year/Jday/Hour/Network.Station.Location.Channel.mseed 6 | - Station (xml): 7 | - Network.Station.xml 8 | - Events (CSV): 9 | - colums: time, latitude, longitude, depth_km, magnitude, event_id 10 | - Picks (CSV) 11 | - columns: station_id (network.station.location.channel) phase_time, phase_type, phase_score, event_id 12 | - Phase picking: 13 | - Picks (CSV): 14 | - columns: station_id (network.station.location.channel) phase_time, phase_type, phase_score, phase_polarity 15 | - Phase association: 16 | - Events (CSV): 17 | - colums: time, latitude, longitude, depth_km, magnitude, event_id 18 | - Picks (CSV): 19 | - columns: station_id (network.station.location.channel), phase_time, phase_type, phase_score, phase_polarity, event_id 20 | - Earthquake location: 21 | - Events (CSV): 22 | - colums: time, latitude, longitude, depth_km, magnitude, event_id 23 | - Earthquake relocation: 24 | - Events (CSV): 25 | - colums: time, latitude, longitude, depth_km, magnitude, event_id 26 | - Focal mechanism: 27 | - Focal mechanism (CSV): 28 | - columns: strike1, dip1, rake1, strike2, dip2, rake2, event_id -------------------------------------------------------------------------------- /docs/deepdenoiser.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI4EPS/QuakeFlow/f558a63dd6afc1a7b4fd4ad89d6ca6961a7d937b/docs/deepdenoiser.md -------------------------------------------------------------------------------- /docs/earthquake_location.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI4EPS/QuakeFlow/f558a63dd6afc1a7b4fd4ad89d6ca6961a7d937b/docs/earthquake_location.md -------------------------------------------------------------------------------- /docs/gamma.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI4EPS/QuakeFlow/f558a63dd6afc1a7b4fd4ad89d6ca6961a7d937b/docs/gamma.md -------------------------------------------------------------------------------- /docs/gcp_readme.md: -------------------------------------------------------------------------------- 1 | # Quick readme, not detailed 2 | 3 | 4 | 1. Create a cluster on GCP with node autoscaling 5 | 6 | ``` 7 | gcloud container clusters create quakeflow-cluster --zone="us-west1-a" --scopes="cloud-platform" --image-type="ubuntu" --machine-type="n1-standard-2" --num-nodes=2 --enable-autoscaling --min-nodes 1 --max-nodes 4 8 | ``` 9 | 10 | 2. Switch to the correct context 11 | 12 | ``` 13 | gcloud container clusters get-credentials quakeflow-cluster 14 | ``` 15 | 16 | 3. Deploy the services on the cluster 17 | 18 | ``` 19 | kubectl apply -f quakeflow-gcp.yaml 20 | ``` 21 | 22 | 4. Setup the APIs 23 | 24 | 4.1 Add pods autoscaling 25 | ``` 26 | kubectl autoscale deployment phasenet-api --cpu-percent=80 --min=1 --max=10 27 | kubectl autoscale deployment gmma-api --cpu-percent=80 --min=1 --max=10 28 | ``` 29 | 30 | 4.2 Expose API 31 | ``` 32 | kubectl expose deployment phasenet-api --type=LoadBalancer --name=phasenet-service 33 | kubectl expose deployment gmma-api --type=LoadBalancer --name=gmma-service 34 | kubectl expose deployment quakeflow-ui --type=LoadBalancer --name=quakeflow-ui 35 | ``` 36 | 37 | 5. Install Kafka 38 | 39 | 5.1 Install 40 | ``` 41 | helm install quakeflow-kafka bitnami/kafka 42 | ``` 43 | 44 | 5.2 Create topics 45 | ``` 46 | kubectl run --quiet=true -it --rm quakeflow-kafka-client --restart='Never' --image docker.io/bitnami/kafka:2.7.0-debian-10-r68 --restart=Never --command -- bash -c "kafka-topics.sh --create --topic phasenet_picks --bootstrap-server my-kafka.default.svc.cluster.local:9092 && kafka-topics.sh --create --topic gmma_events --bootstrap-server my-kafka.default.svc.cluster.local:9092 && kafka-topics.sh --create --topic waveform_raw --bootstrap-server my-kafka.default.svc.cluster.local:9092" 47 | ``` 48 | 49 | 5.3 Check status 50 | ``` 51 | helm status quakeflow-kafka 52 | ``` 53 | 54 | 55 | 6. Rollup restart deployments 56 | ``` 57 | kubectl rollout restart deployments 58 | ``` 59 | 60 | 7. Install Dashboard 61 | ``` 62 | kubectl apply -f https://raw.githubusercontent.com/kubernetes/dashboard/v2.0.0/aio/deploy/recommended.yaml 63 | ``` 64 | 65 | Run the following command and visit http://localhost:8001/api/v1/namespaces/kubernetes-dashboard/services/https:kubernetes-dashboard:/proxy/ 66 | ``` 67 | kubectl proxy 68 | ``` 69 | 70 | If you are asked to provide a token, get the token with the following command 71 | ``` 72 | gcloud config config-helper --format=json | jq -r '.credential.access_token' 73 | ``` 74 | -------------------------------------------------------------------------------- /docs/k8s_readme.md: -------------------------------------------------------------------------------- 1 | # Quick readme, not detailed 2 | 3 | ## All-in-one script 4 | You need to preinstall [helm](https://helm.sh/), [kubectl](https://kubernetes.io/docs/tasks/tools/), [docker](https://docs.docker.com/engine/install/) and [minikube](https://minikube.sigs.k8s.io/docs/start/) (or any other local Kubernetes framework) 5 | 6 | Then deploy everything with the following script! 7 | 8 | ``` 9 | $ git clone --recurse-submodules -j8 git@github.com:wayneweiqiang/QuakeFlow.git 10 | $ sh deploy_local.sh 11 | ``` 12 | 13 | ## Prebuilt Kafka 14 | 15 | 1. Install 16 | ``` 17 | helm repo add bitnami https://charts.bitnami.com/bitnami 18 | helm install quakeflow-kafka bitnami/kafka 19 | ``` 20 | 21 | 2. Create topics 22 | ``` 23 | kubectl run --quiet=true -it --rm quakeflow-kafka-client --restart='Never' --image docker.io/bitnami/kafka:2.7.0-debian-10-r68 --restart=Never --command -- bash -c "kafka-topics.sh --create --topic phasenet_picks --bootstrap-server quakeflow-kafka.default.svc.cluster.local:9092 && kafka-topics.sh --create --topic gmma_events --bootstrap-server quakeflow-kafka.default.svc.cluster.local:9092 && kafka-topics.sh --create --topic waveform_raw --bootstrap-server quakeflow-kafka.default.svc.cluster.local:9092" 24 | ``` 25 | 26 | 2. Check status 27 | ``` 28 | helm status quakeflow-kafka 29 | ``` 30 | 31 | ## Our own containers 32 | 33 | 1. Switch to minikube environment 34 | ``` 35 | eval $(minikube docker-env) 36 | ``` 37 | 38 | 1.1. Fix metrics-server for auto-scalling (Only for docker) 39 | https://stackoverflow.com/questions/54106725/docker-kubernetes-mac-autoscaler-unable-to-find-metrics 40 | 41 | ``` 42 | kubectl apply -f metrics-server.yaml 43 | ``` 44 | 45 | 2. Build the docker images, see the docs for each container 46 | 47 | ``` 48 | docker build --tag quakeflow-spark:1.0 . 49 | ... 50 | ``` 51 | 52 | 3. Create everything 53 | ``` 54 | kubectl apply -f quakeflow-delpoyment.yaml 55 | ``` 56 | 57 | 3.1 Add autoscaling 58 | ``` 59 | kubectl autoscale deployment phasenet-api --cpu-percent=80 --min=1 --max=10 60 | kubectl autoscale deployment gmma-api --cpu-percent=80 --min=1 --max=10 61 | ``` 62 | 63 | 3.2 Expose API 64 | ``` 65 | kubectl expose deployment phasenet-api --type=LoadBalancer --name=phasenet-service 66 | ``` 67 | 68 | 4. Check the pods 69 | ``` 70 | kubectl get pods 71 | ``` 72 | 73 | 5. Check the logs (an example) 74 | ``` 75 | kubectl logs quakeflow-spark-7699cd45d8-mvv6r 76 | ``` 77 | 78 | 6. Delete a single deployment 79 | ``` 80 | kubectl delete deploy quakeflow-spark 81 | ``` 82 | 83 | 7. Delete everything 84 | ``` 85 | kubectl delete -f quakeflow-delpoyment.yaml 86 | ``` 87 | 88 | 89 | 90 | -------------------------------------------------------------------------------- /docs/kubeflow: -------------------------------------------------------------------------------- 1 | ../kubeflow -------------------------------------------------------------------------------- /docs/phasenet.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI4EPS/QuakeFlow/f558a63dd6afc1a7b4fd4ad89d6ca6961a7d937b/docs/phasenet.md -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: base 2 | channels: 3 | - defaults 4 | - conda-forge 5 | dependencies: 6 | - cartopy 7 | - obspy 8 | prefix: /opt/conda 9 | -------------------------------------------------------------------------------- /examples/california/.gitignore: -------------------------------------------------------------------------------- 1 | eikonal/ 2 | local/ 3 | tests/ 4 | mseed_list_NC/ 5 | mseed_list_SC/ 6 | tmp_NC/ 7 | tmp_SC/ 8 | NC/ 9 | SC/ 10 | .history/ 11 | local/ 12 | tests/ 13 | figures/ 14 | benchmark/ 15 | Cal/ 16 | cctorch_figures/ 17 | cctorch_figures2/ 18 | cctorch2_figures/ 19 | __pycache__/ 20 | -------------------------------------------------------------------------------- /examples/california/.skyignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | local/ 3 | tests/ 4 | figures/ 5 | benchmark/ 6 | Cal/ 7 | cctorch_figures/ 8 | cctorch_figures2/ 9 | cctorch2_figures/ 10 | 11 | -------------------------------------------------------------------------------- /examples/california/args.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | 4 | def parse_args(): 5 | parser = argparse.ArgumentParser(description="Run Gamma on NCEDC/SCEDC data") 6 | parser.add_argument("--num_nodes", type=int, default=1) 7 | parser.add_argument("--node_rank", type=int, default=0) 8 | parser.add_argument("--year", type=int, default=2023) 9 | parser.add_argument("--root_path", type=str, default="local") 10 | parser.add_argument("--region", type=str, default="Cal") 11 | parser.add_argument("--bucket", type=str, default="quakeflow_catalog") 12 | parser.add_argument("--config", type=str, default="local/Mendocino/config.json") 13 | return parser.parse_args() 14 | -------------------------------------------------------------------------------- /examples/california/monitor.py: -------------------------------------------------------------------------------- 1 | # %% 2 | import argparse 3 | import json 4 | import logging 5 | import os 6 | import time 7 | from tqdm import tqdm 8 | import fsspec 9 | 10 | logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(message)s") 11 | 12 | # %% 13 | NUM_NODES = 32 # < 2004 14 | # NUM_NODES = 128 # >= 2004 15 | 16 | ###### Hardcoded ####### 17 | token_json = f"{os.environ['HOME']}/.config/gcloud/application_default_credentials.json" 18 | with open(token_json, "r") as fp: 19 | token = json.load(fp) 20 | fs = fsspec.filesystem("gs", token=token) 21 | 22 | # for year in tqdm(range(1986, 1999)[::-1]): 23 | # cmd = f"python run_phasenet.py --region NC --year {year} --num_nodes 1" 24 | # os.system(cmd) 25 | 26 | # raise 27 | 28 | for year in range(1999, 2005)[::-1]: 29 | 30 | cmds = [ 31 | f"python submit_phasenet.py --region NC --branch ncedc --year {year} --num_nodes {NUM_NODES}", 32 | f"python submit_phasenet.py --region SC --branch scedc --year {year} --num_nodes {NUM_NODES}", 33 | ] 34 | 35 | # while True: 36 | if True: 37 | for cmd in cmds: 38 | logging.info(f"Running: {cmd}") 39 | os.system(cmd) 40 | logging.info("Sleeping for 1 minutes...") 41 | time.sleep(60) 42 | 43 | finish = True 44 | for REGION in ["NC", "SC"]: 45 | for NODE_RANK in range(NUM_NODES): 46 | mseed_file = ( 47 | f"gs://quakeflow_catalog/{REGION}/phasenet/mseed_list/{year}_{NODE_RANK:03d}_{NUM_NODES:03d}.txt" 48 | ) 49 | if fs.exists(mseed_file): 50 | with fs.open(mseed_file, "r") as fp: 51 | mseed_list = fp.readlines() 52 | if len(mseed_list) > 0: 53 | print(f"{mseed_file}, {len(mseed_list) = }") 54 | finish = False 55 | break 56 | if finish: 57 | break 58 | -------------------------------------------------------------------------------- /examples/california/refresh.py: -------------------------------------------------------------------------------- 1 | # %% 2 | import sky 3 | from tqdm import tqdm 4 | 5 | # %% 6 | status = sky.status() 7 | 8 | # %% 9 | for cluster in tqdm(sky.status()[::-1]): 10 | try: 11 | print(f"Refreshing {cluster['name']}...") 12 | sky.status(cluster_names=[cluster["name"]], refresh=True) 13 | if not cluster["to_down"]: 14 | sky.autostop(cluster["name"], idle_minutes=10, down=True) 15 | except Exception as e: 16 | print(e) 17 | 18 | # %% 19 | -------------------------------------------------------------------------------- /examples/california/run_gamma.yaml: -------------------------------------------------------------------------------- 1 | name: dev 2 | 3 | workdir: . 4 | 5 | num_nodes: 1 6 | 7 | resources: 8 | 9 | cloud: gcp # aws 10 | region: us-west1 # gcp 11 | # region: us-west-2 # aws 12 | # instance_type: n2-highmem-16 13 | # accelerators: P100:1 14 | # cpus: 16+ 15 | cpus: 16 16 | # disk_size: 300 17 | # disk_tier: high 18 | # use_spot: True 19 | # spot_recovery: FAILOVER 20 | # image_id: docker:zhuwq0/quakeflow:latest 21 | 22 | # # envs: 23 | # JOB: quakeflow 24 | # NCPU: 1 25 | # ROOT_PATH: /data 26 | # MODEL_NAME: phasenet_plus 27 | # WANDB_API_KEY: cb014c63ac451036ca406582b41d32ae83154289 28 | 29 | file_mounts: 30 | 31 | # /data/waveforms: 32 | # name: waveforms 33 | # source: waveforms_combined 34 | # mode: MOUNT 35 | 36 | # /dataset/stations: 37 | # name: stations 38 | # source: stations 39 | # mode: COPY 40 | 41 | # /data/waveforms: waveforms_combined 42 | # /dataset/stations: stations 43 | 44 | # ~/.ssh/id_rsa.pub: ~/.ssh/id_rsa.pub 45 | # ~/.ssh/id_rsa: ~/.ssh/id_rsa 46 | # ~/.config/rclone/rclone.conf: ~/.config/rclone/rclone.conf 47 | 48 | /opt/GaMMA: ../../GaMMA 49 | 50 | setup: | 51 | echo "Begin setup." 52 | echo export WANDB_API_KEY=$WANDB_API_KEY >> ~/.bashrc 53 | pip install h5py tqdm wandb pandas numpy scipy 54 | pip install fsspec gcsfs 55 | pip install obspy pyproj 56 | # pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121 57 | # pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu 58 | pip install -e /opt/GaMMA 59 | 60 | run: | 61 | num_nodes=`echo "$SKYPILOT_NODE_IPS" | wc -l` 62 | master_addr=`echo "$SKYPILOT_NODE_IPS" | head -n1` 63 | [[ ${SKYPILOT_NUM_GPUS_PER_NODE} -gt $NCPU ]] && nproc_per_node=${SKYPILOT_NUM_GPUS_PER_NODE} || nproc_per_node=$NCPU 64 | if [ "${SKYPILOT_NODE_RANK}" == "0" ]; then 65 | ls -al /opt 66 | ls -al /data 67 | fi 68 | python run_gamma.py --num_node $num_nodes --node_rank $SKYPILOT_NODE_RANK 69 | # torchrun \ 70 | # --nproc_per_node=${nproc_per_node} \ 71 | # --node_rank=${SKYPILOT_NODE_RANK} \ 72 | # --nnodes=$num_nodes \ 73 | # --master_addr=$master_addr \ 74 | # --master_port=8008 \ 75 | # train.py --model $MODEL_NAME --batch-size=256 --hdf5-file /dataset/train.h5 --test-hdf5-file /dataset/test.h5 \ 76 | # --workers 12 --stack-event --flip-polarity --drop-channel --output /checkpoint/$MODEL_NAME --wandb --wandb-project $MODEL_NAME --resume True -------------------------------------------------------------------------------- /examples/california/run_growclust_cc.py: -------------------------------------------------------------------------------- 1 | # %% 2 | import argparse 3 | import os 4 | from datetime import datetime 5 | 6 | import pandas as pd 7 | from tqdm import tqdm 8 | 9 | 10 | def parse_args(): 11 | parser = argparse.ArgumentParser(description="Run Gamma on NCEDC/SCEDC data") 12 | parser.add_argument("--num_nodes", type=int, default=1) 13 | parser.add_argument("--node_rank", type=int, default=0) 14 | parser.add_argument("--year", type=int, default=2023) 15 | parser.add_argument("--root_path", type=str, default="local") 16 | parser.add_argument("--region", type=str, default="Cal") 17 | parser.add_argument("--bucket", type=str, default="quakeflow_catalog") 18 | return parser.parse_args() 19 | 20 | 21 | # %% 22 | args = parse_args() 23 | root_path = args.root_path 24 | region = args.region 25 | 26 | # %% 27 | result_path = f"{region}/growclust" 28 | if not os.path.exists(f"{root_path}/{result_path}"): 29 | os.makedirs(f"{root_path}/{result_path}") 30 | 31 | # %% 32 | # stations_json = f"{region}/results/data/stations.json" 33 | # stations = pd.read_json(f"{root_path}/{stations_json}", orient="index") 34 | station_csv = f"{region}/cctorch/cctorch_stations.csv" 35 | stations = pd.read_csv(f"{root_path}/{station_csv}") 36 | stations.set_index("station_id", inplace=True) 37 | 38 | 39 | lines = [] 40 | for i, row in stations.iterrows(): 41 | # line = f"{row['network']}{row['station']:<4} {row['latitude']:.4f} {row['longitude']:.4f}\n" 42 | line = f"{row['station']:<4} {row['latitude']:.4f} {row['longitude']:.4f}\n" 43 | lines.append(line) 44 | 45 | with open(f"{root_path}/{result_path}/stlist.txt", "w") as fp: 46 | fp.writelines(lines) 47 | 48 | 49 | # %% 50 | # events_csv = f"{region}/results/phase_association/events.csv" 51 | # events_csv = f"{region}/adloc/ransac_events.csv" 52 | events_csv = f"{region}/cctorch/cctorch_events.csv" 53 | # event_file = f"{region}/cctorch/events.csv" 54 | events = pd.read_csv(f"{root_path}/{events_csv}") 55 | # event_df = event_df[event_df["gamma_score"] > 10] 56 | # event_index = [f"{x:06d}" for x in event_df["event_index"]] 57 | # events["time"] = pd.to_datetime(events["time"]) 58 | events["time"] = pd.to_datetime(events["event_time"]) 59 | if "magnitude" not in events.columns: 60 | events["magnitude"] = 0.0 61 | 62 | events[["year", "month", "day", "hour", "minute", "second"]] = ( 63 | events["time"] 64 | # .apply(lambda x: datetime.fromisoformat(x).strftime("%Y %m %d %H %M %S.%f").split(" ")) 65 | .apply(lambda x: x.strftime("%Y %m %d %H %M %S.%f").split(" ")) 66 | .apply(pd.Series) 67 | .apply(pd.to_numeric) 68 | ) 69 | 70 | lines = [] 71 | for i, row in events.iterrows(): 72 | # yr mon day hr min sec lat lon dep mag eh ez rms evid 73 | line = f"{row['year']:4d} {row['month']:2d} {row['day']:2d} {row['hour']:2d} {row['minute']:2d} {row['second']:7.3f} {row['latitude']:.4f} {row['longitude']:.4f} {row['depth_km']:7.3f} {row['magnitude']:.2f} 0.000 0.000 0.000 {row['event_index']:6d}\n" 74 | lines.append(line) 75 | 76 | with open(f"{root_path}/{result_path}/evlist.txt", "w") as fp: 77 | fp.writelines(lines) 78 | 79 | # %% 80 | os.system(f"bash run_growclust_cc.sh {root_path} {region}") 81 | -------------------------------------------------------------------------------- /examples/california/run_growclust_cc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -x 3 | WORKING_DIR=$PWD 4 | if [ $# -eq 2 ]; then 5 | root_path=$1 6 | region=$2 7 | else 8 | root_path="local" 9 | region="demo" 10 | fi 11 | 12 | if [ ! -d "$root_path/$region/growclust" ]; then 13 | mkdir -p $root_path/$region/growclust 14 | fi 15 | 16 | cp $root_path/$region/cctorch/dt.cc $root_path/$region/growclust/dt.cc 17 | cd $root_path/$region/growclust 18 | mkdir -p TT OUT 19 | 20 | if [ ! -d "GrowClust" ]; then 21 | git clone https://github.com/zhuwq0/GrowClust.git 22 | make -C GrowClust/SRC/ 23 | fi 24 | 25 | cat < growclust.inp 26 | **** Example GrowClust Control File ***** 27 | ******** Daniel Trugman, 2016 ********** 28 | ******************************************* 29 | * 30 | ******************************************* 31 | ************* Event list **************** 32 | ******************************************* 33 | * evlist_fmt (0 = evlist, 1 = phase, 2 = GrowClust, 3 = HypoInverse) 34 | 1 35 | * fin_evlist (event list file name) 36 | evlist.txt 37 | * 38 | ******************************************* 39 | ************ Station list ************* 40 | ******************************************* 41 | * stlist_fmt (0 = SEED channel, 1 = station name) 42 | 1 43 | * fin_stlist (station list file name) 44 | stlist.txt 45 | * 46 | ******************************************* 47 | ************* XCOR data *************** 48 | ******************************************* 49 | * xcordat_fmt (0 = binary, 1 = text), tdif_fmt (21 = tt2-tt1, 12 = tt1-tt2) 50 | 1 12 51 | * fin_xcordat 52 | dt.cc 53 | * 54 | ******************************************* 55 | *** Velocity Model / Travel Time Tables *** 56 | ******************************************* 57 | * fin_vzmdl (input vz model file) 58 | vzmodel.txt 59 | * fout_vzfine (output, interpolated vz model file) 60 | TT/vzfine.txt 61 | * fout_pTT (output travel time table, P phase) 62 | TT/tt.pg 63 | * fout_sTT (output travel time table, S phase) 64 | TT/tt.sg 65 | * 66 | ****************************************** 67 | ***** Travel Time Table Parameters ****** 68 | ****************************************** 69 | * vpvs_factor rayparam_min (-1 = default) 70 | 1.732 0.0 71 | * tt_dep0 tt_dep1 tt_ddep 72 | 0. 71. 1. 73 | * tt_del0 tt_del1 tt_ddel 74 | 0. 500. 2. 75 | * 76 | ****************************************** 77 | ***** GrowClust Algorithm Parameters ***** 78 | ****************************************** 79 | * rmin delmax rmsmax 80 | 0.1 120 1.0 81 | * rpsavgmin, rmincut ngoodmin iponly 82 | 0 0.1 8 0 83 | * 84 | ****************************************** 85 | ************ Output files **************** 86 | ****************************************** 87 | * nboot nbranch_min 88 | 0 1 89 | * fout_cat (relocated catalog) 90 | OUT/out.growclust_cc_cat 91 | * fout_clust (relocated cluster file) 92 | OUT/out.growclust_cc_clust 93 | * fout_log (program log) 94 | OUT/out.growclust_cc_log 95 | * fout_boot (bootstrap distribution) 96 | OUT/out.growclust_cc_boot 97 | ****************************************** 98 | ****************************************** 99 | EOF 100 | 101 | cat < vzmodel.txt 102 | 0.0 5.30 0.00 103 | 1.0 5.65 0.00 104 | 3.0 5.93 0.00 105 | 5.0 6.20 0.00 106 | 7.0 6.20 0.00 107 | 9.0 6.20 0.00 108 | 11.0 6.20 0.00 109 | 13.0 6.20 0.00 110 | 17.0 6.20 0.00 111 | 21.0 6.20 0.00 112 | 31.00 7.50 0.00 113 | 31.10 8.11 0.00 114 | 100.0 8.11 0.00 115 | EOF 116 | 117 | ./GrowClust/SRC/growclust growclust.inp 118 | cp OUT/out.growclust_cc_cat growclust_cc_catalog.txt 119 | cd $WORKING_DIR 120 | -------------------------------------------------------------------------------- /examples/california/run_hypodd_cc.py: -------------------------------------------------------------------------------- 1 | # %% 2 | # from args import parse_args 3 | ## 4 | import argparse 5 | import json 6 | import os 7 | 8 | import numpy as np 9 | import pandas as pd 10 | 11 | 12 | def parse_args(): 13 | parser = argparse.ArgumentParser(description="Run Gamma on NCEDC/SCEDC data") 14 | parser.add_argument("--num_nodes", type=int, default=1) 15 | parser.add_argument("--node_rank", type=int, default=0) 16 | parser.add_argument("--year", type=int, default=2023) 17 | parser.add_argument("--root_path", type=str, default="local") 18 | parser.add_argument("--region", type=str, default="Cal") 19 | parser.add_argument("--bucket", type=str, default="quakeflow_catalog") 20 | return parser.parse_args() 21 | 22 | 23 | # %% 24 | args = parse_args() 25 | root_path = args.root_path 26 | region = args.region 27 | 28 | # with open(f"{root_path}/{region}/config.json", "r") as fp: 29 | # config = json.load(fp) 30 | config = json.load(open("config.json", "r")) 31 | 32 | # %% 33 | data_path = f"{region}/cctorch" 34 | result_path = f"{region}/hypodd" 35 | if not os.path.exists(f"{root_path}/{result_path}"): 36 | os.makedirs(f"{root_path}/{result_path}") 37 | 38 | # %% 39 | stations = pd.read_csv(f"{root_path}/{data_path}/cctorch_stations.csv") 40 | 41 | station_lines = {} 42 | for i, row in stations.iterrows(): 43 | station_id = row["station_id"] 44 | network_code, station_code, comp_code, channel_code = station_id.split(".") 45 | # tmp_code = f"{station_code}{channel_code}" 46 | tmp_code = f"{station_code}" 47 | station_lines[tmp_code] = f"{tmp_code:<8s} {row['latitude']:.3f} {row['longitude']:.3f}\n" 48 | 49 | 50 | with open(f"{root_path}/{result_path}/stations.dat", "w") as f: 51 | for line in sorted(station_lines.values()): 52 | f.write(line) 53 | 54 | # %% 55 | events = pd.read_csv(f"{root_path}/{data_path}/cctorch_events.csv") 56 | events["time"] = pd.to_datetime(events["event_time"], format="mixed") 57 | 58 | event_lines = [] 59 | 60 | for i, row in events.iterrows(): 61 | event_index = row["event_index"] 62 | origin = row["time"] 63 | magnitude = row["magnitude"] 64 | x_err = 0.0 65 | z_err = 0.0 66 | time_err = 0.0 67 | dx, dy, dz = 0.0, 0.0, 0.0 68 | # dx = np.random.uniform(-0.01, 0.01) 69 | # dy = np.random.uniform(-0.01, 0.01) 70 | # dz = np.random.uniform(0, 10) 71 | # dz = 0 72 | event_lines.append( 73 | f"{origin.year:4d}{origin.month:02d}{origin.day:02d} " 74 | f"{origin.hour:2d}{origin.minute:02d}{origin.second:02d}{round(origin.microsecond / 1e4):02d} " 75 | # f"{row['latitude']:8.4f} {row['longitude']:9.4f} {row['depth_km']:8.4f} " 76 | f"{row['latitude'] + dy:8.4f} {row['longitude']+ dx:9.4f} {row['depth_km']+dz:8.4f} " 77 | f"{magnitude:5.2f} {x_err:5.2f} {z_err:5.2f} {time_err:5.2f} {event_index:9d}\n" 78 | ) 79 | 80 | with open(f"{root_path}/{result_path}/events.dat", "w") as f: 81 | f.writelines(event_lines) 82 | 83 | # %% 84 | os.system(f"bash run_hypodd_cc.sh {root_path} {region}") 85 | -------------------------------------------------------------------------------- /examples/california/run_hypodd_cc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -x 3 | WORKING_DIR=$PWD 4 | if [ $# -eq 2 ]; then 5 | root_path=$1 6 | region=$2 7 | else 8 | root_path="local" 9 | region="demo" 10 | fi 11 | 12 | if [ ! -d "$root_path/$region/hypodd" ]; then 13 | mkdir -p $root_path/$region/hypodd 14 | fi 15 | 16 | cp $root_path/$region/cctorch/dt.cc $root_path/$region/hypodd/dt.cc 17 | cd $root_path/$region/hypodd 18 | 19 | if [ ! -d "HypoDD" ]; then 20 | git clone https://github.com/zhuwq0/HypoDD.git 21 | export PATH=$PATH:$PWD/HypoDD 22 | make -C HypoDD/src/ 23 | fi 24 | 25 | cat < cc.inp 26 | * RELOC.INP: 27 | *--- input file selection 28 | * cross correlation diff times: 29 | dt.cc 30 | * 31 | *catalog P diff times: 32 | 33 | * 34 | * event file: 35 | events.dat 36 | * 37 | * station file: 38 | stations.dat 39 | * 40 | *--- output file selection 41 | * original locations: 42 | hypodd_cc.loc 43 | * relocations: 44 | hypodd_cc.reloc 45 | * station information: 46 | hypodd.sta 47 | * residual information: 48 | hypodd.res 49 | * source paramater information: 50 | hypodd.src 51 | * 52 | *--- data type selection: 53 | * IDAT: 0 = synthetics; 1= cross corr; 2= catalog; 3= cross & cat 54 | * IPHA: 1= P; 2= S; 3= P&S 55 | * DIST:max dist [km] between cluster centroid and station 56 | * IDAT IPHA DIST 57 | 1 3 120 58 | * 59 | *--- event clustering: 60 | * OBSCC: min # of obs/pair for crosstime data (0= no clustering) 61 | * OBSCT: min # of obs/pair for network data (0= no clustering) 62 | * OBSCC OBSCT 63 | 0 0 64 | * 65 | *--- solution control: 66 | * ISTART: 1 = from single source; 2 = from network sources 67 | * ISOLV: 1 = SVD, 2=lsqr 68 | * NSET: number of sets of iteration with specifications following 69 | * ISTART ISOLV NSET 70 | 2 2 4 71 | * 72 | *--- data weighting and re-weighting: 73 | * NITER: last iteration to used the following weights 74 | * WTCCP, WTCCS: weight cross P, S 75 | * WTCTP, WTCTS: weight catalog P, S 76 | * WRCC, WRCT: residual threshold in sec for cross, catalog data 77 | * WDCC, WDCT: max dist [km] between cross, catalog linked pairs 78 | * DAMP: damping (for lsqr only) 79 | * --- CROSS DATA ----- ----CATALOG DATA ---- 80 | * NITER WTCCP WTCCS WRCC WDCC WTCTP WTCTS WRCT WDCT DAMP 81 | 4 1 1 -9 -9 -9 -9 -9 -9 70 82 | 4 1 1 6 -9 -9 -9 -9 -9 70 83 | 4 1 0.8 3 4 -9 -9 -9 -9 70 84 | 4 1 0.8 2 2 -9 -9 -9 -9 70 85 | * 86 | *--- 1D model: 87 | * NLAY: number of model layers 88 | * RATIO: vp/vs ratio 89 | * TOP: depths of top of layer (km) 90 | * VEL: layer velocities (km/s) 91 | * NLAY RATIO 92 | 12 1.73 93 | * TOP 94 | 0.0 1.0 3.0 5.0 7.0 9.0 11.0 13.0 17.0 21.0 31.00 31.10 95 | * VEL 96 | 5.30 5.65 5.93 6.20 6.20 6.20 6.20 6.20 6.20 6.20 7.50 8.11 97 | * 98 | *--- event selection: 99 | * CID: cluster to be relocated (0 = all) 100 | * ID: cuspids of event to be relocated (8 per line) 101 | * CID 102 | 0 103 | * ID 104 | EOF 105 | 106 | ./HypoDD/src/hypoDD/hypoDD cc.inp 107 | cd $WORKING_DIR -------------------------------------------------------------------------------- /examples/california/run_phasenet.yaml: -------------------------------------------------------------------------------- 1 | name: dev 2 | 3 | workdir: . 4 | 5 | num_nodes: 1 6 | 7 | resources: 8 | 9 | cloud: gcp # aws 10 | region: us-west1 # gcp 11 | # region: us-west-2 # aws 12 | # instance_type: n2-highmem-16 13 | # accelerators: P100:1 14 | # cpus: 16+ 15 | cpus: 16 16 | # disk_size: 300 17 | # disk_tier: high 18 | # use_spot: True 19 | # spot_recovery: FAILOVER 20 | # image_id: docker:zhuwq0/quakeflow:latest 21 | 22 | envs: 23 | # REGION: SC 24 | # BRANCH: scedc 25 | REGION: NC 26 | BRANCH: ncedc 27 | YEAR: 2022 28 | 29 | file_mounts: 30 | 31 | # /data/waveforms: 32 | # name: waveforms 33 | # source: waveforms_combined 34 | # mode: MOUNT 35 | 36 | # /dataset/stations: 37 | # name: stations 38 | # source: stations 39 | # mode: COPY 40 | 41 | # /data/waveforms: waveforms_combined 42 | # /dataset/stations: stations 43 | 44 | # ~/.ssh/id_rsa.pub: ~/.ssh/id_rsa.pub 45 | # ~/.ssh/id_rsa: ~/.ssh/id_rsa 46 | # ~/.config/rclone/rclone.conf: ~/.config/rclone/rclone.conf 47 | 48 | # /opt/GaMMA: ../../GaMMA 49 | # /opt/PhaseNet: ../../PhaseNet 50 | 51 | setup: | 52 | echo "Begin setup." 53 | echo export WANDB_API_KEY=$WANDB_API_KEY >> ~/.bashrc 54 | pip install h5py tqdm wandb pandas numpy scipy 55 | pip install fsspec gcsfs s3fs 56 | pip install obspy pyproj 57 | # pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121 58 | # pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu 59 | ### PhaseNet 60 | pip install tensorflow==2.14.0 61 | [ ! -d "PhaseNet" ] && git clone https://github.com/AI4EPS/PhaseNet.git 62 | cd PhaseNet && git checkout $BRANCH && git pull origin $BRANCH && cd .. 63 | ### GaMMA 64 | # pip install -e /opt/GaMMA 65 | 66 | run: | 67 | num_nodes=`echo "$SKYPILOT_NODE_IPS" | wc -l` 68 | master_addr=`echo "$SKYPILOT_NODE_IPS" | head -n1` 69 | [[ ${SKYPILOT_NUM_GPUS_PER_NODE} -gt $NCPU ]] && nproc_per_node=${SKYPILOT_NUM_GPUS_PER_NODE} || nproc_per_node=$NCPU 70 | if [ "${SKYPILOT_NODE_RANK}" == "0" ]; then 71 | ls -al /opt 72 | ls -al /data 73 | fi 74 | python run_phasenet.py --model_path PhaseNet --num_node $num_nodes --node_rank $SKYPILOT_NODE_RANK --region $REGION --year $YEAR -------------------------------------------------------------------------------- /examples/california/set_config_ncedc.py: -------------------------------------------------------------------------------- 1 | if __name__ == "__main__": 2 | import json 3 | import os 4 | import sys 5 | 6 | root_path = "local" 7 | region = "ncedc" 8 | if len(sys.argv) > 1: 9 | root_path = sys.argv[1] 10 | region = sys.argv[2] 11 | 12 | config = { 13 | "minlatitude": 32, 14 | "maxlatitude": 43, 15 | "minlongitude": -126.0, 16 | "maxlongitude": -114.0, 17 | "num_nodes": 1, 18 | "sampling_rate": 100, 19 | "degree2km": 111.1949, 20 | "channel": "HH*,BH*,EH*,HN*", 21 | } 22 | 23 | with open(f"{root_path}/{region}/config.json", "w") as fp: 24 | json.dump(config, fp, indent=2) -------------------------------------------------------------------------------- /examples/california/submit_template.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import time 3 | from concurrent.futures import ThreadPoolExecutor 4 | 5 | import sky 6 | 7 | 8 | # NUM_NODES = 8 9 | def parse_args(): 10 | parser = argparse.ArgumentParser() 11 | parser.add_argument("--num_nodes", type=int, default=16) 12 | parser.add_argument("--year", type=int, default=2023) 13 | parser.add_argument("--region", type=str, default="CA") 14 | return parser.parse_args() 15 | 16 | 17 | args = parse_args() 18 | NUM_NODES = args.num_nodes 19 | YEAR = args.year 20 | REGION = args.region 21 | 22 | task = sky.Task( 23 | name="cut_template", 24 | setup=""" 25 | echo "Begin setup." 26 | echo export WANDB_API_KEY=$WANDB_API_KEY >> ~/.bashrc 27 | pip install -U h5py tqdm wandb pandas scipy scikit-learn numpy==1.26.4 28 | pip install -U fsspec gcsfs s3fs 29 | pip install -U obspy pyproj 30 | pip install -e /opt/ADLoc 31 | """, 32 | run=""" 33 | num_nodes=`echo "$SKYPILOT_NODE_IPS" | wc -l` 34 | master_addr=`echo "$SKYPILOT_NODE_IPS" | head -n1` 35 | if [ "$SKYPILOT_NODE_RANK" == "0" ]; then 36 | ls -al /opt 37 | ls -al /data 38 | ls -al ./ 39 | cat config.json 40 | fi 41 | python cut_templates_cc.py --num_node $NUM_NODES --node_rank $NODE_RANK --year $YEAR --config config.json 42 | """, 43 | workdir=".", 44 | num_nodes=1, 45 | envs={"NUM_NODES": NUM_NODES, "NODE_RANK": 0, "YEAR": YEAR}, 46 | ) 47 | 48 | task.set_file_mounts( 49 | { 50 | "/opt/ADLoc": "../../ADLoc", 51 | "config.json": "local/Mendocino/config.json", 52 | # "config.json": "local/Cal/config.json", 53 | }, 54 | ) 55 | # task.set_storage_mounts({ 56 | # '/remote/imagenet/': sky.Storage(name='my-bucket', 57 | # source='/local/imagenet'), 58 | # }) 59 | task.set_resources( 60 | sky.Resources( 61 | cloud=sky.GCP(), 62 | region="us-west1", # GCP 63 | # region="us-west-2", # AWS 64 | accelerators=None, 65 | cpus=16, 66 | disk_tier="low", 67 | disk_size=50, # GB 68 | memory="64+", 69 | use_spot=True, 70 | ), 71 | ) 72 | 73 | # for NODE_RANK in range(NUM_NODES): 74 | # task.update_envs({"NODE_RANK": NODE_RANK}) 75 | # cluster_name = f"cctorch-{NODE_RANK:02d}" 76 | # print(f"Launching cluster {cluster_name}-{NUM_NODES}...") 77 | # sky.jobs.launch( 78 | # task, 79 | # name=f"{cluster_name}", 80 | # ) 81 | 82 | jobs = [] 83 | try: 84 | sky.status(refresh="AUTO") 85 | except Exception as e: 86 | print(e) 87 | 88 | # task.update_envs({"NODE_RANK": 0}) 89 | # job_id = sky.launch(task, cluster_name="template", fast=True) 90 | # # job_id = sky.exec(task, cluster_name="template") 91 | # status = sky.stream_and_get(job_id) 92 | # # sky.tail_logs(cluster_name="cctorch8", job_id=job_id, follow=True) 93 | # print(f"Job ID: {job_id}, status: {status}") 94 | 95 | # raise 96 | 97 | job_idx = 1 98 | requests_ids = [] 99 | for NODE_RANK in range(NUM_NODES): 100 | # for NODE_RANK in range(30): 101 | 102 | task.update_envs({"NODE_RANK": NODE_RANK}) 103 | cluster_name = f"template-{NODE_RANK:03d}" 104 | 105 | requests_ids.append(sky.jobs.launch(task, name=f"{cluster_name}")) 106 | 107 | print(f"Running cut_template on (rank={NODE_RANK}, num_node={NUM_NODES}) of {cluster_name}") 108 | 109 | job_idx += 1 110 | 111 | for request_id in requests_ids: 112 | print(sky.get(request_id)) 113 | -------------------------------------------------------------------------------- /examples/california/tests/clustering.py: -------------------------------------------------------------------------------- 1 | # %% 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | import pandas as pd 5 | from sklearn.cluster import DBSCAN 6 | 7 | # %% 8 | v = 6.0 9 | 10 | # scale_x = 4 11 | # eps = 8 12 | # min_samples = 8 13 | 14 | scale_x = 1 15 | eps = 10 16 | min_samples = 3 17 | 18 | # %% 19 | picks = pd.read_csv("phasenet_picks_20230919_1h.csv", parse_dates=["phase_time"]) 20 | # picks = pd.read_csv("phasenet_picks_20230820_1h.csv", parse_dates=["phase_time"]) 21 | 22 | # %% 23 | stations = pd.read_json("stations.json", orient="index") 24 | stations["station_id"] = stations.index 25 | 26 | # %% 27 | picks = picks.merge(stations, on="station_id") 28 | picks["t_s"] = (picks["phase_time"] - picks["phase_time"].min()).dt.total_seconds() 29 | 30 | # %% 31 | dbscan = DBSCAN(eps=eps, min_samples=min_samples) 32 | dbscan.fit(picks[["t_s", "x_km", "y_km"]].values / np.array([1, scale_x * v, v])) 33 | 34 | # %% 35 | picks["cluster"] = dbscan.labels_ 36 | # %% 37 | mapping_color = lambda x: f"C{x}" if x >= 0 else "black" 38 | plt.figure(figsize=(20, 5)) 39 | plt.scatter(picks["t_s"], picks["x_km"], c=picks["cluster"].apply(mapping_color), s=0.3) 40 | plt.title(f"Number of picks: {len(picks)}") 41 | plt.show() 42 | 43 | # %% 44 | picks_selected = picks.copy() 45 | dbscan = DBSCAN(eps=1, min_samples=1) 46 | dbscan.fit(picks_selected[["t_s", "x_km", "y_km"]].values / np.array([1, scale_x * v, v])) 47 | picks_selected["cluster"] = dbscan.labels_ 48 | picks_selected = ( 49 | picks_selected.groupby("cluster").agg({"t_s": "median", "x_km": "median", "y_km": "median"}).reset_index() 50 | ) 51 | 52 | # %% 53 | print(f"{len(picks) = }, {len(picks_selected) = }") 54 | 55 | # %% 56 | # mapping_color = lambda x: f"C{x}" if x >= 0 else "black" 57 | # plt.figure(figsize=(20, 5)) 58 | # plt.scatter(picks_selected["t_s"], picks_selected["x_km"], c=picks_selected["cluster"].apply(mapping_color), s=0.3) 59 | # plt.title(f"Number of picks: {len(picks_selected)}") 60 | # plt.show() 61 | 62 | # %% 63 | dbscan = DBSCAN(eps=eps, min_samples=min_samples) 64 | dbscan.fit(picks_selected[["t_s", "x_km", "y_km"]].values / np.array([1, scale_x * v, v])) 65 | 66 | # %% 67 | picks_selected["cluster"] = dbscan.labels_ 68 | # %% 69 | mapping_color = lambda x: f"C{x}" if x >= 0 else "black" 70 | plt.figure(figsize=(20, 5)) 71 | plt.scatter(picks_selected["t_s"], picks_selected["x_km"], c=picks_selected["cluster"].apply(mapping_color), s=0.3) 72 | plt.title(f"Number of picks: {len(picks_selected)}") 73 | plt.show() 74 | -------------------------------------------------------------------------------- /examples/forge/load_data.py: -------------------------------------------------------------------------------- 1 | # %% 2 | import io 3 | import multiprocessing as mp 4 | import os 5 | 6 | import fsspec 7 | from obspy import read 8 | from tqdm import tqdm 9 | 10 | 11 | # %% 12 | def process_url(url): 13 | with fsspec.open(url, mode="rb") as fp: 14 | content = fp.read() 15 | 16 | file_like_object = io.BytesIO(content) 17 | 18 | st = read(file_like_object) 19 | 20 | 21 | # %% 22 | if __name__ == "__main__": 23 | # %% 24 | # https://constantine.seis.utah.edu/datasets.html 25 | os.system("curl -o urls.txt https://constantine.seis.utah.edu/files/get_all_slb.sh") 26 | 27 | # %% 28 | urls = [] 29 | with open("urls.txt") as f: 30 | for line in f: 31 | if line.startswith("wget"): 32 | urls.append(line.split()[-1]) 33 | 34 | # # %% 35 | # for url in tqdm(urls): 36 | # with fsspec.open(url, mode="rb") as fp: 37 | # content = fp.read() 38 | 39 | # file_like_object = io.BytesIO(content) 40 | 41 | # st = read(file_like_object) 42 | # # print(st) 43 | # # raise 44 | 45 | # %% 46 | ncpu = mp.cpu_count() * 2 47 | print(f"Number of CPUs: {ncpu}") 48 | pbar = tqdm(total=len(urls)) 49 | with mp.Pool(ncpu) as pool: 50 | for url in urls: 51 | pool.apply_async( 52 | func=process_url, 53 | args=(url,), 54 | callback=lambda _: pbar.update(1), 55 | error_callback=lambda e: print(e), 56 | ) 57 | pool.close() 58 | pool.join() 59 | 60 | # %% 61 | -------------------------------------------------------------------------------- /examples/japan/.gitignore: -------------------------------------------------------------------------------- 1 | local/ 2 | -------------------------------------------------------------------------------- /examples/japan/merge_picks.py: -------------------------------------------------------------------------------- 1 | # %% 2 | import multiprocessing as mp 3 | import os 4 | from glob import glob 5 | 6 | import pandas as pd 7 | from tqdm import tqdm 8 | 9 | 10 | def merge_csv(csv_files, pick_file): 11 | keep_header = True 12 | for csv_file in csv_files: 13 | if os.stat(csv_file).st_size == 0: 14 | continue 15 | if keep_header: 16 | cmd = f"cat {csv_file} > {pick_file}" 17 | keep_header = False 18 | else: 19 | cmd = f"tail -n +2 {csv_file} >> {pick_file}" 20 | os.system(cmd) 21 | 22 | 23 | # %% 24 | if __name__ == "__main__": 25 | # %% 26 | csv_path = "local/hinet/phasenet/csvs" 27 | pick_path = "local/hinet/phasenet/picks" 28 | if not os.path.exists(pick_path): 29 | os.makedirs(pick_path) 30 | 31 | # %% 32 | jdays = sorted(os.listdir(csv_path)) 33 | 34 | # %% 35 | ncpu = min(32, mp.cpu_count()) 36 | ctx = mp.get_context("spawn") 37 | pbar = tqdm(total=len(jdays)) 38 | 39 | # %% 40 | jobs = [] 41 | with ctx.Pool(ncpu) as pool: 42 | 43 | # %% 44 | for jday in jdays: 45 | csv_files = [] 46 | for hour in sorted(os.listdir(f"{csv_path}/{jday}")): 47 | tmp = glob(f"{csv_path}/{jday}/{hour}/*.csv") 48 | csv_files.extend(tmp) 49 | 50 | year, jday = jday.split("-") 51 | if not os.path.exists(f"local/hinet/phasenet/picks/{year}"): 52 | os.makedirs(f"local/hinet/phasenet/picks/{year}") 53 | pick_file = f"local/hinet/phasenet/picks/{year}/{jday}.csv" 54 | 55 | # merge_csv(csv_files, pick_file) 56 | job = pool.apply_async(merge_csv, (csv_files, pick_file), callback=lambda _: pbar.update(1)) 57 | jobs.append(job) 58 | 59 | pool.close() 60 | pool.join() 61 | 62 | for job in jobs: 63 | out = job.get() 64 | if out is not None: 65 | print(out) 66 | -------------------------------------------------------------------------------- /examples/japan/run_cctorch.py: -------------------------------------------------------------------------------- 1 | # %% 2 | import os 3 | 4 | import torch 5 | 6 | # %% 7 | # root_path = args.root_path 8 | # region = args.region 9 | root_path = "local" 10 | region = "hinet" 11 | 12 | data_path = f"{region}/cctorch" 13 | result_path = f"{region}/cctorch/ccpairs" 14 | if not os.path.exists(f"{root_path}/{result_path}"): 15 | os.makedirs(f"{root_path}/{result_path}") 16 | 17 | 18 | ## based on GPU memory 19 | 20 | batch = 1_024 21 | block_size1 = 1000_000 22 | block_size2 = 1000_000 23 | 24 | 25 | base_cmd = ( 26 | f"../../CCTorch/run.py --pair_list={root_path}/{region}/cctorch/pairs.txt --data_path1={root_path}/{region}/cctorch/template.dat --data_format1=memmap " 27 | f"--data_list1={root_path}/{region}/cctorch/cctorch_picks.csv " 28 | f"--events_csv={root_path}/{region}/cctorch/cctorch_events.csv --picks_csv={root_path}/{region}/cctorch/cctorch_picks.csv --stations_csv={root_path}/{region}/cctorch/cctorch_stations.csv " 29 | f"--config={root_path}/{region}/cctorch/config.json --batch_size={batch} --block_size1={block_size1} --block_size2={block_size2} --result_path={root_path}/{result_path}" 30 | ) 31 | 32 | num_gpu = torch.cuda.device_count() 33 | if num_gpu == 0: 34 | if os.uname().sysname == "Darwin": 35 | cmd = f"python {base_cmd} --device=cpu" 36 | else: 37 | cmd = f"python {base_cmd} --device=cpu" 38 | else: 39 | cmd = f"torchrun --standalone --nproc_per_node {num_gpu} {base_cmd}" 40 | print(cmd) 41 | os.system(cmd) 42 | 43 | # %% 44 | os.chdir(f"{root_path}/{region}/cctorch") 45 | source_file = f"ccpairs/CC_{num_gpu:03d}_dt.cc" 46 | target_file = f"dt.cc" 47 | print(f"{source_file} -> {target_file}") 48 | if os.path.lexists(target_file): 49 | os.remove(target_file) 50 | os.symlink(source_file, target_file) 51 | 52 | source_file = f"ccpairs/CC_{num_gpu:03d}.csv" 53 | target_file = f"dtcc.csv" 54 | print(f"{source_file} -> {target_file}") 55 | if os.path.lexists(target_file): 56 | os.remove(target_file) 57 | os.symlink(source_file, target_file) 58 | -------------------------------------------------------------------------------- /examples/japan/run_growclust_cc.py: -------------------------------------------------------------------------------- 1 | # %% 2 | import os 3 | from datetime import datetime 4 | 5 | import pandas as pd 6 | from tqdm import tqdm 7 | 8 | # %% 9 | root_path = "local" 10 | region = "hinet" 11 | result_path = f"{region}/growclust" 12 | if not os.path.exists(f"{root_path}/{result_path}"): 13 | os.makedirs(f"{root_path}/{result_path}") 14 | 15 | # %% 16 | # stations_json = f"{region}/results/data/stations.json" 17 | # stations = pd.read_json(f"{root_path}/{stations_json}", orient="index") 18 | # station_csv = f"{region}/adloc/ransac_stations.csv" 19 | station_csv = f"{region}/cctorch/cctorch_stations.csv" 20 | stations = pd.read_csv(f"{root_path}/{station_csv}") 21 | stations["station"] = stations["station_id"].apply(lambda x: x.split(".")[2]) 22 | stations.set_index("station_id", inplace=True) 23 | 24 | 25 | lines = [] 26 | for i, row in stations.iterrows(): 27 | # line = f"{row['network']}{row['station']:<4} {row['latitude']:.4f} {row['longitude']:.4f}\n" 28 | line = f"{row['station']:<4} {row['latitude']:.4f} {row['longitude']:.4f}\n" 29 | lines.append(line) 30 | 31 | with open(f"{root_path}/{result_path}/stlist.txt", "w") as fp: 32 | fp.writelines(lines) 33 | 34 | 35 | # %% 36 | # events_csv = f"{region}/results/phase_association/events.csv" 37 | # events_csv = f"{region}/adloc/ransac_events.csv" 38 | events_csv = f"{region}/cctorch/cctorch_events.csv" 39 | # event_file = f"{region}/cctorch/events.csv" 40 | events = pd.read_csv(f"{root_path}/{events_csv}") 41 | # event_df = event_df[event_df["gamma_score"] > 10] 42 | # event_index = [f"{x:06d}" for x in event_df["event_index"]] 43 | events["time"] = pd.to_datetime(events["event_time"]) 44 | if "magnitude" not in events.columns: 45 | events["magnitude"] = 0.0 46 | 47 | events[["year", "month", "day", "hour", "minute", "second"]] = ( 48 | events["time"] 49 | # .apply(lambda x: datetime.fromisoformat(x).strftime("%Y %m %d %H %M %S.%f").split(" ")) 50 | .apply(lambda x: x.strftime("%Y %m %d %H %M %S.%f").split(" ")) 51 | .apply(pd.Series) 52 | .apply(pd.to_numeric) 53 | ) 54 | 55 | lines = [] 56 | for i, row in events.iterrows(): 57 | # yr mon day hr min sec lat lon dep mag eh ez rms evid 58 | line = f"{row['year']:4d} {row['month']:2d} {row['day']:2d} {row['hour']:2d} {row['minute']:2d} {row['second']:7.3f} {row['latitude']:.4f} {row['longitude']:.4f} {row['depth_km']:7.3f} {row['magnitude']:.2f} 0.000 0.000 0.000 {row['event_index']:6d}\n" 59 | lines.append(line) 60 | 61 | with open(f"{root_path}/{result_path}/evlist.txt", "w") as fp: 62 | fp.writelines(lines) 63 | 64 | # %% 65 | os.system(f"bash run_growclust_cc.sh {root_path} {region}") 66 | -------------------------------------------------------------------------------- /examples/japan/run_growclust_cc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -x 3 | WORKING_DIR=$PWD 4 | if [ $# -eq 2 ]; then 5 | root_path=$1 6 | region=$2 7 | else 8 | root_path="local" 9 | region="demo" 10 | fi 11 | 12 | if [ ! -d "$root_path/$region/growclust" ]; then 13 | mkdir -p $root_path/$region/growclust 14 | fi 15 | 16 | cp $root_path/$region/cctorch/dt.cc $root_path/$region/growclust/dt.cc 17 | cd $root_path/$region/growclust 18 | mkdir -p TT OUT 19 | 20 | if [ ! -d "GrowClust" ]; then 21 | git clone git@github.com:zhuwq0/GrowClust.git 22 | make -C GrowClust/SRC/ 23 | fi 24 | 25 | cat < growclust.inp 26 | **** Example GrowClust Control File ***** 27 | ******** Daniel Trugman, 2016 ********** 28 | ******************************************* 29 | * 30 | ******************************************* 31 | ************* Event list **************** 32 | ******************************************* 33 | * evlist_fmt (0 = evlist, 1 = phase, 2 = GrowClust, 3 = HypoInverse) 34 | 1 35 | * fin_evlist (event list file name) 36 | evlist.txt 37 | * 38 | ******************************************* 39 | ************ Station list ************* 40 | ******************************************* 41 | * stlist_fmt (0 = SEED channel, 1 = station name) 42 | 1 43 | * fin_stlist (station list file name) 44 | stlist.txt 45 | * 46 | ******************************************* 47 | ************* XCOR data *************** 48 | ******************************************* 49 | * xcordat_fmt (0 = binary, 1 = text), tdif_fmt (21 = tt2-tt1, 12 = tt1-tt2) 50 | 1 12 51 | * fin_xcordat 52 | dt.cc 53 | * 54 | ******************************************* 55 | *** Velocity Model / Travel Time Tables *** 56 | ******************************************* 57 | * fin_vzmdl (input vz model file) 58 | vzmodel.txt 59 | * fout_vzfine (output, interpolated vz model file) 60 | TT/vzfine.txt 61 | * fout_pTT (output travel time table, P phase) 62 | TT/tt.pg 63 | * fout_sTT (output travel time table, S phase) 64 | TT/tt.sg 65 | * 66 | ****************************************** 67 | ***** Travel Time Table Parameters ****** 68 | ****************************************** 69 | * vpvs_factor rayparam_min (-1 = default) 70 | 1.732 0.0 71 | * tt_dep0 tt_dep1 tt_ddep 72 | 0. 71. 1. 73 | * tt_del0 tt_del1 tt_ddel 74 | 0. 500. 2. 75 | * 76 | ****************************************** 77 | ***** GrowClust Algorithm Parameters ***** 78 | ****************************************** 79 | * rmin delmax rmsmax 80 | 0.1 120 1.0 81 | * rpsavgmin, rmincut ngoodmin iponly 82 | 0 0 8 0 83 | * 84 | ****************************************** 85 | ************ Output files **************** 86 | ****************************************** 87 | * nboot nbranch_min 88 | 0 1 89 | * fout_cat (relocated catalog) 90 | OUT/out.growclust_cc_cat 91 | * fout_clust (relocated cluster file) 92 | OUT/out.growclust_cc_clust 93 | * fout_log (program log) 94 | OUT/out.growclust_cc_log 95 | * fout_boot (bootstrap distribution) 96 | OUT/out.growclust_cc_boot 97 | ****************************************** 98 | ****************************************** 99 | EOF 100 | 101 | cat < vzmodel.txt 102 | 0.0 5.30 0.00 103 | 1.0 5.65 0.00 104 | 3.0 5.93 0.00 105 | 5.0 6.20 0.00 106 | 7.0 6.20 0.00 107 | 9.0 6.20 0.00 108 | 11.0 6.20 0.00 109 | 13.0 6.20 0.00 110 | 17.0 6.20 0.00 111 | 21.0 6.20 0.00 112 | 31.00 7.50 0.00 113 | 31.10 8.11 0.00 114 | 100.0 8.11 0.00 115 | EOF 116 | 117 | ./GrowClust/SRC/growclust growclust.inp 118 | cp OUT/out.growclust_cc_cat growclust_cc_catalog.txt 119 | cd $WORKING_DIR 120 | -------------------------------------------------------------------------------- /examples/japan/run_hypodd_cc.py: -------------------------------------------------------------------------------- 1 | # %% 2 | import json 3 | import os 4 | 5 | import numpy as np 6 | import pandas as pd 7 | 8 | # %% 9 | root_path = "local" 10 | region = "hinet" 11 | 12 | with open(f"{root_path}/{region}/config.json", "r") as fp: 13 | config = json.load(fp) 14 | 15 | # %% 16 | data_path = f"{region}/cctorch" 17 | result_path = f"{region}/hypodd" 18 | if not os.path.exists(f"{root_path}/{result_path}"): 19 | os.makedirs(f"{root_path}/{result_path}") 20 | 21 | # %% 22 | stations = pd.read_csv(f"{root_path}/{data_path}/cctorch_stations.csv") 23 | 24 | station_lines = {} 25 | for i, row in stations.iterrows(): 26 | station_id = row["station_id"] 27 | # network_code, station_code, comp_code, channel_code = station_id.split(".") 28 | station_code = station_id.split(".")[2] 29 | # tmp_code = f"{station_code}{channel_code}" 30 | tmp_code = f"{station_code}" 31 | station_lines[tmp_code] = f"{tmp_code:<8s} {row['latitude']:.3f} {row['longitude']:.3f}\n" 32 | 33 | 34 | with open(f"{root_path}/{result_path}/stations.dat", "w") as f: 35 | for line in sorted(station_lines.values()): 36 | f.write(line) 37 | 38 | # %% 39 | events = pd.read_csv(f"{root_path}/{data_path}/cctorch_events.csv") 40 | events["time"] = pd.to_datetime(events["event_time"], format="mixed") 41 | 42 | event_lines = [] 43 | 44 | mean_latitude = events["latitude"].mean() 45 | mean_longitude = events["longitude"].mean() 46 | for i, row in events.iterrows(): 47 | event_index = row["event_index"] 48 | origin = row["time"] 49 | magnitude = row["magnitude"] 50 | x_err = 0.0 51 | z_err = 0.0 52 | time_err = 0.0 53 | dx, dy, dz = 0.0, 0.0, 0.0 54 | dx = np.random.uniform(-0.01, 0.01) 55 | dy = np.random.uniform(-0.01, 0.01) 56 | # dz = np.random.uniform(0, 10) 57 | dz = 0 58 | event_lines.append( 59 | f"{origin.year:4d}{origin.month:02d}{origin.day:02d} " 60 | f"{origin.hour:2d}{origin.minute:02d}{origin.second:02d}{round(origin.microsecond / 1e4):02d} " 61 | # f"{row['latitude']:8.4f} {row['longitude']:9.4f} {row['depth_km']:8.4f} " 62 | f"{row['latitude'] + dy:8.4f} {row['longitude']+ dx:9.4f} {row['depth_km']+dz:8.4f} " 63 | f"{magnitude:5.2f} {x_err:5.2f} {z_err:5.2f} {time_err:5.2f} {event_index:9d}\n" 64 | ) 65 | 66 | with open(f"{root_path}/{result_path}/events.dat", "w") as f: 67 | f.writelines(event_lines) 68 | 69 | # %% 70 | os.system(f"bash run_hypodd_cc.sh {root_path} {region}") 71 | -------------------------------------------------------------------------------- /examples/japan/run_hypodd_cc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -x 3 | WORKING_DIR=$PWD 4 | if [ $# -eq 2 ]; then 5 | root_path=$1 6 | region=$2 7 | else 8 | root_path="local" 9 | region="demo" 10 | fi 11 | 12 | if [ ! -d "$root_path/$region/hypodd" ]; then 13 | mkdir -p $root_path/$region/hypodd 14 | fi 15 | 16 | cp $root_path/$region/cctorch/dt.cc $root_path/$region/hypodd/dt.cc 17 | cd $root_path/$region/hypodd 18 | 19 | if [ ! -d "HypoDD" ]; then 20 | git clone git@github.com:zhuwq0/HypoDD.git 21 | export PATH=$PATH:$PWD/HypoDD 22 | make -C HypoDD/src/ 23 | fi 24 | 25 | cat < cc.inp 26 | * RELOC.INP: 27 | *--- input file selection 28 | * cross correlation diff times: 29 | dt.cc 30 | * 31 | *catalog P diff times: 32 | 33 | * 34 | * event file: 35 | events.dat 36 | * 37 | * station file: 38 | stations.dat 39 | * 40 | *--- output file selection 41 | * original locations: 42 | hypodd_cc.loc 43 | * relocations: 44 | hypodd_cc.reloc 45 | * station information: 46 | hypodd.sta 47 | * residual information: 48 | hypodd.res 49 | * source paramater information: 50 | hypodd.src 51 | * 52 | *--- data type selection: 53 | * IDAT: 0 = synthetics; 1= cross corr; 2= catalog; 3= cross & cat 54 | * IPHA: 1= P; 2= S; 3= P&S 55 | * DIST:max dist [km] between cluster centroid and station 56 | * IDAT IPHA DIST 57 | 1 3 120 58 | * 59 | *--- event clustering: 60 | * OBSCC: min # of obs/pair for crosstime data (0= no clustering) 61 | * OBSCT: min # of obs/pair for network data (0= no clustering) 62 | * OBSCC OBSCT 63 | 0 0 64 | * 65 | *--- solution control: 66 | * ISTART: 1 = from single source; 2 = from network sources 67 | * ISOLV: 1 = SVD, 2=lsqr 68 | * NSET: number of sets of iteration with specifications following 69 | * ISTART ISOLV NSET 70 | 2 2 4 71 | * 72 | *--- data weighting and re-weighting: 73 | * NITER: last iteration to used the following weights 74 | * WTCCP, WTCCS: weight cross P, S 75 | * WTCTP, WTCTS: weight catalog P, S 76 | * WRCC, WRCT: residual threshold in sec for cross, catalog data 77 | * WDCC, WDCT: max dist [km] between cross, catalog linked pairs 78 | * DAMP: damping (for lsqr only) 79 | * --- CROSS DATA ----- ----CATALOG DATA ---- 80 | * NITER WTCCP WTCCS WRCC WDCC WTCTP WTCTS WRCT WDCT DAMP 81 | 4 1 1 -9 -9 -9 -9 -9 -9 170 82 | 4 1 1 6 -9 -9 -9 -9 -9 170 83 | 4 1 0.8 3 4 -9 -9 -9 -9 170 84 | 4 1 0.8 2 2 -9 -9 -9 -9 170 85 | * 86 | *--- 1D model: 87 | * NLAY: number of model layers 88 | * RATIO: vp/vs ratio 89 | * TOP: depths of top of layer (km) 90 | * VEL: layer velocities (km/s) 91 | * NLAY RATIO 92 | 12 1.73 93 | * TOP 94 | 0.0 1.0 3.0 5.0 7.0 9.0 11.0 13.0 17.0 21.0 31.00 31.10 95 | * VEL 96 | 5.30 5.65 5.93 6.20 6.20 6.20 6.20 6.20 6.20 6.20 7.50 8.11 97 | * 98 | *--- event selection: 99 | * CID: cluster to be relocated (0 = all) 100 | * ID: cuspids of event to be relocated (8 per line) 101 | * CID 102 | 0 103 | * ID 104 | EOF 105 | 106 | ./HypoDD/src/hypoDD/hypoDD cc.inp 107 | cd $WORKING_DIR -------------------------------------------------------------------------------- /examples/japan/set_config.py: -------------------------------------------------------------------------------- 1 | # %% 2 | import json 3 | 4 | config = { 5 | "minlatitude": 36.8, 6 | "maxlatitude": 38.2, 7 | "minlongitude": 136.2, 8 | "maxlongitude": 138.3, 9 | "starttime": "2024-01-01T00:00:00", 10 | "endtime": "2024-02-29T23:00:00", 11 | } 12 | 13 | # %% 14 | with open("local/hinet/config.json", "w") as f: 15 | json.dump(config, f, indent=2) 16 | 17 | # %% 18 | -------------------------------------------------------------------------------- /examples/seafoam/load_data.py: -------------------------------------------------------------------------------- 1 | # %% 2 | import multiprocessing as mp 3 | 4 | import fsspec 5 | import h5py 6 | from tqdm import tqdm 7 | 8 | 9 | # %% 10 | def read_hdf5(file, key_path): 11 | fs = fsspec.filesystem("gcs", token=key_path) 12 | with fs.open(file, "rb") as f: 13 | with h5py.File(f, "r") as hf: 14 | print(file.split("/")[-1], hf["Acquisition/Raw[0]/RawData"]) 15 | 16 | 17 | # %% 18 | if __name__ == "__main__": 19 | 20 | # %% 21 | token_file = "" 22 | 23 | fs = fsspec.filesystem("gcs", token=token_file) 24 | 25 | folders = fs.ls("berkeley-mbari-das/") 26 | 27 | hdf5_files = [] 28 | for folder in folders: 29 | if folder.split("/")[-1] in ["ContextData", "MBARI_cable_geom_dx10m.csv"]: 30 | continue 31 | years = fs.ls(folder) 32 | for year in tqdm(years, desc=folder): 33 | jdays = fs.ls(year) 34 | for jday in jdays: 35 | files = fs.ls(jday) 36 | for file in files: 37 | if file.endswith(".h5"): 38 | hdf5_files.append(file) 39 | 40 | # %% 41 | # for file in hdf5_files: 42 | # read_hdf5(file, key_path) 43 | 44 | ctx = mp.get_context("spawn") 45 | pbar = tqdm(total=len(hdf5_files)) 46 | ncpu = 8 47 | with ctx.Pool(ncpu) as pool: 48 | jobs = [] 49 | for file in hdf5_files: 50 | job = pool.apply_async(read_hdf5, (file, key_path), callback=lambda _: pbar.update()) 51 | pool.close() 52 | pool.join() 53 | 54 | for job in jobs: 55 | result = job.get() 56 | if result: 57 | print(result) 58 | 59 | pbar.close() 60 | 61 | # %% 62 | -------------------------------------------------------------------------------- /kubeflow/.gitignore: -------------------------------------------------------------------------------- 1 | *zip 2 | share 3 | Hawaii 4 | Ridgecrest* 5 | PuertoRico 6 | Demo 7 | HYPODD 8 | LongValley* 9 | SierraNegra 10 | -------------------------------------------------------------------------------- /kubeflow/Dockerfile: -------------------------------------------------------------------------------- 1 | # FROM continuumio/miniconda3 2 | 3 | FROM ubuntu:18.04 4 | 5 | ENV PATH="/root/miniconda3/bin:${PATH}" 6 | ARG PATH="/root/miniconda3/bin:${PATH}" 7 | RUN apt-get update && apt-get install -y wget && rm -rf /var/lib/apt/lists/* 8 | 9 | RUN wget \ 10 | https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh \ 11 | && mkdir /root/.conda \ 12 | && bash Miniconda3-latest-Linux-x86_64.sh -b \ 13 | && rm -f Miniconda3-latest-Linux-x86_64.sh 14 | RUN conda --version 15 | 16 | # Setup env variables 17 | ENV PYTHONUNBUFFERED=1 18 | 19 | WORKDIR /app 20 | COPY env.yml /app 21 | RUN conda env update -f=env.yml -n base 22 | RUN pip install --no-cache-dir --upgrade -i https://pypi.anaconda.org/zhuwq0/simple gmma 23 | RUN conda clean --all 24 | #SHELL ["conda", "run", "-n", "base", "/bin/bash", "-c"] 25 | -------------------------------------------------------------------------------- /kubeflow/README.md: -------------------------------------------------------------------------------- 1 | ## QuakeFlow Demo Install 2 | 3 | 4 | ``` 5 | git clone -b factorize https://github.com/wayneweiqiang/PhaseNet.git 6 | git clone https://github.com/wayneweiqiang/GMMA.git 7 | conda env create quakeflow --file=env.yml --force 8 | conda activate quakeflow 9 | ``` -------------------------------------------------------------------------------- /kubeflow/debug_pvc.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Pod 3 | metadata: 4 | name: dataaccess 5 | spec: 6 | containers: 7 | - name: alpine 8 | image: alpine:latest 9 | command: ['sleep', 'infinity'] 10 | volumeMounts: 11 | - name: mypvc 12 | mountPath: /data 13 | volumes: 14 | - name: mypvc 15 | persistentVolumeClaim: 16 | claimName: mypvc -------------------------------------------------------------------------------- /kubeflow/env.yml: -------------------------------------------------------------------------------- 1 | name: quakeflow 2 | channels: 3 | - defaults 4 | - conda-forge 5 | dependencies: 6 | - python=3.8 7 | - numpy 8 | - scipy 9 | - matplotlib 10 | - pandas 11 | - scikit-learn 12 | - tqdm 13 | - obspy 14 | - pymongo 15 | - tensorflow 16 | - pip 17 | - pip: 18 | - minio 19 | - kfp 20 | - gmma 21 | - --extra-index-url https://pypi.anaconda.org/zhuwq0/simple 22 | 23 | -------------------------------------------------------------------------------- /kubeflow/rsync.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Pod 3 | metadata: 4 | name: dataaccess52 5 | spec: 6 | containers: 7 | - name: alpine 8 | image: zhuwq0/waveform-env:1.1 9 | command: ['sleep', 'infinity'] 10 | volumeMounts: 11 | - name: mypvc 12 | mountPath: /data 13 | volumes: 14 | - name: mypvc 15 | persistentVolumeClaim: 16 | claimName: quakeflow-w8gfg-data-volume-52 17 | -------------------------------------------------------------------------------- /kubeflow/waveforms/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.8 2 | 3 | RUN apt-get update && apt-get install -y vim ssh rsync && rm -rf /var/lib/apt/lists/* 4 | 5 | RUN python -m pip install --upgrade obspy && rm -rf /var/cache/apk/* 6 | 7 | WORKDIR /opt 8 | 9 | # RUN ssh-keygen -q -t rsa -N "" <<< $"\ny" >/dev/null 2>&1 10 | # RUN ssh-copy-id zhuwq@wintermute.gps.caltech.edu 11 | 12 | # ARG CACHEBUST=1 -------------------------------------------------------------------------------- /kubernetes/deploy_gcp.sh: -------------------------------------------------------------------------------- 1 | # Deploy Kafka with Helm, create client and add topics 2 | helm repo add bitnami https://charts.bitnami.com/bitnami 3 | helm install quakeflow-kafka bitnami/kafka 4 | kubectl run --quiet=true -it --rm quakeflow-kafka-client --restart='Never' --image docker.io/bitnami/kafka:2.7.0-debian-10-r68 --restart=Never \ 5 | --command -- bash -c "kafka-topics.sh --create --topic phasenet_picks --bootstrap-server quakeflow-kafka.default.svc.cluster.local:9092 && kafka-topics.sh --create --topic gmma_events --bootstrap-server quakeflow-kafka.default.svc.cluster.local:9092 && kafka-topics.sh --create --topic waveform_raw --bootstrap-server quakeflow-kafka.default.svc.cluster.local:9092 && kafka-topics.sh --create --topic phasenet_waveform --bootstrap-server quakeflow-kafka.default.svc.cluster.local:9092" 6 | kubectl run --quiet=true -it --rm quakeflow-kafka-client --restart='Never' --image docker.io/bitnami/kafka:2.7.0-debian-10-r68 --restart=Never \ 7 | --command -- bash -c "kafka-configs.sh --alter --entity-type topics --entity-name phasenet_picks --add-config 'retention.ms=-1' --bootstrap-server quakeflow-kafka.default.svc.cluster.local:9092 && kafka-configs.sh --alter --entity-type topics --entity-name gmma_events --add-config 'retention.ms=-1' --bootstrap-server quakeflow-kafka.default.svc.cluster.local:9092" 8 | ## For external access: 9 | # helm upgrade quakeflow-kafka bitnami/kafka --set externalAccess.enabled=true,externalAccess.autoDiscovery.enabled=true,rbac.create=true 10 | ## Check topic configs: 11 | # kubectl run --quiet=true -it --rm quakeflow-kafka-client --restart='Never' --image docker.io/bitnami/kafka:2.7.0-debian-10-r68 --restart=Never \ 12 | # --command -- bash -c "kafka-topics.sh --describe --topics-with-overrides --bootstrap-server quakeflow-kafka.default.svc.cluster.local:9092" 13 | 14 | # Deploy MongoDB 15 | helm install quakeflow-mongodb --set auth.rootPassword=quakeflow123,auth.username=quakeflow,auth.password=quakeflow123,auth.database=quakeflow,architecture=replicaset,persistence.size=100Gi 16 | bitnami/mongodb 17 | 18 | # Deploy to Kubernetes 19 | kubectl apply -f quakeflow-gcp.yaml 20 | 21 | # Add autoscaling 22 | kubectl autoscale deployment phasenet-api --cpu-percent=50 --min=1 --max=365 23 | kubectl autoscale deployment gamma-api --cpu-percent=200 --min=1 --max=365 24 | kubectl autoscale deployment deepdenoiser-api --cpu-percent=50 --min=1 --max=10 25 | 26 | # Expose APIs 27 | # kubectl expose deployment phasenet-api --type=LoadBalancer --name=phasenet-service 28 | # kubectl expose deployment gamma-api --type=LoadBalancer --name=gmma-service 29 | # kubectl expose deployment quakeflow-streamlit --type=LoadBalancer --name=streamlit-ui 30 | # kubectl expose deployment quakeflow-ui --type=LoadBalancer --name=quakeflow-ui 31 | 32 | # Add MINIO storage 33 | # helm install quakeflow-minio --set accessKey.password=minio --set secretKey.password=minio123 --set persistence.size=1T bitnami/minio 34 | -------------------------------------------------------------------------------- /kubernetes/quakeflow-autoscaling.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: autoscaling/v2beta2 2 | kind: HorizontalPodAutoscaler 3 | metadata: 4 | name: phasenet-api 5 | spec: 6 | scaleTargetRef: 7 | apiVersion: apps/v1 8 | kind: Deployment 9 | name: phasenet-api 10 | minReplicas: 1 11 | maxReplicas: 365 12 | metrics: 13 | - type: Resource 14 | resource: 15 | name: cpu 16 | target: 17 | type: Utilization 18 | averageUtilization: 80 19 | - type: Resource 20 | resource: 21 | name: memory 22 | target: 23 | type: AverageValue 24 | averageValue: 1500Mi 25 | # - type: Object 26 | # object: 27 | # metric: 28 | # name: requests-per-second 29 | # describedObject: 30 | # # apiVersion: v1 31 | # # kind: Service 32 | # # name: phasenet-api 33 | # apiVersion: networking.k8s.io/v1beta1 34 | # kind: Ingress 35 | # name: quakeflow-ingress 36 | # target: 37 | # type: Value 38 | # value: 10 39 | # Uncomment these lines if you create the custom packets_per_second metric and 40 | # configure your app to export the metric. 41 | # - type: Pods 42 | # pods: 43 | # metric: 44 | # name: packets_per_second 45 | # target: 46 | # type: AverageValue 47 | # averageValue: 100 48 | --- 49 | apiVersion: autoscaling/v2beta2 50 | kind: HorizontalPodAutoscaler 51 | metadata: 52 | name: gamma-api 53 | spec: 54 | scaleTargetRef: 55 | apiVersion: apps/v1 56 | kind: Deployment 57 | name: gamma-api 58 | minReplicas: 1 59 | maxReplicas: 365 60 | metrics: 61 | - type: Resource 62 | resource: 63 | name: cpu 64 | target: 65 | type: Utilization 66 | averageUtilization: 300 67 | - type: Resource 68 | resource: 69 | name: memory 70 | target: 71 | type: AverageValue 72 | averageValue: 500Mi 73 | # Uncomment these lines if you create the custom packets_per_second metric and 74 | # configure your app to export the metric. 75 | # - type: Pods 76 | # pods: 77 | # metric: 78 | # name: packets_per_second 79 | # target: 80 | # type: AverageValue 81 | # averageValue: 100 82 | --- 83 | apiVersion: autoscaling/v2beta2 84 | kind: HorizontalPodAutoscaler 85 | metadata: 86 | name: deepdenoiser-api 87 | spec: 88 | scaleTargetRef: 89 | apiVersion: apps/v1 90 | kind: Deployment 91 | name: deepdenoiser-api 92 | minReplicas: 1 93 | maxReplicas: 365 94 | metrics: 95 | - type: Resource 96 | resource: 97 | name: cpu 98 | target: 99 | type: Utilization 100 | averageUtilization: 50 101 | - type: Resource 102 | resource: 103 | name: memory 104 | target: 105 | type: AverageValue 106 | averageValue: 1000Mi 107 | # Uncomment these lines if you create the custom packets_per_second metric and 108 | # configure your app to export the metric. 109 | # - type: Pods 110 | # pods: 111 | # metric: 112 | # name: packets_per_second 113 | # target: 114 | # type: AverageValue 115 | # averageValue: 100 -------------------------------------------------------------------------------- /kubernetes/quakeflow-ingress.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: networking.k8s.io/v1 2 | kind: Ingress 3 | metadata: 4 | name: quakeflow-ingress 5 | annotations: 6 | kubernetes.io/ingress.global-static-ip-name: "quakeflow-static-ip" 7 | spec: 8 | # defaultBackend: 9 | # service: 10 | # name: phasenet-api 11 | # port: 12 | # number: 8000 13 | rules: 14 | - host: phasenet.quakeflow.com 15 | http: 16 | paths: 17 | - path: /* 18 | pathType: ImplementationSpecific 19 | backend: 20 | service: 21 | name: phasenet-api 22 | port: 23 | number: 8000 24 | - host: gamma.quakeflow.com 25 | http: 26 | paths: 27 | - path: /* 28 | pathType: ImplementationSpecific 29 | backend: 30 | service: 31 | name: gamma-api 32 | port: 33 | number: 8001 34 | - host: deepdenoiser.quakeflow.com 35 | http: 36 | paths: 37 | - path: /* 38 | pathType: ImplementationSpecific 39 | backend: 40 | service: 41 | name: deepdenoiser-api 42 | port: 43 | number: 8002 44 | - host: ui.quakeflow.com 45 | http: 46 | paths: 47 | - path: /* 48 | pathType: ImplementationSpecific 49 | backend: 50 | service: 51 | name: quakeflow-ui 52 | port: 53 | number: 8005 54 | --- 55 | apiVersion: cloud.google.com/v1 56 | kind: BackendConfig 57 | metadata: 58 | name: backendconfig 59 | spec: 60 | timeoutSec: 600 61 | # connectionDraining: 62 | # drainingTimeoutSec: 600 63 | --- -------------------------------------------------------------------------------- /kubernetes/replay/real_data.py: -------------------------------------------------------------------------------- 1 | # %% 2 | # https://docs.obspy.org/packages/autogen/obspy.clients.seedlink.easyseedlink.create_client.html#obspy.clients.seedlink.easyseedlink.create_client 3 | from obspy.clients.seedlink.easyseedlink import create_client 4 | 5 | 6 | # %% 7 | def handle_data(trace): 8 | print("Received new data:") 9 | print(trace) 10 | print() 11 | 12 | 13 | # %% 14 | client = create_client("rtserve.iris.washington.edu:18000", handle_data) 15 | client.select_stream("CI", "LRL", "HNZ") 16 | client.run() 17 | 18 | # %% 19 | -------------------------------------------------------------------------------- /kubernetes/replay/replay_data.py: -------------------------------------------------------------------------------- 1 | # %% 2 | import fsspec 3 | import pandas as pd 4 | import obspy 5 | import matplotlib.pyplot as plt 6 | 7 | # %% 8 | protocal = "gs" 9 | bucket = "quakeflow_share" 10 | folder = "demo/obspy" 11 | 12 | # %% Seismic stations used in the demo 13 | stations = pd.read_csv(f"{protocal}://{bucket}/{folder}/stations.csv") 14 | plt.figure() 15 | plt.scatter(stations["longitude"], stations["latitude"], marker="^", label="stations") 16 | plt.show() 17 | 18 | # %% Read replay waveforms 19 | fs = fsspec.filesystem(protocal) 20 | mseeds = fs.glob(f"{bucket}/{folder}/waveforms/*/*.mseed") 21 | 22 | # %% 23 | for mseed in mseeds: 24 | print(mseed) 25 | with fs.open(mseed, "rb") as f: 26 | st = obspy.read(f) 27 | 28 | # plot a few seconds 29 | tmp = st.slice(starttime=st[0].stats.starttime, endtime=st[0].stats.starttime + 20) 30 | tmp.plot() 31 | break 32 | -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: "QuakeFlow" 2 | site_description: 'QuakeFlow: a Scalable Deep-learning-based Earthquake Monitoring Workflow with Cloud Computing' 3 | site_author: 'Weiqiang Zhu' 4 | docs_dir: docs/ 5 | repo_name: 'AI4EPS/QuakeFlow' 6 | repo_url: 'https://github.com/ai4eps/QuakeFlow' 7 | nav: 8 | - Overview: README.md 9 | - Pipeline: kubeflow/workflow.ipynb 10 | - Standard Formats: data_format.md 11 | # - Visualization: plot_catalog.ipynb 12 | # - FastAPI: fastapi.ipynb 13 | # - Deploy on cluster: k8s_readme.md 14 | # - Deploy on cloud: gcp_readme.md 15 | theme: 16 | name: 'material' 17 | plugins: 18 | - mkdocs-jupyter: 19 | ignore_h1_titles: True 20 | - exclude: 21 | glob: 22 | - PhaseNet/* 23 | - GaMMA/* 24 | - "*_demo" 25 | extra: 26 | analytics: 27 | provider: google 28 | property: G-69DX3B35RK 29 | -------------------------------------------------------------------------------- /quakeflow/demo/association/Dockerfile: -------------------------------------------------------------------------------- 1 | # Python image to use. 2 | FROM python:3.9-alpine 3 | 4 | # Install gcc 5 | RUN apk add --no-cache gcc musl-dev linux-headers 6 | 7 | # Set the working directory to /app 8 | WORKDIR /app 9 | 10 | # copy the requirements file used for dependencies 11 | COPY requirements.txt . 12 | 13 | # Install any needed packages specified in requirements.txt 14 | RUN pip install --trusted-host pypi.python.org -r requirements.txt 15 | 16 | # Copy the rest of the working directory contents into the container at /app 17 | COPY . . 18 | 19 | # Run app.py when the container launches 20 | CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8080"] -------------------------------------------------------------------------------- /quakeflow/demo/association/app.py: -------------------------------------------------------------------------------- 1 | from fastapi import FastAPI 2 | from pydantic import BaseModel 3 | 4 | app = FastAPI() 5 | 6 | 7 | # Define a request body model 8 | class Pick(BaseModel): 9 | station_id: list 10 | phase_time: list 11 | phase_type: list 12 | phase_score: list 13 | phase_amplitude: list 14 | phase_polarity: list 15 | 16 | 17 | # Define an endpoint to make predictions 18 | @app.post("/predict") 19 | def predict(request: Pick): 20 | print(f"Associating on {len(request.station_id)} picks.", flush=True) 21 | return { 22 | "events": { 23 | "time": [], 24 | "latitude": [], 25 | "longitude": [], 26 | "depth_km": [], 27 | }, 28 | "picks": { 29 | "station_id": [], 30 | "phase_time": [], 31 | "phase_type": [], 32 | "phase_score": [], 33 | "phase_amplitude": [], 34 | "phase_polarity": [], 35 | }, 36 | } 37 | -------------------------------------------------------------------------------- /quakeflow/demo/association/requirements.txt: -------------------------------------------------------------------------------- 1 | fastapi 2 | obspy 3 | uvicorn 4 | pandas -------------------------------------------------------------------------------- /quakeflow/demo/data/Dockerfile: -------------------------------------------------------------------------------- 1 | # Python image to use. 2 | FROM python:3.9-alpine 3 | 4 | # Install gcc 5 | RUN apk add --no-cache gcc musl-dev linux-headers 6 | 7 | # Set the working directory to /app 8 | WORKDIR /app 9 | 10 | # Copy the requirements file used for dependencies 11 | COPY requirements.txt . 12 | 13 | # Install any needed packages specified in requirements.txt 14 | RUN pip install --trusted-host pypi.python.org -r requirements.txt 15 | 16 | # Copy the rest of the working directory contents into the container at /app 17 | COPY . . 18 | 19 | # Run app.py when the container launches 20 | CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8080"] 21 | -------------------------------------------------------------------------------- /quakeflow/demo/data/app.py: -------------------------------------------------------------------------------- 1 | # %% 2 | import json 3 | import logging 4 | import threading 5 | import time 6 | from datetime import datetime 7 | from random import randint 8 | 9 | import fsspec 10 | import obspy 11 | import pandas as pd 12 | import redis 13 | from fastapi import FastAPI 14 | 15 | logging.basicConfig(level=logging.INFO) 16 | 17 | 18 | app = FastAPI() 19 | 20 | # %% 21 | PROTOCAL = "gs" 22 | BUCKET = "quakeflow_share" 23 | FOLDER = "demo" 24 | REDIS_HOST = "quakeflow-redis-master.default.svc.cluster.local" 25 | try: 26 | redis_client = redis.Redis(host=REDIS_HOST, port=6379, decode_responses=True) 27 | redis_client.ping() 28 | except: 29 | redis_client = redis.Redis(host="localhost", port=6379, decode_responses=True) 30 | 31 | 32 | def replay(): 33 | fs = fsspec.filesystem(PROTOCAL) 34 | mseeds = fs.glob(f"{BUCKET}/{FOLDER}/waveforms/????-???/??/*.mseed") 35 | 36 | waveforms = {} 37 | station_ids = [] 38 | min_timestamp = None 39 | print("Reading waveforms: ", end="", flush=True) 40 | for i, mseed in enumerate(mseeds): 41 | print(mseed.split("/")[-1], end=" ", flush=True) 42 | with fs.open(mseed, "rb") as f: 43 | st = obspy.read(f) 44 | st = st.merge(fill_value="latest") 45 | st = st.resample(100) 46 | tr = st[0] 47 | if min_timestamp is None: 48 | min_timestamp = tr.times("timestamp")[0] 49 | waveforms[tr.id] = { 50 | "data": tr.data.tolist(), 51 | # "timestamp": (tr.times("timestamp") - min_timestamp).tolist(), 52 | "timestamp": tr.times("timestamp").tolist(), 53 | } 54 | station_ids.append(tr.id) 55 | if i > 40: 56 | break 57 | print("\nFinished reading waveforms.", flush=True) 58 | 59 | index = {x: 0 for x in station_ids} 60 | while True: 61 | for i, sid in enumerate(station_ids): 62 | # print(sid, end=" ", flush=True) 63 | window_size = randint(80, 120) 64 | data = waveforms[sid]["data"][index[sid] : index[sid] + window_size] 65 | timestamp = waveforms[sid]["timestamp"][index[sid] : index[sid] + window_size] 66 | if len(data) < window_size: # wrap around 67 | index[sid] = 0 68 | data = waveforms[sid]["data"][:window_size] 69 | timestamp = waveforms[sid]["timestamp"][:window_size] 70 | redis_client.xadd( 71 | sid, 72 | { 73 | "data": json.dumps(data), 74 | "timestamp": json.dumps(timestamp), 75 | }, 76 | ) 77 | redis_client.xtrim(sid, maxlen=60000) 78 | index[sid] += window_size 79 | # print(datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")) 80 | time.sleep(1) 81 | 82 | 83 | threading.Thread(target=replay, daemon=True).start() 84 | 85 | 86 | @app.get("/") 87 | def read_root(): 88 | return {"message": "Replaying waveforms."} 89 | -------------------------------------------------------------------------------- /quakeflow/demo/data/requirements.txt: -------------------------------------------------------------------------------- 1 | redis 2 | fastapi 3 | fsspec 4 | gcsfs 5 | obspy 6 | uvicorn 7 | pandas -------------------------------------------------------------------------------- /quakeflow/demo/hub/Dockerfile: -------------------------------------------------------------------------------- 1 | # Python image to use. 2 | FROM python:3.9-alpine 3 | 4 | # Set the working directory to /app 5 | WORKDIR /app 6 | 7 | # copy the requirements file used for dependencies 8 | COPY requirements.txt . 9 | 10 | # Install any needed packages specified in requirements.txt 11 | RUN pip install --trusted-host pypi.python.org -r requirements.txt 12 | 13 | # Copy the rest of the working directory contents into the container at /app 14 | COPY . . 15 | 16 | # Run app.py when the container launches 17 | CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8080"] 18 | -------------------------------------------------------------------------------- /quakeflow/demo/hub/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | redis 3 | fsspec 4 | gcsfs 5 | fastapi 6 | requests 7 | uvicorn 8 | pandas 9 | debugpy # Required for debugging 10 | -------------------------------------------------------------------------------- /quakeflow/demo/location/Dockerfile: -------------------------------------------------------------------------------- 1 | # Python image to use. 2 | FROM python:3.9-alpine 3 | 4 | # Install gcc 5 | RUN apk add --no-cache gcc musl-dev linux-headers 6 | 7 | # Set the working directory to /app 8 | WORKDIR /app 9 | 10 | # copy the requirements file used for dependencies 11 | COPY requirements.txt . 12 | 13 | # Install any needed packages specified in requirements.txt 14 | RUN pip install --trusted-host pypi.python.org -r requirements.txt 15 | 16 | # Copy the rest of the working directory contents into the container at /app 17 | COPY . . 18 | 19 | # Run app.py when the container launches 20 | CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8080"] -------------------------------------------------------------------------------- /quakeflow/demo/location/app.py: -------------------------------------------------------------------------------- 1 | from fastapi import FastAPI 2 | from pydantic import BaseModel 3 | 4 | app = FastAPI() 5 | 6 | 7 | # Define a request body model 8 | class Pick(BaseModel): 9 | station_id: list 10 | phase_time: list 11 | phase_type: list 12 | phase_score: list 13 | phase_amplitude: list 14 | phase_polarity: list 15 | 16 | 17 | # Define an endpoint to make predictions 18 | @app.post("/predict") 19 | def predict(request: Pick): 20 | print(f"Locating on {len(request.station_id)} picks.", flush=True) 21 | return { 22 | "time": [], 23 | "latitude": [], 24 | "longitude": [], 25 | "depth_km": [], 26 | "num_p_picks": [], 27 | "num_s_picks": [], 28 | } 29 | -------------------------------------------------------------------------------- /quakeflow/demo/location/requirements.txt: -------------------------------------------------------------------------------- 1 | redis 2 | fastapi 3 | fsspec 4 | gcsfs 5 | obspy 6 | uvicorn 7 | pandas -------------------------------------------------------------------------------- /quakeflow/demo/picking/Dockerfile: -------------------------------------------------------------------------------- 1 | # Python image to use. 2 | FROM python:3.9-alpine 3 | 4 | # Install gcc 5 | RUN apk add --no-cache gcc musl-dev linux-headers 6 | 7 | # Set the working directory to /app 8 | WORKDIR /app 9 | 10 | # copy the requirements file used for dependencies 11 | COPY requirements.txt . 12 | 13 | # Install any needed packages specified in requirements.txt 14 | RUN pip install --trusted-host pypi.python.org -r requirements.txt 15 | 16 | # Copy the rest of the working directory contents into the container at /app 17 | COPY . . 18 | 19 | # Run app.py when the container launches 20 | CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8080"] -------------------------------------------------------------------------------- /quakeflow/demo/picking/app.py: -------------------------------------------------------------------------------- 1 | from fastapi import FastAPI 2 | from pydantic import BaseModel 3 | 4 | app = FastAPI() 5 | 6 | 7 | # Define a request body model 8 | class Data(BaseModel): 9 | id: list 10 | vec: list 11 | timestamp: list 12 | 13 | 14 | # Define an endpoint to make predictions 15 | @app.post("/predict") 16 | def predict(request: Data): 17 | print(f"Picking on {len(request.id)} stations.", flush=True) 18 | return { 19 | "station_id": [], 20 | "phase_time": [], 21 | "phase_type": [], 22 | "phase_score": [], 23 | "phase_amplitude": [], 24 | "phase_polarity": [], 25 | } 26 | -------------------------------------------------------------------------------- /quakeflow/demo/picking/requirements.txt: -------------------------------------------------------------------------------- 1 | fastapi 2 | obspy 3 | uvicorn 4 | pandas -------------------------------------------------------------------------------- /quakeflow/deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: quakeflow-hub 5 | spec: 6 | replicas: 1 7 | selector: 8 | matchLabels: 9 | app: quakeflow-hub 10 | template: 11 | metadata: 12 | labels: 13 | app: quakeflow-hub 14 | spec: 15 | containers: 16 | - name: server 17 | image: zhuwq0/quakeflow-hub 18 | ports: 19 | - containerPort: 8080 20 | env: 21 | - name: PORT 22 | value: "8080" 23 | readinessProbe: 24 | tcpSocket: 25 | port: 8080 26 | initialDelaySeconds: 5 27 | --- 28 | apiVersion: apps/v1 29 | kind: Deployment 30 | metadata: 31 | name: quakeflow-data 32 | spec: 33 | replicas: 1 34 | selector: 35 | matchLabels: 36 | app: quakeflow-data 37 | template: 38 | metadata: 39 | labels: 40 | app: quakeflow-data 41 | spec: 42 | containers: 43 | - name: server 44 | image: zhuwq0/quakeflow-data 45 | ports: 46 | - containerPort: 8080 47 | env: 48 | - name: PORT 49 | value: "8080" 50 | readinessProbe: 51 | tcpSocket: 52 | port: 8080 53 | initialDelaySeconds: 5 54 | --- 55 | apiVersion: apps/v1 56 | kind: Deployment 57 | metadata: 58 | name: picking-api 59 | spec: 60 | replicas: 1 61 | selector: 62 | matchLabels: 63 | app: picking-api 64 | template: 65 | metadata: 66 | labels: 67 | app: picking-api 68 | spec: 69 | containers: 70 | - name: server 71 | image: zhuwq0/picking-api 72 | ports: 73 | - containerPort: 8080 74 | env: 75 | - name: PORT 76 | value: "8080" 77 | readinessProbe: 78 | tcpSocket: 79 | port: 8080 80 | initialDelaySeconds: 5 81 | --- 82 | apiVersion: apps/v1 83 | kind: Deployment 84 | metadata: 85 | name: association-api 86 | spec: 87 | replicas: 1 88 | selector: 89 | matchLabels: 90 | app: association-api 91 | template: 92 | metadata: 93 | labels: 94 | app: association-api 95 | spec: 96 | containers: 97 | - name: server 98 | image: zhuwq0/association-api 99 | ports: 100 | - containerPort: 8080 101 | env: 102 | - name: PORT 103 | value: "8080" 104 | readinessProbe: 105 | tcpSocket: 106 | port: 8080 107 | initialDelaySeconds: 5 108 | --- 109 | apiVersion: apps/v1 110 | kind: Deployment 111 | metadata: 112 | name: location-api 113 | spec: 114 | replicas: 1 115 | selector: 116 | matchLabels: 117 | app: location-api 118 | template: 119 | metadata: 120 | labels: 121 | app: location-api 122 | spec: 123 | containers: 124 | - name: server 125 | image: zhuwq0/location-api 126 | ports: 127 | - containerPort: 8080 128 | env: 129 | - name: PORT 130 | value: "8080" 131 | readinessProbe: 132 | tcpSocket: 133 | port: 8080 134 | initialDelaySeconds: 5 -------------------------------------------------------------------------------- /quakeflow/helm.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | helm install quakeflow-redis --set auth.enabled=false oci://registry-1.docker.io/bitnamicharts/redis -------------------------------------------------------------------------------- /quakeflow/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Sensor Data Visualization 7 | 8 | 9 | 10 |
11 | 37 | 38 | 39 | -------------------------------------------------------------------------------- /quakeflow/replay_data.py: -------------------------------------------------------------------------------- 1 | # %% 2 | import redis 3 | import json 4 | import time 5 | from random import randint 6 | import fsspec 7 | import pandas as pd 8 | import obspy 9 | import matplotlib.pyplot as plt 10 | import numpy as np 11 | 12 | # %% 13 | PROTOCAL = "gs" 14 | BUCKET = "quakeflow_share" 15 | FOLDER = "demo/obspy" 16 | 17 | stations = pd.read_csv(f"{PROTOCAL}://{BUCKET}/{FOLDER}/stations.csv") 18 | fs = fsspec.filesystem(PROTOCAL) 19 | mseeds = fs.glob(f"{BUCKET}/{FOLDER}/waveforms/*/*.mseed") 20 | 21 | # %% 22 | waveforms = {} 23 | station_ids = [] 24 | min_timestamp = None 25 | print("Reading waveforms: ", end="", flush=True) 26 | for i, mseed in enumerate(mseeds): 27 | print(mseed.split("/")[-1], end=" ", flush=True) 28 | with fs.open(mseed, "rb") as f: 29 | st = obspy.read(f) 30 | st = st.merge(fill_value="latest") 31 | st = st.resample(100) 32 | tr = st[0] 33 | if min_timestamp is None: 34 | min_timestamp = tr.times("timestamp")[0] 35 | waveforms[tr.id] = { 36 | "data": tr.data.tolist(), 37 | # "timestamp": (tr.times("timestamp") - min_timestamp).tolist(), 38 | "timestamp": tr.times("timestamp").tolist(), 39 | } 40 | station_ids.append(tr.id) 41 | if i > 40: 42 | break 43 | print("\nFinished reading waveforms.", flush=True) 44 | with open("station_ids.json", "w") as f: 45 | json.dump(station_ids, f) 46 | 47 | 48 | # %% 49 | r = redis.Redis(host="localhost", port=6379, db=0) 50 | 51 | index = {x: 0 for x in station_ids} 52 | while True: 53 | for i, sid in enumerate(station_ids): 54 | print(sid, end=" ", flush=True) 55 | window_size = randint(80, 120) 56 | r.xadd( 57 | sid, 58 | { 59 | "data": json.dumps(waveforms[sid]["data"][index[sid] : index[sid] + window_size]), 60 | "timestamp": json.dumps(waveforms[sid]["timestamp"][index[sid] : index[sid] + window_size]), 61 | }, 62 | ) 63 | r.xtrim(sid, maxlen=60000) 64 | index[sid] += window_size 65 | print() 66 | time.sleep(1) 67 | 68 | # %% 69 | -------------------------------------------------------------------------------- /quakeflow/service.yaml: -------------------------------------------------------------------------------- 1 | # This Service manifest defines: 2 | # - a load balancer for pods matching label "app: python-hello-world" 3 | # - exposing the application to the public Internet (type:LoadBalancer) 4 | # - routes port 80 of the load balancer to the port 8080 of the Pods. 5 | # Syntax reference https://kubernetes.io/docs/concepts/configuration/overview/ 6 | # apiVersion: v1 7 | # kind: Service 8 | # metadata: 9 | # name: quakeflow-external 10 | # spec: 11 | # type: LoadBalancer 12 | # selector: 13 | # app: quakeflow-hub 14 | # ports: 15 | # - name: http 16 | # port: 80 17 | # targetPort: 8080 18 | --- 19 | apiVersion: v1 20 | kind: Service 21 | metadata: 22 | name: picking-api 23 | spec: 24 | type: LoadBalancer 25 | selector: 26 | app: picking-api 27 | ports: 28 | - name: http 29 | port: 80 30 | targetPort: 8080 31 | --- 32 | apiVersion: v1 33 | kind: Service 34 | metadata: 35 | name: association-api 36 | spec: 37 | type: LoadBalancer 38 | selector: 39 | app: association-api 40 | ports: 41 | - name: http 42 | port: 80 43 | targetPort: 8080 44 | --- 45 | apiVersion: v1 46 | kind: Service 47 | metadata: 48 | name: location-api 49 | spec: 50 | type: LoadBalancer 51 | selector: 52 | app: location-api 53 | ports: 54 | - name: http 55 | port: 80 56 | targetPort: 8080 57 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | obspy 2 | cartopy -------------------------------------------------------------------------------- /scripts/.gitignore: -------------------------------------------------------------------------------- 1 | yaml/ 2 | debug/ 3 | debug_* 4 | demo 5 | local 6 | debug 7 | figures 8 | templates 9 | win32tools 10 | __pycache__ 11 | 2019-185 12 | BayArea 13 | Shelly2020.txt 14 | *.png 15 | *.pth 16 | *.csv 17 | *.txt 18 | *.tar.gz 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /scripts/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:22.04 2 | 3 | RUN apt-get update 4 | RUN apt-get install -y git wget 5 | RUN apt-get install -y libgeos++-dev 6 | 7 | ENV PATH="/root/miniconda3/bin:${PATH}" 8 | ARG PATH="/root/miniconda3/bin:${PATH}" 9 | ENV MINICONDA_VERSION=Miniconda3-py310_23.11.0-2-Linux-x86_64.sh 10 | RUN wget "https://repo.anaconda.com/miniconda/${MINICONDA_VERSION}" && \ 11 | mkdir /root/.conda && \ 12 | bash $MINICONDA_VERSION -b && \ 13 | rm -f $MINICONDA_VERSION && \ 14 | conda --version 15 | 16 | ENV PYTHONUNBUFFERED=1 17 | ## plotting 18 | RUN conda install -c conda-forge pygmt 19 | RUN pip install --no-cache-dir cartopy plotly 20 | ## machine learning 21 | RUN pip install --no-cache-dir numpy scipy matplotlib pandas scikit-learn 22 | RUN pip install --no-cache-dir obspy pyproj 23 | RUN pip install --no-cache-dir fsspec gcsfs s3fs 24 | RUN pip install --no-cache-dir kfp 25 | RUN pip install --no-cache-dir git+https://github.com/AI4EPS/GaMMA.git 26 | 27 | RUN apt-get clean && \ 28 | rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* 29 | RUN conda clean --all --yes 30 | RUN rm -rf /root/.cache/pip -------------------------------------------------------------------------------- /scripts/README.md: -------------------------------------------------------------------------------- 1 | ``` 2 | mkir relocation 3 | cd relocation 4 | git clone git@github.com:zhuwq0/GrowClust.git 5 | git clone git@github.com:zhuwq0/HypoDD.git 6 | cd .. 7 | ``` 8 | ``` 9 | python download_waveform.py 10 | python run_phasenet.py 11 | python run_gamma.py 12 | python convert_hypodd.py && bash run_hypodd_ct.sh 13 | python convert_growclust.py && bash run_growclust_ct.sh 14 | python cut_templates.py && python run_cctorch.py 15 | python convert_hypodd.py --dtcc && bash run_hypodd_cc.sh 16 | python convert_growclust.py --dtcc && bash run_growclust_cc.sh 17 | python run_template_macthing.py 18 | ``` -------------------------------------------------------------------------------- /scripts/args.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | 4 | def parse_args(): 5 | parser = argparse.ArgumentParser() 6 | parser.add_argument("--region", type=str, default="demo", help="region") 7 | parser.add_argument("--root_path", type=str, default="local", help="root path") 8 | 9 | ## Cloud 10 | parser.add_argument("--protocol", type=str, default="file", help="protocol (file, gs, s3)") 11 | parser.add_argument("--bucket", type=str, default=None, help="bucket name") 12 | parser.add_argument("--token", type=str, default=None, help="token") 13 | 14 | # parser.add_argument("--bucket", type=str, default="quakeflow_catalog", help="bucket name") 15 | # parser.add_argument("--protocol", type=str, default="gs", help="protocol (file, gs, s3)") 16 | # parser.add_argument("--token", type=str, default="application_default_credentials.json", help="token") 17 | 18 | ## Parallel 19 | parser.add_argument("--num_nodes", type=int, default=1, help="number of nodes") 20 | parser.add_argument("--node_rank", type=int, default=0, help="node rank") 21 | 22 | ## Model 23 | parser.add_argument("--model", type=str, default="phasenet", help="model") 24 | 25 | ## PhaseNet 26 | parser.add_argument("--overwrite", action="store_true", help="overwrite existing results") 27 | 28 | ## ADLOC 29 | parser.add_argument("--iter", type=int, default=0, help="iteration") 30 | 31 | ## CCTorch 32 | parser.add_argument("--dtct_pair", action="store_true", help="run convert_dtcc.py") 33 | 34 | return parser.parse_args() 35 | -------------------------------------------------------------------------------- /scripts/convert_dtcc.py: -------------------------------------------------------------------------------- 1 | # %% 2 | import json 3 | import os 4 | import pickle 5 | 6 | import numpy as np 7 | import pandas as pd 8 | from args import parse_args 9 | from tqdm import tqdm 10 | 11 | # %% 12 | args = parse_args() 13 | root_path = args.root_path 14 | region = args.region 15 | 16 | with open(f"{root_path}/{region}/config.json", "r") as fp: 17 | config = json.load(fp) 18 | 19 | # %% 20 | data_path = f"{region}/cctorch" 21 | result_path = f"{region}/adloc_dd" 22 | if not os.path.exists(f"{result_path}"): 23 | os.makedirs(f"{result_path}") 24 | 25 | # %% 26 | stations = pd.read_csv(f"{root_path}/{data_path}/cctorch_stations.csv") 27 | stations["station_id"] = stations["station"] 28 | stations = stations.groupby("station_id").first().reset_index() 29 | 30 | # %% 31 | events = pd.read_csv(f"{root_path}/{data_path}/cctorch_events.csv", dtype={"event_index": str}) 32 | events["time"] = pd.to_datetime(events["event_time"], format="mixed") 33 | 34 | # %% 35 | stations["idx_sta"] = np.arange(len(stations)) # reindex in case the index does not start from 0 or is not continuous 36 | events["idx_eve"] = np.arange(len(events)) # reindex in case the index does not start from 0 or is not continuous 37 | mapping_phase_type_int = {"P": 0, "S": 1} 38 | 39 | # %% 40 | with open(f"{root_path}/{data_path}/dt.cc", "r") as f: 41 | lines = f.readlines() 42 | 43 | # %% 44 | event_index1 = [] 45 | event_index2 = [] 46 | station_index = [] 47 | phase_type = [] 48 | phase_score = [] 49 | phase_dtime = [] 50 | 51 | stations.set_index("station_id", inplace=True) 52 | events.set_index("event_index", inplace=True) 53 | 54 | for line in tqdm(lines): 55 | if line[0] == "#": 56 | evid1, evid2, _ = line[1:].split() 57 | else: 58 | stid, dt, weight, phase = line.split() 59 | event_index1.append(events.loc[evid1, "idx_eve"]) 60 | event_index2.append(events.loc[evid2, "idx_eve"]) 61 | station_index.append(stations.loc[stid, "idx_sta"]) 62 | phase_type.append(mapping_phase_type_int[phase]) 63 | phase_score.append(weight) 64 | phase_dtime.append(dt) 65 | 66 | 67 | dtypes = np.dtype( 68 | [ 69 | ("idx_eve1", np.int32), 70 | ("idx_eve2", np.int32), 71 | ("idx_sta", np.int32), 72 | ("phase_type", np.int32), 73 | ("phase_score", np.float32), 74 | ("phase_dtime", np.float32), 75 | ] 76 | ) 77 | pairs_array = np.memmap( 78 | f"{root_path}/{result_path}/pair_dt.dat", 79 | mode="w+", 80 | shape=(len(phase_dtime),), 81 | dtype=dtypes, 82 | ) 83 | pairs_array["idx_eve1"] = event_index1 84 | pairs_array["idx_eve2"] = event_index2 85 | pairs_array["idx_sta"] = station_index 86 | pairs_array["phase_type"] = phase_type 87 | pairs_array["phase_score"] = phase_score 88 | pairs_array["phase_dtime"] = phase_dtime 89 | with open(f"{root_path}/{result_path}/pair_dtypes.pkl", "wb") as f: 90 | pickle.dump(dtypes, f) 91 | 92 | 93 | # %% 94 | events.to_csv(f"{root_path}/{result_path}/pair_events.csv", index=True, index_label="event_index") 95 | stations.to_csv(f"{root_path}/{result_path}/pair_stations.csv", index=True, index_label="station_id") 96 | -------------------------------------------------------------------------------- /scripts/create_filelist.py: -------------------------------------------------------------------------------- 1 | # %% 2 | import os 3 | from glob import glob 4 | 5 | # %% 6 | protocol = "file" 7 | token = None 8 | 9 | ## get from command line 10 | root_path = "local" 11 | region = "demo" 12 | if len(os.sys.argv) > 1: 13 | root_path = os.sys.argv[1] 14 | region = os.sys.argv[2] 15 | print(f"root_path: {root_path}") 16 | print(f"region: {region}") 17 | 18 | # %% 19 | result_path = f"{region}/phasenet_das" 20 | if not os.path.exists(f"{root_path}/{result_path}"): 21 | os.makedirs(f"{root_path}/{result_path}", exist_ok=True) 22 | 23 | # %% 24 | folder_depth = 2 25 | csv_list = sorted(glob(f"{root_path}/{result_path}/picks_phasenet_das/????-??-??/*.csv")) 26 | csv_list = ["/".join(x.split("/")[-folder_depth:]) for x in csv_list] 27 | 28 | # %% 29 | hdf5_list = sorted(glob(f"{root_path}/{region}/????-??-??/*.h5")) 30 | num_to_process = 0 31 | with open(f"{root_path}/{result_path}/filelist.csv", "w") as fp: 32 | # fp.write("\n".join(hdf5_list)) 33 | for line in hdf5_list: 34 | csv_name = "/".join(line.split("/")[-folder_depth:]).replace(".h5", ".csv") 35 | if csv_name not in csv_list: 36 | fp.write(f"{line}\n") 37 | num_to_process += 1 38 | 39 | print(f"filelist.csv created in {root_path}/{result_path}: {num_to_process} / {len(hdf5_list)} to process") 40 | -------------------------------------------------------------------------------- /scripts/debug_growclust.py: -------------------------------------------------------------------------------- 1 | # %% 2 | from pathlib import Path 3 | import h5py 4 | import scipy 5 | from tqdm import tqdm 6 | import numpy as np 7 | import json 8 | import pandas as pd 9 | from datetime import datetime 10 | 11 | # %% 12 | output_path = Path("relocation/growclust/") 13 | if not output_path.exists(): 14 | output_path.mkdir(parents=True) 15 | 16 | # %% 17 | dt_ct = Path("relocation/hypodd/dt.ct") 18 | 19 | lines = [] 20 | with open(dt_ct, "r") as fp: 21 | for line in tqdm(fp): 22 | if line.startswith("#"): 23 | ev1, ev2 = line.split()[1:3] 24 | lines.append(f"# {ev1} {ev2} 0.000\n") 25 | else: 26 | station, t1, t2, score, phase = line.split() 27 | #station = station[:-2] 28 | # if station in ["WAS2", "FUR", "RRX"]: 29 | # continue 30 | lines.append(f"{station} {float(t1)-float(t2):.5f} {score} {phase}\n") 31 | 32 | # %% 33 | with open(output_path / "dt.ct", "w") as fp: 34 | fp.writelines(lines) 35 | 36 | -------------------------------------------------------------------------------- /scripts/download_event_hinet.py: -------------------------------------------------------------------------------- 1 | # %% 2 | !pip install HinetPy 3 | # !wget https://github.com/AI4EPS/software/releases/download/win32tools/win32tools.tar.gz 4 | ! [ -e win32tools.tar.gz ] || wget https://github.com/AI4EPS/software/releases/download/win32tools/win32tools.tar.gz 5 | !tar -xvf win32tools.tar.gz 6 | !cd win32tools && make 7 | 8 | 9 | # %% 10 | from HinetPy import Client, win32 11 | import os 12 | 13 | os.environ["PATH"] += os.pathsep + os.path.abspath("win32tools/catwin32.src") + os.pathsep + os.path.abspath("win32tools/win2sac.src") 14 | 15 | # %% 16 | waveform_path = "local/wavefroms/" 17 | 18 | # %% 19 | client = Client("", "") 20 | 21 | data, ctable = client.get_continuous_waveform("0101", "201001010000", 20, outdir=f"{waveform_path}/cnt") 22 | 23 | 24 | # %% 25 | # data = "2010010100000101VM.cnt" 26 | # ctable = "01_01_20100101.euc.ch" 27 | 28 | win32.extract_sac(data, ctable, outdir="local/wavefroms") 29 | win32.extract_sacpz(ctable) -------------------------------------------------------------------------------- /scripts/load_cloud_picks.py: -------------------------------------------------------------------------------- 1 | # %% 2 | import json 3 | import os 4 | from concurrent.futures import ThreadPoolExecutor 5 | 6 | import fsspec 7 | import pandas as pd 8 | from tqdm import tqdm 9 | 10 | # %% 11 | if __name__ == "__main__": 12 | 13 | # %% 14 | protocol = "gs" 15 | token_json = f"{os.environ['HOME']}/.config/gcloud/application_default_credentials.json" 16 | with open(token_json, "r") as fp: 17 | token = json.load(fp) 18 | 19 | bucket = "quakeflow_catalog" 20 | folder = "NC/phasenet_merged" ## NCEDC 21 | # folder = "SC/phasenet_merged" ## SCEDC 22 | 23 | fs = fsspec.filesystem(protocol, token=token) 24 | 25 | def load_csv(jday): 26 | with fs.open(jday, "r") as fp: 27 | return pd.read_csv(fp, dtype=str) 28 | 29 | # %% 30 | years = range(2023, 2024) 31 | 32 | for year in years: 33 | jdays = fs.glob(f"{bucket}/{folder}/{year}/????.???.csv") 34 | 35 | with ThreadPoolExecutor(max_workers=32) as executor: 36 | picks = list( 37 | tqdm(executor.map(load_csv, jdays), total=len(jdays), desc=f"Loading {bucket}/{folder}/{year}") 38 | ) 39 | 40 | # %% 41 | picks = pd.concat(picks) 42 | picks.to_csv("phasenet_picks.csv", index=False) 43 | 44 | # %% 45 | picks = pd.read_csv("phasenet_picks.csv") 46 | print(f"Loaded {len(picks):,} picks") 47 | -------------------------------------------------------------------------------- /scripts/load_cloud_templates.py: -------------------------------------------------------------------------------- 1 | # %% 2 | import json 3 | import os 4 | from concurrent.futures import ThreadPoolExecutor 5 | 6 | import fsspec 7 | import matplotlib.pyplot as plt 8 | import numpy as np 9 | import pandas as pd 10 | from tqdm import tqdm 11 | 12 | # %% 13 | if __name__ == "__main__": 14 | 15 | # %% 16 | result_path = "results/" 17 | if not os.path.exists(result_path): 18 | os.makedirs(result_path) 19 | 20 | # %% 21 | protocol = "gs" 22 | token_json = f"{os.environ['HOME']}/.config/gcloud/application_default_credentials.json" 23 | with open(token_json, "r") as fp: 24 | token = json.load(fp) 25 | 26 | bucket = "quakeflow_catalog" 27 | folder = "Cal/cctorch" 28 | 29 | fs = fsspec.filesystem(protocol, token=token) 30 | 31 | # %% 32 | def plot_templates(templates, events, picks): 33 | templates = templates - np.nanmean(templates, axis=(-1), keepdims=True) 34 | std = np.std(templates, axis=(-1), keepdims=True) 35 | std[std == 0] = 1.0 36 | templates = templates / std 37 | 38 | plt.figure(figsize=(10, 10)) 39 | plt.imshow(templates[:, -1, 0, :], origin="lower", aspect="auto", vmin=-0.3, vmax=0.3, cmap="RdBu_r") 40 | plt.colorbar() 41 | plt.show() 42 | 43 | # %% 44 | years = [2023] 45 | 46 | for year in years: 47 | num_jday = 366 if (year % 4 == 0 and year % 100 != 0) or year % 400 == 0 else 365 48 | 49 | for jday in range(1, num_jday + 1): 50 | 51 | if not fs.exists(f"{bucket}/{folder}/{year}/template_{jday:03d}.dat"): 52 | continue 53 | 54 | with fs.open(f"{bucket}/{folder}/{year}/cctorch_picks_{jday:03d}.csv", "r") as fp: 55 | picks = pd.read_csv(fp, dtype=str) 56 | with fs.open(f"{bucket}/{folder}/{year}/cctorch_events_{jday:03d}.csv", "r") as fp: 57 | events = pd.read_csv(fp, dtype=str) 58 | with fs.open(f"{bucket}/{folder}/{year}/config_{jday:03d}.json", "r") as fp: 59 | config = json.load(fp) 60 | template_file = fs.open(f"{bucket}/{folder}/{year}/template_{jday:03d}.dat", "rb") 61 | templates = np.frombuffer(template_file.read(), dtype=np.float32).reshape(tuple(config["template_shape"])) 62 | template_file.close() 63 | 64 | print(f"events: {len(events):,} ") 65 | print(f"picks: {len(picks):,} ") 66 | print(f"templates: {templates.shape}") 67 | 68 | picks.to_csv(f"{result_path}/picks_{year:04d}_{jday:03d}.csv", index=False) 69 | events.to_csv(f"{result_path}/events_{year:04d}_{jday:03d}.csv", index=False) 70 | np.save(f"{result_path}/templates_{year:04d}_{jday:03d}.npy", templates) 71 | 72 | plot_templates(templates, events, picks) 73 | 74 | # break 75 | 76 | # %% 77 | -------------------------------------------------------------------------------- /scripts/merge_gamma_picks.py: -------------------------------------------------------------------------------- 1 | # %% 2 | import json 3 | import multiprocessing as mp 4 | import os 5 | from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed 6 | from datetime import datetime, timedelta, timezone 7 | from threading import Lock, Thread 8 | 9 | import fsspec 10 | import numpy as np 11 | import pandas as pd 12 | import pyproj 13 | from obspy import read_inventory 14 | from obspy.clients.fdsn import Client 15 | from sklearn.cluster import DBSCAN 16 | from tqdm import tqdm 17 | from args import parse_args 18 | from glob import glob 19 | 20 | 21 | # %% 22 | if __name__ == "__main__": 23 | 24 | args = parse_args() 25 | root_path = args.root_path 26 | region = args.region 27 | 28 | data_path = f"{region}/gamma" 29 | result_path = f"{region}/gamma" 30 | 31 | # %% 32 | # protocol = "gs" 33 | # token_json = f"{os.environ['HOME']}/.config/gcloud/application_default_credentials.json" 34 | # with open(token_json, "r") as fp: 35 | # token = json.load(fp) 36 | # fs = fsspec.filesystem(protocol, token=token) 37 | 38 | # %% 39 | event_csvs = sorted(glob(f"{root_path}/{data_path}/????/????.???.events.csv")) 40 | 41 | # %% 42 | events = [] 43 | picks = [] 44 | for event_csv in tqdm(event_csvs, desc="Load event csvs"): 45 | pick_csv = event_csv.replace("events.csv", "picks.csv") 46 | year, jday = event_csv.split("/")[-1].split(".")[:2] 47 | events_ = pd.read_csv(event_csv, dtype=str) 48 | picks_ = pd.read_csv(pick_csv, dtype=str) 49 | events_["year"] = year 50 | events_["jday"] = jday 51 | picks_["year"] = year 52 | picks_["jday"] = jday 53 | events.append(events_) 54 | picks.append(picks_) 55 | 56 | events = pd.concat(events, ignore_index=True) 57 | picks = pd.concat(picks, ignore_index=True) 58 | 59 | events["dummy_id"] = events["year"] + "." + events["jday"] + "." + events["event_index"] 60 | picks["dummy_id"] = picks["year"] + "." + picks["jday"] + "." + picks["event_index"] 61 | 62 | events["event_index"] = np.arange(len(events)) 63 | picks = picks.drop("event_index", axis=1) 64 | picks = picks.merge(events[["dummy_id", "event_index"]], on="dummy_id", how="left") 65 | 66 | events.drop(["year", "jday", "dummy_id"], axis=1, inplace=True) 67 | picks.drop(["year", "jday", "dummy_id"], axis=1, inplace=True) 68 | 69 | events.to_csv(f"{root_path}/{result_path}/gamma_events.csv", index=False) 70 | picks.to_csv(f"{root_path}/{result_path}/gamma_picks.csv", index=False) 71 | 72 | # %% 73 | -------------------------------------------------------------------------------- /scripts/quakeflow_job.yaml: -------------------------------------------------------------------------------- 1 | name: quakeflow 2 | 3 | workdir: ./ 4 | 5 | num_nodes: 2 6 | 7 | resources: 8 | 9 | cloud: gcp 10 | 11 | region: us-west1 12 | 13 | zone: us-west1-b 14 | 15 | # instance_type: 16 | 17 | accelerators: V100:1 18 | 19 | cpus: 4+ 20 | 21 | use_spot: True 22 | # spot_recovery: none 23 | 24 | # image_id: docker:zhuwq0/quakeflow:latest 25 | 26 | envs: 27 | JOB: quakeflow 28 | NCPU: 1 29 | ROOT: /data/local 30 | REGION: demo 31 | 32 | file_mounts: 33 | 34 | /data: 35 | # source: s3://scedc-pds 36 | # source: gs://quakeflow_dataset 37 | source: gs://quakeflow_share/ 38 | mode: MOUNT 39 | 40 | /quakeflow_dataset: 41 | source: gs://quakeflow_dataset/ 42 | mode: MOUNT 43 | 44 | ~/.ssh/id_rsa.pub: ~/.ssh/id_rsa.pub 45 | ~/.ssh/id_rsa: ~/.ssh/id_rsa 46 | ~/.config/rclone/rclone.conf: ~/.config/rclone/rclone.conf 47 | ~/EQNet: ../EQNet 48 | 49 | setup: | 50 | echo "Begin setup." 51 | sudo apt install rclone 52 | pip3 install fsspec gcsfs kfp==2.3 53 | pip3 install obspy pyproj 54 | pip3 install cartopy 55 | pip3 install h5py tqdm wandb 56 | pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118 57 | mkdir ~/data && rclone mount range:/ ~/data --daemon 58 | 59 | run: | 60 | num_nodes=`echo "$SKYPILOT_NODE_IPS" | wc -l` 61 | master_addr=`echo "$SKYPILOT_NODE_IPS" | head -n1` 62 | [[ ${SKYPILOT_NUM_GPUS_PER_NODE} -gt $NCPU ]] && nproc_per_node=${SKYPILOT_NUM_GPUS_PER_NODE} || nproc_per_node=$NCPU 63 | if [ "${SKYPILOT_NODE_RANK}" == "0" ]; then 64 | ls -al /data 65 | python set_config.py $ROOT $REGION 66 | python download_catalog.py $ROOT $REGION 67 | python download_station.py $ROOT $REGION 68 | fi 69 | torchrun \ 70 | --nproc_per_node=${nproc_per_node} \ 71 | --node_rank=${SKYPILOT_NODE_RANK} \ 72 | --nnodes=$num_nodes \ 73 | --master_addr=$master_addr \ 74 | --master_port=8008 \ 75 | download_waveform.py $ROOT $REGION 76 | torchrun \ 77 | --nproc_per_node=${nproc_per_node} \ 78 | --node_rank=${SKYPILOT_NODE_RANK} \ 79 | --nnodes=$num_nodes \ 80 | --master_addr=$master_addr \ 81 | --master_port=8008 \ 82 | run_phasenet_v2.py $ROOT $REGION 83 | -------------------------------------------------------------------------------- /scripts/run_eqnet.py: -------------------------------------------------------------------------------- 1 | # %% 2 | from pathlib import Path 3 | import os 4 | import torch 5 | 6 | # %% 7 | # region = "Hawaii_Loa" 8 | # region = "South_Pole" 9 | # region = "Kilauea" 10 | region = "Kilauea_debug" 11 | root_path = Path(region) 12 | data_path = root_path / "obspy" 13 | result_path = root_path / "eqnet" 14 | if not result_path.exists(): 15 | result_path.mkdir() 16 | 17 | # %% 18 | mseed_path = data_path / "waveforms" 19 | mseeds = sorted(list(mseed_path.rglob("*.mseed"))) 20 | file_list = [] 21 | for f in mseeds: 22 | file_list.append(str(f).split(".mseed")[0][:-1]+"*.mseed") 23 | 24 | file_list = sorted(list(set(file_list))) 25 | 26 | # %% 27 | with open(result_path / "mseed_list.txt", "w") as fp: 28 | fp.write("\n".join(file_list)) 29 | 30 | # %% 31 | num_gpu = torch.cuda.device_count() 32 | 33 | # %% 34 | # os.system(f"torchrun --standalone --nproc_per_node 4 ../EQNet/predict.py --model phasenet --add_polarity --add_event --data_path ./ --data_list mseed_list.txt --response_xml '{root_path}/stations/*xml' --result_path ./eqnet_picks --batch_size=1 --format mseed") 35 | os.system(f"torchrun --standalone --nproc_per_node {num_gpu} ../EQNet/predict.py --model phasenet --add_polarity --add_event --data_path ./ --data_list {result_path}/mseed_list.txt --response_xml {data_path}/inventory.xml --result_path {result_path}/results --batch_size=1 --format mseed") 36 | 37 | os.system(f"cp {result_path}/results/picks_phasenet_raw.csv {result_path}/picks.csv") 38 | -------------------------------------------------------------------------------- /scripts/run_growclust_cc.py: -------------------------------------------------------------------------------- 1 | # %% 2 | import os 3 | from datetime import datetime 4 | 5 | import pandas as pd 6 | from args import parse_args 7 | from tqdm import tqdm 8 | 9 | args = parse_args() 10 | 11 | # %% 12 | root_path = args.root_path 13 | region = args.region 14 | result_path = f"{region}/growclust" 15 | if not os.path.exists(f"{root_path}/{result_path}"): 16 | os.makedirs(f"{root_path}/{result_path}") 17 | 18 | # %% 19 | # stations_json = f"{region}/results/data/stations.json" 20 | # stations = pd.read_json(f"{root_path}/{stations_json}", orient="index") 21 | station_csv = f"{region}/cctorch/cctorch_stations.csv" 22 | stations = pd.read_csv(f"{root_path}/{station_csv}") 23 | stations.set_index("station_id", inplace=True) 24 | 25 | 26 | lines = [] 27 | for i, row in stations.iterrows(): 28 | # line = f"{row['network']}{row['station']:<4} {row['latitude']:.4f} {row['longitude']:.4f}\n" 29 | line = f"{row['station']:<4} {row['latitude']:.4f} {row['longitude']:.4f}\n" 30 | lines.append(line) 31 | 32 | with open(f"{root_path}/{result_path}/stlist.txt", "w") as fp: 33 | fp.writelines(lines) 34 | 35 | 36 | # %% 37 | # events_csv = f"{region}/results/phase_association/events.csv" 38 | # events_csv = f"{region}/adloc/ransac_events.csv" 39 | events_csv = f"{region}/cctorch/cctorch_events.csv" 40 | # event_file = f"{region}/cctorch/events.csv" 41 | events = pd.read_csv(f"{root_path}/{events_csv}") 42 | # event_df = event_df[event_df["gamma_score"] > 10] 43 | # event_index = [f"{x:06d}" for x in event_df["event_index"]] 44 | # events["time"] = pd.to_datetime(events["time"]) 45 | events["time"] = pd.to_datetime(events["event_time"]) 46 | if "magnitude" not in events.columns: 47 | events["magnitude"] = 0.0 48 | 49 | events[["year", "month", "day", "hour", "minute", "second"]] = ( 50 | events["time"] 51 | # .apply(lambda x: datetime.fromisoformat(x).strftime("%Y %m %d %H %M %S.%f").split(" ")) 52 | .apply(lambda x: x.strftime("%Y %m %d %H %M %S.%f").split(" ")) 53 | .apply(pd.Series) 54 | .apply(pd.to_numeric) 55 | ) 56 | 57 | lines = [] 58 | for i, row in events.iterrows(): 59 | # yr mon day hr min sec lat lon dep mag eh ez rms evid 60 | line = f"{row['year']:4d} {row['month']:2d} {row['day']:2d} {row['hour']:2d} {row['minute']:2d} {row['second']:7.3f} {row['latitude']:.4f} {row['longitude']:.4f} {row['depth_km']:7.3f} {row['magnitude']:.2f} 0.000 0.000 0.000 {row['event_index']:6d}\n" 61 | lines.append(line) 62 | 63 | with open(f"{root_path}/{result_path}/evlist.txt", "w") as fp: 64 | fp.writelines(lines) 65 | 66 | # %% 67 | os.system(f"bash run_growclust_cc.sh {root_path} {region}") 68 | -------------------------------------------------------------------------------- /scripts/run_growclust_cc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -x 3 | WORKING_DIR=$PWD 4 | if [ $# -eq 2 ]; then 5 | root_path=$1 6 | region=$2 7 | else 8 | root_path="local" 9 | region="demo" 10 | fi 11 | 12 | if [ ! -d "$root_path/$region/growclust" ]; then 13 | mkdir -p $root_path/$region/growclust 14 | fi 15 | 16 | cp $root_path/$region/cctorch/dt.cc $root_path/$region/growclust/dt.cc 17 | cd $root_path/$region/growclust 18 | mkdir -p TT OUT 19 | 20 | if [ ! -d "GrowClust" ]; then 21 | git clone https://github.com/zhuwq0/GrowClust.git 22 | make -C GrowClust/SRC/ 23 | fi 24 | 25 | cat < growclust.inp 26 | **** Example GrowClust Control File ***** 27 | ******** Daniel Trugman, 2016 ********** 28 | ******************************************* 29 | * 30 | ******************************************* 31 | ************* Event list **************** 32 | ******************************************* 33 | * evlist_fmt (0 = evlist, 1 = phase, 2 = GrowClust, 3 = HypoInverse) 34 | 1 35 | * fin_evlist (event list file name) 36 | evlist.txt 37 | * 38 | ******************************************* 39 | ************ Station list ************* 40 | ******************************************* 41 | * stlist_fmt (0 = SEED channel, 1 = station name) 42 | 1 43 | * fin_stlist (station list file name) 44 | stlist.txt 45 | * 46 | ******************************************* 47 | ************* XCOR data *************** 48 | ******************************************* 49 | * xcordat_fmt (0 = binary, 1 = text), tdif_fmt (21 = tt2-tt1, 12 = tt1-tt2) 50 | 1 12 51 | * fin_xcordat 52 | dt.cc 53 | * 54 | ******************************************* 55 | *** Velocity Model / Travel Time Tables *** 56 | ******************************************* 57 | * fin_vzmdl (input vz model file) 58 | vzmodel.txt 59 | * fout_vzfine (output, interpolated vz model file) 60 | TT/vzfine.txt 61 | * fout_pTT (output travel time table, P phase) 62 | TT/tt.pg 63 | * fout_sTT (output travel time table, S phase) 64 | TT/tt.sg 65 | * 66 | ****************************************** 67 | ***** Travel Time Table Parameters ****** 68 | ****************************************** 69 | * vpvs_factor rayparam_min (-1 = default) 70 | 1.732 0.0 71 | * tt_dep0 tt_dep1 tt_ddep 72 | 0. 71. 1. 73 | * tt_del0 tt_del1 tt_ddel 74 | 0. 500. 2. 75 | * 76 | ****************************************** 77 | ***** GrowClust Algorithm Parameters ***** 78 | ****************************************** 79 | * rmin delmax rmsmax 80 | 0.1 120 1.0 81 | * rpsavgmin, rmincut ngoodmin iponly 82 | 0 0.1 8 0 83 | * 84 | ****************************************** 85 | ************ Output files **************** 86 | ****************************************** 87 | * nboot nbranch_min 88 | 0 1 89 | * fout_cat (relocated catalog) 90 | OUT/out.growclust_cc_cat 91 | * fout_clust (relocated cluster file) 92 | OUT/out.growclust_cc_clust 93 | * fout_log (program log) 94 | OUT/out.growclust_cc_log 95 | * fout_boot (bootstrap distribution) 96 | OUT/out.growclust_cc_boot 97 | ****************************************** 98 | ****************************************** 99 | EOF 100 | 101 | cat < vzmodel.txt 102 | 0.0 5.30 0.00 103 | 1.0 5.65 0.00 104 | 3.0 5.93 0.00 105 | 5.0 6.20 0.00 106 | 7.0 6.20 0.00 107 | 9.0 6.20 0.00 108 | 11.0 6.20 0.00 109 | 13.0 6.20 0.00 110 | 17.0 6.20 0.00 111 | 21.0 6.20 0.00 112 | 31.00 7.50 0.00 113 | 31.10 8.11 0.00 114 | 100.0 8.11 0.00 115 | EOF 116 | 117 | ./GrowClust/SRC/growclust growclust.inp 118 | cp OUT/out.growclust_cc_cat growclust_cc_catalog.txt 119 | cd $WORKING_DIR 120 | -------------------------------------------------------------------------------- /scripts/run_growclust_ct.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | WORKING_DIR=$PWD 3 | if [ $# -eq 2 ]; then 4 | root_path=$1 5 | region=$2 6 | else 7 | root_path="local" 8 | region="demo" 9 | fi 10 | 11 | if [ ! -d "$root_path/$region/growclust" ]; then 12 | mkdir -p $root_path/$region/growclust 13 | fi 14 | 15 | cd $root_path/$region/growclust 16 | mkdir -p TT OUT 17 | 18 | if [ ! -d "GrowClust" ]; then 19 | git clone https://github.com/zhuwq0/GrowClust.git 20 | make -C GrowClust/SRC/ 21 | fi 22 | 23 | cat < growclust.inp 24 | **** Example GrowClust Control File ***** 25 | ******** Daniel Trugman, 2016 ********** 26 | ******************************************* 27 | * 28 | ******************************************* 29 | ************* Event list **************** 30 | ******************************************* 31 | * evlist_fmt (0 = evlist, 1 = phase, 2 = GrowClust, 3 = HypoInverse) 32 | 1 33 | * fin_evlist (event list file name) 34 | evlist.txt 35 | * 36 | ******************************************* 37 | ************ Station list ************* 38 | ******************************************* 39 | * stlist_fmt (0 = SEED channel, 1 = station name) 40 | 1 41 | * fin_stlist (station list file name) 42 | stlist.txt 43 | * 44 | ******************************************* 45 | ************* XCOR data *************** 46 | ******************************************* 47 | * xcordat_fmt (0 = binary, 1 = text), tdif_fmt (21 = tt2-tt1, 12 = tt1-tt2) 48 | 1 12 49 | * fin_xcordat 50 | dt.ct 51 | * 52 | ******************************************* 53 | *** Velocity Model / Travel Time Tables *** 54 | ******************************************* 55 | * fin_vzmdl (input vz model file) 56 | vzmodel.txt 57 | * fout_vzfine (output, interpolated vz model file) 58 | TT/vzfine.txt 59 | * fout_pTT (output travel time table, P phase) 60 | TT/tt.pg 61 | * fout_sTT (output travel time table, S phase) 62 | TT/tt.sg 63 | * 64 | ****************************************** 65 | ***** Travel Time Table Parameters ****** 66 | ****************************************** 67 | * vpvs_factor rayparam_min (-1 = default) 68 | 1.732 0.0 69 | * tt_dep0 tt_dep1 tt_ddep 70 | 0. 81. 1. 71 | * tt_del0 tt_del1 tt_ddel 72 | 0. 500. 2. 73 | * 74 | ****************************************** 75 | ***** GrowClust Algorithm Parameters ***** 76 | ****************************************** 77 | * rmin delmax rmsmax 78 | 0.6 120 1.0 79 | * rpsavgmin, rmincut ngoodmin iponly 80 | 0 0.6 8 0 81 | * 82 | ****************************************** 83 | ************ Output files **************** 84 | ****************************************** 85 | * nboot nbranch_min 86 | 0 1 87 | * fout_cat (relocated catalog) 88 | OUT/out.growclust_ct_cat 89 | * fout_clust (relocated cluster file) 90 | OUT/out.growclust_ct_clust 91 | * fout_log (program log) 92 | OUT/out.growclust_ct_log 93 | * fout_boot (bootstrap distribution) 94 | OUT/out.growclust_ct_boot 95 | ****************************************** 96 | ****************************************** 97 | EOF 98 | 99 | cat < vzmodel.txt 100 | 0.0 5.30 0.00 101 | 1.0 5.65 0.00 102 | 3.0 5.93 0.00 103 | 5.0 6.20 0.00 104 | 7.0 6.20 0.00 105 | 9.0 6.20 0.00 106 | 11.0 6.20 0.00 107 | 13.0 6.20 0.00 108 | 17.0 6.20 0.00 109 | 21.0 6.20 0.00 110 | 31.00 7.50 0.00 111 | 31.10 8.11 0.00 112 | 100.0 8.11 0.00 113 | EOF 114 | 115 | ./GrowClust/SRC/growclust growclust.inp 116 | cp OUT/out.growclust_ct_cat growclust_ct_catalog.txt 117 | cd $WORKING_DIR 118 | -------------------------------------------------------------------------------- /scripts/run_hypodd_cc.py: -------------------------------------------------------------------------------- 1 | # %% 2 | import json 3 | import os 4 | 5 | import numpy as np 6 | import pandas as pd 7 | from args import parse_args 8 | 9 | # %% 10 | args = parse_args() 11 | root_path = args.root_path 12 | region = args.region 13 | 14 | with open(f"{root_path}/{region}/config.json", "r") as fp: 15 | config = json.load(fp) 16 | 17 | # %% 18 | data_path = f"{region}/cctorch" 19 | result_path = f"{region}/hypodd" 20 | if not os.path.exists(f"{root_path}/{result_path}"): 21 | os.makedirs(f"{root_path}/{result_path}") 22 | 23 | # %% 24 | stations = pd.read_csv(f"{root_path}/{data_path}/cctorch_stations.csv") 25 | 26 | station_lines = {} 27 | for i, row in stations.iterrows(): 28 | station_id = row["station_id"] 29 | network_code, station_code, comp_code, channel_code = station_id.split(".") 30 | # tmp_code = f"{station_code}{channel_code}" 31 | tmp_code = f"{station_code}" 32 | station_lines[tmp_code] = f"{tmp_code:<8s} {row['latitude']:.3f} {row['longitude']:.3f}\n" 33 | 34 | 35 | with open(f"{root_path}/{result_path}/stations.dat", "w") as f: 36 | for line in sorted(station_lines.values()): 37 | f.write(line) 38 | 39 | # %% 40 | events = pd.read_csv(f"{root_path}/{data_path}/cctorch_events.csv") 41 | events["time"] = pd.to_datetime(events["event_time"], format="mixed") 42 | 43 | event_lines = [] 44 | 45 | for i, row in events.iterrows(): 46 | event_index = row["event_index"] 47 | origin = row["time"] 48 | magnitude = row["magnitude"] 49 | x_err = 0.0 50 | z_err = 0.0 51 | time_err = 0.0 52 | dx, dy, dz = 0.0, 0.0, 0.0 53 | # dx = np.random.uniform(-0.01, 0.01) 54 | # dy = np.random.uniform(-0.01, 0.01) 55 | # dz = np.random.uniform(0, 10) 56 | # dz = 0 57 | event_lines.append( 58 | f"{origin.year:4d}{origin.month:02d}{origin.day:02d} " 59 | f"{origin.hour:2d}{origin.minute:02d}{origin.second:02d}{round(origin.microsecond / 1e4):02d} " 60 | # f"{row['latitude']:8.4f} {row['longitude']:9.4f} {row['depth_km']:8.4f} " 61 | f"{row['latitude'] + dy:8.4f} {row['longitude']+ dx:9.4f} {row['depth_km']+dz:8.4f} " 62 | f"{magnitude:5.2f} {x_err:5.2f} {z_err:5.2f} {time_err:5.2f} {event_index:9d}\n" 63 | ) 64 | 65 | with open(f"{root_path}/{result_path}/events.dat", "w") as f: 66 | f.writelines(event_lines) 67 | 68 | # %% 69 | os.system(f"bash run_hypodd_cc.sh {root_path} {region}") 70 | -------------------------------------------------------------------------------- /scripts/run_hypodd_cc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -x 3 | WORKING_DIR=$PWD 4 | if [ $# -eq 2 ]; then 5 | root_path=$1 6 | region=$2 7 | else 8 | root_path="local" 9 | region="demo" 10 | fi 11 | 12 | if [ ! -d "$root_path/$region/hypodd" ]; then 13 | mkdir -p $root_path/$region/hypodd 14 | fi 15 | 16 | cp $root_path/$region/cctorch/dt.cc $root_path/$region/hypodd/dt.cc 17 | cd $root_path/$region/hypodd 18 | 19 | if [ ! -d "HypoDD" ]; then 20 | git clone https://github.com/zhuwq0/HypoDD.git 21 | export PATH=$PATH:$PWD/HypoDD 22 | make -C HypoDD/src/ 23 | fi 24 | 25 | cat < cc.inp 26 | * RELOC.INP: 27 | *--- input file selection 28 | * cross correlation diff times: 29 | dt.cc 30 | * 31 | *catalog P diff times: 32 | 33 | * 34 | * event file: 35 | events.dat 36 | * 37 | * station file: 38 | stations.dat 39 | * 40 | *--- output file selection 41 | * original locations: 42 | hypodd_cc.loc 43 | * relocations: 44 | hypodd_cc.reloc 45 | * station information: 46 | hypodd.sta 47 | * residual information: 48 | hypodd.res 49 | * source paramater information: 50 | hypodd.src 51 | * 52 | *--- data type selection: 53 | * IDAT: 0 = synthetics; 1= cross corr; 2= catalog; 3= cross & cat 54 | * IPHA: 1= P; 2= S; 3= P&S 55 | * DIST:max dist [km] between cluster centroid and station 56 | * IDAT IPHA DIST 57 | 1 3 120 58 | * 59 | *--- event clustering: 60 | * OBSCC: min # of obs/pair for crosstime data (0= no clustering) 61 | * OBSCT: min # of obs/pair for network data (0= no clustering) 62 | * OBSCC OBSCT 63 | 0 0 64 | * 65 | *--- solution control: 66 | * ISTART: 1 = from single source; 2 = from network sources 67 | * ISOLV: 1 = SVD, 2=lsqr 68 | * NSET: number of sets of iteration with specifications following 69 | * ISTART ISOLV NSET 70 | 2 2 4 71 | * 72 | *--- data weighting and re-weighting: 73 | * NITER: last iteration to used the following weights 74 | * WTCCP, WTCCS: weight cross P, S 75 | * WTCTP, WTCTS: weight catalog P, S 76 | * WRCC, WRCT: residual threshold in sec for cross, catalog data 77 | * WDCC, WDCT: max dist [km] between cross, catalog linked pairs 78 | * DAMP: damping (for lsqr only) 79 | * --- CROSS DATA ----- ----CATALOG DATA ---- 80 | * NITER WTCCP WTCCS WRCC WDCC WTCTP WTCTS WRCT WDCT DAMP 81 | 4 1 1 -9 -9 -9 -9 -9 -9 70 82 | 4 1 1 6 -9 -9 -9 -9 -9 70 83 | 4 1 0.8 3 4 -9 -9 -9 -9 70 84 | 4 1 0.8 2 2 -9 -9 -9 -9 70 85 | * 86 | *--- 1D model: 87 | * NLAY: number of model layers 88 | * RATIO: vp/vs ratio 89 | * TOP: depths of top of layer (km) 90 | * VEL: layer velocities (km/s) 91 | * NLAY RATIO 92 | 12 1.73 93 | * TOP 94 | 0.0 1.0 3.0 5.0 7.0 9.0 11.0 13.0 17.0 21.0 31.00 31.10 95 | * VEL 96 | 5.30 5.65 5.93 6.20 6.20 6.20 6.20 6.20 6.20 6.20 7.50 8.11 97 | * 98 | *--- event selection: 99 | * CID: cluster to be relocated (0 = all) 100 | * ID: cuspids of event to be relocated (8 per line) 101 | * CID 102 | 0 103 | * ID 104 | EOF 105 | 106 | ./HypoDD/src/hypoDD/hypoDD cc.inp 107 | cd $WORKING_DIR -------------------------------------------------------------------------------- /scripts/run_phasenet_das.yaml: -------------------------------------------------------------------------------- 1 | name: quakeflow 2 | 3 | workdir: . 4 | 5 | num_nodes: 1 6 | 7 | resources: 8 | 9 | cloud: gcp 10 | 11 | region: us-west1 12 | 13 | zone: us-west1-b 14 | 15 | # instance_type: n2-highmem-16 16 | 17 | # accelerators: V100:1 18 | 19 | cpus: 8+ 20 | 21 | use_spot: True 22 | 23 | # image_id: docker:zhuwq0/quakeflow:latest 24 | 25 | envs: 26 | JOB: quakeflow 27 | NCPU: 1 28 | ROOT_PATH: /data 29 | RESULT_PATH: phasenet_das 30 | 31 | file_mounts: 32 | 33 | /data: 34 | # source: s3://scedc-pds/ 35 | # source: gs://quakeflow_dataset/ 36 | # source: gs://quakeflow_share/ 37 | source: gs://das_arcata/ 38 | mode: MOUNT 39 | 40 | /quakeflow_dataset: 41 | source: gs://quakeflow_dataset/ 42 | mode: MOUNT 43 | 44 | ~/.ssh/id_rsa.pub: ~/.ssh/id_rsa.pub 45 | ~/.ssh/id_rsa: ~/.ssh/id_rsa 46 | ~/.config/rclone/rclone.conf: ~/.config/rclone/rclone.conf 47 | # EQNet: ../EQNet 48 | 49 | setup: | 50 | echo "Begin setup." 51 | sudo apt install rclone 52 | pip3 install fsspec gcsfs kfp==2.3 53 | pip3 install obspy pyproj 54 | pip3 install cartopy 55 | pip3 install h5py tqdm wandb 56 | pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118 57 | # pip3 install torch torchvision torchaudio 58 | # mkdir ~/data && rclone mount range:/ ~/data --daemon 59 | 60 | run: | 61 | [ -d "EQNet" ] && rm -r "EQNet" 62 | git clone https://github.com/AI4EPS/EQNet.git 63 | num_nodes=`echo "$SKYPILOT_NODE_IPS" | wc -l` 64 | master_addr=`echo "$SKYPILOT_NODE_IPS" | head -n1` 65 | [[ ${SKYPILOT_NUM_GPUS_PER_NODE} -gt $NCPU ]] && nproc_per_node=${SKYPILOT_NUM_GPUS_PER_NODE} || nproc_per_node=$NCPU 66 | if [ "${SKYPILOT_NODE_RANK}" == "0" ]; then 67 | ls -al /data 68 | python create_filelist.py ${ROOT_PATH} "" 69 | fi 70 | 71 | torchrun \ 72 | --nproc_per_node=${nproc_per_node} \ 73 | --node_rank=${SKYPILOT_NODE_RANK} \ 74 | --nnodes=$num_nodes \ 75 | --master_addr=$master_addr \ 76 | --master_port=8008 \ 77 | EQNet/predict.py --model phasenet_das --format=h5 --data_list=${ROOT_PATH}/${RESULT_PATH}/filelist.csv --result_path=${ROOT_PATH}/${RESULT_PATH} --batch_size 1 --workers 6 --folder_depth=2 --system optasense 78 | 79 | -------------------------------------------------------------------------------- /scripts/run_phasenet_v2.py: -------------------------------------------------------------------------------- 1 | # %% 2 | import json 3 | import os 4 | import sys 5 | from collections import defaultdict 6 | from glob import glob 7 | from typing import Dict, List 8 | 9 | import fsspec 10 | import numpy as np 11 | from args import parse_args 12 | 13 | 14 | def run_phasenet( 15 | root_path: str, 16 | region: str, 17 | config: Dict, 18 | node_rank: int = 0, 19 | num_nodes: int = 1, 20 | overwrite: bool = False, 21 | model_path: str = "../PhaseNet/", 22 | protocol: str = "file", 23 | bucket: str = "", 24 | token: Dict = None, 25 | ) -> str: 26 | 27 | # %% 28 | fs = fsspec.filesystem(protocol=protocol, token=token) 29 | 30 | # %% 31 | result_path = f"{region}/phasenet" 32 | if not os.path.exists(f"{root_path}/{result_path}"): 33 | os.makedirs(f"{root_path}/{result_path}") 34 | 35 | # %% 36 | waveform_dir = f"{region}/waveforms" 37 | # mseed_list = sorted(glob(f"{root_path}/{waveform_dir}/????/???/??/*.mseed")) 38 | # subdir = 3 39 | mseed_list = sorted(glob(f"{root_path}/{waveform_dir}/????/???/*.mseed")) 40 | subdir = 2 41 | 42 | # %% 43 | mseed_3c = defaultdict(list) 44 | for mseed in mseed_list: 45 | key = "/".join(mseed.replace(".mseed", "").split("/")[-subdir - 1 :]) 46 | key = key[:-1] ## remove the channel suffix 47 | mseed_3c[key].append(mseed) 48 | print(f"Number of mseed files: {len(mseed_3c)}") 49 | 50 | # %% 51 | if not overwrite: 52 | # processed = sorted(glob(f"{root_path}/{result_path}/picks/????/???/??/*.csv")) 53 | processed = sorted(glob(f"{root_path}/{result_path}/picks/????/???/*.csv")) 54 | processed = ["/".join(f.replace(".csv", "").split("/")[-subdir - 1 :]) for f in processed] 55 | processed = [p[:-1] for p in processed] ## remove the channel suffix 56 | print(f"Number of processed files: {len(processed)}") 57 | 58 | keys = sorted(list(set(mseed_3c.keys()) - set(processed))) 59 | print(f"Number of unprocessed files: {len(keys)}") 60 | keys = list(np.array_split(keys, num_nodes)[node_rank]) 61 | print(f"Node {node_rank:03d}/{num_nodes:03d}: processing {len(keys)} files") 62 | 63 | if len(keys) == 0: 64 | return 0 65 | 66 | mseed_3c = [",".join(sorted(mseed_3c[k])) for k in keys] 67 | 68 | # %% 69 | mseed_file = f"{root_path}/{result_path}/mseed_list_{node_rank:03d}_{num_nodes:03d}.csv" 70 | with open(mseed_file, "w") as fp: 71 | fp.write("\n".join(mseed_3c)) 72 | 73 | # %% 74 | inventory_path = f"{root_path}/{region}/obspy/inventory" 75 | 76 | # %% 77 | os.system( 78 | f"python {model_path}/phasenet/predict.py --model={model_path}/model/190703-214543 --data_dir=./ --data_list={mseed_file} --response_xml={inventory_path} --format=mseed --amplitude --highpass_filter=1.0 --result_dir={root_path}/{result_path} --result_fname=phasenet_picks_{node_rank:03d}_{num_nodes:03d} --batch_size=1 --subdir_level={subdir}" 79 | ) 80 | 81 | 82 | if __name__ == "__main__": 83 | 84 | args = parse_args() 85 | root_path = args.root_path 86 | region = args.region 87 | num_nodes = args.num_nodes 88 | node_rank = args.node_rank 89 | 90 | with open(f"{root_path}/{region}/config.json", "r") as fp: 91 | config = json.load(fp) 92 | 93 | os.system("cd ../PhaseNet && git checkout quakeflow && git pull") 94 | run_phasenet(root_path=root_path, region=region, config=config) 95 | 96 | if num_nodes == 1: 97 | os.system(f"python merge_phasenet_picks.py --region {region}") 98 | 99 | # %% 100 | -------------------------------------------------------------------------------- /scripts/set_config.py: -------------------------------------------------------------------------------- 1 | # %% 2 | import json 3 | import os 4 | from typing import Dict 5 | 6 | import fsspec 7 | from args import parse_args 8 | 9 | 10 | def set_config(root_path: str, region: str, config: Dict, protocol: str, bucket: str, token: Dict) -> Dict: 11 | 12 | fs = fsspec.filesystem(protocol, token=token) 13 | if not os.path.exists(root_path): 14 | os.makedirs(root_path) 15 | data_dir = f"{region}" 16 | if not os.path.exists(f"{root_path}/{data_dir}"): 17 | os.makedirs(f"{root_path}/{data_dir}", exist_ok=True) 18 | for subfolder in [ 19 | "network", 20 | "waveforms", 21 | "picking", 22 | "association", 23 | "location", 24 | "relocation", 25 | "mechanism", 26 | ]: 27 | if not os.path.exists(f"{root_path}/{data_dir}/results/{subfolder}"): 28 | os.makedirs(f"{root_path}/{data_dir}/results/{subfolder}", exist_ok=True) 29 | 30 | config_region = {} 31 | ## default values 32 | config_region["num_nodes"] = 1 33 | ## submodules config 34 | if "obspy" in config: 35 | config_region["obspy"] = config["obspy"] 36 | if "phasenet" in config: 37 | config_region["phasenet"] = config["phasenet"] 38 | if "gamma" in config: 39 | config_region["gamma"] = config["gamma"] 40 | if "adloc" in config: 41 | config_region["adloc"] = config["adloc"] 42 | if "cctorch" in config: 43 | config_region["cctorch"] = config["cctorch"] 44 | if "adtomo" in config: 45 | config_region["adtomo"] = config["adtomo"] 46 | if "region" in config: 47 | if region in config["region"]: 48 | config_region.update(config["region"][region]) 49 | 50 | with open(f"{root_path}/{data_dir}/config.json", "w") as fp: 51 | json.dump(config_region, fp, indent=4) 52 | 53 | if protocol != "file": 54 | fs.put(f"{root_path}/{data_dir}/config.json", f"{bucket}/{data_dir}/config.json") 55 | print(json.dumps(config_region, indent=4)) 56 | 57 | return config_region 58 | 59 | 60 | if __name__ == "__main__": 61 | 62 | args = parse_args() 63 | root_path = args.root_path 64 | region = args.region 65 | protocol = args.protocol 66 | bucket = args.bucket 67 | token = args.token 68 | 69 | with open("config.json", "r") as fp: 70 | config = json.load(fp) 71 | 72 | set_config(root_path=root_path, region=region, config=config, protocol=protocol, bucket=bucket, token=token) 73 | -------------------------------------------------------------------------------- /scripts/submit_download_waveform.py: -------------------------------------------------------------------------------- 1 | import time 2 | from concurrent.futures import ThreadPoolExecutor 3 | 4 | import sky 5 | from args import parse_args 6 | 7 | args = parse_args() 8 | ROOT_PATH = args.root_path 9 | REGION = args.region 10 | PROTOCOL = args.protocol 11 | BUCKET = args.bucket 12 | TOKEN = args.token 13 | NUM_NODES = args.num_nodes 14 | 15 | task = sky.Task( 16 | name="download_waveform", 17 | setup=""" 18 | echo "Begin setup." 19 | pip install obspy 20 | pip install pandas numpy 21 | pip install -U fsspec gcsfs s3fs 22 | echo "Setup complete." 23 | """, 24 | run=""" 25 | num_nodes=`echo "$SKYPILOT_NODE_IPS" | wc -l` 26 | master_addr=`echo "$SKYPILOT_NODE_IPS" | head -n1` 27 | if [ "$SKYPILOT_NODE_RANK" == "0" ]; then 28 | ls -al /opt 29 | ls -al /data 30 | ls -al ./ 31 | fi 32 | python download_waveform_v3.py --region $REGION --bucket $BUCKET --protocol $PROTOCOL --token $TOKEN --num_nodes $NUM_NODES --node_rank $NODE_RANK 33 | """, 34 | workdir=".", 35 | num_nodes=1, 36 | envs={ 37 | "ROOT_PATH": ROOT_PATH, 38 | "REGION": REGION, 39 | "PROTOCOL": PROTOCOL, 40 | "BUCKET": BUCKET, 41 | "TOKEN": TOKEN, 42 | "NUM_NODES": NUM_NODES, 43 | "NODE_RANK": 0, 44 | }, 45 | ) 46 | 47 | task.set_file_mounts( 48 | {}, 49 | ) 50 | # task.set_storage_mounts({ 51 | # '/remote/imagenet/': sky.Storage(name='my-bucket', 52 | # source='/local/imagenet'), 53 | # }) 54 | 55 | task.set_resources( 56 | sky.Resources( 57 | cloud=sky.GCP(), 58 | region="us-west1", # GCP 59 | # region="us-west-2", # AWS 60 | accelerators=None, 61 | cpus=2, 62 | disk_tier="low", 63 | disk_size=50, # GB 64 | memory=None, 65 | use_spot=True, 66 | ), 67 | ) 68 | 69 | jobs = [] 70 | try: 71 | sky.status(refresh=True) 72 | except Exception as e: 73 | print(e) 74 | 75 | with ThreadPoolExecutor(max_workers=NUM_NODES) as executor: 76 | for NODE_RANK in range(NUM_NODES): 77 | 78 | task.update_envs({"NODE_RANK": NODE_RANK}) 79 | cluster_name = f"obspy-{NODE_RANK:02d}-{NUM_NODES:02d}-{REGION}" 80 | 81 | status = sky.status(cluster_names=[f"{cluster_name}"], refresh=True) 82 | if len(status) > 0: 83 | if status[0]["status"].value == "INIT": 84 | sky.down(f"{cluster_name}") 85 | if (not status[0]["to_down"]) and (not status[0]["status"].value == "INIT"): 86 | sky.autostop(f"{cluster_name}", idle_minutes=10, down=True) 87 | print(f"Cluster {cluster_name}/{NUM_NODES} already exists.") 88 | continue 89 | 90 | status = sky.status(cluster_names=[f"{cluster_name}"]) 91 | if len(status) == 0: 92 | print(f"Launching cluster {cluster_name}/{NUM_NODES}...") 93 | jobs.append( 94 | executor.submit( 95 | sky.launch, 96 | task, 97 | cluster_name=f"{cluster_name}", 98 | idle_minutes_to_autostop=10, 99 | down=True, 100 | detach_setup=False, 101 | detach_run=False, 102 | ) 103 | ) 104 | time.sleep(5) 105 | 106 | for job in jobs: 107 | print(job.result()) 108 | -------------------------------------------------------------------------------- /scripts/tests/.gitignore: -------------------------------------------------------------------------------- 1 | hypodd/ 2 | -------------------------------------------------------------------------------- /scripts/tests/prepare_data.py: -------------------------------------------------------------------------------- 1 | # %% 2 | import os 3 | import pandas as pd 4 | import numpy 5 | import matplotlib.pyplot as plt 6 | 7 | # %% 8 | catalog_url = "https://www.sciencebase.gov/catalog/file/get/5dd715f3e4b0695797650d18?f=__disk__db%2F88%2Fa1%2Fdb88a1f6754843800f25bd63712ed438dfa7699f" 9 | os.system(f"curl -o catalog.txt {catalog_url}") 10 | 11 | # %% 12 | events = pd.read_csv( 13 | "catalog.txt", 14 | sep="\s+", 15 | comment="#", 16 | header=None, 17 | names=[ 18 | "year", 19 | "month", 20 | "day", 21 | "hour", 22 | "minute", 23 | "second", 24 | "latitude", 25 | "longitude", 26 | "depth_km", 27 | "magnitude", 28 | "event_index", 29 | ], 30 | ) 31 | events["time"] = pd.to_datetime(events[["year", "month", "day", "hour", "minute", "second"]]) 32 | # events["event_index"] = numpy.arange(len(events)) 33 | events.drop(columns=["year", "month", "day", "hour", "minute", "second"], inplace=True, errors="ignore") 34 | 35 | plt.figure(figsize=(10, 10)) 36 | plt.scatter(events["longitude"], events["latitude"], s=0.1, linewidths=0.0) 37 | plt.show() 38 | 39 | 40 | events = events[ 41 | (events["latitude"] >= 35.57) 42 | & (events["latitude"] <= 35.62) 43 | & (events["longitude"] >= -117.47) 44 | & (events["longitude"] <= -117.36) 45 | ] 46 | 47 | plt.figure(figsize=(10, 10)) 48 | plt.scatter(events["longitude"], events["latitude"], s=0.5) 49 | plt.title(f"Number of events: {len(events)}") 50 | 51 | events = events[ 52 | (events["latitude"] >= 35.585) 53 | & (events["latitude"] <= 35.592) 54 | & (events["longitude"] >= -117.42) 55 | & (events["longitude"] <= -117.41) 56 | ] 57 | 58 | plt.scatter(events["longitude"], events["latitude"], s=0.5) 59 | # plt.title(f"Number of events: {len(events)}") 60 | plt.show() 61 | 62 | # %% 63 | events.to_csv("events.csv", index=False, date_format="%Y-%m-%dT%H:%M:%S.%f") 64 | -------------------------------------------------------------------------------- /scripts/tests/prepare_data_quakeflow.py: -------------------------------------------------------------------------------- 1 | # %% 2 | import os 3 | import pandas as pd 4 | import numpy 5 | import matplotlib.pyplot as plt 6 | 7 | # %% 8 | events = pd.read_csv("../local/Ridgecrest/adloc/ransac_events_sst.csv") 9 | events["event_id"] = events["event_index"].astype(str) 10 | 11 | plt.figure(figsize=(10, 10)) 12 | plt.scatter(events["longitude"], events["latitude"], s=0.1, linewidths=0.0) 13 | plt.show() 14 | 15 | 16 | events = events[ 17 | (events["latitude"] >= 35.57) 18 | & (events["latitude"] <= 35.62) 19 | & (events["longitude"] >= -117.47) 20 | & (events["longitude"] <= -117.36) 21 | ] 22 | 23 | plt.figure(figsize=(10, 10)) 24 | plt.scatter(events["longitude"], events["latitude"], s=0.5) 25 | plt.title(f"Number of events: {len(events)}") 26 | 27 | # events = events[ 28 | # (events["latitude"] >= 35.585) 29 | # & (events["latitude"] <= 35.592) 30 | # & (events["longitude"] >= -117.42) 31 | # & (events["longitude"] <= -117.41) 32 | # ] 33 | 34 | plt.scatter(events["longitude"], events["latitude"], s=0.5) 35 | # plt.title(f"Number of events: {len(events)}") 36 | plt.show() 37 | 38 | # %% 39 | events.to_csv("events.csv", index=False, date_format="%Y-%m-%dT%H:%M:%S.%f") 40 | 41 | # %% 42 | -------------------------------------------------------------------------------- /scripts/tests/run_hypodd_cc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -x 3 | WORKING_DIR=$PWD 4 | if [ $# -eq 2 ]; then 5 | root_path=$1 6 | region=$2 7 | else 8 | root_path="." 9 | region="." 10 | fi 11 | 12 | if [ ! -d "$root_path/$region/hypodd" ]; then 13 | mkdir -p $root_path/$region/hypodd 14 | fi 15 | 16 | mv $root_path/$region/hypodd/dt.cc $root_path/$region/hypodd/dt_old.cc 17 | cp $root_path/$region/cctorch/dt.cc $root_path/$region/hypodd/dt.cc 18 | cd $root_path/$region/hypodd 19 | 20 | if [ ! -d "HypoDD" ]; then 21 | git clone git@github.com:zhuwq0/HypoDD.git 22 | export PATH=$PATH:$PWD/HypoDD 23 | make -C HypoDD/src/ 24 | fi 25 | 26 | cat < cc.inp 27 | * RELOC.INP: 28 | *--- input file selection 29 | * cross correlation diff times: 30 | dt.cc 31 | * 32 | *catalog P diff times: 33 | 34 | * 35 | * event file: 36 | events.dat 37 | * 38 | * station file: 39 | stations.dat 40 | * 41 | *--- output file selection 42 | * original locations: 43 | hypodd_cc.loc 44 | * relocations: 45 | hypodd_cc.reloc 46 | * station information: 47 | hypodd.sta 48 | * residual information: 49 | hypodd.res 50 | * source paramater information: 51 | hypodd.src 52 | * 53 | *--- data type selection: 54 | * IDAT: 0 = synthetics; 1= cross corr; 2= catalog; 3= cross & cat 55 | * IPHA: 1= P; 2= S; 3= P&S 56 | * DIST:max dist [km] between cluster centroid and station 57 | * IDAT IPHA DIST 58 | 1 3 120 59 | * 60 | *--- event clustering: 61 | * OBSCC: min # of obs/pair for crosstime data (0= no clustering) 62 | * OBSCT: min # of obs/pair for network data (0= no clustering) 63 | * OBSCC OBSCT 64 | 0 0 65 | * 66 | *--- solution control: 67 | * ISTART: 1 = from single source; 2 = from network sources 68 | * ISOLV: 1 = SVD, 2=lsqr 69 | * NSET: number of sets of iteration with specifications following 70 | * ISTART ISOLV NSET 71 | 2 2 4 72 | * 73 | *--- data weighting and re-weighting: 74 | * NITER: last iteration to used the following weights 75 | * WTCCP, WTCCS: weight cross P, S 76 | * WTCTP, WTCTS: weight catalog P, S 77 | * WRCC, WRCT: residual threshold in sec for cross, catalog data 78 | * WDCC, WDCT: max dist [km] between cross, catalog linked pairs 79 | * DAMP: damping (for lsqr only) 80 | * --- CROSS DATA ----- ----CATALOG DATA ---- 81 | * NITER WTCCP WTCCS WRCC WDCC WTCTP WTCTS WRCT WDCT DAMP 82 | 4 1 1 -9 -9 -9 -9 -9 -9 70 83 | 4 1 1 6 -9 -9 -9 -9 -9 70 84 | 4 1 0.8 3 4 -9 -9 -9 -9 70 85 | 4 1 0.8 2 2 -9 -9 -9 -9 70 86 | * 87 | *--- 1D model: 88 | * NLAY: number of model layers 89 | * RATIO: vp/vs ratio 90 | * TOP: depths of top of layer (km) 91 | * VEL: layer velocities (km/s) 92 | * NLAY RATIO 93 | 10 1.73 94 | * TOP 95 | 0.0 1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0 30.0 96 | * VEL 97 | 4.74 5.01 5.35 5.71 6.07 6.17 6.27 6.34 6.39 7.80 98 | * 99 | *--- event selection: 100 | * CID: cluster to be relocated (0 = all) 101 | * ID: cuspids of event to be relocated (8 per line) 102 | * CID 103 | 0 104 | * ID 105 | EOF 106 | 107 | ./HypoDD/src/hypoDD/hypoDD cc.inp 108 | cd $WORKING_DIR -------------------------------------------------------------------------------- /scripts/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI4EPS/QuakeFlow/f558a63dd6afc1a7b4fd4ad89d6ca6961a7d937b/scripts/utils/__init__.py -------------------------------------------------------------------------------- /scripts/utils/convert_cctorch_turkey.py: -------------------------------------------------------------------------------- 1 | # %% 2 | import pandas as pd 3 | import json 4 | from pathlib import Path 5 | from datetime import datetime 6 | import shutil 7 | import obspy 8 | from tqdm import tqdm 9 | import multiprocessing as mp 10 | 11 | # %% 12 | catalog = pd.read_csv("../EikoLoc/eikoloc_catalog.csv", parse_dates=["time"]) 13 | 14 | # %% 15 | catalog["index"] = catalog["event_index"] 16 | catalog = catalog.set_index("index") 17 | 18 | # %% 19 | picks = pd.read_csv("../EikoLoc/gamma_picks.csv", parse_dates=["phase_time"]) 20 | 21 | # %% 22 | picks["index"] = picks["event_index"] 23 | 24 | # %% 25 | picks = picks.set_index("event_index") 26 | 27 | # %% 28 | # picks[["network", "station", "location", "channel"]] = picks["station_id"].str.split(".", expand=True) 29 | # picks["phase_time"] = picks["phase_time"].dt.strftime("%Y-%m-%d %H:%M:%S.%f") 30 | 31 | # %% 32 | waveform_path = Path("../waveforms") 33 | output_path = Path("waveforms") 34 | if not output_path.exists(): 35 | output_path.mkdir() 36 | 37 | # %% 38 | def save_mseed(f, year, jday): 39 | try: 40 | meta = obspy.read(f) 41 | except: 42 | return 43 | 44 | date = datetime.strptime(f"{year}-{jday}", "%Y-%j") 45 | month, day = date.strftime("%m"), date.strftime("%d") 46 | 47 | meta = meta.merge(fill_value="latest") 48 | min_time = min([tr.stats.starttime for tr in meta]) 49 | max_time = max([tr.stats.endtime for tr in meta]) 50 | meta = meta.slice(starttime=min_time, endtime=max_time) 51 | for trace in meta: 52 | station_id = trace.get_id() 53 | network, station, location, channel = station_id.split(".") 54 | for hour in range(24): 55 | starttime = obspy.UTCDateTime(f"{year}-{month}-{day}T{hour:02d}:00:00.000Z") 56 | endtime = obspy.UTCDateTime(f"{year}-{month}-{day}T{hour:02d}:00:00.000Z")+3600 57 | trace_hour = trace.slice(starttime=starttime, endtime=endtime) 58 | if len(trace_hour.data) > 0: 59 | trace_hour.write(output_path / f"{year}-{jday}" / f"{hour:02d}" / f"{station_id}.mseed", format="MSEED") 60 | # except Exception as e: 61 | # print(f"{year}-{month}-{day}T{hour:02d}:00:00.000Z") 62 | # print(obspy.UTCDateTime(f"{year}-{month}-{day}T{hour:02d}:00:00.000Z")) 63 | # print(e) 64 | # print(min_time, max_time, f"{year}-{month}-{day}T{hour:02d}") 65 | # raise 66 | print("Finish: ", output_path / f"{year}-{jday}", f) 67 | 68 | 69 | # %% 70 | for day_dir in waveform_path.iterdir(): 71 | 72 | year = datetime.fromisoformat(day_dir.name).strftime("%Y") 73 | jday = datetime.fromisoformat(day_dir.name).strftime("%j") 74 | 75 | if not (output_path / f"{year}-{jday}").exists(): 76 | (output_path / f"{year}-{jday}").mkdir() 77 | for hour in range(24): 78 | if not (output_path / f"{year}-{jday}" / f"{hour:02d}").exists(): 79 | (output_path / f"{year}-{jday}" / f"{hour:02d}").mkdir() 80 | 81 | file_list = set() 82 | mseeds = list(day_dir.rglob("*.mseed_[ENZ].mseed")) 83 | for x in mseeds: 84 | file_name = str(x) 85 | file_name = "_".join(file_name.split("_")[:-1] + ["?.mseed"]) 86 | file_list.add(file_name) 87 | 88 | mseeds = list(day_dir.rglob("*_tdvms_?.mseed")) 89 | for x in mseeds: 90 | file_list.add(str(x)) 91 | 92 | ncpu = mp.cpu_count()//2 93 | with mp.Pool(ncpu) as p: 94 | p.starmap(save_mseed, [(f, year, jday) for f in file_list]) 95 | 96 | 97 | -------------------------------------------------------------------------------- /scripts/utils/preprocess_focal_mechanism.py: -------------------------------------------------------------------------------- 1 | # %% 2 | import json 3 | import os 4 | import warnings 5 | from glob import glob 6 | 7 | import obspy 8 | import obspy.taup 9 | import pandas as pd 10 | from tqdm import tqdm 11 | 12 | warnings.filterwarnings("ignore") 13 | 14 | # %% 15 | os.system( 16 | "curl -L -O https://github.com/AI4EPS/EPS207_Observational_Seismology/releases/download/fm_data/fm_data.zip && unzip -q -o fm_data.zip" 17 | ) 18 | 19 | # %% 20 | data_path = "fm_data" 21 | os.system("mv fm_data/picks fm_data/picks_raw") 22 | 23 | # %% 24 | with open(f"{data_path}/stations.json", "r") as f: 25 | stations = json.load(f) 26 | 27 | stations = pd.DataFrame.from_dict(stations, orient="index") 28 | events = pd.read_csv(f"{data_path}/catalog.csv", parse_dates=["time"]) 29 | events["time"] = events["time"].dt.tz_localize(None) 30 | events.set_index("event_id", inplace=True) 31 | 32 | # %% 33 | model = obspy.taup.TauPyModel("iasp91") 34 | max_timediff = 2.0 35 | 36 | plotting = False 37 | if plotting: 38 | plt.figure(figsize=(10, 10)) 39 | for file in tqdm(list(glob(f"{data_path}/picks_raw/*.csv"))): 40 | picks = pd.read_csv(file, parse_dates=["phase_time"]) 41 | event_id = file.split("/")[-1].replace(".csv", "") 42 | evot, mag, evla, evlo, evdp, x, y, z = events.loc[ 43 | event_id, ["time", "magnitude", "latitude", "longitude", "depth_km", "x_km", "y_km", "z_km"] 44 | ].to_numpy() 45 | 46 | keep_idx = [] 47 | for i, pick in picks.iterrows(): 48 | stlo, stla = stations.loc[pick["station_id"], ["longitude", "latitude"]].to_numpy() 49 | epicdist = obspy.geodetics.gps2dist_azimuth(evla, evlo, stla, stlo)[0] / 1000 50 | prac_phase_time = (pick["phase_time"] - evot).total_seconds() 51 | 52 | phase_type = pick["phase_type"] 53 | if phase_type == "P": 54 | arrivals = model.get_travel_times_geo(max(0, evdp), evla, evlo, stla, stlo, phase_list=["p", "P"]) 55 | if plotting: 56 | plt.scatter(prac_phase_time, epicdist, color="b") 57 | else: 58 | arrivals = model.get_travel_times_geo(max(0, evdp), evla, evlo, stla, stlo, phase_list=["s", "S"]) 59 | if plotting: 60 | plt.scatter(prac_phase_time, epicdist, color="r") 61 | 62 | theo_phase_time = arrivals[0].time 63 | if abs(theo_phase_time - prac_phase_time) < max_timediff: 64 | keep_idx.append(i) 65 | else: 66 | if plotting: 67 | plt.scatter(prac_phase_time, epicdist, color="g") 68 | 69 | picks_ = picks.iloc[keep_idx] 70 | picks_["event_index"] = event_id 71 | 72 | try: 73 | picks_.to_csv(f"{data_path}/picks/{event_id}.csv", index=None) 74 | except: 75 | os.mkdir(f"{data_path}/picks") 76 | picks_.to_csv(f"{data_path}/picks/{event_id}.csv", index=None) 77 | 78 | if plotting: 79 | plt.xlabel("Time (s)") 80 | plt.ylabel("Epicentral distance (km)") 81 | plt.title(event_id) 82 | plt.show() 83 | plt.close() 84 | 85 | # %% 86 | picks = [] 87 | for file in tqdm(list(glob(f"{data_path}/picks/*.csv"))): 88 | picks.append(pd.read_csv(file)) 89 | picks = pd.concat(picks, ignore_index=True) 90 | picks.to_csv(f"{data_path}/picks.csv", index=None) 91 | 92 | 93 | # %% 94 | -------------------------------------------------------------------------------- /seedlink/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM continuumio/miniconda3 2 | 3 | WORKDIR /app 4 | 5 | ENV PYTHONUNBUFFERED=1 6 | 7 | # Create the environment: 8 | COPY env.yml /app 9 | RUN conda env create --name quakeflow --file=env.yml 10 | # Make RUN commands use the new environment: 11 | SHELL ["conda", "run", "-n", "quakeflow", "/bin/bash", "-c"] 12 | 13 | # Copy files 14 | COPY . /app 15 | 16 | # Start API server 17 | # ENTRYPOINT ["conda", "run", "--no-capture-output", "-n", "quakeflow", "python", "producer.py"] 18 | ENTRYPOINT ["conda", "run", "--no-capture-output", "-n", "quakeflow", "python", "producer_iris.py"] -------------------------------------------------------------------------------- /seedlink/env.yml: -------------------------------------------------------------------------------- 1 | name: quakeflow 2 | channels: 3 | - defaults 4 | - conda-forge 5 | dependencies: 6 | - python=3.7 7 | - numpy 8 | - scikit-learn 9 | - pandas 10 | - tensorflow 11 | - obspy 12 | - pip 13 | - pip: 14 | - kafka-python 15 | - pyspark==2.4.4 16 | -------------------------------------------------------------------------------- /seedlink/readme.md: -------------------------------------------------------------------------------- 1 | # Waveform Generator 2 | 3 | Fake streaming data generator. 4 | 5 | Build the docker image 6 | 7 | ``` 8 | docker build --tag quakeflow-waveform:1.0 . 9 | ``` 10 | 11 | Run the Waveform Generator 12 | 13 | ``` 14 | docker run -it quakeflow-waveform:1.0 15 | ``` -------------------------------------------------------------------------------- /skaffold.yaml: -------------------------------------------------------------------------------- 1 | # apiVersion: skaffold/v4beta2 2 | # kind: Config 3 | # metadata: 4 | # name: quakeflow 5 | # build: 6 | # artifacts: 7 | # - image: phasenet-api 8 | # context: PhaseNet 9 | # - image: gamma-api 10 | # context: GaMMA 11 | # - image: deepdenoiser-api 12 | # context: DeepDenoiser 13 | # manifests: 14 | # rawYaml: 15 | # - kubernetes/quakeflow-local.yaml 16 | 17 | 18 | apiVersion: skaffold/v2beta19 19 | kind: Config 20 | build: 21 | tagPolicy: 22 | sha256: {} 23 | # defines where to find the code at build time and where to push the resulting image 24 | artifacts: 25 | - context: quakeflow/demo/hub 26 | image: zhuwq0/quakeflow-hub 27 | - context: quakeflow/demo/data 28 | image: zhuwq0/quakeflow-data 29 | - context: quakeflow/demo/picking 30 | image: zhuwq0/picking-api 31 | - context: quakeflow/demo/association 32 | image: zhuwq0/association-api 33 | - context: quakeflow/demo/location 34 | image: zhuwq0/location-api 35 | # defines the Kubernetes manifests to deploy on each run 36 | deploy: 37 | kubectl: 38 | manifests: 39 | - quakeflow/deployment.yaml 40 | - quakeflow/service.yaml 41 | # use the cloudbuild profile to build images using Google Cloud Build 42 | profiles: 43 | - name: cloudbuild 44 | build: 45 | googleCloudBuild: {} -------------------------------------------------------------------------------- /spark/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:18.04 2 | 3 | # Install Java and Spark 4 | RUN apt-get update 5 | RUN DEBIAN_FRONTEND="noninteractive" apt-get install -y openjdk-11-jdk git wget tzdata 6 | RUN ln -fs /usr/share/zoneinfo/US/Pacific-New /etc/localtime && dpkg-reconfigure -f noninteractive tzdata 7 | 8 | ENV PATH="/root/miniconda3/bin:${PATH}" 9 | ARG PATH="/root/miniconda3/bin:${PATH}" 10 | RUN apt-get update 11 | 12 | RUN apt-get install -y wget && rm -rf /var/lib/apt/lists/* 13 | 14 | RUN wget \ 15 | https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh \ 16 | && mkdir /root/.conda \ 17 | && bash Miniconda3-latest-Linux-x86_64.sh -b \ 18 | && rm -f Miniconda3-latest-Linux-x86_64.sh 19 | RUN conda --version 20 | 21 | # Setup env variables 22 | ENV PYTHONUNBUFFERED=1 23 | 24 | 25 | WORKDIR /app 26 | COPY env.yml /app 27 | RUN conda env create --name quakeflow --file=env.yml 28 | SHELL ["conda", "run", "-n", "quakeflow", "/bin/bash", "-c"] 29 | 30 | 31 | COPY . /app 32 | ENTRYPOINT ["conda", "run", "--no-capture-output", "-n", "quakeflow", "python", "/app/spark_streaming.py"] 33 | # CMD /spark-2.4.7-bin-hadoop2.7/bin/spark-submit --packages org.apache.spark:spark-streaming-kafka-0-8_2.11:2.4.0 /app/spark.py 34 | -------------------------------------------------------------------------------- /spark/env.yml: -------------------------------------------------------------------------------- 1 | name: quakeflow 2 | channels: 3 | - defaults 4 | - anaconda 5 | dependencies: 6 | - python=3.7 7 | - numpy 8 | - pip 9 | - pip: 10 | - kafka-python 11 | - pyspark==3.1.1 12 | - requests 13 | -------------------------------------------------------------------------------- /spark/readme.md: -------------------------------------------------------------------------------- 1 | # Spark ETL Pipeline 2 | 3 | Spark streaming ETL Pipeline 4 | 5 | Build the docker image 6 | 7 | ``` 8 | docker build --tag quakeflow-spark:1.0 . 9 | ``` 10 | 11 | Run the Spark ETL Pipeline 12 | 13 | ``` 14 | docker run -it quakeflow-spark:1.0 15 | ``` 16 | 17 | Run it locally (make sure update the spark lib to 3.1.1) 18 | ``` 19 | python spark_structured_streaming.py 20 | ``` 21 | -------------------------------------------------------------------------------- /spark/requirements.txt: -------------------------------------------------------------------------------- 1 | pyspark==2.4.4 2 | numpy 3 | kafka-python 4 | requests -------------------------------------------------------------------------------- /tests/analysis/.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | figures 3 | 4 | 2019.catalog 5 | Liu2020.txt 6 | Ross2019.txt 7 | Shelly2020.txt 8 | 9 | 10 | performance*.txt 11 | merged*.csv 12 | 13 | input 14 | output 15 | -------------------------------------------------------------------------------- /tests/analysis/config.json: -------------------------------------------------------------------------------- 1 | {"region": "Ridgecrest_oneweek", "center": [-117.504, 35.705], "xlim_degree": [-118.004, -117.004], "ylim_degree": [35.205, 36.205], "degree2km": 111.19492474777779, "starttime": "2019-07-04T00:00:00", "endtime": "2019-07-10T00:00:00", "networks": ["CI"], "channels": "HH*,BH*,EH*,HN*", "client": "SCEDC"} -------------------------------------------------------------------------------- /tests/check_pvc.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Pod 3 | metadata: 4 | name: dataaccess 5 | spec: 6 | containers: 7 | - name: alpine 8 | image: alpine:latest 9 | command: ['sleep', 'infinity'] 10 | volumeMounts: 11 | - name: mypvc 12 | mountPath: /data 13 | volumes: 14 | - name: mypvc 15 | persistentVolumeClaim: 16 | # claimName: quakeflow-4ldv8-data-volume-37 17 | claimName: quakeflow-4ldv8-data-volume-0 -------------------------------------------------------------------------------- /tests/kafka-spark/consumer.py: -------------------------------------------------------------------------------- 1 | from kafka import KafkaConsumer 2 | from json import loads 3 | 4 | consumer = KafkaConsumer( 5 | 'testtopic', 6 | bootstrap_servers=['localhost:9092'], 7 | auto_offset_reset='earliest', 8 | enable_auto_commit=True, 9 | group_id='my-group', 10 | value_deserializer=lambda x: loads(x.decode('utf-8')) 11 | 12 | ) 13 | # client = MongoClient('localhost:27017') 14 | # collection = client.testtopic.testtopic 15 | for message in consumer: 16 | message = message.value 17 | # message['timestamp'] = message['timestamp'][0] 18 | # message['vec'] = message['vec'][0][:10] 19 | print(message) 20 | # collection.insert_one(message) 21 | # print('{} added to {}'.format(message, collection)) 22 | -------------------------------------------------------------------------------- /tests/kafka-spark/env.yml: -------------------------------------------------------------------------------- 1 | name: cs329s 2 | channels: 3 | - defaults 4 | - anaconda 5 | dependencies: 6 | - python=3.7 7 | - jupyter 8 | - matplotlib 9 | - numpy 10 | - scikit-learn 11 | - pandas 12 | - tensorflow 13 | - pip 14 | - pip: 15 | - kafka-python 16 | - pyspark==2.4.4 17 | - fastapi 18 | - uvicorn 19 | - tqdm 20 | - streamlit 21 | - tweepy 22 | -------------------------------------------------------------------------------- /tests/kafka-spark/quakeflow logo design 2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI4EPS/QuakeFlow/f558a63dd6afc1a7b4fd4ad89d6ca6961a7d937b/tests/kafka-spark/quakeflow logo design 2.jpg -------------------------------------------------------------------------------- /tests/kafka-spark/readme.md: -------------------------------------------------------------------------------- 1 | # Kafka & Pyspark 2 | 3 | This folder will be deprecated as we split things into individual docker containers. 4 | 5 | ## Setup 6 | 7 | 1. Install Conda Env 8 | ``` 9 | conda env create --name cs329s --file=env.yml 10 | ``` 11 | 12 | 2. Run your Zookeeper and Kafka cluster 13 | 14 | See https://kafka.apache.org/quickstart for the installation and detailed steps. 15 | 16 | ``` 17 | # Start the ZooKeeper service 18 | $ bin/zookeeper-server-start.sh config/zookeeper.properties 19 | 20 | # Start the Kafka broker service 21 | $ bin/kafka-server-start.sh config/server.properties 22 | ``` 23 | 24 | 3. Create a topic `testtopic` (just for test purpose) 25 | 26 | ``` 27 | $ bin/kafka-topics.sh --create --topic waveform_raw --bootstrap-server localhost:9092 28 | ``` 29 | 30 | 4. Setup PhaseNet and GMMA 31 | 32 | PhaseNet and GMMA are independent to this Quakeflow repo. You can clone and download 33 | both of them in a different folder. 34 | 35 | PhaseNet: https://github.com/wayneweiqiang/PhaseNet 36 | 37 | ``` 38 | $ git clone -b quakeflow https://github.com/wayneweiqiang/PhaseNet 39 | $ cd PhaseNet 40 | $ uvicorn app:app --reload --port 8000 41 | ``` 42 | 43 | Open another terminal and run 44 | 45 | GMMA: https://github.com/wayneweiqiang/GMMA 46 | 47 | ``` 48 | $ git clone -b quakeflow https://github.com/wayneweiqiang/GMMA 49 | $ cd GMMA 50 | $ uvicorn app:app --reload --port 8001 51 | ``` 52 | 53 | 5. Run the `producer.py` script 54 | 55 | ``` 56 | $ python producer.py 57 | ``` 58 | 59 | and you should see the script print out some timestamps every second 60 | 61 | 62 | 65 | 66 | 6. Run the `spark.py` script for testing the Spark features 67 | 68 | - `spark-submit` is pre-installed in our environment 69 | 70 | - Run the following command, and you will see the logs in `logs.txt` 71 | 72 | ``` 73 | $ spark-submit --packages org.apache.spark:spark-streaming-kafka-0-8_2.11:2.3.3 spark.py > logs.txt 74 | ``` 75 | 76 | 7. Check the `GMMA` API service after 30 seconds, you should see [200 OK] and some outputs about the earthquakes 77 | 78 | 79 | 80 | Go to the Spark UI portal (http://localhost:4040/) and you can see the jobs, stages and streaming statistics. 81 | 82 | 83 | 84 | Also some cool DAG Visualization about how the streaming ETL pipeline is done 85 | 86 | 87 | 88 | 89 | -------------------------------------------------------------------------------- /ui/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM continuumio/miniconda3 2 | 3 | WORKDIR /app 4 | 5 | # Create the environment: 6 | COPY env.yml /app 7 | RUN conda env create --name quakeflow --file=env.yml 8 | # Make RUN commands use the new environment: 9 | SHELL ["conda", "run", "-n", "quakeflow", "/bin/bash", "-c"] 10 | 11 | # Copy files 12 | COPY . /app 13 | 14 | # Expose API port 15 | EXPOSE 8005 16 | 17 | ENV PYTHONUNBUFFERED=1 18 | 19 | # Start API server 20 | # ENTRYPOINT ["conda", "run", "--no-capture-output", "-n", "quakeflow", "python", "app_plotly.py"] 21 | ENTRYPOINT ["conda", "run", "--no-capture-output", "-n", "quakeflow", "gunicorn", "app_plotly:server", "--reload", "-b", "0.0.0.0:8005"] 22 | -------------------------------------------------------------------------------- /ui/Procfile: -------------------------------------------------------------------------------- 1 | web: gunicorn app:server --workers 4 -------------------------------------------------------------------------------- /ui/assets/demo-button.css: -------------------------------------------------------------------------------- 1 | .link-button { 2 | margin-top: 10px; 3 | margin-right: 10px; 4 | vertical-align: top; 5 | color: white; 6 | } -------------------------------------------------------------------------------- /ui/assets/style.css: -------------------------------------------------------------------------------- 1 | body { 2 | font-family: "Open Sans", sans-serif; 3 | background-color: #061E44; 4 | } 5 | 6 | h6 { 7 | margin-top: 0; 8 | } 9 | 10 | .app__container { 11 | margin: 3% 5%; 12 | } 13 | 14 | .app__header { 15 | display: flex; 16 | justify-content: space-between; 17 | color: #fff; 18 | } 19 | 20 | .app__header__title { 21 | letter-spacing: 0.23rem; 22 | } 23 | 24 | .app__header__title--grey { 25 | color: #C4CDD5; 26 | } 27 | 28 | .app__menu__img { 29 | height: 50px; 30 | width: auto; 31 | } 32 | 33 | .app__content { 34 | display: flex; 35 | margin-top: 20px; 36 | } 37 | 38 | .wind__speed__container { 39 | display: flex; 40 | flex-direction: column; 41 | background-color: #082255; 42 | border-radius: 0.55rem; 43 | box-shadow: 0 1px 3px rgba(0,0,0,0.12), 0 1px 2px rgba(0,0,0,0.24); 44 | } 45 | 46 | .graph__title { 47 | color: #fff !important; 48 | letter-spacing: 0.3rem !important; 49 | padding: 25px 25px 0px 25px; 50 | margin-bottom: 0px !important; 51 | font-size: 1em; 52 | } 53 | 54 | .slider { 55 | padding: 15px; 56 | } 57 | 58 | .auto__checkbox { 59 | margin-right: 10px; 60 | } 61 | 62 | .auto__label { 63 | color: #DFE3E8; 64 | } 65 | 66 | .auto__container { 67 | display: flex; 68 | justify-content: space-between; 69 | color: #DFE3E8; 70 | padding: 0px 15px; 71 | } 72 | 73 | .auto__p { 74 | margin-bottom: 0; 75 | } 76 | 77 | .graph__container { 78 | background-color: #082255; 79 | border-radius: 0.55rem; 80 | box-shadow: 0 1px 3px rgba(0,0,0,0.12), 0 1px 2px rgba(0,0,0,0.24); 81 | } 82 | 83 | .first { 84 | margin-bottom: 15px; 85 | } 86 | 87 | .second { 88 | margin-top: 15px; 89 | } 90 | 91 | .histogram__direction { 92 | margin-left: 15px; 93 | } 94 | 95 | #wind-direction { 96 | display: flex; 97 | justify-content: center; 98 | } 99 | 100 | @media only screen and (max-width: 600px) { 101 | .histogram__direction { 102 | margin: 15px 0px 20px 0px; 103 | } 104 | .app__content { 105 | display: block; 106 | } 107 | .app__menu__img { 108 | height: 30px; 109 | width: auto; 110 | } 111 | .app__header { 112 | display: flex; 113 | flex-direction: column; 114 | } 115 | .app__header__desc { 116 | order: 1; 117 | text-align: center; 118 | } 119 | .app__header__logo { 120 | order: 0; 121 | } 122 | .app__header__title { 123 | font-size: 1.5em; 124 | padding-top: 15px; 125 | } 126 | } -------------------------------------------------------------------------------- /ui/env.yml: -------------------------------------------------------------------------------- 1 | name: quakeflow 2 | channels: 3 | - defaults 4 | - anaconda 5 | dependencies: 6 | - python=3.7 7 | - matplotlib 8 | - numpy 9 | - scikit-learn 10 | - pandas 11 | - tensorflow 12 | - pip 13 | - pip: 14 | - kafka-python 15 | - tweepy 16 | - plotly 17 | - plotly-express 18 | - dash 19 | - Pillow 20 | - geopy 21 | - kaleido 22 | - gunicorn 23 | 24 | 25 | -------------------------------------------------------------------------------- /ui/gradio/test_api.py: -------------------------------------------------------------------------------- 1 | # %% 2 | from gradio_client import Client 3 | import obspy 4 | import numpy as np 5 | import json 6 | import pandas as pd 7 | 8 | # %% 9 | 10 | waveform = obspy.read() 11 | array = np.array([x.data for x in waveform]).T 12 | 13 | # pipeline = PreTrainedPipeline() 14 | inputs = array.tolist() 15 | inputs = json.dumps(inputs) 16 | # picks = pipeline(inputs) 17 | # print(picks) 18 | 19 | # %% 20 | client = Client("ai4eps/phasenet") 21 | output, file = client.predict(["test_test.mseed"]) 22 | # %% 23 | with open(output, "r") as f: 24 | picks = json.load(f)["data"] 25 | 26 | # %% 27 | picks = pd.read_csv(file) 28 | 29 | 30 | # %% 31 | job = client.submit(["test_test.mseed", "test_test.mseed"], api_name="/predict") # This is not blocking 32 | 33 | print(job.status()) 34 | 35 | # %% 36 | output, file = job.result() 37 | 38 | -------------------------------------------------------------------------------- /ui/streamlit/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM continuumio/miniconda3 2 | 3 | WORKDIR /app 4 | 5 | # Create the environment: 6 | COPY env.yml /app 7 | RUN conda env create --name cs329s --file=env.yml 8 | # Make RUN commands use the new environment: 9 | SHELL ["conda", "run", "-n", "cs329s", "/bin/bash", "-c"] 10 | 11 | # Copy files 12 | COPY . /app 13 | 14 | # Expose API port 15 | EXPOSE 8501 16 | 17 | ENV PYTHONUNBUFFERED=1 18 | 19 | # Start API server 20 | ENTRYPOINT ["conda", "run", "--no-capture-output", "-n", "cs329s", "streamlit", "run", "ui_streamlit_iris.py"] 21 | -------------------------------------------------------------------------------- /ui/streamlit/env.yml: -------------------------------------------------------------------------------- 1 | name: cs329s 2 | channels: 3 | - defaults 4 | - anaconda 5 | dependencies: 6 | - python=3.7 7 | - matplotlib 8 | - numpy 9 | - scikit-learn 10 | - pandas 11 | - tensorflow 12 | - pip 13 | - pip: 14 | - kafka-python 15 | - streamlit 16 | - tweepy 17 | - plotly 18 | - Pillow 19 | - geopy 20 | - kaleido 21 | 22 | --------------------------------------------------------------------------------